This commit is contained in:
Chang Lan 2025-05-29 14:21:31 +08:00 committed by GitHub
commit b3f2c0230e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 121 additions and 43 deletions

View File

@ -5,24 +5,21 @@ from opencompass.runners import LocalRunner
from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
with read_base(): with read_base():
from opencompass.configs.datasets.ruler.ruler_cwe_gen import \ from opencompass.configs.datasets.ruler.ruler_cwe_gen import cwe_datasets # CWE
cwe_datasets # CWE from opencompass.configs.datasets.ruler.ruler_fwe_gen import fwe_datasets # FWE
from opencompass.configs.datasets.ruler.ruler_fwe_gen import \ from opencompass.configs.datasets.ruler.ruler_niah_gen import niah_datasets # Niah
fwe_datasets # FWE from opencompass.configs.datasets.ruler.ruler_qa_gen import qa_datasets # QA
from opencompass.configs.datasets.ruler.ruler_niah_gen import \ from opencompass.configs.datasets.ruler.ruler_vt_gen import vt_datasets # VT
niah_datasets # Niah from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import (
from opencompass.configs.datasets.ruler.ruler_qa_gen import \ models as internlm2_5_7b_chat_1m,
qa_datasets # QA )
from opencompass.configs.datasets.ruler.ruler_vt_gen import \ from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import (
vt_datasets # VT models as llama3_8b_instruct_model,
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \ )
models as internlm2_5_7b_chat_1m from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import (
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \ models as qwen2_7b_instruct_model,
models as llama3_8b_instruct_model )
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \ from opencompass.configs.summarizers.groups.ruler import ruler_summary_groups
models as qwen2_7b_instruct_model
from opencompass.configs.summarizers.groups.ruler import \
ruler_summary_groups
import_datasets = sum( import_datasets = sum(
[niah_datasets, vt_datasets, fwe_datasets, cwe_datasets, qa_datasets], []) [niah_datasets, vt_datasets, fwe_datasets, cwe_datasets, qa_datasets], [])

View File

@ -0,0 +1,32 @@
import os
from mmengine.config import read_base
with read_base():
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
from .ruler_niah_gen import niah_datasets as niah # Niah
from .ruler_qa_gen import qa_datasets as qa # QA
from .ruler_vt_gen import vt_datasets as vt # VT
import_ds = sum((cwe, fwe, niah, qa, vt), [])
# Evaluation config
NUM_SAMPLES = 100 # Change to the number of samples you need
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
# Change the context lengths to be tested
max_seq_lens = [1024 * 256]
abbr_suffixs = ['256k']
ruler_datasets = []
# Different seq length
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
for dataset in import_ds:
tmp_dataset = dataset.deepcopy()
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
tmp_dataset['num_samples'] = NUM_SAMPLES
tmp_dataset['max_seq_length'] = max_seq_len
tmp_dataset['tokenizer_model'] = tokenizer_model
ruler_datasets.append(tmp_dataset)

View File

@ -0,0 +1,32 @@
import os
from mmengine.config import read_base
with read_base():
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
from .ruler_niah_gen import niah_datasets as niah # Niah
from .ruler_qa_gen import qa_datasets as qa # QA
from .ruler_vt_gen import vt_datasets as vt # VT
import_ds = sum((cwe, fwe, niah, qa, vt), [])
# Evaluation config
NUM_SAMPLES = 100 # Change to the number of samples you need
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
# Change the context lengths to be tested
max_seq_lens = [1024 * 512]
abbr_suffixs = ['512k']
ruler_datasets = []
# Different seq length
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
for dataset in import_ds:
tmp_dataset = dataset.deepcopy()
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
tmp_dataset['num_samples'] = NUM_SAMPLES
tmp_dataset['max_seq_length'] = max_seq_len
tmp_dataset['tokenizer_model'] = tokenizer_model
ruler_datasets.append(tmp_dataset)

View File

@ -8,5 +8,7 @@ with read_base():
from .ruler_32k_gen import ruler_datasets as ruler_32k_ds from .ruler_32k_gen import ruler_datasets as ruler_32k_ds
from .ruler_64k_gen import ruler_datasets as ruler_64k_ds from .ruler_64k_gen import ruler_datasets as ruler_64k_ds
from .ruler_128k_gen import ruler_datasets as ruler_128k_ds from .ruler_128k_gen import ruler_datasets as ruler_128k_ds
from .ruler_256k_gen import ruler_datasets as ruler_256k_ds
from .ruler_512k_gen import ruler_datasets as ruler_512k_ds
ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), []) ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])

View File

@ -1,8 +1,7 @@
from opencompass.datasets.ruler.ruler_vt import RulerVtDataset, RulerVtEvaluator
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets.ruler.ruler_vt import RulerVtDataset
from opencompass.datasets.ruler.ruler_vt import RulerVtEvaluator
# VT Dataset # VT Dataset
vt_datasets = [ vt_datasets = [

View File

@ -1,19 +1,21 @@
"""RULER summary groups"""
default_ruler_tasks = [ default_ruler_tasks = [
'ruler_niah_single_1', "ruler_niah_single_1",
'ruler_niah_single_2', "ruler_niah_single_2",
'ruler_niah_single_3', "ruler_niah_single_3",
'ruler_niah_multikey_1', "ruler_niah_multikey_1",
'ruler_niah_multikey_2', "ruler_niah_multikey_2",
'ruler_niah_multikey_3', "ruler_niah_multikey_3",
'ruler_niah_multivalue', "ruler_niah_multivalue",
'ruler_niah_multiquery', "ruler_niah_multiquery",
'ruler_vt', "ruler_vt",
'ruler_fwe', "ruler_fwe",
'ruler_cwe', "ruler_cwe",
'ruler_qa_squad', "ruler_qa_squad",
'ruler_qa_hotpotqa', "ruler_qa_hotpotqa",
] ]
context_window_sizes = ['4k', '8k', '16k', '32k', '64k', '128k', '1m'] context_window_sizes = ["4k", "8k", "16k", "32k", "64k", "128k", "256k", "512k", "1m"]
ruler_summary_groups = [] ruler_summary_groups = []
for context_window_size in context_window_sizes: for context_window_size in context_window_sizes:

View File

@ -47,6 +47,18 @@ ruler_128k_summarizer = dict(
[v for k, v in locals().items() if k.endswith('_summary_groups')], [] [v for k, v in locals().items() if k.endswith('_summary_groups')], []
), ),
) )
ruler_256k_summarizer = dict(
dataset_abbrs=["ruler_256k"],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
),
)
ruler_512k_summarizer = dict(
dataset_abbrs=["ruler_512k"],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
),
)
ruler_1m_summarizer = dict( ruler_1m_summarizer = dict(
dataset_abbrs=['ruler_1m'], dataset_abbrs=['ruler_1m'],
@ -57,15 +69,17 @@ ruler_1m_summarizer = dict(
ruler_combined_summarizer = dict( ruler_combined_summarizer = dict(
dataset_abbrs=[ dataset_abbrs=[
'ruler_4k', "ruler_4k",
'ruler_8k', "ruler_8k",
'ruler_16k', "ruler_16k",
'ruler_32k', "ruler_32k",
'ruler_64k', "ruler_64k",
'ruler_128k', "ruler_128k",
'ruler_1m', "ruler_256k",
"ruler_512k",
"ruler_1m",
], ],
summary_groups=sum( summary_groups=sum(
[v for k, v in locals().items() if k.endswith('_summary_groups')], [] [v for k, v in locals().items() if k.endswith("_summary_groups")], []
), ),
) )