mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge 56d49d034b
into d572761cef
This commit is contained in:
commit
b3f2c0230e
@ -5,24 +5,21 @@ from opencompass.runners import LocalRunner
|
|||||||
from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
|
from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
from opencompass.configs.datasets.ruler.ruler_cwe_gen import \
|
from opencompass.configs.datasets.ruler.ruler_cwe_gen import cwe_datasets # CWE
|
||||||
cwe_datasets # CWE
|
from opencompass.configs.datasets.ruler.ruler_fwe_gen import fwe_datasets # FWE
|
||||||
from opencompass.configs.datasets.ruler.ruler_fwe_gen import \
|
from opencompass.configs.datasets.ruler.ruler_niah_gen import niah_datasets # Niah
|
||||||
fwe_datasets # FWE
|
from opencompass.configs.datasets.ruler.ruler_qa_gen import qa_datasets # QA
|
||||||
from opencompass.configs.datasets.ruler.ruler_niah_gen import \
|
from opencompass.configs.datasets.ruler.ruler_vt_gen import vt_datasets # VT
|
||||||
niah_datasets # Niah
|
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import (
|
||||||
from opencompass.configs.datasets.ruler.ruler_qa_gen import \
|
models as internlm2_5_7b_chat_1m,
|
||||||
qa_datasets # QA
|
)
|
||||||
from opencompass.configs.datasets.ruler.ruler_vt_gen import \
|
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import (
|
||||||
vt_datasets # VT
|
models as llama3_8b_instruct_model,
|
||||||
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \
|
)
|
||||||
models as internlm2_5_7b_chat_1m
|
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import (
|
||||||
from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
|
models as qwen2_7b_instruct_model,
|
||||||
models as llama3_8b_instruct_model
|
)
|
||||||
from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
|
from opencompass.configs.summarizers.groups.ruler import ruler_summary_groups
|
||||||
models as qwen2_7b_instruct_model
|
|
||||||
from opencompass.configs.summarizers.groups.ruler import \
|
|
||||||
ruler_summary_groups
|
|
||||||
|
|
||||||
import_datasets = sum(
|
import_datasets = sum(
|
||||||
[niah_datasets, vt_datasets, fwe_datasets, cwe_datasets, qa_datasets], [])
|
[niah_datasets, vt_datasets, fwe_datasets, cwe_datasets, qa_datasets], [])
|
||||||
|
32
opencompass/configs/datasets/ruler/ruler_256k_gen.py
Normal file
32
opencompass/configs/datasets/ruler/ruler_256k_gen.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||||
|
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||||
|
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||||
|
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||||
|
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||||
|
|
||||||
|
|
||||||
|
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||||
|
|
||||||
|
# Evaluation config
|
||||||
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
|
# Change the context lengths to be tested
|
||||||
|
max_seq_lens = [1024 * 256]
|
||||||
|
abbr_suffixs = ['256k']
|
||||||
|
|
||||||
|
ruler_datasets = []
|
||||||
|
|
||||||
|
# Different seq length
|
||||||
|
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||||
|
for dataset in import_ds:
|
||||||
|
tmp_dataset = dataset.deepcopy()
|
||||||
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
|
ruler_datasets.append(tmp_dataset)
|
32
opencompass/configs/datasets/ruler/ruler_512k_gen.py
Normal file
32
opencompass/configs/datasets/ruler/ruler_512k_gen.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||||
|
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||||
|
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||||
|
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||||
|
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||||
|
|
||||||
|
|
||||||
|
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||||
|
|
||||||
|
# Evaluation config
|
||||||
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
|
# Change the context lengths to be tested
|
||||||
|
max_seq_lens = [1024 * 512]
|
||||||
|
abbr_suffixs = ['512k']
|
||||||
|
|
||||||
|
ruler_datasets = []
|
||||||
|
|
||||||
|
# Different seq length
|
||||||
|
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||||
|
for dataset in import_ds:
|
||||||
|
tmp_dataset = dataset.deepcopy()
|
||||||
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
|
ruler_datasets.append(tmp_dataset)
|
@ -8,5 +8,7 @@ with read_base():
|
|||||||
from .ruler_32k_gen import ruler_datasets as ruler_32k_ds
|
from .ruler_32k_gen import ruler_datasets as ruler_32k_ds
|
||||||
from .ruler_64k_gen import ruler_datasets as ruler_64k_ds
|
from .ruler_64k_gen import ruler_datasets as ruler_64k_ds
|
||||||
from .ruler_128k_gen import ruler_datasets as ruler_128k_ds
|
from .ruler_128k_gen import ruler_datasets as ruler_128k_ds
|
||||||
|
from .ruler_256k_gen import ruler_datasets as ruler_256k_ds
|
||||||
|
from .ruler_512k_gen import ruler_datasets as ruler_512k_ds
|
||||||
|
|
||||||
ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])
|
ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
|
from opencompass.datasets.ruler.ruler_vt import RulerVtDataset, RulerVtEvaluator
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
||||||
from opencompass.datasets.ruler.ruler_vt import RulerVtDataset
|
|
||||||
from opencompass.datasets.ruler.ruler_vt import RulerVtEvaluator
|
|
||||||
|
|
||||||
# VT Dataset
|
# VT Dataset
|
||||||
vt_datasets = [
|
vt_datasets = [
|
||||||
|
@ -1,19 +1,21 @@
|
|||||||
|
"""RULER summary groups"""
|
||||||
|
|
||||||
default_ruler_tasks = [
|
default_ruler_tasks = [
|
||||||
'ruler_niah_single_1',
|
"ruler_niah_single_1",
|
||||||
'ruler_niah_single_2',
|
"ruler_niah_single_2",
|
||||||
'ruler_niah_single_3',
|
"ruler_niah_single_3",
|
||||||
'ruler_niah_multikey_1',
|
"ruler_niah_multikey_1",
|
||||||
'ruler_niah_multikey_2',
|
"ruler_niah_multikey_2",
|
||||||
'ruler_niah_multikey_3',
|
"ruler_niah_multikey_3",
|
||||||
'ruler_niah_multivalue',
|
"ruler_niah_multivalue",
|
||||||
'ruler_niah_multiquery',
|
"ruler_niah_multiquery",
|
||||||
'ruler_vt',
|
"ruler_vt",
|
||||||
'ruler_fwe',
|
"ruler_fwe",
|
||||||
'ruler_cwe',
|
"ruler_cwe",
|
||||||
'ruler_qa_squad',
|
"ruler_qa_squad",
|
||||||
'ruler_qa_hotpotqa',
|
"ruler_qa_hotpotqa",
|
||||||
]
|
]
|
||||||
context_window_sizes = ['4k', '8k', '16k', '32k', '64k', '128k', '1m']
|
context_window_sizes = ["4k", "8k", "16k", "32k", "64k", "128k", "256k", "512k", "1m"]
|
||||||
|
|
||||||
ruler_summary_groups = []
|
ruler_summary_groups = []
|
||||||
for context_window_size in context_window_sizes:
|
for context_window_size in context_window_sizes:
|
||||||
|
@ -47,6 +47,18 @@ ruler_128k_summarizer = dict(
|
|||||||
[v for k, v in locals().items() if k.endswith('_summary_groups')], []
|
[v for k, v in locals().items() if k.endswith('_summary_groups')], []
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
ruler_256k_summarizer = dict(
|
||||||
|
dataset_abbrs=["ruler_256k"],
|
||||||
|
summary_groups=sum(
|
||||||
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
|
||||||
|
),
|
||||||
|
)
|
||||||
|
ruler_512k_summarizer = dict(
|
||||||
|
dataset_abbrs=["ruler_512k"],
|
||||||
|
summary_groups=sum(
|
||||||
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
ruler_1m_summarizer = dict(
|
ruler_1m_summarizer = dict(
|
||||||
dataset_abbrs=['ruler_1m'],
|
dataset_abbrs=['ruler_1m'],
|
||||||
@ -57,15 +69,17 @@ ruler_1m_summarizer = dict(
|
|||||||
|
|
||||||
ruler_combined_summarizer = dict(
|
ruler_combined_summarizer = dict(
|
||||||
dataset_abbrs=[
|
dataset_abbrs=[
|
||||||
'ruler_4k',
|
"ruler_4k",
|
||||||
'ruler_8k',
|
"ruler_8k",
|
||||||
'ruler_16k',
|
"ruler_16k",
|
||||||
'ruler_32k',
|
"ruler_32k",
|
||||||
'ruler_64k',
|
"ruler_64k",
|
||||||
'ruler_128k',
|
"ruler_128k",
|
||||||
'ruler_1m',
|
"ruler_256k",
|
||||||
|
"ruler_512k",
|
||||||
|
"ruler_1m",
|
||||||
],
|
],
|
||||||
summary_groups=sum(
|
summary_groups=sum(
|
||||||
[v for k, v in locals().items() if k.endswith('_summary_groups')], []
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user