diff --git a/configs/datasets/ruler/ruler_64k_gen.py b/configs/datasets/ruler/ruler_64k_gen.py new file mode 100644 index 00000000..709260d6 --- /dev/null +++ b/configs/datasets/ruler/ruler_64k_gen.py @@ -0,0 +1,28 @@ +from mmengine.config import read_base + +with read_base(): + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT + + +import_ds = sum((cwe, fwe, niah, qa, vt), []) + +# Evaluation config +NUM_SAMPLES = 100 # Change to the number of samples you need +# Change the context lengths to be tested +max_seq_lens = [1024 * 64] +abbr_suffixs: list[str] = ['64k'] + +ruler_datasets = [] + +# Different seq length +for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): + for dataset in import_ds: + tmp_dataset = dataset.deepcopy() + tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix + tmp_dataset['num_samples'] = NUM_SAMPLES + tmp_dataset['max_seq_length'] = max_seq_len + ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_combined_gen.py b/configs/datasets/ruler/ruler_combined_gen.py index 0b9ebe19..077c4f10 100644 --- a/configs/datasets/ruler/ruler_combined_gen.py +++ b/configs/datasets/ruler/ruler_combined_gen.py @@ -6,6 +6,7 @@ with read_base(): from .ruler_8k_gen import ruler_datasets as ruler_8k_ds from .ruler_16k_gen import ruler_datasets as ruler_16k_ds from .ruler_32k_gen import ruler_datasets as ruler_32k_ds + from .ruler_64k_gen import ruler_datasets as ruler_64k_ds from .ruler_128k_gen import ruler_datasets as ruler_128k_ds ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), []) diff --git a/configs/summarizers/groups/ruler.py b/configs/summarizers/groups/ruler.py index 3bac0414..49a76567 100644 --- a/configs/summarizers/groups/ruler.py +++ b/configs/summarizers/groups/ruler.py @@ -13,7 +13,7 @@ default_ruler_tasks = [ 'ruler_qa_squad', 'ruler_qa_hotpotqa', ] -context_window_sizes = ['4k', '8k', '16k', '32k', '128k', '1m'] +context_window_sizes = ['4k', '8k', '16k', '32k', '64k', '128k', '1m'] ruler_summary_groups = [] for context_window_size in context_window_sizes: diff --git a/configs/summarizers/ruler.py b/configs/summarizers/ruler.py index 90da3e4c..cb35ac2e 100644 --- a/configs/summarizers/ruler.py +++ b/configs/summarizers/ruler.py @@ -35,7 +35,12 @@ ruler_32k_summarizer = dict( [v for k, v in locals().items() if k.endswith('_summary_groups')], [] ), ) - +ruler_64k_summarizer = dict( + dataset_abbrs=['ruler_64k'], + summary_groups=sum( + [v for k, v in locals().items() if k.endswith('_summary_groups')], [] + ), +) ruler_128k_summarizer = dict( dataset_abbrs=['ruler_128k'], summary_groups=sum( @@ -56,6 +61,7 @@ ruler_combined_summarizer = dict( 'ruler_8k', 'ruler_16k', 'ruler_32k', + 'ruler_64k', 'ruler_128k', 'ruler_1m', ], diff --git a/opencompass/configs/datasets/ruler/ruler_64k_gen.py b/opencompass/configs/datasets/ruler/ruler_64k_gen.py new file mode 100644 index 00000000..709260d6 --- /dev/null +++ b/opencompass/configs/datasets/ruler/ruler_64k_gen.py @@ -0,0 +1,28 @@ +from mmengine.config import read_base + +with read_base(): + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT + + +import_ds = sum((cwe, fwe, niah, qa, vt), []) + +# Evaluation config +NUM_SAMPLES = 100 # Change to the number of samples you need +# Change the context lengths to be tested +max_seq_lens = [1024 * 64] +abbr_suffixs: list[str] = ['64k'] + +ruler_datasets = [] + +# Different seq length +for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): + for dataset in import_ds: + tmp_dataset = dataset.deepcopy() + tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix + tmp_dataset['num_samples'] = NUM_SAMPLES + tmp_dataset['max_seq_length'] = max_seq_len + ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_combined_gen.py b/opencompass/configs/datasets/ruler/ruler_combined_gen.py index 0b9ebe19..077c4f10 100644 --- a/opencompass/configs/datasets/ruler/ruler_combined_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_combined_gen.py @@ -6,6 +6,7 @@ with read_base(): from .ruler_8k_gen import ruler_datasets as ruler_8k_ds from .ruler_16k_gen import ruler_datasets as ruler_16k_ds from .ruler_32k_gen import ruler_datasets as ruler_32k_ds + from .ruler_64k_gen import ruler_datasets as ruler_64k_ds from .ruler_128k_gen import ruler_datasets as ruler_128k_ds ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), []) diff --git a/opencompass/configs/summarizers/groups/ruler.py b/opencompass/configs/summarizers/groups/ruler.py index 3bac0414..49a76567 100644 --- a/opencompass/configs/summarizers/groups/ruler.py +++ b/opencompass/configs/summarizers/groups/ruler.py @@ -13,7 +13,7 @@ default_ruler_tasks = [ 'ruler_qa_squad', 'ruler_qa_hotpotqa', ] -context_window_sizes = ['4k', '8k', '16k', '32k', '128k', '1m'] +context_window_sizes = ['4k', '8k', '16k', '32k', '64k', '128k', '1m'] ruler_summary_groups = [] for context_window_size in context_window_sizes: diff --git a/opencompass/configs/summarizers/ruler.py b/opencompass/configs/summarizers/ruler.py index 90da3e4c..cb35ac2e 100644 --- a/opencompass/configs/summarizers/ruler.py +++ b/opencompass/configs/summarizers/ruler.py @@ -35,7 +35,12 @@ ruler_32k_summarizer = dict( [v for k, v in locals().items() if k.endswith('_summary_groups')], [] ), ) - +ruler_64k_summarizer = dict( + dataset_abbrs=['ruler_64k'], + summary_groups=sum( + [v for k, v in locals().items() if k.endswith('_summary_groups')], [] + ), +) ruler_128k_summarizer = dict( dataset_abbrs=['ruler_128k'], summary_groups=sum( @@ -56,6 +61,7 @@ ruler_combined_summarizer = dict( 'ruler_8k', 'ruler_16k', 'ruler_32k', + 'ruler_64k', 'ruler_128k', 'ruler_1m', ],