From a927bba1cf80beefc316a9d3c5df053942437c58 Mon Sep 17 00:00:00 2001 From: Chang Lan Date: Mon, 21 Oct 2024 20:59:02 -0700 Subject: [PATCH] [Fix] Fix RULER datasets (#1628) We need to ensure that we don't import anything that ends with "_datasets", or they will be picked up by the runner, leading to duplicate / unwanted datasets being evaluated. --- configs/datasets/ruler/ruler_128k_gen.py | 14 +++++++------- configs/datasets/ruler/ruler_16k_gen.py | 12 ++++++------ configs/datasets/ruler/ruler_1m_gen.py | 14 +++++++------- configs/datasets/ruler/ruler_32k_gen.py | 14 +++++++------- configs/datasets/ruler/ruler_4k_gen.py | 14 +++++++------- configs/datasets/ruler/ruler_8k_gen.py | 14 +++++++------- configs/datasets/ruler/ruler_combined_gen.py | 16 +++++++--------- .../configs/datasets/ruler/ruler_128k_gen.py | 14 +++++++------- .../configs/datasets/ruler/ruler_16k_gen.py | 12 ++++++------ .../configs/datasets/ruler/ruler_1m_gen.py | 14 +++++++------- .../configs/datasets/ruler/ruler_32k_gen.py | 14 +++++++------- .../configs/datasets/ruler/ruler_4k_gen.py | 14 +++++++------- .../configs/datasets/ruler/ruler_8k_gen.py | 14 +++++++------- .../configs/datasets/ruler/ruler_combined_gen.py | 16 +++++++--------- 14 files changed, 96 insertions(+), 100 deletions(-) diff --git a/configs/datasets/ruler/ruler_128k_gen.py b/configs/datasets/ruler/ruler_128k_gen.py index 8d7deae9..4f302ad2 100644 --- a/configs/datasets/ruler/ruler_128k_gen.py +++ b/configs/datasets/ruler/ruler_128k_gen.py @@ -1,14 +1,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -20,7 +20,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/configs/datasets/ruler/ruler_16k_gen.py b/configs/datasets/ruler/ruler_16k_gen.py index 4ec069d8..5a8a9c47 100644 --- a/configs/datasets/ruler/ruler_16k_gen.py +++ b/configs/datasets/ruler/ruler_16k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need diff --git a/configs/datasets/ruler/ruler_1m_gen.py b/configs/datasets/ruler/ruler_1m_gen.py index b3d951fc..0b20375c 100644 --- a/configs/datasets/ruler/ruler_1m_gen.py +++ b/configs/datasets/ruler/ruler_1m_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/configs/datasets/ruler/ruler_32k_gen.py b/configs/datasets/ruler/ruler_32k_gen.py index 1a353d52..ab02cb4a 100644 --- a/configs/datasets/ruler/ruler_32k_gen.py +++ b/configs/datasets/ruler/ruler_32k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/configs/datasets/ruler/ruler_4k_gen.py b/configs/datasets/ruler/ruler_4k_gen.py index ca5f037c..f0031507 100644 --- a/configs/datasets/ruler/ruler_4k_gen.py +++ b/configs/datasets/ruler/ruler_4k_gen.py @@ -1,14 +1,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -20,7 +20,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/configs/datasets/ruler/ruler_8k_gen.py b/configs/datasets/ruler/ruler_8k_gen.py index 53200a91..8c9a4ad8 100644 --- a/configs/datasets/ruler/ruler_8k_gen.py +++ b/configs/datasets/ruler/ruler_8k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/configs/datasets/ruler/ruler_combined_gen.py b/configs/datasets/ruler/ruler_combined_gen.py index 438be707..0b9ebe19 100644 --- a/configs/datasets/ruler/ruler_combined_gen.py +++ b/configs/datasets/ruler/ruler_combined_gen.py @@ -1,13 +1,11 @@ from mmengine.config import read_base with read_base(): - from .ruler_4k_gen import ruler_datasets as ruler_4k_datasets - from .ruler_8k_gen import ruler_datasets as ruler_8k_datasets - from .ruler_16k_gen import ruler_datasets as ruler_16k_datasets - from .ruler_32k_gen import ruler_datasets as ruler_32k_datasets - from .ruler_128k_gen import ruler_datasets as ruler_128k_datasets - from .ruler_1m_gen import ruler_datasets as ruler_1m_datasets + from .ruler_1m_gen import ruler_datasets as ruler_1m_ds + from .ruler_4k_gen import ruler_datasets as ruler_4k_ds + from .ruler_8k_gen import ruler_datasets as ruler_8k_ds + from .ruler_16k_gen import ruler_datasets as ruler_16k_ds + from .ruler_32k_gen import ruler_datasets as ruler_32k_ds + from .ruler_128k_gen import ruler_datasets as ruler_128k_ds -ruler_combined_datasets = sum( - (v for k, v in locals().items() if k.endswith('_datasets')), [] -) +ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), []) diff --git a/opencompass/configs/datasets/ruler/ruler_128k_gen.py b/opencompass/configs/datasets/ruler/ruler_128k_gen.py index 8d7deae9..4f302ad2 100644 --- a/opencompass/configs/datasets/ruler/ruler_128k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_128k_gen.py @@ -1,14 +1,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -20,7 +20,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/opencompass/configs/datasets/ruler/ruler_16k_gen.py b/opencompass/configs/datasets/ruler/ruler_16k_gen.py index 4ec069d8..5a8a9c47 100644 --- a/opencompass/configs/datasets/ruler/ruler_16k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_16k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need diff --git a/opencompass/configs/datasets/ruler/ruler_1m_gen.py b/opencompass/configs/datasets/ruler/ruler_1m_gen.py index b3d951fc..0b20375c 100644 --- a/opencompass/configs/datasets/ruler/ruler_1m_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_1m_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/opencompass/configs/datasets/ruler/ruler_32k_gen.py b/opencompass/configs/datasets/ruler/ruler_32k_gen.py index 1a353d52..ab02cb4a 100644 --- a/opencompass/configs/datasets/ruler/ruler_32k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_32k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/opencompass/configs/datasets/ruler/ruler_4k_gen.py b/opencompass/configs/datasets/ruler/ruler_4k_gen.py index ca5f037c..f0031507 100644 --- a/opencompass/configs/datasets/ruler/ruler_4k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_4k_gen.py @@ -1,14 +1,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -20,7 +20,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/opencompass/configs/datasets/ruler/ruler_8k_gen.py b/opencompass/configs/datasets/ruler/ruler_8k_gen.py index 53200a91..8c9a4ad8 100644 --- a/opencompass/configs/datasets/ruler/ruler_8k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_8k_gen.py @@ -2,14 +2,14 @@ from mmengine.config import read_base with read_base(): - from .ruler_niah_gen import niah_datasets # Niah - from .ruler_vt_gen import vt_datasets # VT - from .ruler_fwe_gen import fwe_datasets # FWE - from .ruler_cwe_gen import cwe_datasets # CWE - from .ruler_qa_gen import qa_datasets # QA + from .ruler_cwe_gen import cwe_datasets as cwe # CWE + from .ruler_fwe_gen import fwe_datasets as fwe # FWE + from .ruler_niah_gen import niah_datasets as niah # Niah + from .ruler_qa_gen import qa_datasets as qa # QA + from .ruler_vt_gen import vt_datasets as vt # VT -import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) +import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need @@ -21,7 +21,7 @@ ruler_datasets = [] # Different seq length for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): - for dataset in import_datasets: + for dataset in import_ds: tmp_dataset = dataset.deepcopy() tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES diff --git a/opencompass/configs/datasets/ruler/ruler_combined_gen.py b/opencompass/configs/datasets/ruler/ruler_combined_gen.py index 438be707..0b9ebe19 100644 --- a/opencompass/configs/datasets/ruler/ruler_combined_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_combined_gen.py @@ -1,13 +1,11 @@ from mmengine.config import read_base with read_base(): - from .ruler_4k_gen import ruler_datasets as ruler_4k_datasets - from .ruler_8k_gen import ruler_datasets as ruler_8k_datasets - from .ruler_16k_gen import ruler_datasets as ruler_16k_datasets - from .ruler_32k_gen import ruler_datasets as ruler_32k_datasets - from .ruler_128k_gen import ruler_datasets as ruler_128k_datasets - from .ruler_1m_gen import ruler_datasets as ruler_1m_datasets + from .ruler_1m_gen import ruler_datasets as ruler_1m_ds + from .ruler_4k_gen import ruler_datasets as ruler_4k_ds + from .ruler_8k_gen import ruler_datasets as ruler_8k_ds + from .ruler_16k_gen import ruler_datasets as ruler_16k_ds + from .ruler_32k_gen import ruler_datasets as ruler_32k_ds + from .ruler_128k_gen import ruler_datasets as ruler_128k_ds -ruler_combined_datasets = sum( - (v for k, v in locals().items() if k.endswith('_datasets')), [] -) +ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])