mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Fix] Fix RULER datasets (#1628)
We need to ensure that we don't import anything that ends with "_datasets", or they will be picked up by the runner, leading to duplicate / unwanted datasets being evaluated.
This commit is contained in:
parent
a4d5a6c81b
commit
a927bba1cf
@ -1,14 +1,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -20,7 +20,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -1,14 +1,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -20,7 +20,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -1,13 +1,11 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_4k_gen import ruler_datasets as ruler_4k_datasets
|
||||
from .ruler_8k_gen import ruler_datasets as ruler_8k_datasets
|
||||
from .ruler_16k_gen import ruler_datasets as ruler_16k_datasets
|
||||
from .ruler_32k_gen import ruler_datasets as ruler_32k_datasets
|
||||
from .ruler_128k_gen import ruler_datasets as ruler_128k_datasets
|
||||
from .ruler_1m_gen import ruler_datasets as ruler_1m_datasets
|
||||
from .ruler_1m_gen import ruler_datasets as ruler_1m_ds
|
||||
from .ruler_4k_gen import ruler_datasets as ruler_4k_ds
|
||||
from .ruler_8k_gen import ruler_datasets as ruler_8k_ds
|
||||
from .ruler_16k_gen import ruler_datasets as ruler_16k_ds
|
||||
from .ruler_32k_gen import ruler_datasets as ruler_32k_ds
|
||||
from .ruler_128k_gen import ruler_datasets as ruler_128k_ds
|
||||
|
||||
ruler_combined_datasets = sum(
|
||||
(v for k, v in locals().items() if k.endswith('_datasets')), []
|
||||
)
|
||||
ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])
|
||||
|
@ -1,14 +1,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -20,7 +20,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -1,14 +1,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -20,7 +20,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -2,14 +2,14 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_niah_gen import niah_datasets # Niah
|
||||
from .ruler_vt_gen import vt_datasets # VT
|
||||
from .ruler_fwe_gen import fwe_datasets # FWE
|
||||
from .ruler_cwe_gen import cwe_datasets # CWE
|
||||
from .ruler_qa_gen import qa_datasets # QA
|
||||
from .ruler_cwe_gen import cwe_datasets as cwe # CWE
|
||||
from .ruler_fwe_gen import fwe_datasets as fwe # FWE
|
||||
from .ruler_niah_gen import niah_datasets as niah # Niah
|
||||
from .ruler_qa_gen import qa_datasets as qa # QA
|
||||
from .ruler_vt_gen import vt_datasets as vt # VT
|
||||
|
||||
|
||||
import_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
||||
import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
||||
|
||||
# Evaluation config
|
||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||
@ -21,7 +21,7 @@ ruler_datasets = []
|
||||
|
||||
# Different seq length
|
||||
for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
||||
for dataset in import_datasets:
|
||||
for dataset in import_ds:
|
||||
tmp_dataset = dataset.deepcopy()
|
||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||
|
@ -1,13 +1,11 @@
|
||||
from mmengine.config import read_base
|
||||
|
||||
with read_base():
|
||||
from .ruler_4k_gen import ruler_datasets as ruler_4k_datasets
|
||||
from .ruler_8k_gen import ruler_datasets as ruler_8k_datasets
|
||||
from .ruler_16k_gen import ruler_datasets as ruler_16k_datasets
|
||||
from .ruler_32k_gen import ruler_datasets as ruler_32k_datasets
|
||||
from .ruler_128k_gen import ruler_datasets as ruler_128k_datasets
|
||||
from .ruler_1m_gen import ruler_datasets as ruler_1m_datasets
|
||||
from .ruler_1m_gen import ruler_datasets as ruler_1m_ds
|
||||
from .ruler_4k_gen import ruler_datasets as ruler_4k_ds
|
||||
from .ruler_8k_gen import ruler_datasets as ruler_8k_ds
|
||||
from .ruler_16k_gen import ruler_datasets as ruler_16k_ds
|
||||
from .ruler_32k_gen import ruler_datasets as ruler_32k_ds
|
||||
from .ruler_128k_gen import ruler_datasets as ruler_128k_ds
|
||||
|
||||
ruler_combined_datasets = sum(
|
||||
(v for k, v in locals().items() if k.endswith('_datasets')), []
|
||||
)
|
||||
ruler_combined_datasets = sum((v for k, v in locals().items() if k.endswith('_ds')), [])
|
||||
|
Loading…
Reference in New Issue
Block a user