diff --git a/configs/datasets/ruler/ruler_128k_gen.py b/configs/datasets/ruler/ruler_128k_gen.py index 4f302ad2..02dea12a 100644 --- a/configs/datasets/ruler/ruler_128k_gen.py +++ b/configs/datasets/ruler/ruler_128k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 128] abbr_suffixs = ['128k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_16k_gen.py b/configs/datasets/ruler/ruler_16k_gen.py index faab3cca..e6841fff 100644 --- a/configs/datasets/ruler/ruler_16k_gen.py +++ b/configs/datasets/ruler/ruler_16k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 16] abbr_suffixs = ['16k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_1m_gen.py b/configs/datasets/ruler/ruler_1m_gen.py index 0b20375c..bf4aa75e 100644 --- a/configs/datasets/ruler/ruler_1m_gen.py +++ b/configs/datasets/ruler/ruler_1m_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 1024] abbr_suffixs = ['1m'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_32k_gen.py b/configs/datasets/ruler/ruler_32k_gen.py index ab02cb4a..1d0ac63a 100644 --- a/configs/datasets/ruler/ruler_32k_gen.py +++ b/configs/datasets/ruler/ruler_32k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 32] abbr_suffixs = ['32k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_4k_gen.py b/configs/datasets/ruler/ruler_4k_gen.py index f0031507..75186b41 100644 --- a/configs/datasets/ruler/ruler_4k_gen.py +++ b/configs/datasets/ruler/ruler_4k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 4] abbr_suffixs = ['4k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_64k_gen.py b/configs/datasets/ruler/ruler_64k_gen.py index 709260d6..8ba25c1e 100644 --- a/configs/datasets/ruler/ruler_64k_gen.py +++ b/configs/datasets/ruler/ruler_64k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 64] abbr_suffixs: list[str] = ['64k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_8k_gen.py b/configs/datasets/ruler/ruler_8k_gen.py index 8c9a4ad8..bcbfb533 100644 --- a/configs/datasets/ruler/ruler_8k_gen.py +++ b/configs/datasets/ruler/ruler_8k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 8] abbr_suffixs = ['8k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/configs/datasets/ruler/ruler_niah_gen.py b/configs/datasets/ruler/ruler_niah_gen.py index bb6e79a5..b338b03d 100644 --- a/configs/datasets/ruler/ruler_niah_gen.py +++ b/configs/datasets/ruler/ruler_niah_gen.py @@ -1,9 +1,7 @@ +from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator +from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever -from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset -from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator - # Ruler Dataset settings niah_configurations = [ @@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations): 'type': RulerNiahDataset, 'base_path': base_path, 'file_path': file_path, - # 'tokenizer_model': model_path, 'tokens_to_generate': 128, - # 'max_seq_length': max_seq_len, - # 'num_samples': NUM_SAMPLES, 'type_haystack': config['type_haystack'], 'type_needle_k': config['type_needle_k'], 'type_needle_v': config['type_needle_v'], diff --git a/opencompass/configs/datasets/ruler/ruler_128k_gen.py b/opencompass/configs/datasets/ruler/ruler_128k_gen.py index 4f302ad2..02dea12a 100644 --- a/opencompass/configs/datasets/ruler/ruler_128k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_128k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 128] abbr_suffixs = ['128k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_16k_gen.py b/opencompass/configs/datasets/ruler/ruler_16k_gen.py index faab3cca..e6841fff 100644 --- a/opencompass/configs/datasets/ruler/ruler_16k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_16k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 16] abbr_suffixs = ['16k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_1m_gen.py b/opencompass/configs/datasets/ruler/ruler_1m_gen.py index 0b20375c..bf4aa75e 100644 --- a/opencompass/configs/datasets/ruler/ruler_1m_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_1m_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 1024] abbr_suffixs = ['1m'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_32k_gen.py b/opencompass/configs/datasets/ruler/ruler_32k_gen.py index ab02cb4a..1d0ac63a 100644 --- a/opencompass/configs/datasets/ruler/ruler_32k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_32k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 32] abbr_suffixs = ['32k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_4k_gen.py b/opencompass/configs/datasets/ruler/ruler_4k_gen.py index f0031507..75186b41 100644 --- a/opencompass/configs/datasets/ruler/ruler_4k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_4k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 4] abbr_suffixs = ['4k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_64k_gen.py b/opencompass/configs/datasets/ruler/ruler_64k_gen.py index 709260d6..8ba25c1e 100644 --- a/opencompass/configs/datasets/ruler/ruler_64k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_64k_gen.py @@ -1,3 +1,5 @@ +import os + from mmengine.config import read_base with read_base(): @@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 64] abbr_suffixs: list[str] = ['64k'] @@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_8k_gen.py b/opencompass/configs/datasets/ruler/ruler_8k_gen.py index 8c9a4ad8..bcbfb533 100644 --- a/opencompass/configs/datasets/ruler/ruler_8k_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_8k_gen.py @@ -1,3 +1,4 @@ +import os from mmengine.config import read_base @@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), []) # Evaluation config NUM_SAMPLES = 100 # Change to the number of samples you need +tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4') # Change the context lengths to be tested max_seq_lens = [1024 * 8] abbr_suffixs = ['8k'] @@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs): tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix tmp_dataset['num_samples'] = NUM_SAMPLES tmp_dataset['max_seq_length'] = max_seq_len + tmp_dataset['tokenizer_model'] = tokenizer_model ruler_datasets.append(tmp_dataset) diff --git a/opencompass/configs/datasets/ruler/ruler_niah_gen.py b/opencompass/configs/datasets/ruler/ruler_niah_gen.py index bb6e79a5..b338b03d 100644 --- a/opencompass/configs/datasets/ruler/ruler_niah_gen.py +++ b/opencompass/configs/datasets/ruler/ruler_niah_gen.py @@ -1,9 +1,7 @@ +from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator +from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_retriever import ZeroRetriever -from opencompass.openicl.icl_inferencer import GenInferencer -from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset -from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator - # Ruler Dataset settings niah_configurations = [ @@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations): 'type': RulerNiahDataset, 'base_path': base_path, 'file_path': file_path, - # 'tokenizer_model': model_path, 'tokens_to_generate': 128, - # 'max_seq_length': max_seq_len, - # 'num_samples': NUM_SAMPLES, 'type_haystack': config['type_haystack'], 'type_needle_k': config['type_needle_k'], 'type_needle_v': config['type_needle_v'], diff --git a/requirements/runtime.txt b/requirements/runtime.txt index fb87ca1a..47133f21 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -2,7 +2,7 @@ absl-py accelerate>=0.19.0 cpm_kernels datasets>=2.12.0 -einops==0.5.0 +einops>=0.5.0 evaluate>=0.3.0 func_timeout fuzzywuzzy @@ -16,7 +16,7 @@ jieba json5 jsonlines mmengine-lite -nltk==3.8 +nltk>=3.7 numpy>=1.23.4,<2.0.0 openai OpenCC