diff --git a/configs/datasets/ruler/ruler_128k_gen.py b/configs/datasets/ruler/ruler_128k_gen.py
index 4f302ad2..02dea12a 100644
--- a/configs/datasets/ruler/ruler_128k_gen.py
+++ b/configs/datasets/ruler/ruler_128k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 128]
 abbr_suffixs = ['128k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_16k_gen.py b/configs/datasets/ruler/ruler_16k_gen.py
index faab3cca..e6841fff 100644
--- a/configs/datasets/ruler/ruler_16k_gen.py
+++ b/configs/datasets/ruler/ruler_16k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 16]
 abbr_suffixs = ['16k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_1m_gen.py b/configs/datasets/ruler/ruler_1m_gen.py
index 0b20375c..bf4aa75e 100644
--- a/configs/datasets/ruler/ruler_1m_gen.py
+++ b/configs/datasets/ruler/ruler_1m_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 1024]
 abbr_suffixs = ['1m']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_32k_gen.py b/configs/datasets/ruler/ruler_32k_gen.py
index ab02cb4a..1d0ac63a 100644
--- a/configs/datasets/ruler/ruler_32k_gen.py
+++ b/configs/datasets/ruler/ruler_32k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 32]
 abbr_suffixs = ['32k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_4k_gen.py b/configs/datasets/ruler/ruler_4k_gen.py
index f0031507..75186b41 100644
--- a/configs/datasets/ruler/ruler_4k_gen.py
+++ b/configs/datasets/ruler/ruler_4k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 4]
 abbr_suffixs = ['4k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_64k_gen.py b/configs/datasets/ruler/ruler_64k_gen.py
index 709260d6..8ba25c1e 100644
--- a/configs/datasets/ruler/ruler_64k_gen.py
+++ b/configs/datasets/ruler/ruler_64k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 64]
 abbr_suffixs: list[str] = ['64k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_8k_gen.py b/configs/datasets/ruler/ruler_8k_gen.py
index 8c9a4ad8..bcbfb533 100644
--- a/configs/datasets/ruler/ruler_8k_gen.py
+++ b/configs/datasets/ruler/ruler_8k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 8]
 abbr_suffixs = ['8k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/configs/datasets/ruler/ruler_niah_gen.py b/configs/datasets/ruler/ruler_niah_gen.py
index bb6e79a5..b338b03d 100644
--- a/configs/datasets/ruler/ruler_niah_gen.py
+++ b/configs/datasets/ruler/ruler_niah_gen.py
@@ -1,9 +1,7 @@
+from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator
+from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
-from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset
-from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator
-
 
 # Ruler Dataset settings
 niah_configurations = [
@@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations):
         'type': RulerNiahDataset,
         'base_path': base_path,
         'file_path': file_path,
-        # 'tokenizer_model': model_path,
         'tokens_to_generate': 128,
-        # 'max_seq_length': max_seq_len,
-        # 'num_samples': NUM_SAMPLES,
         'type_haystack': config['type_haystack'],
         'type_needle_k': config['type_needle_k'],
         'type_needle_v': config['type_needle_v'],
diff --git a/opencompass/configs/datasets/ruler/ruler_128k_gen.py b/opencompass/configs/datasets/ruler/ruler_128k_gen.py
index 4f302ad2..02dea12a 100644
--- a/opencompass/configs/datasets/ruler/ruler_128k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_128k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 128]
 abbr_suffixs = ['128k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_16k_gen.py b/opencompass/configs/datasets/ruler/ruler_16k_gen.py
index faab3cca..e6841fff 100644
--- a/opencompass/configs/datasets/ruler/ruler_16k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_16k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 16]
 abbr_suffixs = ['16k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_1m_gen.py b/opencompass/configs/datasets/ruler/ruler_1m_gen.py
index 0b20375c..bf4aa75e 100644
--- a/opencompass/configs/datasets/ruler/ruler_1m_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_1m_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 1024]
 abbr_suffixs = ['1m']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_32k_gen.py b/opencompass/configs/datasets/ruler/ruler_32k_gen.py
index ab02cb4a..1d0ac63a 100644
--- a/opencompass/configs/datasets/ruler/ruler_32k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_32k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 32]
 abbr_suffixs = ['32k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_4k_gen.py b/opencompass/configs/datasets/ruler/ruler_4k_gen.py
index f0031507..75186b41 100644
--- a/opencompass/configs/datasets/ruler/ruler_4k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_4k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 4]
 abbr_suffixs = ['4k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_64k_gen.py b/opencompass/configs/datasets/ruler/ruler_64k_gen.py
index 709260d6..8ba25c1e 100644
--- a/opencompass/configs/datasets/ruler/ruler_64k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_64k_gen.py
@@ -1,3 +1,5 @@
+import os
+
 from mmengine.config import read_base
 
 with read_base():
@@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100  # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 64]
 abbr_suffixs: list[str] = ['64k']
@@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_8k_gen.py b/opencompass/configs/datasets/ruler/ruler_8k_gen.py
index 8c9a4ad8..bcbfb533 100644
--- a/opencompass/configs/datasets/ruler/ruler_8k_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_8k_gen.py
@@ -1,3 +1,4 @@
+import os
 
 from mmengine.config import read_base
 
@@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
 
 # Evaluation config
 NUM_SAMPLES = 100 # Change to the number of samples you need
+tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
 # Change the context lengths to be tested
 max_seq_lens = [1024 * 8]
 abbr_suffixs = ['8k']
@@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
         tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
         tmp_dataset['num_samples'] = NUM_SAMPLES
         tmp_dataset['max_seq_length'] = max_seq_len
+        tmp_dataset['tokenizer_model'] = tokenizer_model
         ruler_datasets.append(tmp_dataset)
diff --git a/opencompass/configs/datasets/ruler/ruler_niah_gen.py b/opencompass/configs/datasets/ruler/ruler_niah_gen.py
index bb6e79a5..b338b03d 100644
--- a/opencompass/configs/datasets/ruler/ruler_niah_gen.py
+++ b/opencompass/configs/datasets/ruler/ruler_niah_gen.py
@@ -1,9 +1,7 @@
+from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator
+from opencompass.openicl.icl_inferencer import GenInferencer
 from opencompass.openicl.icl_prompt_template import PromptTemplate
 from opencompass.openicl.icl_retriever import ZeroRetriever
-from opencompass.openicl.icl_inferencer import GenInferencer
-from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset
-from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator
-
 
 # Ruler Dataset settings
 niah_configurations = [
@@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations):
         'type': RulerNiahDataset,
         'base_path': base_path,
         'file_path': file_path,
-        # 'tokenizer_model': model_path,
         'tokens_to_generate': 128,
-        # 'max_seq_length': max_seq_len,
-        # 'num_samples': NUM_SAMPLES,
         'type_haystack': config['type_haystack'],
         'type_needle_k': config['type_needle_k'],
         'type_needle_v': config['type_needle_v'],
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index fb87ca1a..47133f21 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -2,7 +2,7 @@ absl-py
 accelerate>=0.19.0
 cpm_kernels
 datasets>=2.12.0
-einops==0.5.0
+einops>=0.5.0
 evaluate>=0.3.0
 func_timeout
 fuzzywuzzy
@@ -16,7 +16,7 @@ jieba
 json5
 jsonlines
 mmengine-lite
-nltk==3.8
+nltk>=3.7
 numpy>=1.23.4,<2.0.0
 openai
 OpenCC