mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Update] Customizable tokenizer for RULER (#1731)
* Customizable tokenizer for RULER * Relax requirements
This commit is contained in:
parent
499302857f
commit
d70100cdf2
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 128]
|
max_seq_lens = [1024 * 128]
|
||||||
abbr_suffixs = ['128k']
|
abbr_suffixs = ['128k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 16]
|
max_seq_lens = [1024 * 16]
|
||||||
abbr_suffixs = ['16k']
|
abbr_suffixs = ['16k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 1024]
|
max_seq_lens = [1024 * 1024]
|
||||||
abbr_suffixs = ['1m']
|
abbr_suffixs = ['1m']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 32]
|
max_seq_lens = [1024 * 32]
|
||||||
abbr_suffixs = ['32k']
|
abbr_suffixs = ['32k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 4]
|
max_seq_lens = [1024 * 4]
|
||||||
abbr_suffixs = ['4k']
|
abbr_suffixs = ['4k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 64]
|
max_seq_lens = [1024 * 64]
|
||||||
abbr_suffixs: list[str] = ['64k']
|
abbr_suffixs: list[str] = ['64k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 8]
|
max_seq_lens = [1024 * 8]
|
||||||
abbr_suffixs = ['8k']
|
abbr_suffixs = ['8k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
|
from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
||||||
from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset
|
|
||||||
from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator
|
|
||||||
|
|
||||||
|
|
||||||
# Ruler Dataset settings
|
# Ruler Dataset settings
|
||||||
niah_configurations = [
|
niah_configurations = [
|
||||||
@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations):
|
|||||||
'type': RulerNiahDataset,
|
'type': RulerNiahDataset,
|
||||||
'base_path': base_path,
|
'base_path': base_path,
|
||||||
'file_path': file_path,
|
'file_path': file_path,
|
||||||
# 'tokenizer_model': model_path,
|
|
||||||
'tokens_to_generate': 128,
|
'tokens_to_generate': 128,
|
||||||
# 'max_seq_length': max_seq_len,
|
|
||||||
# 'num_samples': NUM_SAMPLES,
|
|
||||||
'type_haystack': config['type_haystack'],
|
'type_haystack': config['type_haystack'],
|
||||||
'type_needle_k': config['type_needle_k'],
|
'type_needle_k': config['type_needle_k'],
|
||||||
'type_needle_v': config['type_needle_v'],
|
'type_needle_v': config['type_needle_v'],
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 128]
|
max_seq_lens = [1024 * 128]
|
||||||
abbr_suffixs = ['128k']
|
abbr_suffixs = ['128k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 16]
|
max_seq_lens = [1024 * 16]
|
||||||
abbr_suffixs = ['16k']
|
abbr_suffixs = ['16k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 1024]
|
max_seq_lens = [1024 * 1024]
|
||||||
abbr_suffixs = ['1m']
|
abbr_suffixs = ['1m']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 32]
|
max_seq_lens = [1024 * 32]
|
||||||
abbr_suffixs = ['32k']
|
abbr_suffixs = ['32k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 4]
|
max_seq_lens = [1024 * 4]
|
||||||
abbr_suffixs = ['4k']
|
abbr_suffixs = ['4k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
with read_base():
|
with read_base():
|
||||||
@ -12,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 64]
|
max_seq_lens = [1024 * 64]
|
||||||
abbr_suffixs: list[str] = ['64k']
|
abbr_suffixs: list[str] = ['64k']
|
||||||
@ -25,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
from mmengine.config import read_base
|
from mmengine.config import read_base
|
||||||
|
|
||||||
@ -13,6 +14,7 @@ import_ds = sum((cwe, fwe, niah, qa, vt), [])
|
|||||||
|
|
||||||
# Evaluation config
|
# Evaluation config
|
||||||
NUM_SAMPLES = 100 # Change to the number of samples you need
|
NUM_SAMPLES = 100 # Change to the number of samples you need
|
||||||
|
tokenizer_model = os.environ.get('TOKENIZER_MODEL', 'gpt-4')
|
||||||
# Change the context lengths to be tested
|
# Change the context lengths to be tested
|
||||||
max_seq_lens = [1024 * 8]
|
max_seq_lens = [1024 * 8]
|
||||||
abbr_suffixs = ['8k']
|
abbr_suffixs = ['8k']
|
||||||
@ -26,4 +28,5 @@ for max_seq_len, abbr_suffix in zip(max_seq_lens, abbr_suffixs):
|
|||||||
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
tmp_dataset['abbr'] = tmp_dataset['abbr'] + '_' + abbr_suffix
|
||||||
tmp_dataset['num_samples'] = NUM_SAMPLES
|
tmp_dataset['num_samples'] = NUM_SAMPLES
|
||||||
tmp_dataset['max_seq_length'] = max_seq_len
|
tmp_dataset['max_seq_length'] = max_seq_len
|
||||||
|
tmp_dataset['tokenizer_model'] = tokenizer_model
|
||||||
ruler_datasets.append(tmp_dataset)
|
ruler_datasets.append(tmp_dataset)
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
|
from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset, RulerNiahEvaluator
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
from opencompass.openicl.icl_retriever import ZeroRetriever
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
from opencompass.openicl.icl_inferencer import GenInferencer
|
|
||||||
from opencompass.datasets.ruler.ruler_niah import RulerNiahDataset
|
|
||||||
from opencompass.datasets.ruler.ruler_niah import RulerNiahEvaluator
|
|
||||||
|
|
||||||
|
|
||||||
# Ruler Dataset settings
|
# Ruler Dataset settings
|
||||||
niah_configurations = [
|
niah_configurations = [
|
||||||
@ -92,10 +90,7 @@ for index, config in enumerate(niah_configurations):
|
|||||||
'type': RulerNiahDataset,
|
'type': RulerNiahDataset,
|
||||||
'base_path': base_path,
|
'base_path': base_path,
|
||||||
'file_path': file_path,
|
'file_path': file_path,
|
||||||
# 'tokenizer_model': model_path,
|
|
||||||
'tokens_to_generate': 128,
|
'tokens_to_generate': 128,
|
||||||
# 'max_seq_length': max_seq_len,
|
|
||||||
# 'num_samples': NUM_SAMPLES,
|
|
||||||
'type_haystack': config['type_haystack'],
|
'type_haystack': config['type_haystack'],
|
||||||
'type_needle_k': config['type_needle_k'],
|
'type_needle_k': config['type_needle_k'],
|
||||||
'type_needle_v': config['type_needle_v'],
|
'type_needle_v': config['type_needle_v'],
|
||||||
|
@ -2,7 +2,7 @@ absl-py
|
|||||||
accelerate>=0.19.0
|
accelerate>=0.19.0
|
||||||
cpm_kernels
|
cpm_kernels
|
||||||
datasets>=2.12.0
|
datasets>=2.12.0
|
||||||
einops==0.5.0
|
einops>=0.5.0
|
||||||
evaluate>=0.3.0
|
evaluate>=0.3.0
|
||||||
func_timeout
|
func_timeout
|
||||||
fuzzywuzzy
|
fuzzywuzzy
|
||||||
@ -16,7 +16,7 @@ jieba
|
|||||||
json5
|
json5
|
||||||
jsonlines
|
jsonlines
|
||||||
mmengine-lite
|
mmengine-lite
|
||||||
nltk==3.8
|
nltk>=3.7
|
||||||
numpy>=1.23.4,<2.0.0
|
numpy>=1.23.4,<2.0.0
|
||||||
openai
|
openai
|
||||||
OpenCC
|
OpenCC
|
||||||
|
Loading…
Reference in New Issue
Block a user