mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
Merge branch 'open-compass:main' into main
This commit is contained in:
commit
a2b6e4af9b
@ -5,11 +5,18 @@ models = [
|
|||||||
type=TurboMindModelwithChatTemplate,
|
type=TurboMindModelwithChatTemplate,
|
||||||
abbr='mixtral-large-instruct-2407-turbomind',
|
abbr='mixtral-large-instruct-2407-turbomind',
|
||||||
path='mistralai/Mistral-Large-Instruct-2407',
|
path='mistralai/Mistral-Large-Instruct-2407',
|
||||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
|
engine_config=dict(
|
||||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
session_len=32768,
|
||||||
|
max_batch_size=16,
|
||||||
|
tp=4,
|
||||||
|
cache_max_entry_count=0.7,
|
||||||
|
),
|
||||||
|
gen_config=dict(
|
||||||
|
top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
|
||||||
|
),
|
||||||
max_seq_len=32768,
|
max_seq_len=32768,
|
||||||
max_out_len=4096,
|
max_out_len=4096,
|
||||||
batch_size=16,
|
batch_size=8,
|
||||||
run_cfg=dict(num_gpus=4),
|
run_cfg=dict(num_gpus=4),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -138,6 +138,10 @@ needlebench_256k_summarizer = create_summarizer(context_lengths_256k, depths_lis
|
|||||||
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||||
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, '1000k')
|
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, '1000k')
|
||||||
|
|
||||||
|
depths_list_internal = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, ]
|
||||||
|
needlebench_internal_32k_summarizer = create_summarizer([32000], depths_list_internal, '32000')
|
||||||
|
needlebench_internal_100k_summarizer = create_summarizer([100000], depths_list_internal, '100000')
|
||||||
|
needlebench_internal_200k_summarizer = create_summarizer([200000], depths_list_internal, '200000')
|
||||||
|
|
||||||
_needlebench_8k_parallel_en_batch1 = []
|
_needlebench_8k_parallel_en_batch1 = []
|
||||||
_needlebench_8k_parallel_en_batch5 = []
|
_needlebench_8k_parallel_en_batch5 = []
|
||||||
|
@ -0,0 +1,8 @@
|
|||||||
|
from mmengine.config import read_base
|
||||||
|
|
||||||
|
with read_base():
|
||||||
|
|
||||||
|
from .needlebench_single import needlebench_en_datasets as needlebench_origin_en_datasets
|
||||||
|
from .needlebench_single import needlebench_zh_datasets as needlebench_origin_zh_datasets
|
||||||
|
|
||||||
|
needlebench_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
|
@ -0,0 +1,111 @@
|
|||||||
|
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
||||||
|
from opencompass.openicl.icl_retriever import ZeroRetriever
|
||||||
|
from opencompass.openicl.icl_inferencer import GenInferencer
|
||||||
|
from opencompass.datasets.needlebench.origin import NeedleBenchOriginDataset
|
||||||
|
from opencompass.datasets.needlebench.origin import NeedleBenchOriginEvaluator
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_postprocess
|
||||||
|
from opencompass.datasets.needlebench.origin import needlebench_dataset_postprocess
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
def logistic(x, L=100, x0=50, k=0.1):
|
||||||
|
return round(L / (1 + math.exp(-k * (x - x0))), 3)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_linear_space(start, end, num):
|
||||||
|
if num == 1:
|
||||||
|
return [start]
|
||||||
|
elif num < 1:
|
||||||
|
raise ValueError('num must be at least 1.')
|
||||||
|
step = (end - start) / (num - 1)
|
||||||
|
return [start + step * i for i in range(num)]
|
||||||
|
|
||||||
|
|
||||||
|
def generate_depth_percents(intervals, interval_type):
|
||||||
|
if interval_type == 'linear':
|
||||||
|
return generate_linear_space(0, 100, intervals)
|
||||||
|
elif interval_type == 'sigmoid':
|
||||||
|
linear_space = generate_linear_space(0, 100, intervals)
|
||||||
|
return [logistic(x) for x in linear_space]
|
||||||
|
else:
|
||||||
|
raise ValueError('Unsupported interval type')
|
||||||
|
|
||||||
|
|
||||||
|
needlebench_reader_cfg = dict(input_columns=['prompt'], output_column='answer')
|
||||||
|
|
||||||
|
needlebench_infer_cfg = dict(
|
||||||
|
prompt_template=dict(
|
||||||
|
type=PromptTemplate,
|
||||||
|
template=dict(
|
||||||
|
round=[
|
||||||
|
dict(role='HUMAN', prompt='{prompt}'),
|
||||||
|
# dict(role='BOT', prompt='{answer}\n'),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
retriever=dict(type=ZeroRetriever),
|
||||||
|
inferencer=dict(type=GenInferencer),
|
||||||
|
)
|
||||||
|
|
||||||
|
needlebench_eval_cfg = dict(
|
||||||
|
evaluator=dict(type=NeedleBenchOriginEvaluator),
|
||||||
|
pred_postprocessor=dict(type=needlebench_postprocess),
|
||||||
|
dataset_postprocessor=dict(type=needlebench_dataset_postprocess),
|
||||||
|
pred_role='BOT',
|
||||||
|
)
|
||||||
|
|
||||||
|
context_lengths = [32000, 100000, 200000, ] # 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000
|
||||||
|
depths_list = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, ] #
|
||||||
|
|
||||||
|
base_path = 'opencompass/needlebench'
|
||||||
|
file_list = ['en_un_asr.jsonl']
|
||||||
|
needlebench_en_datasets = []
|
||||||
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_origin_en_{original_context_length}',
|
||||||
|
'type': NeedleBenchOriginDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 600,
|
||||||
|
'guide': False,
|
||||||
|
'language': 'English',
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg,
|
||||||
|
}
|
||||||
|
needlebench_en_datasets.append(dataset_dict)
|
||||||
|
|
||||||
|
file_list = ['zh_all.jsonl']
|
||||||
|
needlebench_zh_datasets = []
|
||||||
|
needle_file_name = 'needles.jsonl'
|
||||||
|
|
||||||
|
for original_context_length in context_lengths:
|
||||||
|
for depth_percent in depths_list:
|
||||||
|
dataset_dict = {
|
||||||
|
'abbr': f'Length{original_context_length}'
|
||||||
|
f'Depth{int(depth_percent)}_origin_zh_{original_context_length}',
|
||||||
|
'type': NeedleBenchOriginDataset,
|
||||||
|
'path': base_path,
|
||||||
|
'length': original_context_length,
|
||||||
|
'depth': int(depth_percent),
|
||||||
|
'tokenizer_model': 'gpt-4',
|
||||||
|
'file_list': file_list,
|
||||||
|
'num_repeats_per_file': 10,
|
||||||
|
'length_buffer': 200,
|
||||||
|
'guide': False,
|
||||||
|
'language': 'Chinese',
|
||||||
|
'needle_file_name': needle_file_name,
|
||||||
|
'reader_cfg': needlebench_reader_cfg,
|
||||||
|
'infer_cfg': needlebench_infer_cfg,
|
||||||
|
'eval_cfg': needlebench_eval_cfg,
|
||||||
|
}
|
||||||
|
needlebench_zh_datasets.append(dataset_dict)
|
@ -0,0 +1,12 @@
|
|||||||
|
from opencompass.models import HuggingFacewithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=HuggingFacewithChatTemplate,
|
||||||
|
abbr="ministral-8B-instruct-2410-hf",
|
||||||
|
path="mistralai/Ministral-8B-Instruct-2410",
|
||||||
|
max_out_len=1024,
|
||||||
|
batch_size=8,
|
||||||
|
run_cfg=dict(num_gpus=2),
|
||||||
|
)
|
||||||
|
]
|
@ -0,0 +1,15 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr="ministral-8B-instruct-2410-turbomind",
|
||||||
|
path="mistralai/Ministral-8B-Instruct-2410",
|
||||||
|
engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
|
||||||
|
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
||||||
|
max_seq_len=16384,
|
||||||
|
max_out_len=4096,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=1),
|
||||||
|
)
|
||||||
|
]
|
@ -5,11 +5,18 @@ models = [
|
|||||||
type=TurboMindModelwithChatTemplate,
|
type=TurboMindModelwithChatTemplate,
|
||||||
abbr='mixtral-large-instruct-2407-turbomind',
|
abbr='mixtral-large-instruct-2407-turbomind',
|
||||||
path='mistralai/Mistral-Large-Instruct-2407',
|
path='mistralai/Mistral-Large-Instruct-2407',
|
||||||
engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
|
engine_config=dict(
|
||||||
gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096),
|
session_len=32768,
|
||||||
|
max_batch_size=16,
|
||||||
|
tp=4,
|
||||||
|
cache_max_entry_count=0.7,
|
||||||
|
),
|
||||||
|
gen_config=dict(
|
||||||
|
top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
|
||||||
|
),
|
||||||
max_seq_len=32768,
|
max_seq_len=32768,
|
||||||
max_out_len=4096,
|
max_out_len=4096,
|
||||||
batch_size=16,
|
batch_size=8,
|
||||||
run_cfg=dict(num_gpus=4),
|
run_cfg=dict(num_gpus=4),
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
@ -0,0 +1,18 @@
|
|||||||
|
from opencompass.models import TurboMindModelwithChatTemplate
|
||||||
|
|
||||||
|
models = [
|
||||||
|
dict(
|
||||||
|
type=TurboMindModelwithChatTemplate,
|
||||||
|
abbr='nvidia-3_1-Nemotron-70b-instruct-HF-turbomind',
|
||||||
|
path='nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
|
||||||
|
engine_config=dict(max_batch_size=16, tp=4),
|
||||||
|
gen_config=dict(
|
||||||
|
top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
|
||||||
|
),
|
||||||
|
max_seq_len=16384,
|
||||||
|
max_out_len=4096,
|
||||||
|
batch_size=16,
|
||||||
|
run_cfg=dict(num_gpus=4),
|
||||||
|
stop_words=['<|end_of_text|>', '<|eot_id|>'],
|
||||||
|
)
|
||||||
|
]
|
@ -138,6 +138,10 @@ needlebench_256k_summarizer = create_summarizer(context_lengths_256k, depths_lis
|
|||||||
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
context_lengths_1000k = list([20000, 160000, 300000, 440000, 580000, 720000, 860000, 1000000])
|
||||||
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, '1000k')
|
needlebench_1000k_summarizer = create_summarizer(context_lengths_1000k, depths_list_sparse, '1000k')
|
||||||
|
|
||||||
|
depths_list_internal = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, ]
|
||||||
|
needlebench_internal_32k_summarizer = create_summarizer([32000], depths_list_internal, '32000')
|
||||||
|
needlebench_internal_100k_summarizer = create_summarizer([100000], depths_list_internal, '100000')
|
||||||
|
needlebench_internal_200k_summarizer = create_summarizer([200000], depths_list_internal, '200000')
|
||||||
|
|
||||||
_needlebench_8k_parallel_en_batch1 = []
|
_needlebench_8k_parallel_en_batch1 = []
|
||||||
_needlebench_8k_parallel_en_batch5 = []
|
_needlebench_8k_parallel_en_batch5 = []
|
||||||
|
@ -10,6 +10,7 @@ from datasets import Dataset
|
|||||||
|
|
||||||
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
from opencompass.openicl.icl_evaluator import BaseEvaluator
|
||||||
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
|
from opencompass.registry import ICL_EVALUATORS, LOAD_DATASET
|
||||||
|
from opencompass.utils import get_data_path
|
||||||
|
|
||||||
from .base import BaseDataset
|
from .base import BaseDataset
|
||||||
|
|
||||||
@ -19,6 +20,7 @@ class DingoDataset(BaseDataset):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path: str):
|
def load(path: str):
|
||||||
|
path = get_data_path(path, local_mode=True)
|
||||||
raw_data = []
|
raw_data = []
|
||||||
with open(path, encoding='utf-8') as f:
|
with open(path, encoding='utf-8') as f:
|
||||||
reader = csv.reader(f, delimiter=';')
|
reader = csv.reader(f, delimiter=';')
|
||||||
@ -34,6 +36,7 @@ class DingoLongDataset(BaseDataset):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def load(path: str):
|
def load(path: str):
|
||||||
|
path = get_data_path(path, local_mode=True)
|
||||||
raw_data = []
|
raw_data = []
|
||||||
with open(path, 'r', encoding='utf-8') as f:
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
@ -46,7 +49,6 @@ class DingoEvaluator(BaseEvaluator):
|
|||||||
|
|
||||||
def score(self, origin_prompt: List, predictions: List) -> dict:
|
def score(self, origin_prompt: List, predictions: List) -> dict:
|
||||||
try:
|
try:
|
||||||
# from dingo.model.model import Model
|
|
||||||
from dingo.exec import Executor
|
from dingo.exec import Executor
|
||||||
from dingo.io import InputArgs
|
from dingo.io import InputArgs
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -58,27 +60,30 @@ class DingoEvaluator(BaseEvaluator):
|
|||||||
current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime())
|
current_time = time.strftime('%Y%m%d_%H%M%S', time.localtime())
|
||||||
file_data = [{'prompt': pmt, 'prediction': prd}
|
file_data = [{'prompt': pmt, 'prediction': prd}
|
||||||
for pmt, prd in zip(origin_prompt, predictions)]
|
for pmt, prd in zip(origin_prompt, predictions)]
|
||||||
file_name = 'dingo_file_' + current_time + '.jsonl'
|
os.makedirs('tmp', exist_ok=True)
|
||||||
|
file_name = os.path.join('tmp', 'dingo_file_' + current_time + '.jsonl') # noqa: E501
|
||||||
|
|
||||||
with open(file_name, 'a', encoding='utf-8') as f:
|
with open(file_name, 'a', encoding='utf-8') as f:
|
||||||
for d in file_data:
|
for d in file_data:
|
||||||
json.dump(d, f, ensure_ascii=False)
|
json.dump(d, f, ensure_ascii=False)
|
||||||
f.write('\n')
|
f.write('\n')
|
||||||
|
|
||||||
input_data = {
|
input_data = {
|
||||||
'eval_models': ['llm_base'],
|
'eval_model': 'llm_base',
|
||||||
'input_path': file_name,
|
'input_path': file_name,
|
||||||
'output_path': './outputs/dingo/',
|
'output_path': './outputs/dingo/',
|
||||||
|
'save_data': True,
|
||||||
'dataset': 'local',
|
'dataset': 'local',
|
||||||
'datasource': 'local',
|
|
||||||
'data_format': 'jsonl',
|
'data_format': 'jsonl',
|
||||||
'column_prompt': ['prompt'],
|
'column_prompt': 'prompt',
|
||||||
'column_content': ['prediction'],
|
'column_content': 'prediction',
|
||||||
}
|
}
|
||||||
# Model.apply_config(input_data["custom_config_path"])
|
try:
|
||||||
input_args = InputArgs(**input_data)
|
input_args = InputArgs(**input_data)
|
||||||
executor = Executor.exec_map['local'](input_args)
|
executor = Executor.exec_map['local'](input_args)
|
||||||
result = executor.execute()
|
result = executor.execute()
|
||||||
summary = result[0].to_dict()
|
summary = result[0].to_dict()
|
||||||
|
except Exception:
|
||||||
os.remove(file_name)
|
raise
|
||||||
|
finally:
|
||||||
|
os.remove(file_name)
|
||||||
return summary
|
return summary
|
||||||
|
@ -81,10 +81,30 @@ class NeedleBenchOriginDataset(BaseDataset):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Language '{language}' is not supported.")
|
raise ValueError(f"Language '{language}' is not supported.")
|
||||||
|
|
||||||
|
def _modify_retrieval_question_for_base(retrieval_question):
|
||||||
|
if language == 'Chinese':
|
||||||
|
parts = retrieval_question.split('请按照')
|
||||||
|
retrieval_question = (parts[0] + '在回答之前,请思考文档中与此问题'
|
||||||
|
'最相关的内容是什么。请按照' + parts[1])
|
||||||
|
return retrieval_question.replace("请按照'", '')[:-16]
|
||||||
|
elif language == 'English':
|
||||||
|
parts = retrieval_question.split('Please answer in the format')
|
||||||
|
retrieval_question = (
|
||||||
|
parts[0] + 'Before answering, please consider'
|
||||||
|
' what in the document is most relevant to this question.'
|
||||||
|
' Please answer in the format' + parts[1])
|
||||||
|
return retrieval_question.replace(
|
||||||
|
"Please answer in the format '", '')[:-10]
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Language '{language}' is not supported.")
|
||||||
|
|
||||||
def _generate_prompt(context, retrieval_question):
|
def _generate_prompt(context, retrieval_question):
|
||||||
if guide:
|
if guide:
|
||||||
retrieval_question = _modify_retrieval_question(
|
retrieval_question = _modify_retrieval_question(
|
||||||
retrieval_question)
|
retrieval_question)
|
||||||
|
else:
|
||||||
|
retrieval_question = _modify_retrieval_question_for_base(
|
||||||
|
retrieval_question)
|
||||||
|
|
||||||
if language == 'Chinese':
|
if language == 'Chinese':
|
||||||
if position == 'End':
|
if position == 'End':
|
||||||
@ -129,10 +149,10 @@ class NeedleBenchOriginDataset(BaseDataset):
|
|||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
file_names = [
|
file_names = [
|
||||||
'PaulGrahamEssays.jsonl', 'multi_needle_reasoning_en.json',
|
'en_un_asr.jsonl', 'zh_all.jsonl', 'PaulGrahamEssays.jsonl',
|
||||||
'multi_needle_reasoning_zh.json', 'zh_finance.jsonl',
|
'multi_needle_reasoning_en.json', 'multi_needle_reasoning_zh.json',
|
||||||
'zh_game.jsonl', 'zh_general.jsonl', 'zh_government.jsonl',
|
'zh_finance.jsonl', 'zh_game.jsonl', 'zh_general.jsonl',
|
||||||
'zh_movie.jsonl', 'zh_tech.jsonl'
|
'zh_government.jsonl', 'zh_movie.jsonl', 'zh_tech.jsonl'
|
||||||
]
|
]
|
||||||
path = get_data_path(path)
|
path = get_data_path(path)
|
||||||
if os.environ.get('DATASET_SOURCE') == 'HF':
|
if os.environ.get('DATASET_SOURCE') == 'HF':
|
||||||
|
@ -518,6 +518,7 @@ class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
|
|||||||
max_seq_len: Optional[int] = None,
|
max_seq_len: Optional[int] = None,
|
||||||
pad_token_id: Optional[int] = None,
|
pad_token_id: Optional[int] = None,
|
||||||
stop_words: Optional[str] = [],
|
stop_words: Optional[str] = [],
|
||||||
|
drop_middle: bool = False,
|
||||||
**other_kwargs):
|
**other_kwargs):
|
||||||
|
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
@ -525,6 +526,7 @@ class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
|
|||||||
self.tokenizer_only = tokenizer_only
|
self.tokenizer_only = tokenizer_only
|
||||||
self.template_parser = LMTemplateParser()
|
self.template_parser = LMTemplateParser()
|
||||||
self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
|
self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
|
||||||
|
self.drop_middle = drop_middle
|
||||||
self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
|
self._load_tokenizer(tokenizer_path or path, tokenizer_kwargs, pad_token_id)
|
||||||
if not tokenizer_only:
|
if not tokenizer_only:
|
||||||
self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
|
self._load_model(path=path, kwargs=model_kwargs, peft_path=peft_path, peft_kwargs=peft_kwargs)
|
||||||
@ -551,7 +553,17 @@ class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
|
|||||||
add_special_tokens=True,
|
add_special_tokens=True,
|
||||||
max_length=self.max_seq_len
|
max_length=self.max_seq_len
|
||||||
)
|
)
|
||||||
tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
|
|
||||||
|
if self.drop_middle:
|
||||||
|
assert len(inputs) == 1
|
||||||
|
input_ids = self.tokenizer(inputs, padding=False, truncation=False)['input_ids']
|
||||||
|
input_ids = torch.tensor(input_ids)
|
||||||
|
if input_ids.shape[-1] > self.max_seq_len:
|
||||||
|
input_ids = torch.cat([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], dim=-1)
|
||||||
|
tokens = {'input_ids': input_ids, }
|
||||||
|
else:
|
||||||
|
tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
|
||||||
|
|
||||||
tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
|
tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
|
||||||
|
|
||||||
generation_kwargs = self.generation_kwargs.copy()
|
generation_kwargs = self.generation_kwargs.copy()
|
||||||
@ -603,7 +615,17 @@ class HuggingFaceBaseModel(HuggingFacewithChatTemplate):
|
|||||||
add_special_tokens=True,
|
add_special_tokens=True,
|
||||||
max_length=self.max_seq_len
|
max_length=self.max_seq_len
|
||||||
)
|
)
|
||||||
tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
|
|
||||||
|
if self.drop_middle:
|
||||||
|
assert len(inputs) == 1
|
||||||
|
input_ids = self.tokenizer(inputs, padding=False, truncation=False)['input_ids']
|
||||||
|
input_ids = torch.tensor(input_ids)
|
||||||
|
if input_ids.shape[-1] > self.max_seq_len:
|
||||||
|
input_ids = torch.cat([input_ids[:, : self.max_seq_len // 2], input_ids[:, - self.max_seq_len // 2:]], dim=-1)
|
||||||
|
tokens = {'input_ids': input_ids, }
|
||||||
|
else:
|
||||||
|
tokens = self.tokenizer.batch_encode_plus(messages, **tokenize_kwargs)
|
||||||
|
|
||||||
tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
|
tokens = {k: v.to(self.model.device) for k, v in tokens.items()}
|
||||||
outputs = self.model(**tokens)[0]
|
outputs = self.model(**tokens)[0]
|
||||||
|
|
||||||
|
@ -32,16 +32,22 @@ class InternTrainManager:
|
|||||||
class CurrentInternTrainManager(InternTrainManager):
|
class CurrentInternTrainManager(InternTrainManager):
|
||||||
|
|
||||||
def load_config(self, path, model_config=None):
|
def load_config(self, path, model_config=None):
|
||||||
from internlm.config import Config
|
|
||||||
if model_config is None:
|
if model_config is None:
|
||||||
model_config = torch.load(os.path.join(path, 'model_config.pt'))
|
from internlm.checkpoint.checkpoint_manager import try_load_config
|
||||||
elif isinstance(model_config, dict):
|
model_config = try_load_config(
|
||||||
model_config = Config(model_config)
|
os.path.join(path, 'model_config.pt'))
|
||||||
elif isinstance(model_config, str):
|
elif isinstance(model_config, str) and model_config.endswith('.pt'):
|
||||||
model_config = Config.fromfile(model_config).model
|
from internlm.checkpoint.checkpoint_manager import try_load_config
|
||||||
|
model_config = try_load_config(model_config)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(
|
from internlm.config import Config
|
||||||
'model_config should be None, dict or filename.')
|
if isinstance(model_config, dict):
|
||||||
|
model_config = Config(model_config)
|
||||||
|
elif isinstance(model_config, str):
|
||||||
|
model_config = Config.fromfile(model_config).model
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(
|
||||||
|
'model_config should be None, dict or filename.')
|
||||||
|
|
||||||
return model_config
|
return model_config
|
||||||
|
|
||||||
@ -60,6 +66,8 @@ class LegacyInternTrainManager(InternTrainManager):
|
|||||||
from internlm.core.context import Config
|
from internlm.core.context import Config
|
||||||
if model_config is None:
|
if model_config is None:
|
||||||
model_config = torch.load(os.path.join(path, 'model_config.pt'))
|
model_config = torch.load(os.path.join(path, 'model_config.pt'))
|
||||||
|
elif isinstance(model_config, str) and model_config.endswith('.pt'):
|
||||||
|
model_config = torch.load(model_config)
|
||||||
elif isinstance(model_config, dict):
|
elif isinstance(model_config, dict):
|
||||||
model_config = Config(model_config)
|
model_config = Config(model_config)
|
||||||
elif isinstance(model_config, str):
|
elif isinstance(model_config, str):
|
||||||
@ -132,6 +140,7 @@ class InternTrain(BaseModel):
|
|||||||
tokenizer_path: Optional[str] = None,
|
tokenizer_path: Optional[str] = None,
|
||||||
tokenizer_type: str = 'INTERNLM',
|
tokenizer_type: str = 'INTERNLM',
|
||||||
model_config: Optional[Union[str, Dict]] = None,
|
model_config: Optional[Union[str, Dict]] = None,
|
||||||
|
parallel_config: Optional[str] = None,
|
||||||
model_type: str = 'INTERNLM2',
|
model_type: str = 'INTERNLM2',
|
||||||
ckpt_type: Optional[str] = None,
|
ckpt_type: Optional[str] = None,
|
||||||
meta_template: Optional[Dict] = None,
|
meta_template: Optional[Dict] = None,
|
||||||
@ -140,11 +149,13 @@ class InternTrain(BaseModel):
|
|||||||
sync_rank: bool = False,
|
sync_rank: bool = False,
|
||||||
mode='none',
|
mode='none',
|
||||||
end_str: Optional[str] = None):
|
end_str: Optional[str] = None):
|
||||||
|
|
||||||
super().__init__(path=path,
|
super().__init__(path=path,
|
||||||
max_seq_len=max_seq_len,
|
max_seq_len=max_seq_len,
|
||||||
tokenizer_only=tokenizer_only,
|
tokenizer_only=tokenizer_only,
|
||||||
meta_template=meta_template,
|
meta_template=meta_template,
|
||||||
sync_rank=sync_rank)
|
sync_rank=sync_rank)
|
||||||
|
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
# insert interntrain module
|
# insert interntrain module
|
||||||
self.manager = InternTrainManager.build(module_path)
|
self.manager = InternTrainManager.build(module_path)
|
||||||
@ -162,6 +173,7 @@ class InternTrain(BaseModel):
|
|||||||
if not tokenizer_only:
|
if not tokenizer_only:
|
||||||
self._load_model(path=path,
|
self._load_model(path=path,
|
||||||
model_config=model_config,
|
model_config=model_config,
|
||||||
|
parallel_config=parallel_config,
|
||||||
model_type=model_type,
|
model_type=model_type,
|
||||||
model_dtype=model_dtype,
|
model_dtype=model_dtype,
|
||||||
ckpt_type=ckpt_type)
|
ckpt_type=ckpt_type)
|
||||||
@ -196,6 +208,7 @@ class InternTrain(BaseModel):
|
|||||||
def _load_model(self,
|
def _load_model(self,
|
||||||
path: str,
|
path: str,
|
||||||
model_config: Optional[str] = None,
|
model_config: Optional[str] = None,
|
||||||
|
parallel_config: Optional[str] = None,
|
||||||
model_type: str = 'INTERNLM2',
|
model_type: str = 'INTERNLM2',
|
||||||
model_dtype: Optional[str] = None,
|
model_dtype: Optional[str] = None,
|
||||||
ckpt_type: Optional[str] = None):
|
ckpt_type: Optional[str] = None):
|
||||||
@ -216,10 +229,11 @@ class InternTrain(BaseModel):
|
|||||||
world_size = int(os.getenv('WORLD_SIZE', '1'))
|
world_size = int(os.getenv('WORLD_SIZE', '1'))
|
||||||
tp_size = world_size # TODO
|
tp_size = world_size # TODO
|
||||||
self.logger.info(f'world size: {world_size} tp: {tp_size}')
|
self.logger.info(f'world size: {world_size} tp: {tp_size}')
|
||||||
parallel_config = dict(zero1=dict(size=1, fsdp=False),
|
if parallel_config is None:
|
||||||
pipeline=dict(size=1),
|
parallel_config = dict(zero1=dict(size=1, fsdp=False),
|
||||||
tensor=dict(size=tp_size, mode='mtp'),
|
pipeline=dict(size=1),
|
||||||
sequence_parallel=False)
|
tensor=dict(size=tp_size, mode='mtp'),
|
||||||
|
sequence_parallel=False)
|
||||||
config = dict(model=model_config,
|
config = dict(model=model_config,
|
||||||
parallel=parallel_config,
|
parallel=parallel_config,
|
||||||
data=dict(use_packed_dataset=False),
|
data=dict(use_packed_dataset=False),
|
||||||
@ -253,7 +267,10 @@ class InternTrain(BaseModel):
|
|||||||
load_func = LOAD_FUNC_DICT[ckpt_type]
|
load_func = LOAD_FUNC_DICT[ckpt_type]
|
||||||
load_func(path, self.model)
|
load_func(path, self.model)
|
||||||
|
|
||||||
self.model.to(model_config['dtype']).eval().cuda()
|
if 'moe' in model_type.lower():
|
||||||
|
self.model.eval().cuda()
|
||||||
|
else:
|
||||||
|
self.model.to(model_config['dtype']).eval().cuda()
|
||||||
|
|
||||||
def _load_tokenizer(self, tokenizer_path: str, tokenizer_type: str):
|
def _load_tokenizer(self, tokenizer_path: str, tokenizer_type: str):
|
||||||
from internlm.core.context.registry import TOKENIZER_INITIALIZER
|
from internlm.core.context.registry import TOKENIZER_INITIALIZER
|
||||||
|
@ -53,11 +53,13 @@ class TurboMindModel(BaseModel):
|
|||||||
engine_config: Dict = {},
|
engine_config: Dict = {},
|
||||||
gen_config: Dict = {},
|
gen_config: Dict = {},
|
||||||
batch_padding: bool = False,
|
batch_padding: bool = False,
|
||||||
|
drop_middle: bool = False,
|
||||||
end_str: Optional[str] = None):
|
end_str: Optional[str] = None):
|
||||||
super().__init__(path=path,
|
super().__init__(path=path,
|
||||||
max_seq_len=max_seq_len,
|
max_seq_len=max_seq_len,
|
||||||
meta_template=meta_template)
|
meta_template=meta_template)
|
||||||
self.logger = get_logger()
|
self.logger = get_logger()
|
||||||
|
self.drop_middle = drop_middle
|
||||||
self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
|
self.max_seq_len = _get_possible_max_seq_len(max_seq_len, path)
|
||||||
from lmdeploy import version_info
|
from lmdeploy import version_info
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
@ -118,6 +120,21 @@ class TurboMindModel(BaseModel):
|
|||||||
}
|
}
|
||||||
gen_config = GenerationConfig(**gen_config)
|
gen_config = GenerationConfig(**gen_config)
|
||||||
|
|
||||||
|
if self.drop_middle:
|
||||||
|
inputs_drop_middle = []
|
||||||
|
for input in inputs:
|
||||||
|
input_ids = self.tokenizer([input],
|
||||||
|
padding=False,
|
||||||
|
truncation=False)['input_ids'][0]
|
||||||
|
if len(input_ids) > self.max_seq_len:
|
||||||
|
input_ids = input_ids[:self.max_seq_len //
|
||||||
|
2] + input_ids[-self.max_seq_len //
|
||||||
|
2:]
|
||||||
|
input = self.tokenizer.decode(input_ids,
|
||||||
|
skip_special_tokens=True)
|
||||||
|
inputs_drop_middle.append(input)
|
||||||
|
inputs = inputs_drop_middle
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
outputs = self.pipe(inputs, gen_config=gen_config, do_preprocess=False)
|
outputs = self.pipe(inputs, gen_config=gen_config, do_preprocess=False)
|
||||||
for output in outputs:
|
for output in outputs:
|
||||||
|
@ -167,6 +167,10 @@ class DLCRunner(BaseRunner):
|
|||||||
|
|
||||||
# set priority to 1 as default
|
# set priority to 1 as default
|
||||||
task_priority = self.aliyun_cfg.get('priority', 1)
|
task_priority = self.aliyun_cfg.get('priority', 1)
|
||||||
|
worker_cpu = self.aliyun_cfg.get('worker_cpu', 12)
|
||||||
|
worker_memory = self.aliyun_cfg.get('worker_memory', 192)
|
||||||
|
config_path = (f" --config {self.aliyun_cfg['dlc_config_path']}"
|
||||||
|
if 'dlc_config_path' in self.aliyun_cfg else '')
|
||||||
|
|
||||||
# Different dlc versions has different commands
|
# Different dlc versions has different commands
|
||||||
if self.aliyun_cfg.get('dlc_job_cmd') == 'create':
|
if self.aliyun_cfg.get('dlc_job_cmd') == 'create':
|
||||||
@ -179,14 +183,14 @@ class DLCRunner(BaseRunner):
|
|||||||
f'dlc {dlc_job_cmd}'
|
f'dlc {dlc_job_cmd}'
|
||||||
f" --command '{shell_cmd}'"
|
f" --command '{shell_cmd}'"
|
||||||
f' --name {task_name[:512]}'
|
f' --name {task_name[:512]}'
|
||||||
f" --config {self.aliyun_cfg['dlc_config_path']}"
|
f'{config_path}'
|
||||||
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
|
f" --workspace_id {self.aliyun_cfg['workspace_id']}"
|
||||||
f" --resource_id={self.aliyun_cfg['resource_id']}"
|
f" --resource_id={self.aliyun_cfg['resource_id']}"
|
||||||
f' --priority {task_priority}'
|
f' --priority {task_priority}'
|
||||||
f'{worker_cmd}'
|
f'{worker_cmd}'
|
||||||
f' --worker_cpu {max(num_gpus * 8, 12)}'
|
f' --worker_cpu {max(num_gpus * 8, worker_cpu)}'
|
||||||
f' --worker_gpu {num_gpus}'
|
f' --worker_gpu {num_gpus}'
|
||||||
f' --worker_memory {max(num_gpus * 128, 192)}Gi'
|
f' --worker_memory {max(num_gpus * 128, worker_memory)}Gi'
|
||||||
f" --worker_image {self.aliyun_cfg['worker_image']}"
|
f" --worker_image {self.aliyun_cfg['worker_image']}"
|
||||||
f" --data_sources={','.join(self.aliyun_cfg['data_sources'])}")
|
f" --data_sources={','.join(self.aliyun_cfg['data_sources'])}")
|
||||||
get_cmd = partial(task.get_command,
|
get_cmd = partial(task.get_command,
|
||||||
@ -253,8 +257,15 @@ class DLCRunner(BaseRunner):
|
|||||||
for retry_index in range(num_retry):
|
for retry_index in range(num_retry):
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
try:
|
try:
|
||||||
job_info = json.loads(
|
raw_job_info = subprocess.getoutput(
|
||||||
subprocess.getoutput(f'dlc get job {job_id}'))
|
f'dlc get job {job_id}{config_path}')
|
||||||
|
if raw_job_info.startswith(
|
||||||
|
'/bin/bash') or raw_job_info.startswith(
|
||||||
|
'[OK]') or raw_job_info.startswith(
|
||||||
|
'[FAILED]'):
|
||||||
|
raw_job_info = raw_job_info[raw_job_info.
|
||||||
|
index('\n') + 1:]
|
||||||
|
job_info = json.loads(raw_job_info)
|
||||||
break
|
break
|
||||||
except: # noqa: E722
|
except: # noqa: E722
|
||||||
if retry_index > num_retry // 3:
|
if retry_index > num_retry // 3:
|
||||||
@ -287,7 +298,7 @@ class DLCRunner(BaseRunner):
|
|||||||
elasped_time).strftime('%Y-%m-%dT%H:%M:%SZ')
|
elasped_time).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||||
logs_cmd = ('dlc logs'
|
logs_cmd = ('dlc logs'
|
||||||
f' {job_id} {job_id}-master-0'
|
f' {job_id} {job_id}-master-0'
|
||||||
f" -c {self.aliyun_cfg['dlc_config_path']}"
|
f'{config_path}'
|
||||||
f' --start_time {pri_time}'
|
f' --start_time {pri_time}'
|
||||||
f' --end_time {cur_time}')
|
f' --end_time {cur_time}')
|
||||||
try:
|
try:
|
||||||
|
@ -15,7 +15,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg,
|
|||||||
model_abbr_from_cfg)
|
model_abbr_from_cfg)
|
||||||
from opencompass.utils.prompt import get_prompt_hash
|
from opencompass.utils.prompt import get_prompt_hash
|
||||||
|
|
||||||
METRIC_WHITELIST = ['score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth']
|
METRIC_WHITELIST = ['pass@1', 'score', 'auc_score', 'accuracy', 'humaneval_pass@1', 'rouge1', 'avg_toxicity_score', 'bleurt_diff', 'matthews_correlation', 'truth']
|
||||||
METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len']
|
METRIC_BLACKLIST = ['bp', 'sys_len', 'ref_len']
|
||||||
|
|
||||||
class PretrainSummarizer:
|
class PretrainSummarizer:
|
||||||
@ -256,14 +256,13 @@ class PretrainSummarizer:
|
|||||||
f.write('\n'.join([','.join(row) for row in table]) + '\n')
|
f.write('\n'.join([','.join(row) for row in table]) + '\n')
|
||||||
self.logger.info(f'write csv to {osp.abspath(output_csv_path)}')
|
self.logger.info(f'write csv to {osp.abspath(output_csv_path)}')
|
||||||
|
|
||||||
|
|
||||||
summary_groups = summarizer_cfg.get('summary_groups', [])
|
summary_groups = summarizer_cfg.get('summary_groups', [])
|
||||||
for sg in summary_groups:
|
for sg in summary_groups:
|
||||||
for model_abbr in model_abbrs:
|
for model_abbr in model_abbrs:
|
||||||
results = {}
|
results = {}
|
||||||
eval_modes = []
|
eval_modes = []
|
||||||
for dataset_abbr in sg['subsets']:
|
for dataset_abbr in sg['subsets']:
|
||||||
if dataset_abbr in parsed_results[model_abbr]:
|
if dataset_abbr in parsed_results[model_abbr] and len(parsed_results[model_abbr][dataset_abbr]) > 1:
|
||||||
results[dataset_abbr] = (parsed_results[model_abbr][dataset_abbr][-1],parsed_results[model_abbr][dataset_abbr][-2])
|
results[dataset_abbr] = (parsed_results[model_abbr][dataset_abbr][-1],parsed_results[model_abbr][dataset_abbr][-2])
|
||||||
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
|
eval_modes.append(dataset_eval_mode.get(dataset_abbr, 'unknown'))
|
||||||
|
|
||||||
@ -327,8 +326,9 @@ class PretrainSummarizer:
|
|||||||
for model_abbr in model_abbrs:
|
for model_abbr in model_abbrs:
|
||||||
if dataset_abbr in parsed_results[model_abbr]:
|
if dataset_abbr in parsed_results[model_abbr]:
|
||||||
if incorrect_bpb != -1 and correct_bpb != -1:
|
if incorrect_bpb != -1 and correct_bpb != -1:
|
||||||
row.append('{:.02f}/{:.02f}'.format(parsed_results[model_abbr][dataset_abbr][correct_bpb],
|
right_bpb = parsed_results[model_abbr][dataset_abbr][correct_bpb]
|
||||||
parsed_results[model_abbr][dataset_abbr][incorrect_bpb]))
|
wrong_bpb = parsed_results[model_abbr][dataset_abbr][incorrect_bpb]
|
||||||
|
row.append('{:.02f}/{:.02f}/{:.02f}'.format(right_bpb,wrong_bpb,wrong_bpb-right_bpb))
|
||||||
else:
|
else:
|
||||||
row.append('{:.02f}'.format(-1))
|
row.append('{:.02f}'.format(-1))
|
||||||
else:
|
else:
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
# Alpaca-eval
|
# Alpaca-eval
|
||||||
alpaca-eval==0.6
|
alpaca-eval==0.6
|
||||||
cn2an
|
cn2an
|
||||||
dingo-python
|
# Dingo
|
||||||
|
dingo-python==1.1.2
|
||||||
# Icl topk retriever
|
# Icl topk retriever
|
||||||
faiss_gpu==1.7.2
|
faiss_gpu==1.7.2
|
||||||
# Humaneval, Humaneval X
|
# Humaneval, Humaneval X
|
||||||
|
Loading…
Reference in New Issue
Block a user