mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[Feature] Support LLaMA2-Accessory (#732)
* Support LLaMA2-Accessory * remove strip * clear imports * reformat * fix lint * fix lint * update readme * update readme * update readme * update readme
This commit is contained in:
parent
ba027eeeac
commit
3eb225a5e6
@ -50,6 +50,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
|
||||
|
||||
## 🚀 What's New <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2023.12.28\]** We have enabled seamless evaluation of all models developed using [LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory), a powerful toolkit for comprehensive LLM development. 🔥🔥🔥.
|
||||
- **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details! 🔥🔥🔥.
|
||||
- **\[2023.12.10\]** We have released [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), a toolkit for evaluating vision-language models (VLMs), currently support 20+ VLMs and 7 multi-modal benchmarks (including MMBench series). 🔥🔥🔥.
|
||||
- **\[2023.12.10\]** We have supported Mistral AI's MoE LLM: **Mixtral-8x7B-32K**. Welcome to [MixtralKit](https://github.com/open-compass/MixtralKit) for more details about inference and evaluation. 🔥🔥🔥.
|
||||
|
@ -50,6 +50,7 @@
|
||||
|
||||
## 🚀 最新进展 <a><img width="35" height="20" src="https://user-images.githubusercontent.com/12782558/212848161-5e783dd6-11e8-4fe0-bbba-39ffb77730be.png"></a>
|
||||
|
||||
- **\[2023.12.28\]** 我们支持了对使用[LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory)(一款强大的LLM开发工具箱)开发的所有模型的无缝评估! 🔥🔥🔥.
|
||||
- **\[2023.12.22\]** 我们开源了[T-Eval](https://github.com/open-compass/T-Eval)用于评测大语言模型工具调用能力。欢迎访问T-Eval的官方[Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html)获取更多信息! 🔥🔥🔥.
|
||||
- **\[2023.12.10\]** 我们开源了多模评测框架 [VLMEvalKit](https://github.com/open-compass/VLMEvalKit),目前已支持 20+ 个多模态大模型与包括 MMBench 系列在内的 7 个多模态评测集. 🔥🔥🔥.
|
||||
- **\[2023.12.10\]** 我们已经支持了Mistral AI的MoE模型 **Mixtral-8x7B-32K**。欢迎查阅[MixtralKit](https://github.com/open-compass/MixtralKit)以获取更多关于推理和评测的详细信息。🔥🔥🔥。
|
||||
|
34
configs/models/accessory/accessory_llama2_7b.py
Normal file
34
configs/models/accessory/accessory_llama2_7b.py
Normal file
@ -0,0 +1,34 @@
|
||||
from opencompass.models import LLaMA2AccessoryModel
|
||||
|
||||
# Please follow the LLaMA2-Accessory installation document
|
||||
# https://llama2-accessory.readthedocs.io/en/latest/install.html
|
||||
# to install LLaMA2-Accessory
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr="Accessory_llama2_7b",
|
||||
type=LLaMA2AccessoryModel,
|
||||
|
||||
# additional_stop_symbols=["###"], # for models tuned with chat template # noqa
|
||||
additional_stop_symbols=[],
|
||||
|
||||
# <begin> kwargs for accessory.MetaModel.from_pretrained
|
||||
# download https://huggingface.co/meta-llama/Llama-2-7b/tree/main to
|
||||
# 'path/to/Llama-2-7b/', which should contain:
|
||||
# - consolidated.00.pth
|
||||
# - params.json
|
||||
# - tokenizer.model
|
||||
pretrained_path="path/to/Llama-2-7b/",
|
||||
llama_type="llama",
|
||||
llama_config="path/to/Llama-2-7b/params.json",
|
||||
tokenizer_path="path/to/Llama-2-7b/tokenizer.model",
|
||||
with_visual=False,
|
||||
max_seq_len=4096,
|
||||
quant=False,
|
||||
# <end>
|
||||
|
||||
batch_size=2,
|
||||
# LLaMA2-Accessory needs num_gpus==num_procs
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
),
|
||||
]
|
31
configs/models/accessory/accessory_mixtral_8x7b.py
Normal file
31
configs/models/accessory/accessory_mixtral_8x7b.py
Normal file
@ -0,0 +1,31 @@
|
||||
from opencompass.models import LLaMA2AccessoryModel
|
||||
|
||||
# Please follow the LLaMA2-Accessory installation document
|
||||
# https://llama2-accessory.readthedocs.io/en/latest/install.html
|
||||
# to install LLaMA2-Accessory
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr="Accessory_mixtral_8x7b",
|
||||
type=LLaMA2AccessoryModel,
|
||||
|
||||
# additional_stop_symbols=["###"], # for models tuned with chat template # noqa
|
||||
additional_stop_symbols=[],
|
||||
|
||||
# <begin> kwargs for accessory.MetaModel.from_pretrained
|
||||
# download from https://huggingface.co/Alpha-VLLM/MoE-Mixtral-7B-8Expert/tree/main/converted_sparse # noqa
|
||||
# see https://llama2-accessory.readthedocs.io/en/latest/projects/mixtral-8x7b.html for more details # noqa
|
||||
pretrained_path="path/to/MoE-Mixtral-7B-8Expert/converted_sparse",
|
||||
llama_type=None, # None for automatic probe from pretrained_path
|
||||
llama_config=None, # None for automatic probe from pretrained_path
|
||||
tokenizer_path=None, # None for automatic probe from pretrained_path
|
||||
with_visual=False,
|
||||
max_seq_len=4096,
|
||||
quant=False,
|
||||
# <end>
|
||||
|
||||
batch_size=2,
|
||||
# LLaMA2-Accessory needs num_gpus==num_procs
|
||||
run_cfg=dict(num_gpus=2, num_procs=2),
|
||||
),
|
||||
]
|
29
configs/models/accessory/accessory_sphinx_v2_1k.py
Normal file
29
configs/models/accessory/accessory_sphinx_v2_1k.py
Normal file
@ -0,0 +1,29 @@
|
||||
from opencompass.models import LLaMA2AccessoryModel
|
||||
|
||||
# Please follow the LLaMA2-Accessory installation document
|
||||
# https://llama2-accessory.readthedocs.io/en/latest/install.html
|
||||
# to install LLaMA2-Accessory
|
||||
|
||||
models = [
|
||||
dict(
|
||||
abbr="Accessory_sphinx_v2_1k",
|
||||
type=LLaMA2AccessoryModel,
|
||||
|
||||
additional_stop_symbols=["###"], # for models tuned with chat template
|
||||
|
||||
# <begin> kwargs for accessory.MetaModel.from_pretrained
|
||||
# download from https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k # noqa
|
||||
pretrained_path="path/to/sphinx_v2_1k",
|
||||
llama_type=None, # None for automatic probe from pretrained_path
|
||||
llama_config=None, # None for automatic probe from pretrained_path
|
||||
tokenizer_path=None, # None for automatic probe from pretrained_path
|
||||
with_visual=False, # currently only support single-modal evaluation
|
||||
max_seq_len=4096,
|
||||
quant=False,
|
||||
# <end>
|
||||
|
||||
batch_size=2,
|
||||
# LLaMA2-Accessory needs num_gpus==num_procs
|
||||
run_cfg=dict(num_gpus=1, num_procs=1),
|
||||
),
|
||||
]
|
@ -1,3 +1,4 @@
|
||||
from .accessory import LLaMA2AccessoryModel # noqa: F401
|
||||
from .ai360_api import AI360GPT # noqa: F401
|
||||
from .alaya import AlayaLM # noqa: F401
|
||||
from .baichuan_api import BaiChuan # noqa: F401
|
||||
|
88
opencompass/models/accessory.py
Normal file
88
opencompass/models/accessory.py
Normal file
@ -0,0 +1,88 @@
|
||||
from typing import Dict, Iterable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch.distributed as dist
|
||||
|
||||
from opencompass.models.base import BaseModel
|
||||
from opencompass.models.base_api import APITemplateParser
|
||||
from opencompass.utils.logging import get_logger
|
||||
from opencompass.utils.prompt import PromptList
|
||||
|
||||
PromptType = Union[PromptList, str]
|
||||
|
||||
|
||||
class LLaMA2AccessoryModel(BaseModel):
|
||||
"""LLaMA2-Accessory model wrapper.
|
||||
|
||||
Project: https://github.com/Alpha-VLLM/LLaMA2-Accessory
|
||||
|
||||
Args:
|
||||
tokenizer_only (bool): whether to load tokenizer only
|
||||
meta_template (dict): meta template for the model
|
||||
additional_stop_symbols: (Iterable[str]): additional symbols that mark
|
||||
the end of generation, e.g. the "###" symbol for separating turns
|
||||
in the chat template.
|
||||
from_pretrained_kwargs: kwargs that will be passed to
|
||||
`accessory.MetaModel.from_pretrained` for model instantiation.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
tokenizer_only: bool = False,
|
||||
meta_template: Optional[Dict] = None,
|
||||
additional_stop_symbols: Iterable[str] = (),
|
||||
**from_pretrained_kwargs):
|
||||
if tokenizer_only:
|
||||
self._load_tokenizer(from_pretrained_kwargs)
|
||||
else:
|
||||
self._load_model(from_pretrained_kwargs)
|
||||
|
||||
self.additional_stop_symbols = additional_stop_symbols
|
||||
self.max_seq_len = from_pretrained_kwargs.get('max_seq_len', 4096)
|
||||
self.template_parser = APITemplateParser(meta_template)
|
||||
self.logger = get_logger()
|
||||
|
||||
def _load_model(self, from_pretrained_kwargs):
|
||||
from accessory.model.meta import MetaModel
|
||||
from accessory.util.misc import init_distributed_mode
|
||||
if not dist.is_initialized():
|
||||
init_distributed_mode()
|
||||
|
||||
model_parallel_group = dist.GroupMember.WORLD
|
||||
from_pretrained_kwargs['mp_group'] = model_parallel_group
|
||||
|
||||
self.model = MetaModel.from_pretrained(**from_pretrained_kwargs)
|
||||
self.tokenizer = self.model.tokenizer
|
||||
self.logger = get_logger()
|
||||
|
||||
def _load_tokenizer(self, from_pretrained_kwargs):
|
||||
from accessory.model.tokenizer import (
|
||||
Tokenizer, probe_tokenizer_path_from_pretrained)
|
||||
if 'tokenizer_path' in from_pretrained_kwargs:
|
||||
tokenizer_path = from_pretrained_kwargs['tokenizer_path']
|
||||
else:
|
||||
pretrained_path = from_pretrained_kwargs['pretrained_path']
|
||||
if isinstance(pretrained_path, str):
|
||||
pretrained_path = [pretrained_path]
|
||||
tokenizer_path = probe_tokenizer_path_from_pretrained(
|
||||
pretrained_path[-1])
|
||||
|
||||
self.tokenizer = Tokenizer(tokenizer_path)
|
||||
|
||||
def generate(self, inputs: List[str], max_out_len: int) -> List[str]:
|
||||
results = self.model.generate(
|
||||
prompts=inputs,
|
||||
max_gen_len=max_out_len,
|
||||
temperature=0.,
|
||||
additional_stop_symbols=self.additional_stop_symbols)
|
||||
return results
|
||||
|
||||
def get_ppl(self,
|
||||
inputs: List[str],
|
||||
mask_length: Optional[List[int]] = None):
|
||||
assert mask_length is None, 'mask_length is not supported'
|
||||
evaluation_results = self.model.evaluate_examples(examples=inputs)
|
||||
ppl = evaluation_results['ppl']
|
||||
return np.array(ppl, dtype=np.float32)
|
||||
|
||||
def get_token_len(self, prompt: str) -> int:
|
||||
return len(self.tokenizer.encode(prompt, True, True))
|
Loading…
Reference in New Issue
Block a user