diff --git a/README.md b/README.md index ead42464..15b0a46c 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through ## 🚀 What's New +- **\[2023.12.28\]** We have enabled seamless evaluation of all models developed using [LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory), a powerful toolkit for comprehensive LLM development. 🔥🔥🔥. - **\[2023.12.22\]** We have released [T-Eval](https://github.com/open-compass/T-Eval), a step-by-step evaluation benchmark to gauge your LLMs on tool utilization. Welcome to our [Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html) for more details! 🔥🔥🔥. - **\[2023.12.10\]** We have released [VLMEvalKit](https://github.com/open-compass/VLMEvalKit), a toolkit for evaluating vision-language models (VLMs), currently support 20+ VLMs and 7 multi-modal benchmarks (including MMBench series). 🔥🔥🔥. - **\[2023.12.10\]** We have supported Mistral AI's MoE LLM: **Mixtral-8x7B-32K**. Welcome to [MixtralKit](https://github.com/open-compass/MixtralKit) for more details about inference and evaluation. 🔥🔥🔥. diff --git a/README_zh-CN.md b/README_zh-CN.md index 024b2145..437ad13b 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -50,6 +50,7 @@ ## 🚀 最新进展 +- **\[2023.12.28\]** 我们支持了对使用[LLaMA2-Accessory](https://github.com/Alpha-VLLM/LLaMA2-Accessory)(一款强大的LLM开发工具箱)开发的所有模型的无缝评估! 🔥🔥🔥. - **\[2023.12.22\]** 我们开源了[T-Eval](https://github.com/open-compass/T-Eval)用于评测大语言模型工具调用能力。欢迎访问T-Eval的官方[Leaderboard](https://open-compass.github.io/T-Eval/leaderboard.html)获取更多信息! 🔥🔥🔥. - **\[2023.12.10\]** 我们开源了多模评测框架 [VLMEvalKit](https://github.com/open-compass/VLMEvalKit),目前已支持 20+ 个多模态大模型与包括 MMBench 系列在内的 7 个多模态评测集. 🔥🔥🔥. - **\[2023.12.10\]** 我们已经支持了Mistral AI的MoE模型 **Mixtral-8x7B-32K**。欢迎查阅[MixtralKit](https://github.com/open-compass/MixtralKit)以获取更多关于推理和评测的详细信息。🔥🔥🔥。 diff --git a/configs/models/accessory/accessory_llama2_7b.py b/configs/models/accessory/accessory_llama2_7b.py new file mode 100644 index 00000000..35022bda --- /dev/null +++ b/configs/models/accessory/accessory_llama2_7b.py @@ -0,0 +1,34 @@ +from opencompass.models import LLaMA2AccessoryModel + +# Please follow the LLaMA2-Accessory installation document +# https://llama2-accessory.readthedocs.io/en/latest/install.html +# to install LLaMA2-Accessory + +models = [ + dict( + abbr="Accessory_llama2_7b", + type=LLaMA2AccessoryModel, + + # additional_stop_symbols=["###"], # for models tuned with chat template # noqa + additional_stop_symbols=[], + + # kwargs for accessory.MetaModel.from_pretrained + # download https://huggingface.co/meta-llama/Llama-2-7b/tree/main to + # 'path/to/Llama-2-7b/', which should contain: + # - consolidated.00.pth + # - params.json + # - tokenizer.model + pretrained_path="path/to/Llama-2-7b/", + llama_type="llama", + llama_config="path/to/Llama-2-7b/params.json", + tokenizer_path="path/to/Llama-2-7b/tokenizer.model", + with_visual=False, + max_seq_len=4096, + quant=False, + # + + batch_size=2, + # LLaMA2-Accessory needs num_gpus==num_procs + run_cfg=dict(num_gpus=1, num_procs=1), + ), +] diff --git a/configs/models/accessory/accessory_mixtral_8x7b.py b/configs/models/accessory/accessory_mixtral_8x7b.py new file mode 100644 index 00000000..c5de9242 --- /dev/null +++ b/configs/models/accessory/accessory_mixtral_8x7b.py @@ -0,0 +1,31 @@ +from opencompass.models import LLaMA2AccessoryModel + +# Please follow the LLaMA2-Accessory installation document +# https://llama2-accessory.readthedocs.io/en/latest/install.html +# to install LLaMA2-Accessory + +models = [ + dict( + abbr="Accessory_mixtral_8x7b", + type=LLaMA2AccessoryModel, + + # additional_stop_symbols=["###"], # for models tuned with chat template # noqa + additional_stop_symbols=[], + + # kwargs for accessory.MetaModel.from_pretrained + # download from https://huggingface.co/Alpha-VLLM/MoE-Mixtral-7B-8Expert/tree/main/converted_sparse # noqa + # see https://llama2-accessory.readthedocs.io/en/latest/projects/mixtral-8x7b.html for more details # noqa + pretrained_path="path/to/MoE-Mixtral-7B-8Expert/converted_sparse", + llama_type=None, # None for automatic probe from pretrained_path + llama_config=None, # None for automatic probe from pretrained_path + tokenizer_path=None, # None for automatic probe from pretrained_path + with_visual=False, + max_seq_len=4096, + quant=False, + # + + batch_size=2, + # LLaMA2-Accessory needs num_gpus==num_procs + run_cfg=dict(num_gpus=2, num_procs=2), + ), +] diff --git a/configs/models/accessory/accessory_sphinx_v2_1k.py b/configs/models/accessory/accessory_sphinx_v2_1k.py new file mode 100644 index 00000000..71e24342 --- /dev/null +++ b/configs/models/accessory/accessory_sphinx_v2_1k.py @@ -0,0 +1,29 @@ +from opencompass.models import LLaMA2AccessoryModel + +# Please follow the LLaMA2-Accessory installation document +# https://llama2-accessory.readthedocs.io/en/latest/install.html +# to install LLaMA2-Accessory + +models = [ + dict( + abbr="Accessory_sphinx_v2_1k", + type=LLaMA2AccessoryModel, + + additional_stop_symbols=["###"], # for models tuned with chat template + + # kwargs for accessory.MetaModel.from_pretrained + # download from https://huggingface.co/Alpha-VLLM/LLaMA2-Accessory/tree/main/finetune/mm/SPHINX/SPHINX-v2-1k # noqa + pretrained_path="path/to/sphinx_v2_1k", + llama_type=None, # None for automatic probe from pretrained_path + llama_config=None, # None for automatic probe from pretrained_path + tokenizer_path=None, # None for automatic probe from pretrained_path + with_visual=False, # currently only support single-modal evaluation + max_seq_len=4096, + quant=False, + # + + batch_size=2, + # LLaMA2-Accessory needs num_gpus==num_procs + run_cfg=dict(num_gpus=1, num_procs=1), + ), +] diff --git a/opencompass/models/__init__.py b/opencompass/models/__init__.py index b742e125..0755511a 100644 --- a/opencompass/models/__init__.py +++ b/opencompass/models/__init__.py @@ -1,3 +1,4 @@ +from .accessory import LLaMA2AccessoryModel # noqa: F401 from .ai360_api import AI360GPT # noqa: F401 from .alaya import AlayaLM # noqa: F401 from .baichuan_api import BaiChuan # noqa: F401 diff --git a/opencompass/models/accessory.py b/opencompass/models/accessory.py new file mode 100644 index 00000000..8fb80b37 --- /dev/null +++ b/opencompass/models/accessory.py @@ -0,0 +1,88 @@ +from typing import Dict, Iterable, List, Optional, Union + +import numpy as np +import torch.distributed as dist + +from opencompass.models.base import BaseModel +from opencompass.models.base_api import APITemplateParser +from opencompass.utils.logging import get_logger +from opencompass.utils.prompt import PromptList + +PromptType = Union[PromptList, str] + + +class LLaMA2AccessoryModel(BaseModel): + """LLaMA2-Accessory model wrapper. + + Project: https://github.com/Alpha-VLLM/LLaMA2-Accessory + + Args: + tokenizer_only (bool): whether to load tokenizer only + meta_template (dict): meta template for the model + additional_stop_symbols: (Iterable[str]): additional symbols that mark + the end of generation, e.g. the "###" symbol for separating turns + in the chat template. + from_pretrained_kwargs: kwargs that will be passed to + `accessory.MetaModel.from_pretrained` for model instantiation. + """ + + def __init__(self, + tokenizer_only: bool = False, + meta_template: Optional[Dict] = None, + additional_stop_symbols: Iterable[str] = (), + **from_pretrained_kwargs): + if tokenizer_only: + self._load_tokenizer(from_pretrained_kwargs) + else: + self._load_model(from_pretrained_kwargs) + + self.additional_stop_symbols = additional_stop_symbols + self.max_seq_len = from_pretrained_kwargs.get('max_seq_len', 4096) + self.template_parser = APITemplateParser(meta_template) + self.logger = get_logger() + + def _load_model(self, from_pretrained_kwargs): + from accessory.model.meta import MetaModel + from accessory.util.misc import init_distributed_mode + if not dist.is_initialized(): + init_distributed_mode() + + model_parallel_group = dist.GroupMember.WORLD + from_pretrained_kwargs['mp_group'] = model_parallel_group + + self.model = MetaModel.from_pretrained(**from_pretrained_kwargs) + self.tokenizer = self.model.tokenizer + self.logger = get_logger() + + def _load_tokenizer(self, from_pretrained_kwargs): + from accessory.model.tokenizer import ( + Tokenizer, probe_tokenizer_path_from_pretrained) + if 'tokenizer_path' in from_pretrained_kwargs: + tokenizer_path = from_pretrained_kwargs['tokenizer_path'] + else: + pretrained_path = from_pretrained_kwargs['pretrained_path'] + if isinstance(pretrained_path, str): + pretrained_path = [pretrained_path] + tokenizer_path = probe_tokenizer_path_from_pretrained( + pretrained_path[-1]) + + self.tokenizer = Tokenizer(tokenizer_path) + + def generate(self, inputs: List[str], max_out_len: int) -> List[str]: + results = self.model.generate( + prompts=inputs, + max_gen_len=max_out_len, + temperature=0., + additional_stop_symbols=self.additional_stop_symbols) + return results + + def get_ppl(self, + inputs: List[str], + mask_length: Optional[List[int]] = None): + assert mask_length is None, 'mask_length is not supported' + evaluation_results = self.model.evaluate_examples(examples=inputs) + ppl = evaluation_results['ppl'] + return np.array(ppl, dtype=np.float32) + + def get_token_len(self, prompt: str) -> int: + return len(self.tokenizer.encode(prompt, True, True))