[Feature] Add llama-2 models (#81)

* add llama-2 models

* update docs

---------

Co-authored-by: gaotongxiao <gaotongxiao@gmail.com>
This commit is contained in:
Leymore 2023-07-19 19:51:29 +08:00 committed by GitHub
parent f83e125e5a
commit eea8b04417
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 358 additions and 44 deletions

View File

@ -27,6 +27,7 @@ Just like a compass guides us on our journey, OpenCompass will guide you through
## News
- **\[2023.07.19\]** We have supported [Llama 2](https://ai.meta.com/llama/)! Its performance report will be available soon. \[[doc](./docs/en/get_started.md#Installation)\]
- **\[2023.07.13\]** We release [MMBench](https://opencompass.org.cn/MMBench), a meticulously curated dataset to comprehensively evaluate different abilities of multimodality models 🔥🔥🔥.
## Introduction

View File

@ -25,6 +25,11 @@
就像指南针在我们的旅程中为我们导航一样我们希望OpenCompass能够帮助你穿越评估大型语言模型的重重迷雾。OpenCompass提供丰富的算法和功能支持期待OpenCompass能够帮助社区更便捷地对NLP模型的性能进行公平全面的评估。
## 更新
- **\[2023.07.19\]** 新增了 [Llama 2](https://ai.meta.com/llama/)!我们近期将会公布其评测结果。\[[文档](./docs/zh_cn/get_started.md#安装)\]
- **\[2023.07.13\]** 发布了 [MMBench](https://opencompass.org.cn/MMBench),该数据集经过细致整理,用于评测多模态模型全方位能力 🔥🔥🔥。
## 介绍
OpenCompass 是面向大模型评测的一站式平台。其主要特点如下:

View File

@ -0,0 +1,21 @@
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='llama-2-13b-hf',
path="meta-llama/Llama-2-13b-hf",
tokenizer_path='meta-llama/Llama-2-13b-hf',
tokenizer_kwargs=dict(padding_side='left',
truncation_side='left',
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(device_map='auto'),
batch_padding=False, # if false, inference with for-loop without batch padding
run_cfg=dict(num_gpus=2, num_procs=1),
)
]

View File

@ -0,0 +1,21 @@
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='llama-2-70b-hf',
path="meta-llama/Llama-2-70b-hf",
tokenizer_path='meta-llama/Llama-2-70b-hf',
tokenizer_kwargs=dict(padding_side='left',
truncation_side='left',
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(device_map='auto'),
batch_padding=False, # if false, inference with for-loop without batch padding
run_cfg=dict(num_gpus=8, num_procs=1),
)
]

View File

@ -0,0 +1,21 @@
from opencompass.models import HuggingFaceCausalLM
models = [
dict(
type=HuggingFaceCausalLM,
abbr='llama-2-7b-hf',
path="meta-llama/Llama-2-7b-hf",
tokenizer_path='meta-llama/Llama-2-7b-hf',
tokenizer_kwargs=dict(padding_side='left',
truncation_side='left',
use_fast=False,
),
max_out_len=100,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(device_map='auto'),
batch_padding=False, # if false, inference with for-loop without batch padding
run_cfg=dict(num_gpus=1, num_procs=1),
)
]

View File

@ -0,0 +1,31 @@
from opencompass.models import Llama2Chat
# Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama
# and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'.
#
# The LLaMA requirement is also needed to be installed.
#
# git clone https://github.com/facebookresearch/llama.git
# cd llama
# pip install -e .
api_meta_template = dict(
round=[
dict(role="HUMAN", api_role="HUMAN"),
dict(role="BOT", api_role="BOT", generate=True),
],
)
models = [
dict(
abbr="llama-2-13b-chat",
type=Llama2Chat,
path="./models/llama2/llama/llama-2-13b-chat/",
tokenizer_path="./models/llama2/llama/tokenizer.model",
meta_template=api_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=2, num_procs=2),
),
]

View File

@ -0,0 +1,31 @@
from opencompass.models import Llama2Chat
# Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama
# and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'.
#
# The LLaMA requirement is also needed to be installed.
#
# git clone https://github.com/facebookresearch/llama.git
# cd llama
# pip install -e .
api_meta_template = dict(
round=[
dict(role="HUMAN", api_role="HUMAN"),
dict(role="BOT", api_role="BOT", generate=True),
],
)
models = [
dict(
abbr="llama-2-70b-chat",
type=Llama2Chat,
path="./models/llama2/llama/llama-2-70b-chat/",
tokenizer_path="./models/llama2/llama/tokenizer.model",
meta_template=api_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=8, num_procs=8),
),
]

View File

@ -0,0 +1,31 @@
from opencompass.models import Llama2Chat
# Please follow the instruction in the Meta AI website https://github.com/facebookresearch/llama
# and download the LLaMA-2-Chat model and tokenizer to the path './models/llama2/llama/'.
#
# The LLaMA requirement is also needed to be installed.
#
# git clone https://github.com/facebookresearch/llama.git
# cd llama
# pip install -e .
api_meta_template = dict(
round=[
dict(role="HUMAN", api_role="HUMAN"),
dict(role="BOT", api_role="BOT", generate=True),
],
)
models = [
dict(
abbr="llama-2-7b-chat",
type=Llama2Chat,
path="./models/llama2/llama/llama-2-7b-chat/",
tokenizer_path="./models/llama2/llama/tokenizer.model",
meta_template=api_meta_template,
max_out_len=100,
max_seq_len=2048,
batch_size=16,
run_cfg=dict(num_gpus=1, num_procs=1),
),
]

View File

@ -2,39 +2,58 @@
1. Set up the OpenCompass environment:
```bash
conda create --name opencompass python=3.10 pytorch torchvision pytorch-cuda -c nvidia -c pytorch -y
conda activate opencompass
```
```bash
conda create --name opencompass python=3.10 pytorch torchvision pytorch-cuda -c nvidia -c pytorch -y
conda activate opencompass
```
If you want to customize the PyTorch version or related CUDA version, please refer to the [official documentation](https://pytorch.org/get-started/locally/) to set up the PyTorch environment. Note that OpenCompass requires `pytorch>=1.13`.
If you want to customize the PyTorch version or related CUDA version, please refer to the [official documentation](https://pytorch.org/get-started/locally/) to set up the PyTorch environment. Note that OpenCompass requires `pytorch>=1.13`.
2. Install OpenCompass:
```bash
git clone https://github.com/InternLM/opencompass.git
cd opencompass
pip install -e .
```
```bash
git clone https://github.com/InternLM/opencompass.git
cd opencompass
pip install -e .
```
3. Install humaneval (Optional)
If you want to **evaluate your models coding ability on the humaneval dataset**, execute this step otherwise skip it.
If you want to **evaluate your models coding ability on the humaneval dataset**, follow this step.
<details>
<summary><b>click to show the details</b></summary>
<details>
<summary><b>click to show the details</b></summary>
```bash
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirements.txt
pip install -e .
cd ..
```
```bash
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirements.txt
pip install -e .
cd ..
```
Please read the comments in `human_eval/execution.py` **lines 48-57** to understand the potential risks of executing the model generation code. If you accept these risks, uncomment **line 58** to enable code execution evaluation.
Please read the comments in `human_eval/execution.py` **lines 48-57** to understand the potential risks of executing the model generation code. If you accept these risks, uncomment **line 58** to enable code execution evaluation.
</details>
</details>
4. Install Llama (Optional)
If you want to **evaluate Llama / Llama-2 / Llama-2-chat with its official implementation**, follow this step.
<details>
<summary><b>click to show the details</b></summary>
```bash
git clone https://github.com/facebookresearch/llama.git
cd llama
pip install -r requirements.txt
pip install -e .
cd ..
```
You can find example configs in `configs/models`. ([example](https://github.com/InternLM/opencompass/blob/eb4822a94d624a4e16db03adeb7a59bbd10c2012/configs/models/llama2_7b_chat.py))
</details>
# Dataset Preparation

View File

@ -2,39 +2,58 @@
1. 准备 OpenCompass 运行环境:
```bash
conda create --name opencompass python=3.10 pytorch torchvision pytorch-cuda -c nvidia -c pytorch -y
conda activate opencompass
```
```bash
conda create --name opencompass python=3.10 pytorch torchvision pytorch-cuda -c nvidia -c pytorch -y
conda activate opencompass
```
如果你希望自定义 PyTorch 版本或相关的 CUDA 版本,请参考 [官方文档](https://pytorch.org/get-started/locally/) 准备 PyTorch 环境。需要注意的是OpenCompass 要求 `pytorch>=1.13`
如果你希望自定义 PyTorch 版本或相关的 CUDA 版本,请参考 [官方文档](https://pytorch.org/get-started/locally/) 准备 PyTorch 环境。需要注意的是OpenCompass 要求 `pytorch>=1.13`
2. 安装 OpenCompass
```bash
git clone https://github.com/InternLM/opencompass.git
cd opencompass
pip install -e .
```
```bash
git clone https://github.com/InternLM/opencompass.git
cd opencompass
pip install -e .
```
3. 安装 humaneval可选
如果你需要**在 humaneval 数据集上评估模型代码能力**,请执行此步骤,否则忽略这一步。
如果你需要**在 humaneval 数据集上评估模型代码能力**,请执行此步骤,否则忽略这一步。
<details>
<summary><b>点击查看详细</b></summary>
<details>
<summary><b>点击查看详细</b></summary>
```bash
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirements.txt
pip install -e .
cd ..
```
```bash
git clone https://github.com/openai/human-eval.git
cd human-eval
pip install -r requirements.txt
pip install -e .
cd ..
```
请仔细阅读 `human_eval/execution.py` **第48-57行**的注释,了解执行模型生成的代码可能存在的风险,如果接受这些风险,请取消**第58行**的注释,启用代码执行评测。
请仔细阅读 `human_eval/execution.py` **第48-57行**的注释,了解执行模型生成的代码可能存在的风险,如果接受这些风险,请取消**第58行**的注释,启用代码执行评测。
</details>
</details>
4. 安装 Llama可选
如果你需要**使用官方实现评测 Llama / Llama-2 / Llama-2-chat 模型**,请执行此步骤,否则忽略这一步。
<details>
<summary><b>点击查看详细</b></summary>
```bash
git clone https://github.com/facebookresearch/llama.git
cd llama
pip install -r requirements.txt
pip install -e .
cd ..
```
你可以在 `configs/models` 下找到所有 Llama / Llama-2 / Llama-2-chat 模型的配置文件示例。([示例](https://github.com/InternLM/opencompass/blob/eb4822a94d624a4e16db03adeb7a59bbd10c2012/configs/models/llama2_7b_chat.py))
</details>
# 数据集准备

View File

@ -3,4 +3,5 @@ from .base_api import APITemplateParser, BaseAPIModel # noqa
from .glm import GLM130B # noqa: F401, F403
from .huggingface import HuggingFace # noqa: F401, F403
from .huggingface import HuggingFaceCausalLM # noqa: F401, F403
from .llama2 import Llama2Chat # noqa: F401, F403
from .openai_api import OpenAI # noqa: F401

View File

@ -0,0 +1,113 @@
from typing import Dict, List, Optional, Union
from opencompass.models.base import BaseModel
from opencompass.models.base_api import APITemplateParser
from opencompass.utils.logging import get_logger
from opencompass.utils.prompt import PromptList
PromptType = Union[PromptList, str]
class Llama2Chat(BaseModel):
"""LLaMA-2 chat model wrapper
https://github.com/facebookresearch/llama/tree/main.
Args:
path (str): path to the model directory
max_seq_len (int): max sequence length
max_batch_size (int): max batch size
tokenizer_only (bool): whether to load tokenizer only
tokenizer_path (str): path to the tokenizer directory
meta_template (dict): meta template for the model
"""
def __init__(
self,
path: str,
max_seq_len: int = 2048,
max_batch_size: int = 16,
tokenizer_only: bool = False,
tokenizer_path: Optional[str] = None,
meta_template: Optional[Dict] = None,
): # noqa
if tokenizer_only:
self._load_tokenizer(tokenizer_path=tokenizer_path)
else:
self._load_model(path=path,
max_seq_len=max_seq_len,
max_batch_size=max_batch_size,
tokenizer_path=tokenizer_path)
self.max_seq_len = max_seq_len
self.template_parser = APITemplateParser(meta_template)
self.logger = get_logger()
def _load_model(self,
path: str,
max_seq_len: int,
max_batch_size: int,
tokenizer_path: Optional[str] = None):
from llama import Llama
self.generator = Llama.build(path, tokenizer_path, max_seq_len,
max_batch_size)
self.tokenizer = self.generator.tokenizer
self.model = self.generator.model
def _load_tokenizer(self, tokenizer_path: str):
from llama import Tokenizer
self.tokenizer = Tokenizer(tokenizer_path)
def generate(self,
inputs: List[str or PromptList],
max_out_len: int = 512,
temperature: float = 0.6) -> str:
"""Generate response from input prompt.
Args:
inputs (list): input prompt
max_out_len (int): max output length
temperature (float): temperature for sampling
"""
dialogs = []
for input in inputs:
assert isinstance(input, (str, PromptList))
if isinstance(input, str):
dialog = [{'role': 'user', 'content': input}]
else:
dialog = []
for item in input:
msg = {'content': item['prompt']}
if item['role'] == 'HUMAN':
msg['role'] = 'user'
elif item['role'] == 'BOT':
msg['role'] = 'assistant'
elif item['role'] == 'SYSTEM':
msg['role'] = 'system'
dialog.append(msg)
dialogs.append(dialog)
try:
results = self.generator.chat_completion(
dialogs, # type: ignore
max_gen_len=max_out_len,
temperature=temperature,
)
return [r['generation']['content'] for r in results]
except AssertionError:
self.warning('Batched data max token limit exceeded, '
'try to run one by one...')
results = []
for dialog in dialogs:
try:
result = self.generator.chat_completion(
[dialog], # type: ignore
max_gen_len=max_out_len,
temperature=temperature,
)[0]
results.append(result['generation']['content'])
except AssertionError:
results.append('')
return results
def get_token_len(self, prompt: str) -> int:
return len(self.tokenizer.encode(prompt, bos=True, eos=True)) + 100