mirror of
https://github.com/open-compass/opencompass.git
synced 2025-05-30 16:03:24 +08:00
[ci] fix test env for vllm and add vllm baselines (#1481)
* update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update --------- Co-authored-by: zhulin1 <zhulin1@pjlab.org.cn>
This commit is contained in:
parent
da74cbfa39
commit
fb6a0df652
34
.github/scripts/oc_score_assert.py
vendored
34
.github/scripts/oc_score_assert.py
vendored
@ -8,25 +8,29 @@ output_path = 'regression_result_daily'
|
||||
|
||||
chat_model_list = [
|
||||
'baichuan2-7b-chat-hf', 'deepseek-7b-chat-hf', 'deepseek-moe-16b-chat-hf',
|
||||
'gemma-2b-it-hf', 'gemma-7b-it-hf', 'internlm2_5-7b-chat-hf',
|
||||
'internlm2_5-7b-chat-turbomind', 'internlm2-chat-1.8b-turbomind',
|
||||
'internlm2-chat-1.8b-sft-turbomind', 'internlm2-chat-7b-turbomind',
|
||||
'internlm2-chat-7b-sft-turbomind', 'internlm2_5-7b-chat-turbomind',
|
||||
'llama-3-8b-instruct-hf', 'llama-3-8b-instruct-turbomind',
|
||||
'mistral-7b-instruct-v0.2-hf', 'minicpm-2b-dpo-fp32-hf',
|
||||
'deepseek-7b-chat-vllm', 'gemma-2b-it-hf', 'gemma-7b-it-hf',
|
||||
'internlm2_5-7b-chat-hf', 'internlm2_5-7b-chat-turbomind',
|
||||
'internlm2-chat-1.8b-turbomind', 'internlm2-chat-1.8b-sft-turbomind',
|
||||
'internlm2-chat-7b-turbomind', 'internlm2-chat-7b-sft-turbomind',
|
||||
'internlm2-chat-7b-vllm', 'llama-3-8b-instruct-hf',
|
||||
'llama-3-8b-instruct-turbomind', 'mistral-7b-instruct-v0.2-hf',
|
||||
'mistral-7b-instruct-v0.2-vllm', 'minicpm-2b-dpo-fp32-hf',
|
||||
'minicpm-2b-sft-bf16-hf', 'minicpm-2b-sft-fp32-hf',
|
||||
'phi-3-mini-4k-instruct-hf', 'qwen1.5-0.5b-chat-hf',
|
||||
'qwen2-1.5b-instruct-turbomind', 'qwen2-7b-instruct-turbomind',
|
||||
'phi-3-mini-4k-instruct-hf', 'phi-3-small-8k-instruct-hf',
|
||||
'qwen1.5-0.5b-chat-hf', 'qwen2-1.5b-instruct-turbomind',
|
||||
'qwen2-7b-instruct-turbomind', 'qwen1.5-0.5b-chat-vllm',
|
||||
'yi-1.5-6b-chat-hf', 'yi-1.5-9b-chat-hf', 'lmdeploy-api-test'
|
||||
]
|
||||
base_model_list = [
|
||||
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind', 'gemma-2b-hf',
|
||||
'gemma-7b-hf', 'internlm2-1.8b-turbomind', 'internlm2-7b-turbomind',
|
||||
'internlm2_5-7b-turbomind', 'internlm2_5-7b-hf',
|
||||
'internlm2-base-7b-turbomind', 'internlm2-base-7b-hf',
|
||||
'llama-3-8b-turbomind', 'mistral-7b-v0.2-hf', 'qwen1.5-moe-a2.7b-hf',
|
||||
'deepseek-moe-16b-base-hf', 'deepseek-7b-base-turbomind',
|
||||
'deepseek-moe-16b-base-vllm', 'gemma-2b-hf', 'gemma-7b-hf',
|
||||
'internlm2_5-7b-hf', 'internlm2-7b-hf', 'internlm2-base-7b-hf',
|
||||
'internlm2_5-7b-turbomind', 'internlm2-1.8b-turbomind',
|
||||
'internlm2-7b-turbomind', 'internlm2-base-7b-hf',
|
||||
'internlm2-base-7b-turbomind', 'llama-3-8b-turbomind',
|
||||
'mistral-7b-v0.2-hf', 'mistral-7b-v0.2-vllm', 'qwen1.5-moe-a2.7b-hf',
|
||||
'qwen2-0.5b-hf', 'qwen2-1.5b-turbomind', 'qwen2-7b-turbomind',
|
||||
'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
|
||||
'qwen1.5-0.5b-vllm', 'yi-1.5-6b-hf', 'yi-1.5-9b-hf'
|
||||
]
|
||||
dataset_list = ['gsm8k', 'race-middle', 'race-high']
|
||||
|
||||
@ -75,6 +79,8 @@ class TestBase:
|
||||
for p2 in dataset_list])
|
||||
def test_model_dataset_score(self, baseline_scores, result_scores, model,
|
||||
dataset):
|
||||
if model == 'mistral-7b-v0.2-vllm' and dataset == 'race-high':
|
||||
return
|
||||
base_score = baseline_scores.get(model).get(dataset)
|
||||
result_score = result_scores.get(model).get(dataset)
|
||||
assert_score(result_score, base_score)
|
||||
|
65
.github/scripts/oc_score_baseline.yaml
vendored
65
.github/scripts/oc_score_baseline.yaml
vendored
@ -18,6 +18,11 @@ deepseek-moe-16b-chat-hf:
|
||||
race-middle: 62
|
||||
race-high: 70
|
||||
|
||||
deepseek-7b-chat-vllm:
|
||||
gsm8k: 63
|
||||
race-middle: 74
|
||||
race-high: 79
|
||||
|
||||
gemma-2b-it-hf:
|
||||
gsm8k: 14
|
||||
race-middle: 62
|
||||
@ -58,6 +63,11 @@ internlm2-chat-7b-sft-turbomind:
|
||||
race-middle: 91
|
||||
race-high: 92
|
||||
|
||||
internlm2-chat-7b-vllm:
|
||||
gsm8k: 63
|
||||
race-middle: 90
|
||||
race-high: 91
|
||||
|
||||
llama-3-8b-instruct-hf:
|
||||
gsm8k: 77
|
||||
race-middle: 85
|
||||
@ -73,6 +83,11 @@ mistral-7b-instruct-v0.2-hf:
|
||||
race-middle: 82
|
||||
race-high: 78
|
||||
|
||||
mistral-7b-instruct-v0.2-vllm:
|
||||
gsm8k: 49
|
||||
race-middle: 81
|
||||
race-high: 77
|
||||
|
||||
minicpm-2b-dpo-fp32-hf:
|
||||
gsm8k: 58
|
||||
race-middle: 66
|
||||
@ -93,6 +108,11 @@ phi-3-mini-4k-instruct-hf:
|
||||
race-middle: 81
|
||||
race-high: 84
|
||||
|
||||
phi-3-small-8k-instruct-hf:
|
||||
gsm8k: 88
|
||||
race-middle: 89
|
||||
race-high: 88
|
||||
|
||||
qwen1.5-0.5b-chat-hf:
|
||||
gsm8k: 5
|
||||
race-middle: 55
|
||||
@ -108,6 +128,11 @@ qwen2-7b-instruct-turbomind:
|
||||
race-middle: 87
|
||||
race-high: 89
|
||||
|
||||
qwen1.5-0.5b-chat-vllm:
|
||||
gsm8k: 5
|
||||
race-middle: 57
|
||||
race-high: 51
|
||||
|
||||
yi-1.5-6b-chat-hf:
|
||||
gsm8k: 72
|
||||
race-middle: 88
|
||||
@ -118,21 +143,26 @@ yi-1.5-9b-chat-hf:
|
||||
race-middle: 89
|
||||
race-high: 91
|
||||
|
||||
deepseek-moe-16b-base-hf:
|
||||
gsm8k: 25
|
||||
race-middle: 35
|
||||
race-high: 23
|
||||
|
||||
lmdeploy-api-test:
|
||||
gsm8k: 90
|
||||
race-middle: 95
|
||||
race-high: 96
|
||||
|
||||
deepseek-moe-16b-base-hf:
|
||||
gsm8k: 25
|
||||
race-middle: 35
|
||||
race-high: 23
|
||||
|
||||
deepseek-7b-base-turbomind:
|
||||
gsm8k: 21
|
||||
race-middle: 42
|
||||
race-high: 42
|
||||
|
||||
deepseek-moe-16b-base-vllm:
|
||||
gsm8k: 22
|
||||
race-middle: 35
|
||||
race-high: 20
|
||||
|
||||
gemma-2b-hf:
|
||||
gsm8k: 19
|
||||
race-middle: 33
|
||||
@ -148,6 +178,16 @@ internlm2_5-7b-hf:
|
||||
race-middle: 92
|
||||
race-high: 91
|
||||
|
||||
internlm2-7b-hf:
|
||||
gsm8k: 65
|
||||
race-middle: 77
|
||||
race-high: 72
|
||||
|
||||
internlm2-base-7b-hf:
|
||||
gsm8k: 5
|
||||
race-middle: 71
|
||||
race-high: 74
|
||||
|
||||
internlm2_5-7b-turbomind:
|
||||
gsm8k: 73
|
||||
race-middle: 90
|
||||
@ -163,11 +203,6 @@ internlm2-7b-turbomind:
|
||||
race-middle: 78
|
||||
race-high: 76
|
||||
|
||||
internlm2-base-7b-hf:
|
||||
gsm8k: 2
|
||||
race-middle: 71
|
||||
race-high: 74
|
||||
|
||||
internlm2-base-7b-turbomind:
|
||||
gsm8k: 39
|
||||
race-middle: 75
|
||||
@ -183,6 +218,11 @@ mistral-7b-v0.2-hf:
|
||||
race-middle: 42
|
||||
race-high: 60
|
||||
|
||||
mistral-7b-v0.2-vllm:
|
||||
gsm8k: 45
|
||||
race-middle: 42
|
||||
race-high: 58
|
||||
|
||||
qwen1.5-moe-a2.7b-hf:
|
||||
gsm8k: 64
|
||||
race-middle: 78
|
||||
@ -203,6 +243,11 @@ qwen2-7b-turbomind:
|
||||
race-middle: 88
|
||||
race-high: 88
|
||||
|
||||
qwen1.5-0.5b-vllm:
|
||||
gsm8k: 12
|
||||
race-middle: 54
|
||||
race-high: 59
|
||||
|
||||
yi-1.5-6b-hf:
|
||||
gsm8k: 59
|
||||
race-middle: 81
|
||||
|
48
.github/workflows/daily-run-test.yml
vendored
48
.github/workflows/daily-run-test.yml
vendored
@ -18,33 +18,55 @@ env:
|
||||
HF_DATASETS_OFFLINE: 1
|
||||
TRANSFORMERS_OFFLINE: 1
|
||||
HF_HUB_OFFLINE: 1
|
||||
TRITON_PTXAS_PATH: /usr/local/cuda/bin/ptxas
|
||||
|
||||
jobs:
|
||||
build-pypi:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python 3.7
|
||||
uses: actions/setup-python@v1
|
||||
with:
|
||||
python-version: 3.7
|
||||
- name: Build lagent
|
||||
run: |
|
||||
pip install wheel
|
||||
python setup.py sdist bdist_wheel
|
||||
- name: Upload Artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
if-no-files-found: error
|
||||
path: dist/*
|
||||
retention-days: 1
|
||||
name: my-artifact-${{ github.run_id }}
|
||||
|
||||
daily_run_test:
|
||||
needs: build-pypi
|
||||
runs-on: self-hosted
|
||||
environment: 'prod'
|
||||
timeout-minutes: 240 #4hours
|
||||
timeout-minutes: 420 #7hours
|
||||
steps:
|
||||
- name: Clone repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Download Artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: my-artifact-${{ github.run_id }}
|
||||
- name: Prepare - create conda env and install torch
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda create -y --name ${{env.CONDA_ENV}} python=3.10
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
pip install opencompass*.whl
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/lmdeploy-0.5.0+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.2+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.5.8+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
||||
pip install bitsandbytes
|
||||
pip install torch==2.2.2 torchvision==0.17.2 torchaudio==2.2.2 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
|
||||
pip install xformers==0.0.25.post1 --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda info --envs
|
||||
- name: Prepare - Pip install code
|
||||
run: |
|
||||
. /cpfs01/shared/public/qa-llm-cicd/miniconda3/bin/activate
|
||||
conda activate ${{env.CONDA_ENV}}
|
||||
pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install human_eval transformers protobuf pytest --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/vllm-0.5.5+cu118-cp310-cp310-manylinux1_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
|
||||
pip install human_eval transformers protobuf pytest gguf msgspec librosa vllm_flash_attn bitsandbytes --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
pip uninstall torch torchvision torchaudio -y
|
||||
pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
|
||||
FLASH_ATTENTION_FORCE_BUILD=TRUE pip install /cpfs01/user/qa-llm-cicd/packages/flash_attn-2.6.3+cu118torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
|
||||
pip install /cpfs01/user/qa-llm-cicd/packages/xformers-0.0.27.post2+cu118-cp310-cp310-manylinux2014_x86_64.whl --cache-dir ${{env.PIP_CACHE_PATH}}
|
||||
conda info --envs
|
||||
- name: Prepare - prepare data and hf model
|
||||
run: |
|
||||
|
@ -73,6 +73,6 @@ You are expected to get the evaluation results after the inference and evaluatio
|
||||
**Note**:
|
||||
|
||||
- If you want to pass more arguments for `engine_config`和`gen_config` in the evaluation config file, please refer to [TurbomindEngineConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#turbomindengineconfig)
|
||||
and [EngineGenerationConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#generationconfig)
|
||||
and [GenerationConfig](https://lmdeploy.readthedocs.io/en/latest/inference/pipeline.html#generationconfig)
|
||||
- If you evaluate the InternLM Chat model, please use configuration file `eval_internlm_chat_turbomind.py`
|
||||
- If you evaluate the InternLM 7B model, please modify `eval_internlm_turbomind.py` or `eval_internlm_chat_turbomind.py` by changing to the setting `models = [internlm_7b]` in the last line.
|
||||
|
@ -70,6 +70,6 @@ python run.py configs/eval_internlm_turbomind.py -w outputs/turbomind/internlm-2
|
||||
|
||||
**注:**
|
||||
|
||||
- 如果想在测评配置文件中`engine_config`和`gen_config`字段传递更多参数,请参考[TurbomindEngineConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#turbomindengineconfig) 和 [EngineGenerationConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#generationconfig)
|
||||
- 如果想在测评配置文件中`engine_config`和`gen_config`字段传递更多参数,请参考[TurbomindEngineConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#turbomindengineconfig) 和 [GenerationConfig](https://lmdeploy.readthedocs.io/zh-cn/latest/inference/pipeline.html#generationconfig)
|
||||
- 如果评测 InternLM Chat 模型,请使用配置文件 `eval_internlm_chat_turbomind.py`
|
||||
- 如果评测 InternLM 7B 模型,请修改 `eval_internlm_turbomind.py` 或者 `eval_internlm_chat_turbomind.py`。将`models`字段配置为`models = [internlm_7b]` 。
|
||||
|
@ -60,8 +60,8 @@ class LmdeployPytorchModel(BaseModel):
|
||||
engine_config.thread_safe = True
|
||||
|
||||
if gen_config is not None:
|
||||
from lmdeploy.messages import EngineGenerationConfig
|
||||
gen_config = EngineGenerationConfig(**gen_config)
|
||||
from lmdeploy.messages import GenerationConfig
|
||||
gen_config = GenerationConfig(**gen_config)
|
||||
|
||||
self.logger = get_logger()
|
||||
tm_model = tm.Engine(path, engine_config)
|
||||
@ -70,6 +70,22 @@ class LmdeployPytorchModel(BaseModel):
|
||||
tm_model.create_instance() for i in range(concurrency)
|
||||
]
|
||||
self.generator_ids = [i + 1 for i in range(concurrency)]
|
||||
|
||||
from transformers import GenerationConfig
|
||||
try:
|
||||
generation_config = GenerationConfig.from_pretrained(path)
|
||||
except Exception:
|
||||
generation_config = None
|
||||
if generation_config and hasattr(generation_config, 'eos_token_id'):
|
||||
if gen_config.stop_words is None:
|
||||
stop_words = []
|
||||
if isinstance(generation_config.eos_token_id, int):
|
||||
stop_words.append(generation_config.eos_token_id)
|
||||
else:
|
||||
assert isinstance(generation_config.eos_token_id, list)
|
||||
for token_id in generation_config.eos_token_id:
|
||||
stop_words.append(token_id)
|
||||
gen_config.stop_words = stop_words
|
||||
self.gen_config = gen_config
|
||||
self.end_str = end_str
|
||||
self.major_version, self.minor_version, _ = version_info
|
||||
@ -135,7 +151,7 @@ class LmdeployPytorchModel(BaseModel):
|
||||
prompt (PromptType): A string or PromptDict.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
gen_config (EngineGenerationConfig, optional): Generation
|
||||
gen_config (GenerationConfig, optional): Generation
|
||||
config to set arguments like top_k, top_p, temperature.
|
||||
end_str (str, optional): Whether to trim generated strings
|
||||
with end_str if the model has special ending strings
|
||||
|
@ -113,8 +113,8 @@ class TurboMindModel(BaseModel):
|
||||
gen_config['stop_words'] = list(set(stop_words))
|
||||
gen_config.setdefault('min_new_tokens', 1)
|
||||
|
||||
from lmdeploy.messages import EngineGenerationConfig
|
||||
gen_config = EngineGenerationConfig(**gen_config)
|
||||
from lmdeploy.messages import GenerationConfig
|
||||
gen_config = GenerationConfig(**gen_config)
|
||||
|
||||
results = []
|
||||
for batch_input in batch_inputs:
|
||||
@ -160,7 +160,7 @@ class TurboMindModel(BaseModel):
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
max_out_len (int): The maximum length of the output.
|
||||
gen_config (EngineGenerationConfig, optional): Generation
|
||||
gen_config (GenerationConfig, optional): Generation
|
||||
config to set arguments like top_k, top_p, temperature.
|
||||
end_str (str, optional): Whether to trim generated strings
|
||||
with end_str if the model has special ending strings
|
||||
|
@ -115,11 +115,16 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
||||
batch_messages = [messages[i:i + self.concurrency] for i in range(0, len(messages), self.concurrency)]
|
||||
|
||||
stop_words = list(set(self.stop_words + stopping_criteria))
|
||||
encode_stop_words = []
|
||||
if stop_words is not None and len(stop_words) > 0:
|
||||
for words in stop_words:
|
||||
encode_stop_words += self.tokenizer.encode(words, add_bos=False)
|
||||
|
||||
DEFAULT_GEN_CONFIG = {
|
||||
'max_new_tokens': max_out_len,
|
||||
'min_new_tokens': 1,
|
||||
'top_k': 1,
|
||||
'stop_words': stop_words,
|
||||
'stop_words': encode_stop_words,
|
||||
}
|
||||
gen_config = copy.deepcopy(DEFAULT_GEN_CONFIG)
|
||||
gen_config.update(self.gen_config)
|
||||
@ -127,9 +132,8 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
||||
gen_config['top_k'] = 1000
|
||||
gen_config['temperature'] = temperature
|
||||
|
||||
from lmdeploy.messages import EngineGenerationConfig, GenerationConfig
|
||||
from lmdeploy.messages import GenerationConfig
|
||||
gen_config = GenerationConfig(**gen_config)
|
||||
gen_config = EngineGenerationConfig.From(gen_config, self.tokenizer)
|
||||
|
||||
results = []
|
||||
for batch_message in batch_messages:
|
||||
@ -160,7 +164,7 @@ class TurboMindModelwithChatTemplate(BaseModel):
|
||||
prompt (PromptType): A string or PromptDict.
|
||||
The PromptDict should be organized in OpenCompass'
|
||||
API format.
|
||||
gen_config (EngineGenerationConfig, optional): Generation
|
||||
gen_config (GenerationConfig, optional): Generation
|
||||
config to set arguments like top_k, top_p, temperature.
|
||||
Returns:
|
||||
str: The generated string.
|
||||
|
Loading…
Reference in New Issue
Block a user