commit a232c01c8f3d427cb077db4c4b797b6e02904d11 Author: xuhengfu Date: Tue Apr 22 10:44:36 2025 +0800 上传文件至 / diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..4cc51eb --- /dev/null +++ b/config.yaml @@ -0,0 +1,22 @@ +model: "HaiRuo-7B-General" +model_version: "V1.0.0.0DEV" +conda_env: "agent-common" +tensor_parallel_size: "1" +visible_gpu_index: "1" +model_port: "20004" +gpu_memory_utilization: "0.9" +dtype: "float16" +model_server_ip: "100.200.128.151" +yellow_block_server_ip: "100.200.128.83" +yellow_block_server_port: "6233" +yellow_block_conda_env: "agent-common" + +# Constant definitions +media_download_path: "/data/media" +model_path: "/data/models" +model_name: "{{model}}-{{model_version}}" +model_name_path: "{{model_path}}/{{model_name}}" +model_tar: "{{model_name}}.tar.gz" +miniconda_path: "/data/miniconda3/envs/" +yellow_block_path: "/data/app/dev/ihp-model-ops/test/model_service_v2" +automated_deployment_path: "/data/jenkins_script/automated_deployment/script" \ No newline at end of file diff --git a/deploy_model_start_1.sh b/deploy_model_start_1.sh new file mode 100644 index 0000000..6c7660d --- /dev/null +++ b/deploy_model_start_1.sh @@ -0,0 +1,419 @@ +#!/bin/bash + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +CYAN='\033[36m' +RESET='\033[0m' + +:<<'COMMENT' +检测介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包,如果不存在则退出。 +检测服务器节点目标conda环境是否存在,如果不存在则创建。 +检测模型服务是否已启动,如果模型服务已启动,则终止模型服务。 +启动模型服务 +检查模型服务是否启动成功 +执行curl命令验证模型服务 +将黄块注册脚本和黄块curl命令脚本scp到黄块服务ip +ssh登录黄块服务ip进行黄块注册,执行黄块curl命令 +MEDIA_DOWNLOAD_PATH="/data/media" +MODEL_PATH="/data/models" +#MODEL=$1 # "HaiRuo-7B-General" +#MODEL_VERSION=$2 # "V1.0.0.0" +MODEL_NAME="$MODEL-$MODEL_VERSION" # "HaiRuo-7B-General-V1.0.0.0" # 模型名称 外部传参 +MODEL_NAME_PATH="$MODEL_PATH/$MODEL_NAME" +MODEL_TAR="$MODEL_NAME.tar.gz" +MINICONDA_PATH="/data/miniconda3/envs/" +#CONDA_ENV=$3 # "conda-HaiRuo-7B-General-V1.0.0.0" # conda环境名称 外部传参 +#VISIBLE_GPU_INDEX=$4 +#MODEL_PORT=$5 +#MODEL_SERVER_IP=$6 +#YELLOW_BLOCK_SERVER_IP=$7 +#YELLOW_BLOCK_SERVER_PORT=$8 +YELLOW_BLOCK_PATH="/data/app/dev/ihp-model-ops/test/model_service_v2" +#YELLOW_BLOCK_CONDA_ENV=$9 +COMMENT + +# 定义帮助信息 +function print_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " -h, --help 显示此帮助信息" + echo " --model 模型名称,例如 'HaiRuo-7B-General'" + echo " --model-version 模型版本,例如 'V1.0.0.0'" + echo " --conda-env Conda环境名称,例如 'conda-HaiRuo-7B-General-V1.0.0.0'" + echo " --tensor-parallel-size 张量并行的GPU卡数量,例如 '2'" + echo " --gpu-index 可用的GPU卡索引,例如 '0,1'" + echo " --model-port 模型服务端口,例如 '20004'" + echo " --model-server-ip 模型服务节点IP地址,例如 '127.0.0.1'" + echo " --yellow-block-server-ip 黄块服务节点IP地址,例如 '127.0.0.1'" + echo " --yellow-block-server-port 黄块服务节点SSH端口,例如 '22'" + echo " --yellow-block-conda-env 黄块Conda环境名称,例如 'agent-common'" + echo + echo "Example:" + echo " bash $0 --model 'HaiRuo-7B-General' --model-version 'V1.0.0.0' --conda-env 'conda-HaiRuo-7B-General-V1.0.0.0' --tensor-parallel-size 2 --gpu-index 0,1 --model-port 20004 --model-server-ip 127.0.0.1 --yellow-block-server-ip 127.0.0.1 --yellow-block-server-port 22 --yellow-block-conda-env 'conda-yellow-block'" +} + +# 初始化变量 +MODEL="" +MODEL_VERSION="" +CONDA_ENV="" +TENSOR_PARALLEL_SIZE="" +VISIBLE_GPU_INDEX="" +MODEL_PORT="" +MODEL_SERVER_IP="" +YELLOW_BLOCK_SERVER_IP="" +YELLOW_BLOCK_SERVER_PORT="" +YELLOW_BLOCK_CONDA_ENV="" + +# Constant definitions +MEDIA_DOWNLOAD_PATH="/data/media" +MODEL_PATH="/data/models" +MODEL_NAME="$MODEL-$MODEL_VERSION" +MODEL_NAME_PATH="$MODEL_PATH/$MODEL_NAME" +MODEL_TAR="$MODEL_NAME.tar.gz" +MINICONDA_PATH="/data/miniconda3/envs/" +YELLOW_BLOCK_PATH="/data/app/dev/ihp-model-ops/test/model_service_v2" + +# 检查是否有参数 +if [ $# -eq 0 ]; then + echo "No arguments provided. Use -h or --help for help." + exit 1 +fi + +# 处理参数 +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + print_help + exit 0 + ;; + --model) + shift + MODEL=$1 + MODEL_NAME="$MODEL-$MODEL_VERSION" + MODEL_NAME_PATH="$MODEL_PATH/$MODEL_NAME" + MODEL_TAR="$MODEL_NAME.tar.gz" + shift + ;; + --model-version) + shift + MODEL_VERSION=$1 + MODEL_NAME="$MODEL-$MODEL_VERSION" + MODEL_NAME_PATH="$MODEL_PATH/$MODEL_NAME" + MODEL_TAR="$MODEL_NAME.tar.gz" + shift + ;; + --conda-env) + shift + CONDA_ENV=$1 + shift + ;; + --tensor-parallel-size) + shift + TENSOR_PARALLEL_SIZE=$1 + shift + ;; + --gpu-index) + shift + VISIBLE_GPU_INDEX=$1 + shift + ;; + --model-port) + shift + MODEL_PORT=$1 + shift + ;; + --model-server-ip) + shift + MODEL_SERVER_IP=$1 + shift + ;; + --yellow-block-server-ip) + shift + YELLOW_BLOCK_SERVER_IP=$1 + shift + ;; + --yellow-block-server-port) + shift + YELLOW_BLOCK_SERVER_PORT=$1 + shift + ;; + --yellow-block-conda-env) + shift + YELLOW_BLOCK_CONDA_ENV=$1 + shift + ;; + *) # 未知选项 + echo "未知选项: $1" + print_help + exit 1 + ;; + esac +done + +# 打印参数值(调试用) +echo "MODEL: $MODEL" +echo "MODEL_VERSION: $MODEL_VERSION" +echo "MODEL_NAME: $MODEL_NAME" +echo "MODEL_NAME_PATH: $MODEL_NAME_PATH" +echo "MODEL_TAR: $MODEL_TAR" +echo "CONDA_ENV: $CONDA_ENV" +echo "TENSOR_PARALLEL_SIZE: $TENSOR_PARALLEL_SIZE" +echo "VISIBLE_GPU_INDEX: $VISIBLE_GPU_INDEX" +echo "MODEL_PORT: $MODEL_PORT" +echo "MODEL_SERVER_IP: $MODEL_SERVER_IP" +echo "YELLOW_BLOCK_SERVER_IP: $YELLOW_BLOCK_SERVER_IP" +echo "YELLOW_BLOCK_SERVER_PORT: $YELLOW_BLOCK_SERVER_PORT" +echo "YELLOW_BLOCK_CONDA_ENV: $YELLOW_BLOCK_CONDA_ENV" +echo "MEDIA_DOWNLOAD_PATH: $MEDIA_DOWNLOAD_PATH" +echo "MODEL_PATH: $MODEL_PATH" +echo "MINICONDA_PATH: $MINICONDA_PATH" +echo "YELLOW_BLOCK_PATH: $YELLOW_BLOCK_PATH" + +# 检查介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包。 +check_media_list() { + echo -e "${CYAN}Checking media files...${RESET}" + cd "$MEDIA_DOWNLOAD_PATH" + if [ -f "$MODEL_TAR" ]; then + echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR exists.${RESET}" + else + echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR does not exist.${RESET}" + fi + if [ -f "$CONDA_ENV.tar.gz" ]; then + echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz exists.${RESET}" + else + echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz does not exist.${RESET}" + fi +} + +# 检查服务器节点中模型权重文件是否存在 +check_model_path() { + cd "$MODEL_PATH" + if [ ! -d "$MODEL_NAME" ]; then + echo -e "${RED}The model $MODEL_NAME does not exist.${RESET}" + return 1 + else + echo -e "${GREEN}The model $MODEL_NAME exists.${RESET}" + return 0 + fi +} + +# 部署模型 +deploy_model() { + echo -e "${CYAN}Start deploying model.${RESET}" + cd "$MODEL_PATH" + cp "$MEDIA_DOWNLOAD_PATH/$MODEL_TAR" ./ + tar -zxf "$MODEL_TAR" + if [ $? -eq 0 ]; then + echo -e "${GREEN}Model deployed successfully.${RESET}" + else + echo -e "${RED}Model deployment failed.${RESET}" + exit 1 + fi + ll "$MODEL_NAME" + chown -R inspur:inspur "$MODEL_NAME" + rm -rf "$MODEL_TAR" +} + +# 检查服务器节点中conda环境是否存在 +check_conda_env() { + cd "$MINICONDA_PATH" + if [ ! -d "$CONDA_ENV" ]; then + echo -e "${RED}The conda environment $CONDA_ENV does not exist.${RESET}" + return 1 + else + echo -e "${GREEN}The conda environment $CONDA_ENV exists.${RESET}" + return 0 + fi +} + +# 将介质下载路径中的conda环境压缩包解压至/data/miniconda3/envs/$CONDA_ENV目录下,修改用户和用户组。 +deploy_conda_env() { + cd "$MINICONDA_PATH" + mkdir "$CONDA_ENV" + cd "$MEDIA_DOWNLOAD_PATH" + tar -zxf "$CONDA_ENV.tar.gz" -C "$MINICONDA_PATH/$CONDA_ENV" + if [ $? -eq 0 ]; then + echo -e "${GREEN}The environment $CONDA_ENV has been created.${RESET}" + else + echo -e "${RED}Failed to create environment $CONDA_ENV.${RESET}" + exit 1 + fi + chown -R inspur:inspur "$MINICONDA_PATH/$CONDA_ENV" +} + +# 调用conda环境中的check.sh脚本,检查模型服务进程 +check_model_service_process() { + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + check_result=$(bash check.sh) + # 检查输出内容 + if [[ "$check_result" == *"SUCC"* ]]; then + return 0 + elif [[ "$check_result" == *"FAIL"* ]]; then + return 1 + else + exit 1 + fi +} + +# 调用业务代码中的stop.sh脚本,停止模型服务 +stop_model_service() { + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + bash stop.sh > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo -e "${GREEN}$MODEL_NAME service stopped successfully.${RESET}" + else + echo -e "${RED}Failed to stop $MODEL_NAME service.${RESET}" + fi +} + +# 激活conda环境,调用业务代码中的start.sh脚本,启动模型服务 +start_model_service(){ + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + bash "start.sh" ${VISIBLE_GPU_INDEX} ${CONDA_ENV} ${MODEL_NAME_PATH} ${TENSOR_PARALLEL_SIZE} ${MODEL_SERVER_IP} ${MODEL_PORT} +} + +# 轮询检测模型服务是否已启动成功 +check_model_started() { + local start_time=$(date +%s) + local timeout=300 + local interval=5 + while true; do + sleep $interval + if check_model_service_process; then + echo -e "${GREEN}$MODEL_NAME service started successfully.${RESET}" + break + fi + local current_time=$(date +%s) + local elapsed_time=$(($current_time - $start_time)) + if [ $elapsed_time -ge $timeout ]; then + echo -e "${RED}Failed to start $MODEL_NAME service.${RESET}" + echo -e "${RED}Please check the log under $MINICONDA_PATH/$CONDA_ENV/script/$MODEL${RESET}" + exit 1 + fi + done +} + +update_model_curl_sh() { + local script_file="model_curl.sh" + local model_server_ip=$1 + local model_port=$2 + + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + # 使用 sed 命令替换变量值 + sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" + sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" + echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" +} + +# 验证模型服务 +model_curl_verification(){ + echo -e "${GREEN}Start to execute the curl command to verify the model${RESET}" + update_model_curl_sh $MODEL_SERVER_IP $MODEL_PORT + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + cat model_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash model_curl.sh +} + +yellow_block_registration_curl(){ + echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" + if [ ! -d "$YELLOW_BLOCK_PATH" ]; then + echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" + else + echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" + cd "$YELLOW_BLOCK_PATH" + echo -e "${GREEN}Start to execute the yellow block registration${RESET}" + bash yellow_block_register.sh + echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" + cat yellow_block_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash yellow_block_curl.sh + fi +} + +update_yellow_block_register_sh() { + local script_file="yellow_block_register.sh" + local yellow_block_conda_env=$1 + local model_server_ip=$2 + local model_port=$3 + + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + # 使用 sed 命令替换变量值 + sed -i "s|^export YELLOW_BLOCK_CONDA_ENV=.*|export YELLOW_BLOCK_CONDA_ENV=$yellow_block_conda_env|" "$script_file" + sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" + sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" + echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" +} + +cp_registration_curl(){ + # Check if the Model IP and Yellow Block IP are the same + if [ "$MODEL_SERVER_IP" == "$YELLOW_BLOCK_SERVER_IP" ]; then + echo -e "\n${GREEN}Model IP and Yellow Block IP are the same. Using cp to copy files.${RESET}" + update_yellow_block_register_sh $YELLOW_BLOCK_CONDA_ENV $MODEL_SERVER_IP $MODEL_PORT + # Use cp to copy files, forcing overwrite + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + cp -f yellow_block_* "$YELLOW_BLOCK_PATH" + yellow_block_registration_curl + else + echo -e "\n${GREEN}Model IP and Yellow Block IP are different. Using scp to copy files.${RESET}" + update_yellow_block_register_sh $YELLOW_BLOCK_CONDA_ENV $MODEL_SERVER_IP $MODEL_PORT + # Use scp to copy files to the Yellow Block server + cd "$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + scp -P "$YELLOW_BLOCK_SERVER_PORT" yellow_block_* "root@$YELLOW_BLOCK_SERVER_IP":"$YELLOW_BLOCK_PATH" + ssh -p "$YELLOW_BLOCK_SERVER_PORT" "root@$YELLOW_BLOCK_SERVER_IP" << 'EOF' +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +CYAN='\033[36m' +RESET='\033[0m' + +YELLOW_BLOCK_PATH="/data/app/dev/ihp-model-ops/test/model_service_v2" + +echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" +if [ ! -d "$YELLOW_BLOCK_PATH" ]; then + echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" +else + echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" + cd "$YELLOW_BLOCK_PATH" + echo -e "${GREEN}Start to execute the yellow block registration${RESET}" + bash yellow_block_register.sh + echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" + cat yellow_block_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash yellow_block_curl.sh +fi +EOF + fi +} + +# Main function +main(){ + echo -e "${CYAN}======================$MODEL_NAME======================${RESET}" + check_media_list + if ! check_model_path; then + deploy_model + fi + if ! check_conda_env; then + deploy_conda_env + fi + if check_model_service_process; then + echo -e "${GREEN}The model service process already exists, starting to stop the process.${RESET}" + stop_model_service + else + echo -e "${GREEN}The model service process does not exist. Start to start the model service.${RESET}" + fi + sleep 5 + start_model_service + if [ $? -eq 0 ]; then + echo -e "${CYAN}The model service is starting...${RESET}" + fi + check_model_started + model_curl_verification + cp_registration_curl + echo -e "\n${CYAN}======================$MODEL_NAME======================${RESET}" +} + +# Execute the main function +main diff --git a/deploy_model_start_2.sh b/deploy_model_start_2.sh new file mode 100644 index 0000000..ab2260e --- /dev/null +++ b/deploy_model_start_2.sh @@ -0,0 +1,364 @@ +#!/bin/bash + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +CYAN='\033[36m' +RESET='\033[0m' + +:<<'COMMENT' +检测介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包,如果不存在则退出。 +检测服务器节点目标conda环境是否存在,如果不存在则创建。 +检测模型服务是否已启动,如果模型服务已启动,则终止模型服务。 +启动模型服务 +检查模型服务是否启动成功 +执行curl命令验证模型服务 +将黄块注册脚本和黄块curl命令脚本scp到黄块服务ip +ssh登录黄块服务ip进行黄块注册,执行黄块curl命令 +COMMENT + +# 定义帮助信息 +function print_help() { + echo "Usage: $0" + echo + echo "This script reads configuration from config.yaml and performs the necessary setup." + echo + echo "Example:" + echo " bash $0" +} + +# 加载 YAML 配置 +function load_config() { + local config_file="$1" + MODEL=$(yq eval '.model' "$config_file") + MODEL_VERSION=$(yq eval '.model_version' "$config_file") + CONDA_ENV=$(yq eval '.conda_env' "$config_file") + TENSOR_PARALLEL_SIZE=$(yq eval '.tensor_parallel_size' "$config_file") + VISIBLE_GPU_INDEX=$(yq eval '.visible_gpu_index' "$config_file") + MODEL_PORT=$(yq eval '.model_port' "$config_file") + GPU_MEMORY_UTILIZATION=$(yq eval '.gpu_memory_utilization' "$config_file") + DTYPE=$(yq eval '.dtype' "$config_file") + MODEL_SERVER_IP=$(yq eval '.model_server_ip' "$config_file") + YELLOW_BLOCK_SERVER_IP=$(yq eval '.yellow_block_server_ip' "$config_file") + YELLOW_BLOCK_SERVER_PORT=$(yq eval '.yellow_block_server_port' "$config_file") + YELLOW_BLOCK_CONDA_ENV=$(yq eval '.yellow_block_conda_env' "$config_file") + + MEDIA_DOWNLOAD_PATH=$(yq eval '.media_download_path' "$config_file") + MODEL_PATH=$(yq eval '.model_path' "$config_file") + MODEL_NAME=$(yq eval '.model_name' "$config_file" | sed "s|{{model}}|$MODEL|g" | sed "s|{{model_version}}|$MODEL_VERSION|g") + MODEL_NAME_PATH=$(yq eval '.model_name_path' "$config_file" | sed "s|{{model_path}}|$MODEL_PATH|g" | sed "s|{{model_name}}|$MODEL_NAME|g") + MODEL_TAR=$(yq eval '.model_tar' "$config_file" | sed "s|{{model_name}}|$MODEL_NAME|g") + MINICONDA_PATH=$(yq eval '.miniconda_path' "$config_file") + YELLOW_BLOCK_PATH=$(yq eval '.yellow_block_path' "$config_file") + AUTOMATED_DEPLOYMENT_PATH=$(yq eval '.automated_deployment_path' "$config_file") +} + +# 检查配置文件是否存在 +cd /data/jenkins_script/automated_deployment +if [ ! -f "config.yaml" ]; then + echo "Error: config.yaml not found in the current directory." + exit 1 +fi + +# 加载配置 +load_config "config.yaml" + +# 打印参数值(调试用) +echo "MODEL: $MODEL" +echo "MODEL_VERSION: $MODEL_VERSION" +echo "MODEL_NAME: $MODEL_NAME" +echo "MODEL_NAME_PATH: $MODEL_NAME_PATH" +echo "MODEL_TAR: $MODEL_TAR" +echo "CONDA_ENV: $CONDA_ENV" +echo "TENSOR_PARALLEL_SIZE: $TENSOR_PARALLEL_SIZE" +echo "VISIBLE_GPU_INDEX: $VISIBLE_GPU_INDEX" +echo "MODEL_PORT: $MODEL_PORT" +echo "GPU_MEMORY_UTILIZATION: $GPU_MEMORY_UTILIZATION" +echo "DTYPE: $DTYPE" +echo "MODEL_SERVER_IP: $MODEL_SERVER_IP" +echo "YELLOW_BLOCK_SERVER_IP: $YELLOW_BLOCK_SERVER_IP" +echo "YELLOW_BLOCK_SERVER_PORT: $YELLOW_BLOCK_SERVER_PORT" +echo "YELLOW_BLOCK_CONDA_ENV: $YELLOW_BLOCK_CONDA_ENV" +echo "MEDIA_DOWNLOAD_PATH: $MEDIA_DOWNLOAD_PATH" +echo "MODEL_PATH: $MODEL_PATH" +echo "MINICONDA_PATH: $MINICONDA_PATH" +echo "YELLOW_BLOCK_PATH: $YELLOW_BLOCK_PATH" +echo "AUTOMATED_DEPLOYMENT_PATH: $AUTOMATED_DEPLOYMENT_PATH" + +check_and_enter_script() { + local script_name="$1" + local path1="$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" + local path2="$AUTOMATED_DEPLOYMENT_PATH" + + # 检查第一个路径是否存在脚本 + if [ -f "$path1/$script_name" ]; then + cd "$path1" || { echo "无法进入目录 $path1"; return 1; } + return 0 + fi + + # 检查第二个路径是否存在脚本 + if [ -f "$path2/$script_name" ]; then + cd "$path2" || { echo "无法进入目录 $path2"; return 1; } + return 0 + fi + + # 如果两个路径都不存在脚本 + return 1 +} + +# 检查介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包。 +check_media_list() { + echo -e "${CYAN}Checking media files...${RESET}" + cd "$MEDIA_DOWNLOAD_PATH" + if [ -f "$MODEL_TAR" ]; then + echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR exists.${RESET}" + else + echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR does not exist.${RESET}" + fi + if [ -f "$CONDA_ENV.tar.gz" ]; then + echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz exists.${RESET}" + else + echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz does not exist.${RESET}" + fi +} + +# 检查服务器节点中模型权重文件是否存在 +check_model_path() { + cd "$MODEL_PATH" + if [ ! -d "$MODEL_NAME" ]; then + echo -e "${RED}The model $MODEL_NAME does not exist.${RESET}" + return 1 + else + echo -e "${GREEN}The model $MODEL_NAME exists.${RESET}" + return 0 + fi +} + +# 部署模型 +deploy_model() { + echo -e "${CYAN}Start deploying model.${RESET}" + cd "$MODEL_PATH" + cp "$MEDIA_DOWNLOAD_PATH/$MODEL_TAR" ./ + tar -zxf "$MODEL_TAR" + if [ $? -eq 0 ]; then + echo -e "${GREEN}Model deployed successfully.${RESET}" + else + echo -e "${RED}Model deployment failed.${RESET}" + exit 1 + fi + ll "$MODEL_NAME" + chown -R inspur:inspur "$MODEL_NAME" + rm -rf "$MODEL_TAR" +} + +# 检查服务器节点中conda环境是否存在 +check_conda_env() { + cd "$MINICONDA_PATH" + if [ ! -d "$CONDA_ENV" ]; then + echo -e "${RED}The conda environment $CONDA_ENV does not exist.${RESET}" + return 1 + else + echo -e "${GREEN}The conda environment $CONDA_ENV exists.${RESET}" + return 0 + fi +} + +# 将介质下载路径中的conda环境压缩包解压至/data/miniconda3/envs/$CONDA_ENV目录下,修改用户和用户组。 +deploy_conda_env() { + cd "$MINICONDA_PATH" + mkdir "$CONDA_ENV" + cd "$MEDIA_DOWNLOAD_PATH" + tar -zxf "$CONDA_ENV.tar.gz" -C "$MINICONDA_PATH/$CONDA_ENV" + if [ $? -eq 0 ]; then + echo -e "${GREEN}The environment $CONDA_ENV has been created.${RESET}" + else + echo -e "${RED}Failed to create environment $CONDA_ENV.${RESET}" + exit 1 + fi + chown -R inspur:inspur "$MINICONDA_PATH/$CONDA_ENV" +} + +# 调用conda环境中的check.sh脚本,检查模型服务进程 +check_model_service_process() { + check_and_enter_script "check.sh" + check_result=$(bash check.sh ${MODEL}) + # 检查输出内容 + if [[ "$check_result" == *"SUCC"* ]]; then + return 0 + elif [[ "$check_result" == *"FAIL"* ]]; then + return 1 + else + exit 1 + fi +} + +# 调用业务代码中的stop.sh脚本,停止模型服务 +stop_model_service() { + check_and_enter_script "stop.sh" + bash stop.sh ${MODEL} > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo -e "${GREEN}$MODEL_NAME service stopped successfully.${RESET}" + else + echo -e "${RED}Failed to stop $MODEL_NAME service.${RESET}" + fi +} + +# 激活conda环境,调用业务代码中的start.sh脚本,启动模型服务 +start_model_service(){ + check_and_enter_script "start.sh" + bash "start.sh" ${VISIBLE_GPU_INDEX} ${CONDA_ENV} ${MODEL} ${MODEL_NAME_PATH} ${TENSOR_PARALLEL_SIZE} ${MODEL_SERVER_IP} ${MODEL_PORT} ${GPU_MEMORY_UTILIZATION} ${DTYPE} +} + +# 轮询检测模型服务是否已启动成功 +check_model_started() { + local start_time=$(date +%s) + local timeout=300 + local interval=5 + while true; do + sleep $interval + if check_model_service_process; then + echo -e "${GREEN}$MODEL_NAME service started successfully.${RESET}" + break + fi + local current_time=$(date +%s) + local elapsed_time=$(($current_time - $start_time)) + if [ $elapsed_time -ge $timeout ]; then + echo -e "${RED}Failed to start $MODEL_NAME service.${RESET}" + echo -e "${RED}Please check the log under $MINICONDA_PATH/$CONDA_ENV/script/$MODEL${RESET}" + exit 1 + fi + done +} + +update_model_curl_sh() { + local script_file="model_curl.sh" + local model_server_ip=$1 + local model_port=$2 + local model=$3 + + check_and_enter_script $script_file + # 使用 sed 命令替换变量值 + sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" + sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" + sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" + echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" +} + +# 验证模型服务 +model_curl_verification(){ + echo -e "${GREEN}Start to execute the curl command to verify the model${RESET}" + update_model_curl_sh $MODEL_SERVER_IP $MODEL_PORT $MODEL + # check_and_enter_script "model_curl.sh" + cat model_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash model_curl.sh +} + +yellow_block_registration_curl(){ + echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" + if [ ! -d "$YELLOW_BLOCK_PATH" ]; then + echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" + else + echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" + cd "$YELLOW_BLOCK_PATH" + echo -e "${GREEN}Start to execute the yellow block registration${RESET}" + bash yellow_block_register.sh + echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" + cat yellow_block_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash yellow_block_curl.sh + fi +} + +update_yellow_block_register_sh() { + local script_file="yellow_block_register.sh" + local yellow_block_conda_env=$1 + local model_server_ip=$2 + local model_port=$3 + local model=$4 + + check_and_enter_script $script_file + # 使用 sed 命令替换变量值 + sed -i "s|^export YELLOW_BLOCK_CONDA_ENV=.*|export YELLOW_BLOCK_CONDA_ENV=$yellow_block_conda_env|" "$script_file" + sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" + sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" + sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" + # echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" +} + +update_yellow_block_curl_sh() { + local script_file="yellow_block_curl.sh" + local model=$1 + + check_and_enter_script $script_file + # 使用 sed 命令替换变量值 + sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" + # echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" +} + +cp_registration_curl(){ + update_yellow_block_register_sh $YELLOW_BLOCK_CONDA_ENV $MODEL_SERVER_IP $MODEL_PORT $MODEL + update_yellow_block_curl_sh $MODEL + # Check if the Model IP and Yellow Block IP are the same + if [ "$MODEL_SERVER_IP" == "$YELLOW_BLOCK_SERVER_IP" ]; then + echo -e "\n${GREEN}Model IP and Yellow Block IP are the same. Using cp to copy files.${RESET}" + cp -f yellow_block_* "$YELLOW_BLOCK_PATH" + yellow_block_registration_curl + else + echo -e "\n${GREEN}Model IP and Yellow Block IP are different. Using scp to copy files.${RESET}" + scp -P "$YELLOW_BLOCK_SERVER_PORT" yellow_block_* "root@$YELLOW_BLOCK_SERVER_IP":"$YELLOW_BLOCK_PATH" + ssh -p "$YELLOW_BLOCK_SERVER_PORT" "root@$YELLOW_BLOCK_SERVER_IP" << 'EOF' +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +CYAN='\033[36m' +RESET='\033[0m' + +YELLOW_BLOCK_PATH="/data/app/dev/ihp-model-ops/test/model_service_v2" + +echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" +if [ ! -d "$YELLOW_BLOCK_PATH" ]; then + echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" +else + echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" + cd "$YELLOW_BLOCK_PATH" + echo -e "${GREEN}Start to execute the yellow block registration${RESET}" + bash yellow_block_register.sh + echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" + cat yellow_block_curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash yellow_block_curl.sh +fi +EOF + fi +} + +# Main function +main(){ + echo -e "${CYAN}======================$MODEL_NAME======================${RESET}" + check_media_list + if ! check_model_path; then + deploy_model + fi + if ! check_conda_env; then + deploy_conda_env + fi + if check_model_service_process; then + echo -e "${GREEN}The model service process already exists, starting to stop the process.${RESET}" + stop_model_service + else + echo -e "${GREEN}The model service process does not exist. Start to start the model service.${RESET}" + fi + sleep 5 + start_model_service + if [ $? -eq 0 ]; then + echo -e "${CYAN}The model service is starting...${RESET}" + fi + check_model_started + model_curl_verification + cp_registration_curl + echo -e "\n${CYAN}======================$MODEL_NAME======================${RESET}" +} + +# Execute the main function +main diff --git a/deploy_red_agent_service.sh b/deploy_red_agent_service.sh new file mode 100644 index 0000000..03212a0 --- /dev/null +++ b/deploy_red_agent_service.sh @@ -0,0 +1,309 @@ +#!/bin/bash + +# Color definitions +RED='\033[0;31m' +GREEN='\033[0;32m' +CYAN='\033[36m' +RESET='\033[0m' + +:<<'COMMENT' +通用红块场景自动化部署脚本 +检测介质下载路径中是否包含以下介质:代码包和conda环境tar包,如果不存在则退出。 +检测服务器节点目标conda环境是否存在,如果不存在则创建。 +检测红块场景代码是否存在。 +检测红块场景服务是否已启动,如果红块场景服务已启动,则终止红块场景服务。 +备份红块场景代码 +部署场景代码 +启动场景代码 +检查场景代码是否启动成功 +执行curl命令验证场景代码 +# Define the directory path for the red block scene +MEDIA_DOWNLOAD_PATH="/data/media" +RED_PATH="/data/redserver/red-agent-service" +RED_SCENE=$1 # 场景代码名称 外部传参 +RED_SCENE_PATH="$RED_PATH/$RED_SCENE" +RED_SCENE_VERSION=$2 # 场景代码版本 外部传参 +RED_SCENE_TAR="$RED_SCENE-$RED_SCENE_VERSION.tar.gz" +MINICONDA_PATH="/data/miniconda3/envs/" +CONDA_ENV=$3 # conda环境名称 外部传参 +COMMENT + +# 定义帮助信息 +function print_help() { + echo "Usage: $0 [options]" + echo + echo "Options:" + echo " -h, --help 显示此帮助信息" + echo " --red-scene 场景代码名称,例如 'scene1'" + echo " --red-scene-version 场景代码版本,例如 'V1.0.0.0'" + echo " --conda-env Conda环境名称,例如 'conda-scene1-V1.0.0.0'" + echo + echo "Example:" + echo " bash $0 --red-scene 'scene1' --red-scene-version 'V1.0.0.0' --conda-env 'conda-scene1-V1.0.0.0'" +} + +# 初始化变量 +MEDIA_DOWNLOAD_PATH="/data/media" +RED_PATH="/data/redserver/red-agent-service" +RED_SCENE="" +RED_SCENE_PATH="" +RED_SCENE_VERSION="" +RED_SCENE_TAR="" +MINICONDA_PATH="/data/miniconda3/envs/" +CONDA_ENV="" + +# 检查是否有参数 +if [ $# -eq 0 ]; then + echo "No arguments provided. Use -h or --help for help." + exit 1 +fi + +# 处理参数 +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + print_help + exit 0 + ;; + --red-scene) + shift + RED_SCENE=$1 + RED_SCENE_PATH="$RED_PATH/$RED_SCENE" + shift + ;; + --red-scene-version) + shift + RED_SCENE_VERSION=$1 + RED_SCENE_TAR="$RED_SCENE-$RED_SCENE_VERSION.tar.gz" + shift + ;; + --conda-env) + shift + CONDA_ENV=$1 + shift + ;; + *) # 未知选项 + echo "未知选项: $1" + print_help + exit 1 + ;; + esac +done + +# 打印参数值(调试用) +echo "MEDIA_DOWNLOAD_PATH: $MEDIA_DOWNLOAD_PATH" +echo "RED_PATH: $RED_PATH" +echo "RED_SCENE: $RED_SCENE" +echo "RED_SCENE_PATH: $RED_SCENE_PATH" +echo "RED_SCENE_VERSION: $RED_SCENE_VERSION" +echo "RED_SCENE_TAR: $RED_SCENE_TAR" +echo "MINICONDA_PATH: $MINICONDA_PATH" +echo "CONDA_ENV: $CONDA_ENV" + +# 检查介质下载路径中是否包含以下介质:代码包和conda环境tar包,如果不存在则退出。 +check_media_list() { + echo -e "${CYAN}Checking media files...${RESET}" + cd "$MEDIA_DOWNLOAD_PATH" + if [ -f "$RED_SCENE_TAR" ]; then + echo -e "${GREEN}The media file $RED_SCENE_TAR exists.${RESET}" + else + echo -e "${RED}The media file $RED_SCENE_TAR does not exist.${RESET}" + exit 1 + fi + if [ -f "$CONDA_ENV.tar.gz" ]; then + echo -e "${GREEN}The media file $CONDA_ENV.tar.gz exists.${RESET}" + else + echo -e "${RED}The media file $CONDA_ENV.tar.gz does not exist.${RESET}" + fi +} + +# 如果当前服务器节点中已存在红块场景代码,则备份红块场景代码。 +backup_red_block() { + cd "$RED_PATH" + local current_time=$(date +"%Y%m%d%H%M%S") + if [ ! -d "$RED_SCENE-bak" ]; then + mkdir "$RED_SCENE-bak" + fi + mv "$RED_SCENE" "$RED_SCENE-bak/$RED_SCENE-$current_time" + if [ $? -eq 0 ]; then + echo -e "${GREEN}Red block code backup succeeded.${RESET}" + else + echo -e "${RED}Red block code backup failed.${RESET}" + fi +} + +# 将介质下载路径中的红块场景代码压缩包拷贝至/data/redserver/red-agent-service目录下,并解压,重命名,修改用户和用户组。 +deploy_red_block() { + echo -e "${CYAN}Start deploying code.${RESET}" + cd "$RED_PATH" + cp "$MEDIA_DOWNLOAD_PATH/$RED_SCENE_TAR" ./ + tar -zxf "$RED_SCENE_TAR" + mv "$RED_SCENE-$RED_SCENE_VERSION" "$RED_SCENE" + if [ $? -eq 0 ]; then + echo -e "${GREEN}Red block code deployed successfully.${RESET}" + else + echo -e "${RED}Red block code deployment failed.${RESET}" + fi + chown -R inspur:inspur "$RED_SCENE_PATH" + rm -rf "$RED_SCENE_TAR" +} + +# 检查服务器节点中conda环境是否存在 +check_conda_env() { + cd "$MINICONDA_PATH" + if [ ! -d "$CONDA_ENV" ]; then + echo -e "${RED}The environment $CONDA_ENV does not exist.${RESET}" + return 1 + else + echo -e "${GREEN}The environment $CONDA_ENV exists.${RESET}" + return 0 + fi +} + +# 将介质下载路径中的conda环境压缩包解压至/data/miniconda3/envs/$CONDA_ENV目录下,修改用户和用户组。 +deploy_conda_env() { + cd "$MINICONDA_PATH" + mkdir "$CONDA_ENV" + cd "$MEDIA_DOWNLOAD_PATH" + tar -zxf "$CONDA_ENV.tar.gz" -C "$MINICONDA_PATH/$CONDA_ENV" + if [ $? -eq 0 ]; then + echo -e "${GREEN}The environment $CONDA_ENV has been created.${RESET}" + else + echo -e "${RED}Failed to create environment $CONDA_ENV.${RESET}" + exit 1 + fi + chown -R inspur:inspur "$MINICONDA_PATH/$CONDA_ENV" +} + +# 调用业务代码中的check.sh脚本,检查红块服务是否已启动成功。 +check_red_block_service_process() { + cd "$RED_SCENE_PATH" + check_result=$(bash check.sh) + # 检查输出内容 + if [[ "$check_result" == *"SUCC"* ]]; then + return 0 + elif [[ "$check_result" == *"FAIL"* ]]; then + return 1 + else + exit 1 + fi +} + +# 调用业务代码中的stop.sh脚本,终止红块服务。 +stop_red_block() { + cd "$RED_SCENE_PATH" + bash stop.sh > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo -e "${GREEN}Red block service stopped successfully.${RESET}" + else + echo -e "${RED}Failed to stop red block service.${RESET}" + fi +} + +# 激活conda环境,调用业务代码中的start.sh脚本,启动红块服务。 +start_red_block() { + source /etc/profile + echo -e "${CYAN}HAIRUO_ENV=$HAIRUO_ENV${RESET}" + su - inspur << EOF + source /data/miniconda3/etc/profile.d/conda.sh + conda activate $CONDA_ENV + cd "$RED_SCENE_PATH" + bash start.sh +EOF +} + +# 轮询检测红块服务是否已启动成功 +check_red_block_started() { + local start_time=$(date +%s) + local timeout=120 + local interval=5 + while true; do + sleep $interval + if check_red_block_service_process; then + echo -e "${GREEN}Red block service started successfully.${RESET}" + break + fi + local current_time=$(date +%s) + local elapsed_time=$(($current_time - $start_time)) + if [ $elapsed_time -ge $timeout ]; then + echo -e "${RED}Failed to start red block service.${RESET}" + return 1 + fi + done +} + +# 回滚红块场景代码,暂时未用到 +rollback_red_block(){ + cd "$RED_PATH" + if [ ! -d "$RED_SCENE-bak" ]; then + echo -e "${RED}The backup directory does not exist and cannot be rolled back${RESET}" + else + echo -e "${GREEN}The backup directory exists, start rollback${RESET}" + rm -rf "$RED_SCENE" + local latest_backup=$(ls -td $RED_SCENE-bak/$RED_SCENE-* | head -1) + mv "$latest_backup" "$RED_SCENE" + fi +} + +# 调用业务代码中的curl.sh脚本,验证红块场景代码 +red_block_curl_verification(){ + echo -e "${GREEN}Start to execute the curl command to verify the red block${RESET}" + cd "$RED_SCENE_PATH" + cat curl.sh + echo -e "${GREEN}The curl command returns the following results:${RESET}" + bash curl.sh +} + +# Main function +main() { + echo -e "${CYAN}======================$RED_SCENE======================${RESET}" + check_media_list + if ! check_conda_env; then + deploy_conda_env + fi + if [ -d "$RED_PATH" ]; then + if [ -d "$RED_SCENE_PATH" ]; then + echo -e "${GREEN}Directory $RED_SCENE_PATH exists.${RESET}" + if check_red_block_service_process; then + echo -e "${GREEN}Red block service process already exists, starting to stop the process.${RESET}" + stop_red_block + else + echo -e "${RED}Red block service process does not exist. Start to start the model service.${RESET}" + fi + backup_red_block + else + echo -e "${RED}Directory $RED_SCENE_PATH does not exist.${RESET}" + fi + else + echo -e "${RED}Directory $RED_PATH does not exist.${RESET}" + mkdir -p "$RED_PATH" + if [ $? -eq 0 ]; then + echo -e "${GREEN}Directory $RED_PATH created successfully.${RESET}" + else + echo -e "${RED}Failed to create directory $RED_PATH.${RESET}" + exit 1 + fi + fi + deploy_red_block + start_red_block + if [ $? -eq 0 ]; then + echo -e "${GREEN}Red block service is starting.${RESET}" + fi + check_red_block_started +:<<'COMMENT' + if [ $? -eq 1 ]; then + echo -e "${RED}Start executing rollback function.${RESET}" + rollback_red_block + start_red_block + if [ $? -eq 0 ]; then + echo -e "${GREEN}Red block service is starting.${RESET}" + fi + check_red_block_started + fi +COMMENT + red_block_curl_verification + echo -e "\n${CYAN}======================$RED_SCENE======================${RESET}" +} + +# Execute the main function +main diff --git a/deploy_s3cmd_download.sh b/deploy_s3cmd_download.sh new file mode 100644 index 0000000..3c5929c --- /dev/null +++ b/deploy_s3cmd_download.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# ɫ +CYAN="\033[1;36m" +RED="\033[0;31m" +GREEN="\033[1;32m" +RESET="\033[0m" + +:<<'COMMENT' +OSSڲͰʴ洢·б +ԶʶĿ¼ļ +ͬ +COMMENT + +# 屾ش洢· +LOCAL_PATH="/data/media" +mkdir -p "$LOCAL_PATH" # ȷ· + +# OSSڲͰⲿͰļ +INTERNAL_S3CFG=~/.s3cfg-internal + +# ļǷ +if [ ! -f "$INTERNAL_S3CFG" ]; then + echo -e "${RED}Internal S3 configuration file not found.${RESET}" + exit 1 +fi + +# ǷOSS· +if [ $# -eq 0 ]; then + echo -e "${RED}No OSS paths provided. Please provide OSS paths as arguments.${RESET}" + exit 1 +fi + +# OSS·б +for oss_path in "$@"; do + echo -e "${CYAN}Processing OSS path: $oss_path${RESET}" + + # ·ǷЧ + if [[ ! $oss_path =~ ^s3:// ]]; then + echo -e "${RED}Skipping invalid OSS path: $oss_path${RESET}" + continue + fi + + # ȡĿ·ı· + local_target="$LOCAL_PATH/$(basename "$oss_path")" + if [[ $oss_path =~ /$ ]]; then + # Ŀ¼ȷβб + local_target="$LOCAL_PATH/$(basename "$oss_path")/" + mkdir -p "$local_target" # ȷĿ¼ + fi + + # ʹ s3cmd sync ͬļĿ¼ + echo -e "${GREEN}Syncing $oss_path to local path $local_target${RESET}" + s3cmd -c "$INTERNAL_S3CFG" sync "$oss_path" "$local_target" # | grep "Downloaded" + + if [ $? -eq 0 ]; then + echo -e "${GREEN}Successfully synced $oss_path to $local_target${RESET}" + else + echo -e "${RED}Error: Failed to sync $oss_path.${RESET}" + fi +done + +echo -e "${GREEN}Script execution completed.${RESET}" \ No newline at end of file