#!/bin/bash # Color definitions RED='\033[0;31m' GREEN='\033[0;32m' CYAN='\033[36m' RESET='\033[0m' :<<'COMMENT' 检测介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包,如果不存在则退出。 检测服务器节点目标conda环境是否存在,如果不存在则创建。 检测模型服务是否已启动,如果模型服务已启动,则终止模型服务。 启动模型服务 检查模型服务是否启动成功 执行curl命令验证模型服务 将黄块注册脚本和黄块curl命令脚本scp到黄块服务ip ssh登录黄块服务ip进行黄块注册,执行黄块curl命令 COMMENT # 定义帮助信息 function print_help() { echo "Usage: $0" echo echo "This script reads configuration from config.yaml and performs the necessary setup." echo echo "Example:" echo " bash $0" } # 加载 YAML 配置 function load_config() { local config_file="$1" MODEL=$(yq eval '.model' "$config_file") MODEL_VERSION=$(yq eval '.model_version' "$config_file") CONDA_ENV=$(yq eval '.conda_env' "$config_file") TENSOR_PARALLEL_SIZE=$(yq eval '.tensor_parallel_size' "$config_file") VISIBLE_GPU_INDEX=$(yq eval '.visible_gpu_index' "$config_file") MODEL_PORT=$(yq eval '.model_port' "$config_file") GPU_MEMORY_UTILIZATION=$(yq eval '.gpu_memory_utilization' "$config_file") DTYPE=$(yq eval '.dtype' "$config_file") MODEL_SERVER_IP=$(yq eval '.model_server_ip' "$config_file") YELLOW_BLOCK_SERVER_IP=$(yq eval '.yellow_block_server_ip' "$config_file") YELLOW_BLOCK_SERVER_PORT=$(yq eval '.yellow_block_server_port' "$config_file") YELLOW_BLOCK_CONDA_ENV=$(yq eval '.yellow_block_conda_env' "$config_file") MEDIA_DOWNLOAD_PATH=$(yq eval '.media_download_path' "$config_file") MODEL_PATH=$(yq eval '.model_path' "$config_file") MODEL_NAME=$(yq eval '.model_name' "$config_file" | sed "s|{{model}}|$MODEL|g" | sed "s|{{model_version}}|$MODEL_VERSION|g") MODEL_NAME_PATH=$(yq eval '.model_name_path' "$config_file" | sed "s|{{model_path}}|$MODEL_PATH|g" | sed "s|{{model_name}}|$MODEL_NAME|g") MODEL_TAR=$(yq eval '.model_tar' "$config_file" | sed "s|{{model_name}}|$MODEL_NAME|g") MINICONDA_PATH=$(yq eval '.miniconda_path' "$config_file") YELLOW_BLOCK_PATH=$(yq eval '.yellow_block_path' "$config_file") AUTOMATED_DEPLOYMENT_PATH=$(yq eval '.automated_deployment_path' "$config_file") } # 检查配置文件是否存在 cd /data/jenkins_script/automated_deployment if [ ! -f "config.yaml" ]; then echo "Error: config.yaml not found in the current directory." exit 1 fi # 加载配置 load_config "config.yaml" # 打印参数值(调试用) echo "MODEL: $MODEL" echo "MODEL_VERSION: $MODEL_VERSION" echo "MODEL_NAME: $MODEL_NAME" echo "MODEL_NAME_PATH: $MODEL_NAME_PATH" echo "MODEL_TAR: $MODEL_TAR" echo "CONDA_ENV: $CONDA_ENV" echo "TENSOR_PARALLEL_SIZE: $TENSOR_PARALLEL_SIZE" echo "VISIBLE_GPU_INDEX: $VISIBLE_GPU_INDEX" echo "MODEL_PORT: $MODEL_PORT" echo "GPU_MEMORY_UTILIZATION: $GPU_MEMORY_UTILIZATION" echo "DTYPE: $DTYPE" echo "MODEL_SERVER_IP: $MODEL_SERVER_IP" echo "YELLOW_BLOCK_SERVER_IP: $YELLOW_BLOCK_SERVER_IP" echo "YELLOW_BLOCK_SERVER_PORT: $YELLOW_BLOCK_SERVER_PORT" echo "YELLOW_BLOCK_CONDA_ENV: $YELLOW_BLOCK_CONDA_ENV" echo "MEDIA_DOWNLOAD_PATH: $MEDIA_DOWNLOAD_PATH" echo "MODEL_PATH: $MODEL_PATH" echo "MINICONDA_PATH: $MINICONDA_PATH" echo "YELLOW_BLOCK_PATH: $YELLOW_BLOCK_PATH" echo "AUTOMATED_DEPLOYMENT_PATH: $AUTOMATED_DEPLOYMENT_PATH" check_and_enter_script() { local script_name="$1" local path1="$MINICONDA_PATH/$CONDA_ENV/script/$MODEL" local path2="$AUTOMATED_DEPLOYMENT_PATH" # 检查第一个路径是否存在脚本 if [ -f "$path1/$script_name" ]; then cd "$path1" || { echo "无法进入目录 $path1"; return 1; } return 0 fi # 检查第二个路径是否存在脚本 if [ -f "$path2/$script_name" ]; then cd "$path2" || { echo "无法进入目录 $path2"; return 1; } return 0 fi # 如果两个路径都不存在脚本 return 1 } # 检查介质下载路径中是否包含以下介质:模型权重文件和conda环境tar包。 check_media_list() { echo -e "${CYAN}Checking media files...${RESET}" cd "$MEDIA_DOWNLOAD_PATH" if [ -f "$MODEL_TAR" ]; then echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR exists.${RESET}" else echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$MODEL_TAR does not exist.${RESET}" fi if [ -f "$CONDA_ENV.tar.gz" ]; then echo -e "${GREEN}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz exists.${RESET}" else echo -e "${RED}The media file $MEDIA_DOWNLOAD_PATH/$CONDA_ENV.tar.gz does not exist.${RESET}" fi } # 检查服务器节点中模型权重文件是否存在 check_model_path() { cd "$MODEL_PATH" if [ ! -d "$MODEL_NAME" ]; then echo -e "${RED}The model $MODEL_NAME does not exist.${RESET}" return 1 else echo -e "${GREEN}The model $MODEL_NAME exists.${RESET}" return 0 fi } # 部署模型 deploy_model() { echo -e "${CYAN}Start deploying model.${RESET}" cd "$MODEL_PATH" cp "$MEDIA_DOWNLOAD_PATH/$MODEL_TAR" ./ tar -zxf "$MODEL_TAR" if [ $? -eq 0 ]; then echo -e "${GREEN}Model deployed successfully.${RESET}" else echo -e "${RED}Model deployment failed.${RESET}" exit 1 fi ll "$MODEL_NAME" chown -R inspur:inspur "$MODEL_NAME" rm -rf "$MODEL_TAR" } # 检查服务器节点中conda环境是否存在 check_conda_env() { cd "$MINICONDA_PATH" if [ ! -d "$CONDA_ENV" ]; then echo -e "${RED}The conda environment $CONDA_ENV does not exist.${RESET}" return 1 else echo -e "${GREEN}The conda environment $CONDA_ENV exists.${RESET}" return 0 fi } # 将介质下载路径中的conda环境压缩包解压至/data/miniconda3/envs/$CONDA_ENV目录下,修改用户和用户组。 deploy_conda_env() { cd "$MINICONDA_PATH" mkdir "$CONDA_ENV" cd "$MEDIA_DOWNLOAD_PATH" tar -zxf "$CONDA_ENV.tar.gz" -C "$MINICONDA_PATH/$CONDA_ENV" if [ $? -eq 0 ]; then echo -e "${GREEN}The environment $CONDA_ENV has been created.${RESET}" else echo -e "${RED}Failed to create environment $CONDA_ENV.${RESET}" exit 1 fi chown -R inspur:inspur "$MINICONDA_PATH/$CONDA_ENV" } # 调用conda环境中的check.sh脚本,检查模型服务进程 check_model_service_process() { check_and_enter_script "check.sh" check_result=$(bash check.sh ${MODEL}) # 检查输出内容 if [[ "$check_result" == *"SUCC"* ]]; then return 0 elif [[ "$check_result" == *"FAIL"* ]]; then return 1 else exit 1 fi } # 调用业务代码中的stop.sh脚本,停止模型服务 stop_model_service() { check_and_enter_script "stop.sh" bash stop.sh ${MODEL} > /dev/null 2>&1 if [ $? -eq 0 ]; then echo -e "${GREEN}$MODEL_NAME service stopped successfully.${RESET}" else echo -e "${RED}Failed to stop $MODEL_NAME service.${RESET}" fi } # 激活conda环境,调用业务代码中的start.sh脚本,启动模型服务 start_model_service(){ check_and_enter_script "start.sh" bash "start.sh" ${VISIBLE_GPU_INDEX} ${CONDA_ENV} ${MODEL} ${MODEL_NAME_PATH} ${TENSOR_PARALLEL_SIZE} ${MODEL_SERVER_IP} ${MODEL_PORT} ${GPU_MEMORY_UTILIZATION} ${DTYPE} } # 轮询检测模型服务是否已启动成功 check_model_started() { local start_time=$(date +%s) local timeout=300 local interval=5 while true; do sleep $interval if check_model_service_process; then echo -e "${GREEN}$MODEL_NAME service started successfully.${RESET}" break fi local current_time=$(date +%s) local elapsed_time=$(($current_time - $start_time)) if [ $elapsed_time -ge $timeout ]; then echo -e "${RED}Failed to start $MODEL_NAME service.${RESET}" echo -e "${RED}Please check the log under $MINICONDA_PATH/$CONDA_ENV/script/$MODEL${RESET}" exit 1 fi done } update_model_curl_sh() { local script_file="model_curl.sh" local model_server_ip=$1 local model_port=$2 local model=$3 check_and_enter_script $script_file # 使用 sed 命令替换变量值 sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" } # 验证模型服务 model_curl_verification(){ echo -e "${GREEN}Start to execute the curl command to verify the model${RESET}" update_model_curl_sh $MODEL_SERVER_IP $MODEL_PORT $MODEL # check_and_enter_script "model_curl.sh" cat model_curl.sh echo -e "${GREEN}The curl command returns the following results:${RESET}" bash model_curl.sh } yellow_block_registration_curl(){ echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" if [ ! -d "$YELLOW_BLOCK_PATH" ]; then echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" else echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" cd "$YELLOW_BLOCK_PATH" echo -e "${GREEN}Start to execute the yellow block registration${RESET}" bash yellow_block_register.sh echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" cat yellow_block_curl.sh echo -e "${GREEN}The curl command returns the following results:${RESET}" bash yellow_block_curl.sh fi } update_yellow_block_register_sh() { local script_file="yellow_block_register.sh" local yellow_block_conda_env=$1 local model_server_ip=$2 local model_port=$3 local model=$4 check_and_enter_script $script_file # 使用 sed 命令替换变量值 sed -i "s|^export YELLOW_BLOCK_CONDA_ENV=.*|export YELLOW_BLOCK_CONDA_ENV=$yellow_block_conda_env|" "$script_file" sed -i "s|^export MODEL_SERVER_IP=.*|export MODEL_SERVER_IP=$model_server_ip|" "$script_file" sed -i "s|^export MODEL_PORT=.*|export MODEL_PORT=$model_port|" "$script_file" sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" # echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" } update_yellow_block_curl_sh() { local script_file="yellow_block_curl.sh" local model=$1 check_and_enter_script $script_file # 使用 sed 命令替换变量值 sed -i "s|^export MODEL=.*|export MODEL=$model|" "$script_file" # echo -e "${GREEN}Updated $script_file with the provided values.${RESET}" } cp_registration_curl(){ update_yellow_block_register_sh $YELLOW_BLOCK_CONDA_ENV $MODEL_SERVER_IP $MODEL_PORT $MODEL update_yellow_block_curl_sh $MODEL # Check if the Model IP and Yellow Block IP are the same if [ "$MODEL_SERVER_IP" == "$YELLOW_BLOCK_SERVER_IP" ]; then echo -e "\n${GREEN}Model IP and Yellow Block IP are the same. Using cp to copy files.${RESET}" cp -f yellow_block_* "$YELLOW_BLOCK_PATH" yellow_block_registration_curl else echo -e "\n${GREEN}Model IP and Yellow Block IP are different. Using scp to copy files.${RESET}" scp -P "$YELLOW_BLOCK_SERVER_PORT" yellow_block_* "root@$YELLOW_BLOCK_SERVER_IP":"$YELLOW_BLOCK_PATH" ssh -p "$YELLOW_BLOCK_SERVER_PORT" "root@$YELLOW_BLOCK_SERVER_IP" << 'EOF' # Color definitions RED='\033[0;31m' GREEN='\033[0;32m' CYAN='\033[36m' RESET='\033[0m' YELLOW_BLOCK_PATH="/data/app/dev/ihp-model-ops/test/model_service_v2" echo -e "${CYAN}Start to execute yellow block registration and curl command${RESET}" if [ ! -d "$YELLOW_BLOCK_PATH" ]; then echo -e "${RED}Directory $YELLOW_BLOCK_PATH does not exist. Please deploy the start yellow block first.${RESET}" else echo -e "${GREEN}Directory $YELLOW_BLOCK_PATH exists.${RESET}" cd "$YELLOW_BLOCK_PATH" echo -e "${GREEN}Start to execute the yellow block registration${RESET}" bash yellow_block_register.sh echo -e "${GREEN}Start to execute the curl command to verify the yellow block${RESET}" cat yellow_block_curl.sh echo -e "${GREEN}The curl command returns the following results:${RESET}" bash yellow_block_curl.sh fi EOF fi } # Main function main(){ echo -e "${CYAN}======================$MODEL_NAME======================${RESET}" check_media_list if ! check_model_path; then deploy_model fi if ! check_conda_env; then deploy_conda_env fi if check_model_service_process; then echo -e "${GREEN}The model service process already exists, starting to stop the process.${RESET}" stop_model_service else echo -e "${GREEN}The model service process does not exist. Start to start the model service.${RESET}" fi sleep 5 start_model_service if [ $? -eq 0 ]; then echo -e "${CYAN}The model service is starting...${RESET}" fi check_model_started model_curl_verification cp_registration_curl echo -e "\n${CYAN}======================$MODEL_NAME======================${RESET}" } # Execute the main function main