#! /bin/sh##################################################################################【操作说明】: ## 1.启动命令:nohup sh watch_server_v4.sh >/dev/null 2>&1 & ## 2.需将 watch_server_v4.sh 放在 [最大盘]/ICC/dahua/fire/server_keeper 下。 ## 3.务必确保有且仅有一个守护进程正在运行。 ## 4.新增服务(如为jar包服务,仅需保证服务包中有且只有一个可执行jar包即可)无需进 ## 行任何操作,即被守护。 ## 5.启动守护进程后,务必确认当前路径下无 ERROR.log 产生。如有,请根据提示修复 ## 问题。 ## 6.由于服务做了冗余,默认不会启动 IFCSI 等服务,如需启动,请在 serverList.sh ## 中根据提示进行添加。 ## 7.请勿手动启动任何服务,否则将导致不可预知错误。 ###################################################################################【更新日志 V4.1】 2020-7-22 23:39:52 刘迎光 ## 1.实现守护进程脚本的自动化,无需人为干预。 ## 2.优化脚本执行逻辑,新增服务后无需重启,即可被守护。 ## 3.增加对守护脚本存放路径的强校验(若路径错误,则生成 ERROR.log) ## 4.kafka 配置读取兼容性大幅度提高。 ## 5.定时清理日志文件。 ################################################################################### 守护进程的文件名(不含后缀)watchPath=$(pwd)watchFileName=${0} # 文件名(含后缀)watchFileName=${watchFileName%.*} # 文件名(不含后缀)watchFilePath=${watchPath}/${watchFileName}.shecho "守护进程的文件名(不含后缀):[[ ${watchFileName} ]]"# 新服务基础路径、日志文件名称basePath=$( cd .. pwd)time=$(date +'%Y-%m-%d %H:%M:%S')# 输出日志信息# args-1:输出内容# [args-2]:日志级别(可选参数,默认为:INFO。ERROR, WARN, INFO)function log_out() { # 当前执行操作的文件 cur_file=$0 # 日志内容 logContent=$1 # 日志级别 LEVEL_CODE=$2 # 将字符串转成大写,再进行字符串比较 LEVEL_CODE=$(echo ${LEVEL_CODE} | tr [a-z] [A-Z]) logTime=$(date +'%Y-%m-%d %H:%M:%S') # LEVEL_CODE 为空,则默认日志等级为 INFO if [[ ! -n "${LEVEL_CODE}" ]] || [[ "${LEVEL_CODE}" == I* ]]; then LEVEL_CODE=INFO LEVEL=1 elif [[ "${LEVEL_CODE}" == W* ]]; then LEVEL_CODE=WARN LEVEL=2 elif [[ "${LEVEL_CODE}" == D* ]]; then LEVEL_CODE=DEBUG LEVEL=3 elif [[ "${LEVEL_CODE}" == E* ]]; then LEVEL_CODE=ERROR LEVEL=4 else LEVEL_CODE=INFO LEVEL=1 fi # logInfo="${logTime}:[${cur_file}][${LEVEL}] ${logContent}" logInfo="${logTime}:[${FUNCNAME[@]}][${LEVEL_CODE}] ${logContent}" echo "${logInfo}" # 根据等级输出日志 if [[ ${LEVEL} -ge 1 ]]; then echo "${logInfo}" >>INFO.log fi if [[ ${LEVEL} -ge 2 ]]; then echo "${logInfo}" >>WARN.log fi if [[ ${LEVEL} -ge 3 ]]; then echo "${logInfo}" >>DEBUG.log fi if [[ ${LEVEL} -ge 4 ]]; then echo "${logInfo}" >>ERROR.log fi}# 调用示例# log_out "要输出的内容"# 校验文件位置是否正确,如果错误,则记录到日志中,并返回错误信息BASE_PATH=/ICC/dahua/fireif [[ ! ${basePath} =~ ${BASE_PATH} ]]; then log_out "守护进程存放位置 [[ ${basePath} ]] 可能有误,请检查脚本路径!!!" E # exitfi# 添加守护进程到开机启动中(服务器重启即会执行)FIND_S="source /etc/profile"FIND_STR="nohup sh ${watchFileName}.sh >/dev/null 2>&1 &"# 开机启动项文件位置FIND_FILE="/etc/rc.d/rc.local"# 移除开机启动项中的相关内容sed -i '/\/etc\/profile/d' ${FIND_FILE}sed -i '/watch_server/d' ${FIND_FILE}sed -i '/server_keeper/d' ${FIND_FILE}sed -i '/server/d' ${FIND_FILE}if [ $(grep -c "${FIND_S}" ${FIND_FILE}) -eq '0' ]; then chmod 777 /etc/rc.d/rc.local echo "source /etc/profile" >>/etc/rc.d/rc.local sleep 2fiif [ $(grep -c "watch_server" ${FIND_FILE}) -eq '0' ]; then chmod 777 /etc/rc.d/rc.local echo "cd ${watchPath}" >>/etc/rc.d/rc.local sleep 2 chmod 777 /etc/rc.d/rc.local echo "nohup sh ${watchFileName}.sh >/dev/null 2>&1 &" >>/etc/rc.d/rc.local sleep 2fiJAVA_OPTS="-Xss512k -XX:MaxMetaspaceSize=1024M -XX:+DisableExplicitGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=75 -XX:ParallelGCThreads=4 -XX:+HeapDumpOnOutOfMemoryError"# 检验服务是否启动的间隔时间sleepTime=10###########################################################function start_kafka_zookeeper() { serverName=zookeeper serverPath=${basePath}/kafka # zookeeper_start_file=${serverPath}/bin/zookeeper-server-start.sh # zookeeper_stop_file=${serverPath}/bin/zookeeper-server-stop.sh # zookeeper_cfg_file=${serverPath}/config/zookeeper.properties #判断对应 Kafka zookeeper 进程是否已经启动 if $(ps -ef | grep -v "grep" | grep ${serverPath} | grep -q "zookeeper.properties"); then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " return fi cd ${serverPath} log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W nohup bin/zookeeper-server-start.sh config/zookeeper.properties >/dev/null 2>&1 & log_out "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W cd ${watchPath} sleep 5}function start_kafka() { serverName=kafka serverPath=${basePath}/${serverName} #判断对应 kafka Server 进程是否已经启动 if $(ps -ef | grep ${serverPath} | grep -q "kafka.Kafka"); then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " return fi cd ${serverPath} log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W nohup bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 & log_out "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W cd ${watchPath}}# 启动 zookeeper 和 kafkafunction start_zookeeperAndKafka() { serverName=kafka serverPath=${basePath}/${serverName} if [ ! -d "${serverPath}" ]; then log_out "[[ ${serverPath} 下的 ${serverName} ]] 不存在,无需启动" return fi # 获取端口(过滤注释行、行首空格(egrep -v "^\s*#|^\s*$")) kafka_port=$(egrep -v "^\s*#|^\s*$" ${serverPath}/config/server.properties | grep "listeners" | grep -v "advertised" | awk -F ':' '{print $3}') if [[ ! -n "${kafka_port}" ]]; then log_out "未找到 [[ ${serverPath} 下的 ${serverName} ]] 的端口 kafka_port !!!请检查 ${serverPath}/config/server.properties 中 是否包含 listeners = PLAINTEXT://:3998 配置" E return fi start_kafka_zookeeper start_kafka}# 启动 redisfunction start_redis() { serverName=redis serverPath=${basePath}/${serverName} if [ ! -d "${serverPath}" ]; then log_out "[[ ${serverPath} 下的 ${serverName} ]] 不存在,无需启动" return fi redis_file=${serverPath}/redis-server redis_conf=${serverPath}/redis.conf #判断对应 redis 进程是否已经启动 if $(ps -ef | grep ${serverPath} | grep -q redis-server); then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " return fi cd ${serverPath} log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W log_out "${redis_file} ${redis_conf} &" ${redis_file} ${redis_conf} & echo "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W cd ${watchPath}}# MySQL 添加守护function start_mysql() { serverName=mysql serverPath=${basePath}/${serverName} if [ ! -d "${serverPath}" ]; then log_out "[[ ${serverPath} 下的 ${serverName} ]] 不存在,无需启动" return fi #判断对应 mysql 进程是否已经启动 mysqlStatus=$(systemctl status mysqld | grep "Active: active") if [[ ${mysqlStatus} == *active* ]]; then # if [ -n ${mysqlStatus} ]; then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " return fi log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W setenforce 0 sed -i 's/SELINUX=enforcing SELINUX=disabled/g' /etc/selinux/config if [ -f "${serverPath}/../../../../firecontroldata/mysql/mysql.sock.lock" ]; then rm -rf ${serverPath}/../../../../firecontroldata/mysql/mysql.sock.lock else systemctl restart mysqld.service log_out "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W fi}# 常用函数(启动 war 包服务)function startWarServer() { # 服务名称(new) serverName=$1 serverPath=${basePath}/${serverName} numServer=$(ps -aux | grep ${serverPath}/bin | grep -v "grep" | wc -l) if [[ ${numServer} -eq 1 ]]; then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " return fi cd ${serverPath} kill -9 $(ps -aux | grep ${serverPath}/bin | grep -v "grep" | awk -F ' ' '{print $2}') log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W bin/startup.sh log_out "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W cd ${watchPath}}# 常用函数(启动 jar 包服务)function startJarServer() { # 标准路径: # ${basePath} # 服务名称(new) serverName=$1 # jar 包名称 jarName=$2 # 限制内存大小(含单位:G) limitXmx=$3 # eg. serverPath=/ICC/dahua/fire/DCPS_TCP serverPath=${basePath}/${serverName} # ${jarName} 为 NULL if [[ ! -n ${jarName} ]]; then log_out "${jarName} 为 NULL,选择从 ${serverName} 中读取 jarName。" jarNum=$(ls ${serverPath} | grep "jar$" | grep -iv "bak" | wc -l) # 可执行的 jar 包不为 1,则说明该服务存在异常 if [[ ! ${jarNum} -eq 1 ]]; then log_out "${serverPath} 下 jar 包数量大于 1,请将无关 jar 包删除(或将其以 _bak 结尾命名)!!!" E return fi jarName=$(ls ${serverPath} | grep "jar$" | grep -iv "bak") fi jarPath=${serverPath}/${jarName} # 去除单位(:G) limitSize=${limitXmx%G*} # 默认限制内存大小:1G if [ ! -n "${limitSize}" ]; then limitXmx=1G elif [[ ${limitSize} -gt 5 ]]; then log_out "${limitXmx} 设置过大,请确认是否正确!!!" E fi log_out "basePath=${basePath},serverName=${serverName},jarName=${jarName},limitXmx=${limitXmx},serverPath=${serverPath},jarPath=${jarPath}" numServer=$(ps -aux | grep ${jarPath} | grep -v "grep" | wc -l) if [[ ${numServer} -eq 1 ]]; then log_out "[[ ${serverName} ]] 正常运行中,无需启动 ... " else cd ${serverPath} log_out "[[ ${serverPath} 下的 ${serverName} ]] 未运行,开始启动... " W nohup java -server -Xms512m -Xmx${limitXmx} ${JAVA_OPTS} -XX:HeapDumpPath=/logs/${serverName}.dump -jar ${jarPath} >/dev/null 2>&1 & log_out "[[ ${serverPath} 下的 ${serverName} ]] 启动完成... " W cd ${watchPath} fi}# 启动服务(war 包和 jar 包服务启动均使用该方法)function startServer() { serverName=$1 serverPath=${basePath}/${serverName} if [ ! -d "${serverPath}" ]; then log_out "[[ ${serverPath} 下的 ${serverName} ]] 不存在,无需启动" return fi FIND_FILE=${basePath}/${serverName}/bin info="[[ ${serverPath} 下的 ${serverName} ]]" if [[ -d "${FIND_FILE}" ]]; then info=${info}",war 包启动" log_out "${info}" startWarServer ${serverName} else jarName=$2 limitSize=$3 info=${info}",jar 包名称:[[ ${jarName} ]] ,限制内存大小:[[ ${limitSize} ]] " log_out "${info}" startJarServer ${serverName} ${jarName} ${limitSize} fi}# 查询是否存在 serverList.sh,若不存在则创建,存在则跳过function mkDir() { file=$1 if [ -f ${file} ]; then log_out "[[ ${serverPath} 下的 ${file} ]] 已存在,无需创建" return fi log_out "[[ ${serverPath} 下的 ${file} ]] 文件不存在,开始创建" # 创建文件 touch ${file} # 往文件中添加初始化的文件列表 echo "#! /bin/sh# 启动守护进程指令:nohup sh ${watchFileName}.sh >/dev/null 2>&1 &# [注意事项]# 0.默认服务(IFCS、FireAppServer、MessageServer、DCPS_TCP、DCPS_UDP、DCPS_TCP、ftpServer等)已自动守护,无需重复添加# 1.禁止手动启动任何服务,否则将会导致服务启动异常。# 2.启动守护进程后,务必观察当前文件夹下是否有 ERROR.log 文件产生。如有,务必根据提示修复存在问题。# 3.新增服务后即被守护,无需重启守护进程。# 4.MySQL、Redis、kafka 已被守护进程 ${watchFileName}.sh 守护,无需重复添加。################ 示例 (始)################# 添加 war 包服务格式:服务名称IFCS# 添加 jar 包服务格式:服务名称 jar包 限制内存(选填,建议不填)DCPS_TCP DCPS.jar 1GDCPS_UDP DCPS.jar################ 示例 (末)################# IFCSI IFCSI.war# IFCSI2.0 IFCSI.jar 2G# DCPS-HTTP_YD1.0 DCPS-HTTP.jar 1G# ONENET1.0 DCPS-HTTP.jar 1G# DCPS-HTTP_YD1.1 DCPS-HTTP.jar 1G# ONENET1.1 DCPS-HTTP.jar 1G# DCPS-HTTP_DX DCPS-HTTP.jar 1G# DXIOT DCPS-HTTP.jar 1G" >${file} log_out "创建 [[ ${file} ]] 文件结束"}# 清理文件(超过 10240000 行,即被清理)function cleanFile() { fileName=$1 lineNum=$(cat ${filePath} | wc -l) MAX_LINE_NUM=10240000 if [[ ${lineNum} -ge ${MAX_LINE_NUM} ]]; then echo "" >${fileName} log_out "[[ ${fileName} ]] 行数:${lineNum},超过 ${MAX_LINE_NUM},将其置空" fi}# 读取文件并启动服务(列表)function readFileAndStartServer() { # 服务列表的文件名称(eg. serverList.sh) serverListFile=$1 log_out "服务列表文件:[[ ${serverListFile} ]]" # 读取文件(过滤掉 注释行、空行、行首的空格) egrep -v "^\s*#|^\s*$" ${serverListFile} | while read xxx; do serverName=$(echo ${xxx} | awk -F ' ' '{print $1}') jarName=$(echo ${xxx} | awk -F ' ' '{print $2}') limitSize=$(echo ${xxx} | awk -F ' ' '{print $3}') startServer ${serverName} ${jarName} ${limitSize} done}# 读取文件夹并启动服务function readDirAndStartServer() { # 获取文件夹列表(不显示文件,同时过滤掉 mysql、redis、kafka、sh、License、server_keeper、bak 等无关文件夹,同时默认不启动 IFCSI、ONENET、DXIOT等服务也一并过滤) dirArray=$(ls -F ${basePath} | grep '/$' | egrep -iv "IFCSI|IFCSI2.0|DCPS-HTTP_YD|TOPSAIL|DPSDK|ONENET|DX|IoT|mysql|redis|kafka|sh|License|server_keeper|ba") # 遍历,得到各文件夹名称 # shellcheck disable=SC2068 for dirName in ${dirArray[@]}; do # 也可以写成for element in ${dirArray[*]} echo "dirName:${dirName}" done # 调用执行服务的方法 startServer ${dirName}}# 总的调用服务方法function main() { while true; do # 启动 zookeeper 和 kafka start_zookeeperAndKafka # 启动 redis start_redis # 启动 mysql start_mysql # 读取文件,启动服务 readFileAndStartServer serverList.sh # 读取文件夹,启动服务 readDirAndStartServer # cleanFile 文件名 cleanFile INFO.log cleanFile DEBUG.log cleanFile WARN.log cleanFile ERROR.log log_out "~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ${sleepTime} 秒后开始下一轮检查 ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ " D sleep ${sleepTime} done}############################################################################################################################################################################################################################################################################################################################### 创建文件mkDir serverList.sh# 启动所有服务mainexit 0