本文转自 (https://cloud.tencent.com/developer/article/1027097)
alert foobar with reminder on 10 cycles 此句表示,在10个周期内都会邮件提醒。
if failed host port 8599 for 3 times within 4 cycles then alert 这样就是:若在四个周期内,三次 8599(我的电驴口)端口都无法通,则邮件通知。很方便!



  1. [root@bastion-IDC src]# wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-6.noarch.rpm
  2. [root@bastion-IDC src]# rpm -ivh epel-release-latest-6.noarch.rpm


  1. [root@bastion-IDC src]# yum install -y monit


  1. #wget https://mmonit.com/monit/dist/binary/5.20.0/monit-5.20.0-linux-x86.tar.gz
  2. #tar -zvxf monit-5.20.0-linux-x86.tar.gz
  3. #mv monit-5.20.0 /usr/local/monit
  4. #cp /usr/local/monit/conf/monitrc /etc/




  1. [root@bastion-IDC src]# cp /etc/monit.conf /etc/monit.conf.bak
  2. [root@bastion-IDC src]# cat /etc/monit.conf //自定义配置如下
  3. set daemon 120 #Poll at 2-minute intervals //每2分钟检查一次,单位为秒;monit做不到实时监控。
  4. set logfile /home/monit/log/monit.log //monit的日志文件
  5. set alert zhouwei@chinabank.com.cn with reminder on 1 cycle //出现1次错误就发报警邮件到指定邮箱。多个邮箱地址就配置多行;with后的配置可以不加。
  6. #set mailserver mail.tildeslash.com, mail.foo.bar port 10025, localhost with timeout 15 seconds
  7. set mailserver //设置邮件服务器
  8. set httpd port 2812 and use address //设置http监控页面的端口和ip
  9. allow localhost #Allow localhost to connect //允许本机访问
  10. allow //允许此IP段访问
  11. allow admin:nishiwode #Allow Basic Auth //认证的用户名和密码
  12. # all system //平均负载.内存使用率,cpu使用率
  13. check system
  14. if loadavg (1min) > 4 then alert
  15. if loadavg (5min) > 2 then alert
  16. if memory usage > 75% then alert
  17. if cpu usage (user) > 70% then alert
  18. if cpu usage (system) > 30% then alert
  19. if cpu usage (wait) > 20% then alert
  20. # all disk //磁盘空间使用率
  21. check device data with path /dev/sda2
  22. if space usage > 90% then alert
  23. if inode usage > 85% then alert
  24. check device home with path /dev/sda3
  25. if space usage > 85% for 5 cycles then alert //如果在5个监控周期内,space使用率超过85%就发报警邮件
  26. if inode usage > 85% for 5 cycles then alert
  27. # all rsync
  28. #
  29. check process sshd with pidfile /var/run/sshd.pid //监控ssh服务
  30. start program "/etc/init.d/sshd start"
  31. stop program "/etc/init.d/sshd stop"
  32. if failed host port 22 protocol ssh then restart
  33. if 3 restarts within 5 cycles then timeout //设置在5个监控周期内重启3次则超时,那么就不再监控这个服务程序
  34. check process httpd with pidfile /var/run/httpd.pid //监控http服务
  35. start program = "/etc/init.d/httpd start"
  36. stop program = "/etc/init.d/httpd stop"
  37. if failed host port 80 protocol http then restart
  38. if 5 restarts within 5 cycles then timeout
  39. check process web_lb with pidfile /data/v20/server/web_lb/httpd.pid //监控自定义服务
  40. start program = "/data/v20/bin/lb.sh" //启动脚本
  41. stop program = "/data/v20/bin/lb_stop.sh" //停止脚本
  42. if failed host port 16101 proto http then restart
  43. if failed host port 16101 proto http for 5 times within 5 cycles then exec "/data/v20/bin/lb_pay.sh"
  44. if failed host port 16102 type TCPSSL proto http then restart
  45. if failed host port 16102 type TCPSSL proto http for 5 times within 5 cycles then exec "/data/v20/bin/lb_pay.sh


  1. [root@bastion-IDC src]# /etc/init.d/monit start/stop/reload/status/restart
  2. [root@bastion-IDC ~]# monit -t //检测monit配置是否正确
  3. [root@bastion-IDC ~]# monit reload //重载monit配置
  4. [root@bastion-IDC ~]# monit status //查看monit进程监控情况


  1. Cannot translate 'huanqiu_web2' to FQDN name -- Name or service not known
  2. Generated unique Monit id af76cbce671f323782e09e0d114857fd and stored to '/root/.monit.id'
  3. Reinitializing monit daemon
  4. No daemon process found

在本机的/etc/hosts里面做下主机映射,即 huanqiu_web2

  1. [root@huanqiu_web1 ~]# cat /etc/hosts
  2. localhost localhost.localdomain localhost4 localhost4.localdomain4
  3. ::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
  4. huanqiu_web1
  5. [root@huanqiu_web1 ~]# cat /etc/monit.conf
  6. set daemon 30
  7. set logfile syslog facility log_daemon
  8. set pidfile /var/run/monit.pid
  9. set httpd port 30000
  10. use address
  11. allow
  12. check process nginx with pidfile /Data/app/nginx/logs/nginx.pid
  13. start program = "/Data/app/nginx/sbin/nginx"
  14. stop program = "/Data/app/nginx/sbin/nginx -s stop"
  15. check process php-fpm with pidfile /Data/app/php5.6.26/var/run/php-fpm.pid
  16. start program = "/Data/app/php5.6.26/sbin/php-fpm"
  17. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep /Data/app/php5.6.26/etc/php-fpm.conf|grep -v grep|awk -F" " '{print $2}'`'"
  18. check process mysql with pidfile /Data/app/mysql5.1.57/var/dev-new-test.pid
  19. start program = "/Data/app/mysql5.1.57/bin/mysqld_safe --defaults-file=/Data/app/mysql5.1.57/my.cnf &"
  20. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep mysqld_safe|grep -v grep|awk -F" " '{print $2}'`'"
  21. check process tomcat-7-admin-wls matching "/Data/app/tomcat-7-wls/conf"
  22. start program = "/Data/app/tomcat-7-wls/bin/startup.sh"
  23. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep /Data/app/tomcat-7-wls/conf|grep -v grep|awk -F" " '{print $2}'`'"
  24. check process tomcat-7-wls matching "/Data/app/tomcat-7-wls/conf"
  25. start program = "/Data/app/tomcat-7-wls/bin/startup.sh"
  26. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep /Data/app/tomcat-7-wls/conf|grep -v grep|awk -F" " '{print $2}'`'"
  27. check process tomcat-7 matching "/Data/app/tomcat-7/conf"
  28. start program = "/Data/app/tomcat-7/bin/startup.sh"
  29. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep /Data/app/tomcat-7/conf|grep -v grep|awk -F" " '{print $2}'`'"
  30. check process tomcat-7-banshanbandao matching "/Data/app/tomcat-7-banshanbandao/conf"
  31. start program = "/Data/app/tomcat-7-banshanbandao/bin/startup.sh"
  32. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep /Data/app/tomcat-7-banshanbandao/conf|grep -v grep|awk -F" " '{print $2}'`'"
  33. check process vpn matching "/etc/vpnc/vpnc-script"
  34. start program = "/bin/sh /bin/vpn_start"
  35. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep vpnc-script|grep -v grep|awk -F" " '{print $2}'`'"
  36. [root@huanqiu_web1 ~]# monit -t
  37. Control file syntax OK
  38. [root@huanqiu_web1 ~]# /etc/init.d/monit start
  39. Starting monit: [ OK ]
  40. [root@huanqiu_web1 ~]# lsof -i:30000
  42. monit 6109 root 5u IPv4 2438183462 0t0 TCP localhost:30000 (LISTEN)
  43. [root@huanqiu_web1 ~]# monit reload
  44. Reinitializing monit daemon
  45. [root@huanqiu_web1 ~]# monit status
  46. The Monit daemon 5.14 uptime: 8m
  47. Process 'nginx'
  48. status Running
  49. monitoring status Monitored
  50. pid 499
  51. parent pid 1
  52. uid 0
  53. effective uid 0
  54. gid 0
  55. uptime 28d 20h 17m
  56. children 8
  57. memory 19.6 MB
  58. memory total 381.6 MB
  59. memory percent 0.0%
  60. memory percent total 0.5%
  61. cpu percent 0.0%
  62. cpu percent total 0.0%
  63. data collected Wed, 22 Mar 2017 11:32:42
  64. Process 'php-fpm'
  65. status Running
  66. monitoring status Monitored
  67. pid 3153
  68. parent pid 1
  69. uid 0
  70. effective uid 0
  71. gid 0
  72. uptime 43d 19h 26m
  73. children 16
  74. memory 8.7 MB
  75. memory total 352.3 MB
  76. memory percent 0.0%
  77. memory percent total 0.5%
  78. cpu percent 0.0%
  79. cpu percent total 0.1%
  80. data collected Wed, 22 Mar 2017 11:32:42
  81. Process 'mysql'
  82. status Running
  83. monitoring status Monitored
  84. pid 46403
  85. parent pid 46254
  86. uid 500
  87. effective uid 500
  88. gid 500
  89. uptime 93d 0h 34m
  90. children 0
  91. memory 317.8 MB
  92. memory total 317.8 MB
  93. memory percent 0.4%
  94. memory percent total 0.4%
  95. cpu percent 0.0%
  96. cpu percent total 0.0%
  97. data collected Wed, 22 Mar 2017 11:32:42
  98. Process 'tomcat-7-admin-wls'
  99. status Running
  100. monitoring status Monitored
  101. pid 34188
  102. parent pid 1
  103. uid 0
  104. effective uid 0
  105. gid 0
  106. uptime 4d 19h 15m
  107. children 0
  108. memory 803.6 MB
  109. memory total 803.6 MB
  110. memory percent 1.2%
  111. memory percent total 1.2%
  112. cpu percent 0.0%
  113. cpu percent total 0.0%
  114. data collected Wed, 22 Mar 2017 11:32:42
  115. Process 'tomcat-7-wls'
  116. status Running
  117. monitoring status Monitored
  118. pid 34188
  119. parent pid 1
  120. uid 0
  121. effective uid 0
  122. gid 0
  123. uptime 4d 19h 15m
  124. children 0
  125. memory 803.6 MB
  126. memory total 803.6 MB
  127. memory percent 1.2%
  128. memory percent total 1.2%
  129. cpu percent 0.0%
  130. cpu percent total 0.0%
  131. data collected Wed, 22 Mar 2017 11:32:42
  132. Process 'tomcat-7'
  133. status Running
  134. monitoring status Monitored
  135. pid 14524
  136. parent pid 1
  137. uid 0
  138. effective uid 0
  139. gid 0
  140. uptime 5d 21h 43m
  141. children 0
  142. memory 581.2 MB
  143. memory total 581.2 MB
  144. memory percent 0.9%
  145. memory percent total 0.9%
  146. cpu percent 0.0%
  147. cpu percent total 0.0%
  148. data collected Wed, 22 Mar 2017 11:32:42
  149. Process 'tomcat-7-banshanbandao'
  150. status Running
  151. monitoring status Monitored
  152. pid 29217
  153. parent pid 1
  154. uid 0
  155. effective uid 0
  156. gid 0
  157. uptime 117d 0h 35m
  158. children 0
  159. memory 1.4 GB
  160. memory total 1.4 GB
  161. memory percent 2.1%
  162. memory percent total 2.1%
  163. cpu percent 0.0%
  164. cpu percent total 0.0%
  165. data collected Wed, 22 Mar 2017 11:32:42
  166. Process 'vpn'
  167. status Running
  168. monitoring status Monitored
  169. pid 13774
  170. parent pid 1
  171. uid 0
  172. effective uid 0
  173. gid 0
  174. uptime 1h 36m
  175. children 0
  176. memory 2.4 MB
  177. memory total 2.4 MB
  178. memory percent 0.0%
  179. memory percent total 0.0%
  180. cpu percent 0.0%
  181. cpu percent total 0.0%
  182. data collected Wed, 22 Mar 2017 11:32:42
  183. System 'huanqiu_web1'
  184. status Running
  185. monitoring status Monitored
  186. load average [0.00] [0.04] [0.09]
  187. cpu 0.6%us 0.1%sy 0.0%wa
  188. memory usage 5.1 GB [8.0%]
  189. swap usage 0 B [0.0%]
  190. data collected Wed, 22 Mar 2017 11:32:42


a)利用进程的pid文件进行监控:with pidfile
b)利用进程的关键字匹配方式进行监控: matching;可以使用“monit procmatch 进程名 CLI”来查找要匹配的唯一关键字

  1. [root@bastion-IDC ~]# cat /etc/monit.conf
  2. set daemon 30
  3. set logfile syslog facility log_daemon
  4. set pidfile /var/run/monit.pid
  5. set httpd port 30000
  6. use address
  7. allow
  8. ............
  9. check process nginx with pidfile /usr/local/nginx/logs/nginx.pid
  10. start program = "/usr/local/nginx/sbin/nginx"
  11. stop program = "/usr/local/nginx/sbin/nginx -s stop"
  12. check process nginx with pidfile /webserver/nginx/run/nginx.pid
  13. start program = "/webserver/init.d/nginx start" with timeout 10 seconds
  14. stop program = "/webserver/init.d/nginx stop"
  15. if failed host heylinux.com port 80 protocol http with timeout 10 seconds then restart
  16. if 3 restarts within 5 cycles then timeout group webserver
  17. check process php-fpm with pidfile /var/run/php-fpm/php-fpm.pid
  18. start program = "/etc/init.d/php-fpm start"
  19. stop program = "/etc/init.d/php-fpm stop"
  20. check process mysqld with pidfile "/letv/mysql2/data/cdn.oss.letv.com.pid"
  21. start program = "/etc/init.d/mysqld start"
  22. stop program = "/etc/init.d/mysqld stop"
  23. if failed host port 3306 then restart
  24. check process mysql with pidfile /webserver/mysql/run/mysqld.pid
  25. start program = "/webserver/init.d/mysqld start" with timeout 10 seconds
  26. stop program = "/webserver/init.d/mysqld stop"
  27. if failed port 3307 protocol mysql with timeout 10 seconds then restart
  28. if 3 restarts within 5 cycles then timeout group webserver
  29. check process memcached with pidfile "/var/run/memcached/memcached.pid"
  30. start program = "/etc/init.d/memcached start"
  31. stop program = "/etc/init.d/memcached stop"
  32. if failed host port 11211 protocol memcache then restart
  33. check process zabbix with pidfile "/usr/local/zabbix/zabbix_agentd.pid"
  34. start program = "/usr/local/zabbix/sbin/zabbix_agentd -c /usr/local/zabbix/conf/zabbix_agentd.conf"
  35. stop program = "/bin/bash -c 'kill -s SIGTERM `cat /usr/local/zabbix/zabbix_agentd.pid`'"
  36. if failed host port 10050 type tcp 2 times within 2 cycles then restart
  37. check process httpd
  38. with pidfile "/usr/local/apache/logs/httpd.pid"
  39. start program = "/usr/local/apache/bin/httpd -k start"
  40. stop program = "/bin/bash -c 'kill -s SIGTERM `cat /usr/local/apache/logs/httpd.pid`'"
  41. check process redis
  42. with pidfile "/var/run/redis.pid"
  43. start program = "/usr/local/bin/redis-server /letv/uss/redis/redis.conf"
  44. stop program = "/bin/bash -c 'kill -s SIGTERM `cat /var/run/redis.pid`'"
  45. check process rsync with pidfile "/var/run/rsyncd.pid"
  46. start program = "/usr/bin/rsync --daemon"
  47. stop program = "/bin/bash -c 'kill -s SIGTERM `cat /var/run/rsyncd.pid`'"
  48. check process pytask.py matching "/letv/p2sp/offline/pytask.py"
  49. start program = "/usr/bin/python /letv/p2sp/offline/pytask.py"
  50. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep offline/pytask.py|grep -v grep|awk -F" " '{print $2}'`'"
  51. check process pytimed.py matching "/letv/p2sp/offline/pytimed.py"
  52. start program = "/usr/bin/python /letv/p2sp/offline/pytimed.py"
  53. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep offline/pytimed.py|grep -v grep|awk -F" " '{print $2}'`'"
  54. check process hadoop with pidfile "/usr/local/hadoop/pids/hadoop-hadoop-datanode.pid"
  55. start program = "/usr/bin/sudo -u hadoop -i hadoop-daemon.sh start datanode"
  56. stop program = "/usr/bin/sudo -u hadoop -i hadoop-daemon.sh stop datanode"
  57. check process ETMDaemon matching "/letv/p2sp/xware/lib/ETMDaemon"
  58. start program = "/letv/p2sp/xware/portal"
  59. stop program = "/bin/bash -c 'kill -s SIGTERM `ps -ef|grep ETMDaemon|grep -v grep |awk '{print $2}'`'"


  1. [root@bastion-IDC ~]# vim /etc/monit.conf
  2. .......
  3. include /etc/services.cfg


  1. [root@bastion-IDC ~]# vim /etc/services.cfg
  2. check process nginx with pidfile /usr/local/nginx/logs/nginx.pid
  3. start program = "/usr/local/nginx/sbin/nginx"
  4. stop program = "/usr/local/nginx/sbin/nginx -s stop"
  5. check process php-fpm with pidfile /var/run/php-fpm/php-fpm.pid
  6. start program = "/etc/init.d/php-fpm start"
  7. stop program = "/etc/init.d/php-fpm stop"
  8. check process mysqld with pidfile "/letv/mysql2/data/cdn.oss.letv.com.pid"
  9. start program = "/etc/init.d/mysqld start"
  10. stop program = "/etc/init.d/mysqld stop"
  11. .........


  1. # mail-server
  2. set mailserver smtp.huanqiu.cn port 587
  3. # email-format
  4. set mail-format {
  5. from: monit@huanqiu.cn
  6. subject: $SERVICE $EVENT at $DATE on $HOST
  7. message: Monit $ACTION $SERVICE $EVENT at $DATE on $HOST : $DESCRIPTION.
  8. Yours sincerely,
  9. Monit
  10. }
  11. set alert wangshibo@huanqiu.cn


  1. check system
  2. if loadavg (5min) > 4 for 4 times 5 cycles then exec "/etc/monit/script/sendsms sysload 5min >4"
  3. if memory usage > 90% then exec "/etc/monit/script/sendsms memory useage>90%"
  4. if cpu usage (user) > 70% for 4 times within 5 cycles then exec "/etc/monit/script/sendsms cpu(user) >70%"
  5. if cpu usage (system) > 30% for 4 times within 5 cycles then exec "/etc/monit/script/sendsms cpu(system) >30% "
  6. if cpu usage (wait) > 20% for 4 times within 5 cycles then exec "/etc/monit/script/sendsms system busy! cpu(wait) >20%"


  1. check host Unicom_mobi with address
  2. if failed icmp type echo count 10 with timeout 20 seconds then exec "/etc/monit/script/sendsms Unicom_mobi ping failed!"
  3. if failed port 22 type tcp with timeout 10 seconds for 2 times within 3 cycles then exec "/etc/monit/script/sendsms unicom connect failed!"
  4. if failed port 9528 type tcp with timeout 10 seconds for 2 times within 3 cycles then exec "/etc/monit/script/sendsms unicom connect failed!"
  5. if failed port 9529 type tcp with timeout 10 seconds for 2 times within 3 cycles then exec "/etc/monit/script/sendsms unicom connect failed!"
  6. if failed port 9530 type tcp with timeout 10 seconds for 2 times within 3 cycles then exec "/etc/monit/script/sendsms unicom connect failed!"


  1. check filesystem root with path /dev/mapper/VolGroup00-LogVol00
  2. if space usage > 80% for 5 times within 15 cycles then exec "/etc/monit/script/clear_core.sh"
  3. else if succeed for 1 times within 2 cycles then exec "/etc/monit/script/sendsms '/dev/sda1 usage > 90% clear core file succeed!'>/dev/null 2"



check system localhost
if loadavg (1min) > 10 then alert
if loadavg (5min) > 6 then alert
if memory usage > 75% then alert
if cpu usage (user) > 70% then alert
if cpu usage (system) > 60% then alert
if cpu usage (wait) > 75% then alert
if loadavg (1min) > 10 for 2 cycles then alert


check host MAIL with address
if failed icmp type echo within 10 cycles then alert
if failed port 25 protocol smtp then alert
else if recovered then exec “/scripts/mail-script”
if failed port 22 protocol ssh then alert
if failed port 143 protocol imap then alert


check file with path /home/laicb/test.txt
if does not exist for 5 cycles then alert


有些资源可以在check system路口,有些可以在check entry路口,有些都可以。
if cpu is greater than 50% for 5 cycles then restart


check file httpd.conf with path /usr/local/apache/conf/httpd.conf
if changed timestamp
then exec “/usr/local/apache/bin/apachectl graceful”
check file stored.ckp with path /msg-foo/config/stored.ckp
if timestamp > 1 minute then alert


这个只能用在check file入口
check file with path /home/laicb/test.txt
if does not exist for 5 cycles then alert
if changed size for 1 cycles then alert //如果没有指定,查看服务所对应的会发现是for 5 times within 5cycles
如果更改文件大小,那么文件大小变化之后就在状态栏里显示size changed


check file monit.bin with path “/usr/local/bin/monit”
if failed permission 0555 then unmonitor //如果/usr/local/bin/monit文件权限不是555就拒绝执行
check file passwd with path /etc/passwd
if failed uid root then unmonitor //如果不是root访问/etc/passwd那么拒绝访问


check process sshd with pidfile /var/run/sshd.pid
if changed pid then exec “/my/script”


check process myapp with pidfile /var/run/myapp.pid
start program = “/etc/init.d/myapp start”
stop program = “/etc/init.d/myapp stop”
if uptime > 3 days then restart


check host www.huanqiu.com with address www.huanqiu.com
if failed icmp type echo count 5 with timeout 15 seconds
then alert


check process apache with pidfile /var/run/httpd.pid
start program = “/etc/init.d/httpd start”
stop program = “/etc/init.d/httpd stop”
if cpu > 40% for 2 cycles then alert
if totalcpu > 60% for 2 cycles then alert
if totalcpu > 80% for 5 cycles then restart
if mem > 100 MB for 5 cycles then stop
if loadavg(5min) greater than 10.0 for 8 cycles then stop



set httpd port 30000
[rootcdn ~]# cat /etc/monitrc
set daemon 30
set logfile syslog facility log_daemon
set pidfile /var/run/monit.pid
set httpd port 30000

allow admin:TVA3z3i



在monit -t和monit reload都没有报错的情况下,monit status报错如下:
[rootcdn ~]# monit status
monit: cannot read status from the monit daemon
[rootcdn ~]# tail -f /var/log/messages
Aug 18 19:27:21 cdn monit[14491]: monit: Denied connection from non-authorized client []
Aug 18 19:27:21 cdn monit[16899]: monit: cannot read status from the monit daemon
将monit配置文件中的“allow”修改为“allow localhost”即可!!
[rootcdn ~]# vim /etc/monitrc
set daemon 30
set logfile syslog facility log_daemon
set pidfile /var/run/monit.pid
set httpd port 30000
allow localhost
[rootcdn ~]# monit status
The Monit daemon 5.3.2 uptime: 5m
Process ‘nginx_down’
status Running
monitoring status Monitored
pid 18671
parent pid 1
uptime 145d 5h 17m
children 8
memory kilobytes 484
memory kilobytes total 4572
memory percent 0.0%
memory percent total 0.0%
cpu percent 0.0%
cpu percent total 0.0%
data collected Mon, 18 Aug 2014 19:34:25

3)下面的错误在使用上面两种方法后,仍不能解决问题! (添加use address

[root182 conf]# monit -t
Control file syntax OK
[root182 conf]# monit reload
Reinitializing monit daemon
[root182 conf]# monit status
monit: error connecting to the monit daemon
[root182 conf]# cat /etc/monitrc.bak
set daemon 30
set httpd port 30000
set logfile syslog facility log_daemon
set pidfile /var/run/monit.pid
需要在monit配置文件中添加“use address”内容!
[root182 conf]# cat /etc/monitrc
set daemon 30
set httpd port 30000
use address
set logfile syslog facility log_daemon
set pidfile /var/run/monit.pid
[root182 conf]# monit status
The Monit daemon 5.3.2 uptime: 7m
Process ‘rsync’
status Running
monitoring status Monitored
pid 13519
parent pid 1
uptime 393d 9h 8m
children 0
memory kilobytes 540
memory kilobytes total 540
memory percent 0.0%
memory percent total 0.0%
cpu percent 0.0%
cpu percent total 0.0%
data collected Thu, 21 Aug 2014 19:24:58
上面的第3钟方式是最全面的,如果添加了use address后,使用monit status仍然出现下面的情况:
[rootly-u-gfs1 ~]# monit status
monit: error connecting to the monit daemon
[rootly-u-gfs1 ~]# monit status
The Monit daemon 5.3.2 uptime: 6m
Process ‘net-snmp’
status Running
monitoring status Monitored


[rootlinux-node2 ~]# yum update
[rootlinux-node2 ~]# yum install -y monit
[rootlinux-node2 ~]# rpm -ql monit
[rootlinux-node2 ~]# monit -V
This is Monit version 5.14
Copyright (C) 2001-2016 Tildeslash Ltd. All Rights Reserved.
[rootlinux-node2 ~]# monit -v
Adding host allow ‘localhost’
Skipping redundant host ‘localhost’
Adding credentials for user ‘admin’
Runtime constants:
Control file = /etc/monitrc
Log file = /var/log/monit.log
Pid file = /run/monit.pid
Id file = /root/.monit.id
State file = /root/.monit.state
Debug = True
Log = True
Use syslog = False
Is Daemon = True
Use process engine = True
Poll time = 30 seconds with start delay 0 seconds
Expect buffer = 256 bytes
Mail from = (not defined)
Mail subject = (not defined)
Mail message = (not defined)
Start monit httpd = True
httpd bind address = localhost
httpd portnumber = 2812
httpd ssl = Disabled
httpd signature = Enabled
httpd auth. style = Basic Authentication and Host/Net allow list
The service list contains the following entries:
System Name = linux-node2.openstack
Monitoring mode = active
[rootlinux-node2 ~]# grep -v ‘^#’ /etc/monitrc
set daemon 30 # check services at 30 seconds intervals
set logfile syslog
set httpd port 2812 and
use address localhost # only accept connection from localhost
allow localhost # allow localhost to connect to the server and
allow admin:monit # require user ‘admin’ with password ‘monit’
allow monit # allow users of group ‘monit’ to connect (rw)
allow users readonly # allow users of group ‘users’ to connect readonly
include /etc/monit.d/*
[rootlinux-node2 ~]# cat /etc/logrotate.d/monit
/var/log/monit.log {
size 100k
create 0644 root root
/bin/systemctl reload monit.service > /dev/null 2>&1 || :
[rootlinux-node2 ~]# cat /etc/monit.d/logging

log to monit.log

set logfile /var/log/monit.log //监视周期为60秒,日志输出及日志滚动以配置好了
[rootlinux-node2 ~]# vim /etc/monitrc
set daemon 5
set logfile syslog
set httpd port 2812 and
use address localhost
allow localhost
allow admin:monit
allow monit
allow users readonly
include /etc/monit.d/*
check process sshd with pidfile /var/run/sshd.pid
start program “/usr/bin/systemctl start sshd.service”
stop program “/usr/bin/systemctl stop sshd.service”
if failed port 22 protocol ssh then restart
if 5 restart within 5 cycles then timeout
check process apache with pidfile /etc/httpd/run/httpd.pid
start program = “/usr/bin/systemctl start httpd” with timeout 60 seconds
stop program = “/usr/bin/systemctl stop httpd”
if failed host linux-node2.openstack port 80 protocol http
and request “/readme.html”
then restart
if 3 restarts within 5 cycles then timeout
group apache
check process mariadb with pidfile “/var/lib/mysql/linux-node2.pid”
start = “/usr/bin/systemctl start mariadb.service”
stop = “/usr/bin/systemctl stop mariadb.service”
if failed host port 3306 protocol mysql then restart
if 5 restarts within 5 cycles then timeout

MariaDB [(none)]> show variables like “%pid%”;
| Variable_name | Value |
| pid_file | /var/lib/mysql/linux-node2.pid |
1 row in set (0.00 sec)
[rootlinux-node2 ~]# systemctl enable monit.service
Created symlink from /etc/systemd/system/multi-user.target.wants/monit.service to /usr/lib/systemd/system/monit.service.
[rootlinux-node2 ~]# systemctl start monit.service
[rootlinux-node2 ~]# lsof -i:2812
monit 89106 root 5u IPv4 172788270 0t0 TCP localhost:atmtcp (LISTEN)
[rootlinux-node2 ~]# systemctl status monit.service
● monit.service - Pro-active monitoring utility for unix systems
Loaded: loaded (/usr/lib/systemd/system/monit.service; enabled; vendor preset: disabled)
Active: active (running) since Fri 2017-02-03 10:47:22 CST; 50s ago
Main PID: 89106 (monit)
CGroup: /system.slice/monit.service
└─89106 /usr/bin/monit -I
Feb 03 10:47:22 linux-node2.openstack systemd[1]: Started Pro-active monitoring utility for unix systems.
Feb 03 10:47:23 linux-node2.openstack systemd[1]: Starting Pro-active monitoring utility for unix systems…
Feb 03 10:47:23 linux-node2.openstack monit[89106]: /etc/monitrc:20: Program does not exist: ‘systemctl’
Feb 03 10:47:23 linux-node2.openstack monit[89106]: /etc/monitrc:21: Program does not exist: ‘systemctl’
Feb 03 10:47:23 linux-node2.openstack monit[89106]: Starting Monit 5.14 daemon with http interface at [localhost]:2812
[rootlinux-node2 ~]# monit status
The Monit daemon 5.14 uptime: 9m
Process ‘sshd’
status Running
monitoring status Monitored
pid 1755
parent pid 1
uid 0
effective uid 0
gid 0
uptime 86d 19h 39m
children 6
memory 3.5 MB
memory total 25.1 MB
memory percent 0.0%
memory percent total 0.0%
cpu percent 0.0%
cpu percent total 0.0%
port response time 0.021s to [localhost]:22 type TCP/IP protocol SSH
data collected Fri, 03 Feb 2017 10:57:20
Process ‘apache’
status Not monitored
monitoring status Not monitored
data collected Fri, 03 Feb 2017 10:50:21
Process ‘mariadb’
status Running
monitoring status Monitored
pid 46235
parent pid 1
uid 27
effective uid 27
gid 27
uptime 29d 16h 1m
children 0
memory 296.1 MB
memory total 296.1 MB
memory percent 0.4%
memory percent total 0.4%
cpu percent 0.0%
cpu percent total 0.0%
port response time 0.001s to []:3306 type TCP/IP protocol MYSQL
data collected Fri, 03 Feb 2017 10:57:20
System ‘linux-node2.openstack’
status Running
monitoring status Monitored
load average [2.01] [1.86] [1.94]
cpu 5.2%us 2.0%sy 0.0%wa
memory usage 44.0 GB [70.1%]
swap usage 2.6 MB [0.1%]
data collected Fri, 03 Feb 2017 10:57:20
[rootlinux-node2 ~]# monit reload
Reinitializing monit daemon
[rootlinux-node2 ~]# systemctl stop nginx.service
[rootlinux-node2 ~]# tailf /var/log/monit.log
[CST Apr 5 21:35:18] error : ‘nginx’ process is not running
[CST Apr 5 21:35:18] info : ‘nginx’ trying to restart
[CST Apr 5 21:35:18] info : ‘nginx’ start: /usr/bin/systemctl
[rootlinux-node2 ~]# systemctl list-unit-files | grep monit.service
monit.service disabled
[rootlinux-node2 ~]# systemctl enable monit.service
ln -s ‘/usr/lib/systemd/system/monit.service’ ‘/etc/systemd/system/multi-user.target.wants/monit.service’
[rootlinux-node2 ~]# systemctl list-unit-files | grep monit.service
monit.service enabled
本文转自 (https://cloud.tencent.com/developer/article/1027097)


  1. root@iZwz93z41dx386tb44eba4Z:~# cat /home/dc/dc_code/frp_0.27.0_linux_amd64/frps-control.sh
  2. if [[ $1 == "start" ]];then
  3. systemctl start daiyi-frps.service
  4. elif [[ $1 == "stop" ]];then
  5. systemctl stop daiyi-frps.service
  6. fi;
  7. root@iZwz93z41dx386tb44eba4Z:~#


  1. check process frps with matching "frps"
  2. start program = "/home/dc/dc_code/frp_0.27.0_linux_amd64/frps-control.sh start" with timeout 60 seconds
  3. stop program = "/home/dc/dc_code/frp_0.27.0_linux_amd64/frps-control.sh stop"


  1. systemctl restart monit.service

多次 kill 掉之后, 他还是会立刻恢复, 很有效果啊,

  1. root@iZwz93z41dx386tb44eba4Z:~# ps -ef|grep frps
  2. root 10227 1 0 09:20 ? 00:00:00 /bin/bash /home/dc/dc_code/frp_0.27.0_linux_amd64/start_frps.sh
  3. root 10235 10227 0 09:20 ? 00:00:00 sudo /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  4. root 10237 10235 0 09:20 ? 00:00:00 /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  5. root 11644 11230 0 09:52 pts/1 00:00:00 grep --color=auto frps
  6. root@iZwz93z41dx386tb44eba4Z:~# kill -9 10227
  7. root@iZwz93z41dx386tb44eba4Z:/home/dc/dc_code/monit-5.25.3/conf# ps -ef|grep frps
  8. root 10068 1 0 09:18 ? 00:00:00 /bin/bash /home/dc/dc_code/frp_0.27.0_linux_amd64/start_frps.sh
  9. root 10072 10068 0 09:18 ? 00:00:00 sudo /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  10. root 10080 10072 0 09:18 ? 00:00:00 /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  11. root 10129 9060 0 09:19 pts/0 00:00:00 grep --color=auto frps
  12. [1]+ Killed systemctl status daiyi-frps.service
  13. root@iZwz93z41dx386tb44eba4Z:/home/dc/dc_code/monit-5.25.3/conf# kill -9 10080
  14. root@iZwz93z41dx386tb44eba4Z:/home/dc/dc_code/monit-5.25.3/conf# ps -ef|grep frps
  15. root 10146 1 0 09:19 ? 00:00:00 /bin/bash /home/dc/dc_code/frp_0.27.0_linux_amd64/start_frps.sh
  16. root 10153 10146 0 09:19 ? 00:00:00 sudo /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  17. root 10155 10153 4 09:19 ? 00:00:00 /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  18. root 10169 9060 0 09:19 pts/0 00:00:00 grep --color=auto frps
  19. root@iZwz93z41dx386tb44eba4Z:/home/dc/dc_code/monit-5.25.3/conf# kill -9 10153
  20. root@iZwz93z41dx386tb44eba4Z:/home/dc/dc_code/monit-5.25.3/conf# ps -ef|grep frps
  21. root 10189 1 0 09:19 ? 00:00:00 /bin/bash /home/dc/dc_code/frp_0.27.0_linux_amd64/start_frps.sh
  22. root 10190 10189 0 09:19 ? 00:00:00 sudo /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  23. root 10203 10190 2 09:19 ? 00:00:00 /home/dc/dc_code/frp_0.27.0_linux_amd64/frps -c /home/dc/dc_code/frp_0.27.0_linux_amd64/frps.ini
  24. root 10213 9060 0 09:19 pts/0 00:00:00 grep --color=auto frps