需求:需要监控机器的端口
将需要监听的服务器端口放入port_list.txt文件
[root@bogon scripts]# pwd
/data/scripts
[root@bogon scripts]# cat port_list.txt
80
8080
9090
9011
6379
修改agent.sh
#!/bin/bash
#########################################################################
# File Name: agent.sh
# Created on: 2021-03-14 11:36:19
# Author: Wu Kang
# Last Modified: 2021-03-15 11:29:17
# Description: 自定义监控
#########################################################################
HOST=`hostname`
IP=`ip a|grep inet|egrep -v "127.0.0.1|fe80|::"|awk -F'/' '{print $1}'|awk '{print $2}'|head -1`
function get_process_count(){
for process_name in `cat process_list.txt`
do
count=`ps -ef | grep $process_name | grep -v grep | awk '{print $2}' | wc -l`
#echo $count $process_name
line='process_count{host="'$HOST'",process_name="'$process_name'",ip="'$IP'"} '$count''
echo $line >>tmpdata.txt
echo $line
done
}
function get_log_delay_second(){
logfile=/tmp/catalina.out
system=$(date +%s)
service=$(stat -c %Y $logfile)
delay_second=$(($system-$service))
line='log_delay{host="'$HOST'",logfile="'$logfile'",ip="'$IP'"} '$delay_second''
echo $line >>tmpdata.txt
echo $line
}
function get_port_listen(){
for port in `cat port_list.txt`
do
count=`ss -tnlp|awk '{print $4}'|grep $port|wc -l`
#echo $count $process_name
line='port_listen{host="'$HOST'",port="'$port'",ip="'$IP'"} '$count''
echo $line >>tmpdata.txt
echo $line
done
}
function getdata(){
get_process_count
get_log_delay_second
get_port_listen
}
function pushdata(){
curl -XPOST --data-binary @tmpdata.txt http://192.168.0.15:9091/metrics/job/process
}
function run(){
while true
do
>tmpdata.txt
getdata
pushdata
sleep 1
done
}
main(){
run
}
main
编辑端口监控告警规则
[root@bogon scripts]# vim /opt/prometheus/rules/port.yml
groups:
- name: port
rules:
- alert: 端口未监听
expr: port_listen == 0
for: 3s
labels:
severity: warning
annotations:
summary: "{{ $labels.port }} 未监听"
description: "host {{ $labels.host }} {{ $labels.ip }} 端口{{ $labels.port }}未监听 ,value is {{ $value }}"
systemctl reload prometheus
一会钉钉就收到了告警