下载prometheus-dingding插件

    1. cd /opt
    2. wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v1.4.0/prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz
    3. tar xvf prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz
    4. ln -s prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz prometheus-webhook-dingtalk
    5. cd prometheus-webhook-dingtalk

    配置文件修改,url和secret设置钉钉机器人对应的接口。

    cp config.example.yml config.yml
    vim config.yml
    ## Request timeout
    # timeout: 5s
    
    ## Uncomment following line in order to write template from scratch (be careful!)
    #no_builtin_template: true
    
    ## Customizable templates path
    #templates:
    #  - contrib/templates/legacy/template.tmpl
    templates:
       - /opt/prometheus-webhook-dingtalk/template.tmpl
    ## You can also override default template using `default_message`
    ## The following example to use the 'legacy' template from v0.3.0
    #default_message:
    #  title: '{{ template "legacy.title" . }}'
    #  text: '{{ template "legacy.content" . }}'
    
    ## Targets, previously was known as "profiles"
    targets:
      webhook:
        url: https://oapi.dingtalk.com/robot/send?access_token=XXXXXXXXXXXXXXXXXXX
        # secret for signature
        secret: XXXXXXXXXXXXX
        message:
          title: '{{ template "ding.link.title" . }}'
          text: '{{ template "ding.link.content" . }}'
    

    编辑告警模板

    vim /opt/prometheus-webhook-dingtalk/template.tmpl
    {{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
    {{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}
    
    {{ define "__text_alert_list" }}{{ range . }}
    **Labels**
    {{ range .Labels.SortedPairs }} - {{ .Name }}: {{ .Value | markdown | html }}
    {{ end }}
    **Annotations**
    {{ range .Annotations.SortedPairs }} - {{ .Name }}: {{ .Value | markdown | html }}
    {{ end }}
    **Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
    {{ end }}{{ end }}
    
    {{ define "default.__text_alert_list" }}{{ range . }}
    ---
    **告警级别:** {{ .Labels.severity | upper }}
    
    **运营团队:** {{ .Labels.team | upper }}
    
    **触发时间:** {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
    
    **事件信息:** 
    {{ range .Annotations.SortedPairs }} - {{ .Name }}: {{ .Value | markdown | html }}
    
    
    {{ end }}
    
    **事件标签:**
    {{ range .Labels.SortedPairs }}{{ if and (ne (.Name) "severity") (ne (.Name) "summary") (ne (.Name) "team") }} - {{ .Name }}: {{ .Value | markdown | html }}
    {{ end }}{{ end }}
    {{ end }}
    {{ end }}
    {{ define "default.__text_alertresovle_list" }}{{ range . }}
    ---
    **告警级别:** {{ .Labels.severity | upper }}
    
    **运营团队:** {{ .Labels.team | upper }}
    
    **触发时间:** {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
    
    **结束时间:** {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}
    
    **事件信息:**
    {{ range .Annotations.SortedPairs }} - {{ .Name }}: {{ .Value | markdown | html }}
    
    
    {{ end }}
    
    **事件标签:**
    {{ range .Labels.SortedPairs }}{{ if and (ne (.Name) "severity") (ne (.Name) "summary") (ne (.Name) "team") }} - {{ .Name }}: {{ .Value | markdown | html }}
    {{ end }}{{ end }}
    {{ end }}
    {{ end }}
    
    {{/* Default */}}
    {{ define "default.title" }}{{ template "__subject" . }}{{ end }}
    {{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
    {{ if gt (len .Alerts.Firing) 0 -}}
    
    {{ template "default.__text_alert_list" .Alerts.Firing }}
    
    
    {{- end }}
    
    {{ if gt (len .Alerts.Resolved) 0 -}}
    {{ template "default.__text_alertresovle_list" .Alerts.Resolved }}
    
    
    {{- end }}
    {{- end }}
    
    {{/* Legacy */}}
    {{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
    {{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
    {{ template "__text_alert_list" .Alerts.Firing }}
    {{- end }}
    
    {{/* Following names for compatibility */}}
    {{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
    {{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}
    

    编写服务脚本

    cat > /usr/lib/systemd/system/ding.service <<EOF
    [Unit]
    Description=prometheus-webhook-dingtalk
    Documentation=
    After=network.target
    
    [Service]
    Type=simple
    WorkingDirectory=/opt/prometheus-webhook-dingtalk
    ExecStart=/opt/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk --config.file=/opt/prometheus-webhook-dingtalk/config.yml
    ExecStop=/bin/kill -KILL \$MAINPID
    ExecReload=/bin/kill -HUP \$MAINPID
    KillMode=control-group
    Restart=on-failure
    RestartSec=3s
    
    [Install]
    WantedBy=multi-user.target
    EOF
    

    启动webhook插件

    systemctl enable ding
    systemctl start ding
    systemctl status ding
    

    修改prometheus.yml ,将alertmanagers修改对应之前启动的alertmanagers地址

    alerting:
      alertmanagers:
      - static_configs:
        - targets:
           - localhost:9093
    

    image.png

    修改rules/linux.yml
    测试将内存/CPU使用率大于%5就告警
    image.png

    systemctl reload prometheus
    

    简单压测CPU

    cat /dev/urandom | md5sum
    

    image.png

    钉钉就收到告警了。
    image.png