基于docker 方式部署
    组件介绍:

    • loki是主服务器,负责存储日志和处理查询。
    • promtail是代理,负责收集日志并将其发送给 loki 。
    • Grafana用于 UI 展示。

    部署
    loki

    1. mkdir /etc/loki && cd /etc/loki
    2. wget https://raw.githubusercontent.com/grafana/loki/v2.4.1/cmd/loki/loki-local-config.yaml -O loki-config.yaml
    3. mv loki-config.yaml local-config.yaml
    4. [root@k8s-node02 loki]# vim local-config.yaml
    5. http_listen_port: 3100
    6. grpc_listen_port: 9096
    7. common:
    8. path_prefix: /loki
    9. storage:
    10. filesystem:
    11. chunks_directory: /loki/chunks
    12. rules_directory: /loki/rules
    13. replication_factor: 1
    14. ring:
    15. instance_addr: 127.0.0.1
    16. kvstore:
    17. store: inmemory
    18. schema_config:
    19. configs:
    20. - from: 2020-10-24
    21. store: boltdb-shipper
    22. object_store: filesystem
    23. schema: v11
    24. index:
    25. prefix: index_
    26. period: 24h
    27. ruler:
    28. alertmanager_url: http://192.168.100.14:9093
    29. enable_api: true
    30. ring:
    31. kvstore:
    32. store: inmemory
    33. #storage:
    34. # type: local
    35. # local:
    36. # directory: /loki/rules
    37. #flush_period: 1m
    38. 创建告警规则文件
    39. mkdir -p /etc/loki/rules/fake
    40. [root@k8s-node02 fake]# cat rules.yaml
    41. groups:
    42. - name: testlog
    43. rules:
    44. - alert: test_log_line number
    45. #expr: count_over_time({filename="/data/logs/nginx/a.log"}|="ERROR"[5s]) > 0
    46. #expr: sum by (filename) (count_over_time({job="nginxlog"}|="ERROR"[5s])) > 0
    47. expr: sum by (filename,message,host) (count_over_time({job="test-log"}|~"ERROR"|regexp "(?P<message>.*)$"[5s])) > 0
    48. for: 0
    49. labels:
    50. severity: "critical"
    51. annotations:
    52. summary: "{{ $labels.host }}"
    53. description: "{{ $labels.message }}"
    54. filename: "{{ $labels.filename }}"
    55. docker 运行
    56. docker run --name loki -v /etc/loki/local-config.yaml:/etc/loki/local-config.yaml -v /etc/loki/rules/:/loki/rules/ -d -p 3100:3100 grafana/loki:2.4.1

    promtail

    1. mkdir /etc/promtail && cd /etc/promtail
    2. wget https://raw.githubusercontent.com/grafana/loki/v2.4.1/clients/cmd/promtail/promtail-docker-config.yaml -O promtail-config.yaml
    3. mv promtail-config.yaml config.yml
    4. 修改配置文件
    5. [root@k8s-node02 promtail]# cat config.yml
    6. server:
    7. http_listen_port: 9080
    8. grpc_listen_port: 0
    9. positions:
    10. filename: /tmp/positions.yaml
    11. clients:
    12. - url: http://192.168.100.14:3100/loki/api/v1/push
    13. scrape_configs:
    14. - job_name: linux
    15. static_configs:
    16. - targets:
    17. - localhost
    18. labels:
    19. job: messages
    20. host: localhost
    21. __path__: /var/log/messages*
    22. - job_name: nginx
    23. static_configs:
    24. - targets:
    25. - localhost
    26. labels:
    27. job: test-log
    28. host: k8s-node02
    29. __path__: /data/logs/nginx/*.log
    30. docker 运行
    31. docker run --name promtail -v /etc/promtail/config.yml:/etc/promtail/config.yml -d -v /data/logs:/data/logs grafana/promtail:2.4.1

    grafana

    1. docker run -d -p 3000:3000 --name=grafana grafana/grafana:7.3.7

    仪表盘配置
    image.png
    仪表盘上查看数据
    image.png
    alertmanager

    1. mkdir -p /data/monitor/alertmanager && cd /data/monitor/alertmanager
    2. [root@k8s-node02 promtail]# cat /data/monitor/alertmanager/alertmanager.yml
    3. global:
    4. smtp_smarthost: 'smtp.exmail.qq.com:465'
    5. smtp_from: 'noreply@marketin.cn'
    6. smtp_auth_username: 'noreply@marketin.cn'
    7. smtp_auth_password: 'mI2bjgi5ni'
    8. resolve_timeout: 5m
    9. route:
    10. group_by: ['alertname', 'instance']
    11. group_wait: 5s
    12. group_interval: 10s
    13. repeat_interval: 5m
    14. receiver: "dingtalk"
    15. inhibit_rules:
    16. - source_match:
    17. severity: 'critical'
    18. target_match_re:
    19. severity: ^(warning)$
    20. equal: ['alertname', 'instance']
    21. receivers:
    22. - name: dingtalk
    23. webhook_configs:
    24. - url: 'http://192.168.100.14:8060/dingtalk/webhook/send'
    25. send_resolved: true
    26. 运行docker
    27. docker run -d -p 9093:9093 --name=alertmanager \
    28. -v /data/monitor/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml \
    29. prom/alertmanager:latest

    配置钉钉告警

    1. [root@k8s-node02 prometheus-webhook-dingtalk-1.4.0.linux-amd64]# cat /root/prometheus-webhook-dingtalk-1.4.0.linux-amd64/config.yml
    2. ## Request timeout
    3. # timeout: 5s
    4. ## Customizable templates path
    5. templates:
    6. - contrib/templates/legacy/template.tmpl
    7. ## You can also override default template using `default_message`
    8. ## The following example to use the 'legacy' template from v0.3.0
    9. # default_message:
    10. # title: '{{ template "legacy.title" . }}'
    11. # text: '{{ template "legacy.content" . }}'
    12. ## Targets, previously was known as "profiles"
    13. targets:
    14. webhook:
    15. url: https://oapi.dingtalk.com/robot/send?access_token=67a51455f433f3dd9bfa9ee9655ca26021cd92a02258cfc82e0ff7ddca1b2bd8
    16. #message:
    17. # text: '{{ template "ding.link.content" . }}'
    18. 配置发送至钉钉内容:
    19. [root@k8s-node02 prometheus-webhook-dingtalk-1.4.0.linux-amd64]# cat /root/prometheus-webhook-dingtalk-1.4.0.linux-amd64/contrib/templates/legacy/template.tmpl
    20. {{ define "ding.link.title" }}[日志报警]{{ end }}
    21. {{ define "ding.link.content" -}}
    22. {{- if gt (len .Alerts.Firing) 0 -}}
    23. {{ range $i, $alert := .Alerts.Firing }}
    24. [dangqian]:{{ index $alert.Labels "severity" }}
    25. [告警主机]:{{ index $alert.Annotations "summary"}}
    26. [日志文件]:{{ index $alert.Annotations "filename"}}
    27. [告警时间]:{{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}
    28. [告警内容]:{{ index $alert.Annotations "description" }}
    29. {{ end }}{{- end }}
    30. {{- end }}