一.部署集群内prometheus监控节点
##安装node-exporter
kubectl appy -f node-exporter.yaml

  1. apiVersion: apps/v1
  2. kind: DaemonSet
  3. metadata:
  4. name: node-exporter
  5. namespace: kube-system
  6. labels:
  7. k8s-app: node-exporter
  8. spec:
  9. selector:
  10. matchLabels:
  11. k8s-app: node-exporter
  12. template:
  13. metadata:
  14. labels:
  15. k8s-app: node-exporter
  16. spec:
  17. containers:
  18. - image: prom/node-exporter
  19. name: node-exporter
  20. ports:
  21. - containerPort: 9100
  22. protocol: TCP
  23. name: http
  24. tolerations:
  25. hostNetwork: true
  26. hostPID: true
  27. hostIPC: true
  28. restartPolicy: Always

安装prometheus

kubectl apply -f configmap.yaml ##准备配置文件

  1. apiVersion: v1
  2. kind: ConfigMap
  3. metadata:
  4. name: prometheus-config
  5. namespace: kube-system
  6. data:
  7. prometheus.yml: |
  8. global:
  9. scrape_interval: 15s
  10. evaluation_interval: 15s
  11. scrape_configs:
  12. - job_name: 'kubernetes-apiservers'
  13. kubernetes_sd_configs:
  14. - role: endpoints
  15. scheme: https
  16. tls_config:
  17. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  18. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  19. relabel_configs:
  20. - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
  21. action: keep
  22. regex: default;kubernetes;https
  23. - job_name: 'kubernetes-nodes'
  24. kubernetes_sd_configs:
  25. - role: node
  26. scheme: https
  27. tls_config:
  28. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  29. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  30. relabel_configs:
  31. - action: labelmap
  32. regex: __meta_kubernetes_node_label_(.+)
  33. - target_label: __address__
  34. replacement: kubernetes.default.svc:443
  35. - source_labels: [__meta_kubernetes_node_name]
  36. regex: (.+)
  37. target_label: __metrics_path__
  38. replacement: /api/v1/nodes/${1}/proxy/metrics
  39. - job_name: 'kubernetes-cadvisor'
  40. kubernetes_sd_configs:
  41. - role: node
  42. scheme: https
  43. tls_config:
  44. ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
  45. bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
  46. relabel_configs:
  47. - action: labelmap
  48. regex: __meta_kubernetes_node_label_(.+)
  49. - target_label: __address__
  50. replacement: kubernetes.default.svc:443
  51. - source_labels: [__meta_kubernetes_node_name]
  52. regex: (.+)
  53. target_label: __metrics_path__
  54. replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
  55. - job_name: 'kubernetes-service-endpoints'
  56. kubernetes_sd_configs:
  57. - role: endpoints
  58. relabel_configs:
  59. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
  60. action: keep
  61. regex: true
  62. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
  63. action: replace
  64. target_label: __scheme__
  65. regex: (https?)
  66. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
  67. action: replace
  68. target_label: __metrics_path__
  69. regex: (.+)
  70. - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
  71. action: replace
  72. target_label: __address__
  73. regex: ([^:]+)(?::\d+)?;(\d+)
  74. replacement: $1:$2
  75. - action: labelmap
  76. regex: __meta_kubernetes_service_label_(.+)
  77. - source_labels: [__meta_kubernetes_namespace]
  78. action: replace
  79. target_label: kubernetes_namespace
  80. - source_labels: [__meta_kubernetes_service_name]
  81. action: replace
  82. target_label: kubernetes_name
  83. - job_name: 'kubernetes-services'
  84. kubernetes_sd_configs:
  85. - role: service
  86. metrics_path: /probe
  87. params:
  88. module: [http_2xx]
  89. relabel_configs:
  90. - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
  91. action: keep
  92. regex: true
  93. - source_labels: [__address__]
  94. target_label: __param_target
  95. - target_label: __address__
  96. replacement: blackbox-exporter.example.com:9115
  97. - source_labels: [__param_target]
  98. target_label: instance
  99. - action: labelmap
  100. regex: __meta_kubernetes_service_label_(.+)
  101. - source_labels: [__meta_kubernetes_namespace]
  102. target_label: kubernetes_namespace
  103. - source_labels: [__meta_kubernetes_service_name]
  104. target_label: kubernetes_name
  105. - job_name: 'kubernetes-ingresses'
  106. kubernetes_sd_configs:
  107. - role: ingress
  108. relabel_configs:
  109. - source_labels: [__meta_kubernetes_ingress_annotation_prometheus_io_probe]
  110. action: keep
  111. regex: true
  112. - source_labels: [__meta_kubernetes_ingress_scheme,__address__,__meta_kubernetes_ingress_path]
  113. regex: (.+);(.+);(.+)
  114. replacement: ${1}://${2}${3}
  115. target_label: __param_target
  116. - target_label: __address__
  117. replacement: blackbox-exporter.example.com:9115
  118. - source_labels: [__param_target]
  119. target_label: instance
  120. - action: labelmap
  121. regex: __meta_kubernetes_ingress_label_(.+)
  122. - source_labels: [__meta_kubernetes_namespace]
  123. target_label: kubernetes_namespace
  124. - source_labels: [__meta_kubernetes_ingress_name]
  125. target_label: kubernetes_name
  126. - job_name: 'kubernetes-pods'
  127. kubernetes_sd_configs:
  128. - role: pod
  129. relabel_configs:
  130. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
  131. action: keep
  132. regex: true
  133. - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
  134. action: replace
  135. target_label: __metrics_path__
  136. regex: (.+)
  137. - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
  138. action: replace
  139. regex: ([^:]+)(?::\d+)?;(\d+)
  140. replacement: $1:$2
  141. target_label: __address__
  142. - action: labelmap
  143. regex: __meta_kubernetes_pod_label_(.+)
  144. - source_labels: [__meta_kubernetes_namespace]
  145. action: replace
  146. target_label: kubernetes_namespace
  147. - source_labels: [__meta_kubernetes_pod_name]
  148. action: replace
  149. target_label: kubernetes_pod_name
  150. - job_name: 'kubernetes-schedule'
  151. scrape_interval: 5s
  152. static_configs:
  153. - targets: ['10.107.141.50:10251']
  154. - job_name: 'kubernetes-control-manager'
  155. scrape_interval: 5s
  156. static_configs:
  157. - targets: ['10.107.141.50:10252']
  158. - job_name: 'kubernetes-kubelet'
  159. scrape_interval: 5s
  160. static_configs:
  161. - targets: ['10.107.141.51:10255','10.107.141.52:10255']
  162. - job_name: 'kubernetes-kube-proxy'
  163. scrape_interval: 5s
  164. static_configs:
  165. - targets: ['10.107.141.51:10249','10.107.141.52:10249']

kubectl apply -f rbac.yaml ##准备deployment使用的rbac认证文件,给peomrtheus权限监控集群

  1. apiVersion: rbac.authorization.k8s.io/v1
  2. kind: ClusterRole
  3. metadata:
  4. name: prometheus
  5. rules:
  6. - apiGroups: [""]
  7. resources:
  8. - nodes
  9. - nodes/proxy
  10. - services
  11. - endpoints
  12. - pods
  13. verbs: ["get", "list", "watch"]
  14. - apiGroups:
  15. - extensions
  16. resources:
  17. - ingresses
  18. verbs: ["get", "list", "watch"]
  19. - nonResourceURLs: ["/metrics"]
  20. verbs: ["get"]
  21. ---
  22. apiVersion: v1
  23. kind: ServiceAccount
  24. metadata:
  25. name: prometheus
  26. namespace: kube-system
  27. ---
  28. apiVersion: rbac.authorization.k8s.io/v1
  29. kind: ClusterRoleBinding
  30. metadata:
  31. name: prometheus
  32. roleRef:
  33. apiGroup: rbac.authorization.k8s.io
  34. kind: ClusterRole
  35. name: prometheus
  36. subjects:
  37. - kind: ServiceAccount
  38. name: prometheus
  39. namespace: kube-system

kubectl apply -f prometheus_deploy.yml

  1. apiVersion: apps/v1
  2. kind: Deployment
  3. metadata:
  4. labels:
  5. name: prometheus-deployment
  6. name: prometheus
  7. namespace: kube-system
  8. spec:
  9. replicas: 1
  10. selector:
  11. matchLabels:
  12. app: prometheus
  13. template:
  14. metadata:
  15. labels:
  16. app: prometheus
  17. spec:
  18. containers:
  19. - image: prom/prometheus
  20. name: prometheus
  21. imagePullPolicy: IfNotPresent
  22. command:
  23. - "/bin/prometheus"
  24. args:
  25. - "--config.file=/etc/prometheus/prometheus.yml"
  26. - "--storage.tsdb.path=/prometheus"
  27. - "--storage.tsdb.retention=24h"
  28. ports:
  29. - containerPort: 9090
  30. protocol: TCP
  31. volumeMounts:
  32. - mountPath: "/prometheus"
  33. name: data
  34. - mountPath: "/etc/prometheus"
  35. name: config-volume
  36. resources:
  37. requests:
  38. cpu: 100m
  39. memory: 100Mi
  40. limits:
  41. cpu: 500m
  42. memory: 2500Mi
  43. serviceAccountName: prometheus
  44. volumes:
  45. - name: data
  46. emptyDir: {}
  47. - name: config-volume
  48. configMap:
  49. name: prometheus-config

kubectl apply -f prometheus_svc.yml

  1. apiVersion: v1
  2. kind: Service
  3. metadata:
  4. labels:
  5. app: prometheus
  6. name: prometheus
  7. namespace: kube-system
  8. spec:
  9. type: NodePort
  10. ports:
  11. - port: 9090
  12. targetPort: 9090
  13. nodePort: 30003
  14. selector:
  15. app: prometheus

kubectl get deployment -o wide -n kube-system
2B9`[U]BI1C8JHLTX@`8H4V.png
kubectl get svc -o wide -n kube-system
`Y(BE{LH[J}R]A4~9~7%RMG.png
##安装grafana
kubectl apply -f grafana_deploy.yaml

  1. apiVersion: apps/v1
  2. kind: Deployment
  3. metadata:
  4. name: grafana-core
  5. namespace: kube-system
  6. labels:
  7. app: grafana
  8. component: core
  9. spec:
  10. selector:
  11. matchLabels:
  12. app: grafana
  13. component: core
  14. replicas: 1
  15. template:
  16. metadata:
  17. labels:
  18. app: grafana
  19. component: core
  20. spec:
  21. containers:
  22. - image: grafana/grafana:4.2.0
  23. name: grafana-core
  24. imagePullPolicy: IfNotPresent
  25. # env:
  26. resources:
  27. # keep request = limit to keep this container in guaranteed class
  28. limits:
  29. cpu: 100m
  30. memory: 100Mi
  31. requests:
  32. cpu: 100m
  33. memory: 100Mi
  34. env:
  35. # The following env variables set up basic auth twith the default admin user and admin password.
  36. - name: GF_AUTH_BASIC_ENABLED
  37. value: "true"
  38. - name: GF_AUTH_ANONYMOUS_ENABLED
  39. value: "false"
  40. # - name: GF_AUTH_ANONYMOUS_ORG_ROLE
  41. # value: Admin
  42. # does not really work, because of template variables in exported dashboards:
  43. # - name: GF_DASHBOARDS_JSON_ENABLED
  44. # value: "true"
  45. readinessProbe:
  46. httpGet:
  47. path: /login
  48. port: 3000
  49. # initialDelaySeconds: 30
  50. # timeoutSeconds: 1
  51. volumeMounts:
  52. - name: grafana-persistent-storage
  53. mountPath: /var
  54. volumes:
  55. - name: grafana-persistent-storage
  56. emptyDir: {}

kubectl apply -f grafana_deploy.yaml

  1. apiVersion: v1
  2. kind: Service
  3. metadata:
  4. name: grafana
  5. namespace: kube-system
  6. labels:
  7. app: grafana
  8. component: core
  9. spec:
  10. type: NodePort
  11. ports:
  12. - port: 3000
  13. selector:
  14. app: grafana
  15. component: core

测试访问:
~4H4VC@`~0{Y$]6DMP0)QVB.png
{VH$J`L(`6E~19@YR2G3Y3X.png
:如果prometehsu采集不到kube-proxy节点,则有可能是kube-proxy的metrics数据走的
是127.0.0.1:10249端口,无法被外部采集到。可使用netstat -lntp | grep 10249查看,正确
结果如图所示
![AMR0~C9VI29T4`~ZKX]}C4.png
如采用二进制systemd安装方法可以去kube-proxy.conf文件,添加—metrics-bind-address=本机ip
N[0P0H9@[VSSV]GFCN~3QIJ.png
二.部署集群外prometheus的global节点

集群外直接用docker部署global节点: (此处监控的是集群内kube-proxy和kubelet组件,根据job_name指定)

  1. global:
  2. scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  3. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  4. # scrape_timeout is set to the global default (10s).
  5. alerting:
  6. alertmanagers:
  7. - static_configs:
  8. - targets:
  9. # - alertmanager:9093
  10. rule_files:
  11. scrape_configs:
  12. - job_name: 'federate'
  13. scrape_interval: 15s
  14. honor_labels: true
  15. metrics_path: '/federate'
  16. params:
  17. 'match[]':
  18. - '{job="kubernetes-kube-proxy"}'
  19. - '{job="kubernetes-kubelet"}'
  20. static_configs:
  21. - targets:
  22. - '10.107.141.51:30003'

运行集群外本地prometheus
docker run -d —name=prometheus -p 9090:9090 -v /tmp/prometheus.yml:/etc/prometheus/prometheus.yml prom/prometheus

进入集群外prometheus查看
(0HSEZ{L]G4JIX]5V{(]W`W.png