https://skywalking.apache.org

SkyWalking Kubernetes

https://github.com/apache/skywalking-kubernetes

SkyWalking Cloud on Kubernetes

https://github.com/apache/skywalking-swck

Helm方式部署

  1. export SKYWALKING_RELEASE_NAME=skywalking # change the release name according to your scenario
  2. export SKYWALKING_RELEASE_NAMESPACE=default # change the namespace to where you want to install SkyWalking
  1. export REPO=skywalking
  2. helm repo add ${REPO} https://apache.jfrog.io/artifactory/skywalking-helm
  3. helm install "${SKYWALKING_RELEASE_NAME}" ${REPO}/skywalking -n "${SKYWALKING_RELEASE_NAMESPACE}" \
  4. --set oap.image.tag=9.1.0 \
  5. --set oap.storageType=elasticsearch \
  6. --set ui.image.tag=9.1.0 \
  7. --set elasticsearch.imageTag=6.8.6

image.png

  1. [root@UR-20210425NAMA ~]# kubectl get pod
  2. NAME READY STATUS RESTARTS AGE
  3. elasticsearch-master-0 1/1 Running 0 7d19h
  4. elasticsearch-master-1 1/1 Running 0 7d19h
  5. elasticsearch-master-2 1/1 Running 0 7d19h
  6. skywalking-es-init-ftvxn 0/1 Completed 0 7d19h
  7. skywalking-oap-84464fc4cc-8hrb6 1/1 Running 6 7d19h
  8. skywalking-oap-84464fc4cc-rxm4h 1/1 Running 6 7d19h
  9. skywalking-ui-549dc5989f-4d6bd 1/1 Running 0 7d19h
  10. [root@UR-20210425NAMA ~]# kubectl get svc
  11. NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
  12. elasticsearch-master ClusterIP 10.43.6.199 <none> 9200/TCP,9300/TCP 7d19h
  13. elasticsearch-master-headless ClusterIP None <none> 9200/TCP,9300/TCP 7d19h
  14. kubernetes ClusterIP 10.43.0.1 <none> 443/TCP 63d
  15. skywalking-oap NodePort 10.43.218.36 <none> 11800:30196/TCP,12800:31981/TCP 7d19h
  16. skywalking-ui NodePort 10.43.236.32 <none> 80:31286/TCP 7d19h
  17. [root@UR-20210425NAMA ~]#

yaml文件

  1. kind: Service
  2. apiVersion: v1
  3. metadata:
  4. name: skywalking-ui
  5. namespace: default
  6. labels:
  7. app: skywalking
  8. app.kubernetes.io/managed-by: Helm
  9. chart: skywalking-4.2.0
  10. component: ui
  11. heritage: Helm
  12. release: skywalking
  13. annotations:
  14. meta.helm.sh/release-name: skywalking
  15. meta.helm.sh/release-namespace: default
  16. spec:
  17. ports:
  18. - protocol: TCP
  19. port: 80
  20. targetPort: 8080
  21. nodePort: 31286
  22. selector:
  23. app: skywalking
  24. component: ui
  25. release: skywalking
  26. clusterIP: 10.43.236.32
  27. type: NodePort
  28. sessionAffinity: None
  29. externalTrafficPolicy: Cluster
  1. kind: Deployment
  2. apiVersion: apps/v1
  3. metadata:
  4. name: skywalking-ui
  5. namespace: infrastructure-prod
  6. labels:
  7. app: skywalking
  8. app.kubernetes.io/managed-by: Helm
  9. chart: skywalking-4.2.0
  10. component: ui
  11. heritage: Helm
  12. release: skywalking
  13. annotations:
  14. deployment.kubernetes.io/revision: '4'
  15. meta.helm.sh/release-name: skywalking
  16. meta.helm.sh/release-namespace: infrastructure-prod
  17. spec:
  18. replicas: 1
  19. selector:
  20. matchLabels:
  21. app: skywalking
  22. component: ui
  23. release: skywalking
  24. template:
  25. metadata:
  26. creationTimestamp: null
  27. labels:
  28. app: skywalking
  29. component: ui
  30. release: skywalking
  31. annotations:
  32. kubesphere.io/restartedAt: '2022-07-01T12:55:58.718Z'
  33. spec:
  34. containers:
  35. - name: ui
  36. image: 'hw-harbor.ur.com.cn/paas/skywalking-ui:8.8.1'
  37. ports:
  38. - name: page
  39. containerPort: 8080
  40. protocol: TCP
  41. env:
  42. - name: SW_OAP_ADDRESS
  43. value: 'http://skywalking-oap:12800'
  44. resources:
  45. limits:
  46. cpu: '1'
  47. memory: 512Mi
  48. requests:
  49. cpu: '1'
  50. memory: 512Mi
  51. terminationMessagePath: /dev/termination-log
  52. terminationMessagePolicy: File
  53. imagePullPolicy: Always
  54. restartPolicy: Always
  55. terminationGracePeriodSeconds: 30
  56. dnsPolicy: ClusterFirst
  57. securityContext: {}
  58. schedulerName: default-scheduler
  59. strategy:
  60. type: RollingUpdate
  61. rollingUpdate:
  62. maxUnavailable: 25%
  63. maxSurge: 25%
  64. revisionHistoryLimit: 10
  65. progressDeadlineSeconds: 600
  1. kind: Service
  2. apiVersion: v1
  3. metadata:
  4. name: skywalking-oap
  5. namespace: infrastructure-prod
  6. labels:
  7. app: skywalking
  8. app.kubernetes.io/managed-by: Helm
  9. chart: skywalking-4.2.0
  10. component: oap
  11. heritage: Helm
  12. release: skywalking
  13. annotations:
  14. kubectl.kubernetes.io/last-applied-configuration: >
  15. {"apiVersion":"v1","kind":"Service","metadata":{"annotations":{"meta.helm.sh/release-name":"skywalking","meta.helm.sh/release-namespace":"infrastructure-prod"},"labels":{"app":"skywalking","app.kubernetes.io/managed-by":"Helm","chart":"skywalking-4.2.0","component":"oap","heritage":"Helm","release":"skywalking"},"name":"skywalking-oap","namespace":"infrastructure-prod"},"spec":{"clusterIP":"172.30.192.136","ports":[{"name":"grpc","port":11800,"protocol":"TCP","targetPort":11800},{"name":"rest","port":12800,"protocol":"TCP","targetPort":12800}],"selector":{"app":"skywalking","component":"oap","release":"skywalking"},"sessionAffinity":"None","type":"ClusterIP"}}
  16. meta.helm.sh/release-name: skywalking
  17. meta.helm.sh/release-namespace: infrastructure-prod
  18. spec:
  19. ports:
  20. - name: grpc
  21. protocol: TCP
  22. port: 11800
  23. targetPort: 11800
  24. - name: rest
  25. protocol: TCP
  26. port: 12800
  27. targetPort: 12800
  28. selector:
  29. app: skywalking
  30. component: oap
  31. release: skywalking
  32. clusterIP: 172.30.192.136
  33. type: ClusterIP
  34. sessionAffinity: None
  1. kind: Deployment
  2. apiVersion: apps/v1
  3. metadata:
  4. name: skywalking-oap
  5. namespace: infrastructure-prod
  6. labels:
  7. app: skywalking
  8. app.kubernetes.io/managed-by: Helm
  9. chart: skywalking-4.2.0
  10. component: oap
  11. heritage: Helm
  12. release: skywalking
  13. annotations:
  14. deployment.kubernetes.io/revision: '23'
  15. meta.helm.sh/release-name: skywalking
  16. meta.helm.sh/release-namespace: infrastructure-prod
  17. spec:
  18. replicas: 3
  19. selector:
  20. matchLabels:
  21. app: skywalking
  22. component: oap
  23. release: skywalking
  24. template:
  25. metadata:
  26. creationTimestamp: null
  27. labels:
  28. app: skywalking
  29. component: oap
  30. release: skywalking
  31. annotations:
  32. kubesphere.io/restartedAt: '2022-08-02T01:09:33.250Z'
  33. spec:
  34. volumes:
  35. - name: alarm-settings
  36. configMap:
  37. name: alarm-settings
  38. items:
  39. - key: alarm-settings.yml
  40. path: alarm-settings.yml
  41. defaultMode: 420
  42. initContainers:
  43. - name: wait-for-elasticsearch
  44. image: 'busybox:1.30'
  45. command:
  46. - sh
  47. - '-c'
  48. - >-
  49. for i in $(seq 1 60); do nc -z -w3 172.25.2.8 9200 && exit 0 ||
  50. sleep 5; done; exit 1
  51. resources: {}
  52. terminationMessagePath: /dev/termination-log
  53. terminationMessagePolicy: File
  54. imagePullPolicy: IfNotPresent
  55. containers:
  56. - name: oap
  57. image: 'hw-harbor.ur.com.cn/paas/skywalking-oap-server:8.8.1'
  58. ports:
  59. - name: grpc
  60. containerPort: 11800
  61. protocol: TCP
  62. - name: rest
  63. containerPort: 12800
  64. protocol: TCP
  65. env:
  66. - name: TZ
  67. value: Asia/Shanghai
  68. - name: JAVA_OPTS
  69. value: '-Dmode=no-init -Xmx2g -Xms2g'
  70. - name: SW_CLUSTER
  71. value: kubernetes
  72. - name: SW_CLUSTER_K8S_NAMESPACE
  73. value: infrastructure-prod
  74. - name: SW_NAMESPACE
  75. value: sw_prod
  76. - name: SW_CLUSTER_K8S_LABEL
  77. value: 'app=skywalking,release=skywalking,component=oap'
  78. - name: SKYWALKING_COLLECTOR_UID
  79. valueFrom:
  80. fieldRef:
  81. apiVersion: v1
  82. fieldPath: metadata.uid
  83. - name: SW_STORAGE
  84. value: elasticsearch
  85. - name: SW_STORAGE_ES_CLUSTER_NODES
  86. value: '172.25.2.8:9200,172.25.2.6:9200,172.25.2.7:9200,'
  87. - name: SW_ES_USER
  88. value: xxx
  89. - name: SW_ES_PASSWORD
  90. value: xxx
  91. - name: SW_STORAGE_ES_RECORD_DATA_TTL
  92. value: '7'
  93. - name: SW_STORAGE_ES_BULK_ACTIONS
  94. value: '5000'
  95. - name: SW_SUPERDATASET_STORAGE_DAY_STEP
  96. value: '-1'
  97. - name: SW_STORAGE_ES_FLUSH_INTERVAL
  98. value: '60'
  99. - name: SW_STORAGE_ES_INDEX_SHARDS_NUMBER
  100. value: '3'
  101. - name: SW_STORAGE_ES_QUERY_MAX_SIZE
  102. value: '7000'
  103. - name: SW_STORAGE_ES_ADVANCED
  104. value: >-
  105. "{\"index.refresh_interval\":\"30s\",\"index.translog.flush_threshold_size\":\"500mb\",\"index.translog.sync_interval\":\"60s\",\"index.translog.durability\":\"async\"}"
  106. resources:
  107. limits:
  108. cpu: '2'
  109. memory: 4Gi
  110. requests:
  111. cpu: '2'
  112. memory: 4Gi
  113. volumeMounts:
  114. - name: alarm-settings
  115. readOnly: true
  116. mountPath: /skywalking/config/alarm-settings.yml
  117. subPath: alarm-settings.yml
  118. livenessProbe:
  119. tcpSocket:
  120. port: 12800
  121. initialDelaySeconds: 15
  122. timeoutSeconds: 1
  123. periodSeconds: 20
  124. successThreshold: 1
  125. failureThreshold: 3
  126. readinessProbe:
  127. tcpSocket:
  128. port: 12800
  129. initialDelaySeconds: 15
  130. timeoutSeconds: 1
  131. periodSeconds: 20
  132. successThreshold: 1
  133. failureThreshold: 3
  134. terminationMessagePath: /dev/termination-log
  135. terminationMessagePolicy: File
  136. imagePullPolicy: Always
  137. restartPolicy: Always
  138. terminationGracePeriodSeconds: 30
  139. dnsPolicy: ClusterFirst
  140. serviceAccountName: skywalking-oap
  141. serviceAccount: skywalking-oap
  142. securityContext: {}
  143. affinity:
  144. podAntiAffinity:
  145. preferredDuringSchedulingIgnoredDuringExecution:
  146. - weight: 1
  147. podAffinityTerm:
  148. labelSelector:
  149. matchLabels:
  150. app: skywalking
  151. component: oap
  152. release: skywalking
  153. topologyKey: kubernetes.io/hostname
  154. schedulerName: default-scheduler
  155. strategy:
  156. type: RollingUpdate
  157. rollingUpdate:
  158. maxUnavailable: 25%
  159. maxSurge: 25%
  160. revisionHistoryLimit: 10
  161. progressDeadlineSeconds: 600
  1. kind: Ingress
  2. apiVersion: extensions/v1beta1
  3. metadata:
  4. name: skywalking-ingress
  5. namespace: infrastructure-prod
  6. labels:
  7. isExternal: 'true'
  8. zone: data
  9. annotations:
  10. kubernetes.io/elb.id: d4b32321-1e87-478b-a821-b9e4c4a542ce
  11. kubernetes.io/elb.ip: 172.25.98.42
  12. kubernetes.io/elb.port: '80'
  13. kubernetes.io/ingress.class: cce
  14. spec:
  15. rules:
  16. - host: skywalking.ur.com.cn
  17. http:
  18. paths:
  19. - path: /
  20. pathType: ImplementationSpecific
  21. backend:
  22. serviceName: skywalking-ui
  23. servicePort: 80
  24. property:
  25. ingress.beta.kubernetes.io/url-match-mode: STARTS_WITH

webhooks告警配置

  1. kind: ConfigMap
  2. apiVersion: v1
  3. metadata:
  4. name: alarm-settings
  5. namespace: infrastructure-prod
  6. annotations:
  7. kubesphere.io/creator: linmingwang
  8. data:
  9. alarm-settings.yml: |-
  10. rules:
  11. # Rule unique name, must be ended with `_rule`.
  12. service_resp_time_rule:
  13. metrics-name: service_resp_time
  14. op: ">"
  15. threshold: 1000
  16. period: 15
  17. count: 5
  18. silence-period: 15
  19. message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
  20. service_sla_rule:
  21. # Metrics value need to be long, double or int
  22. metrics-name: service_sla
  23. op: "<"
  24. threshold: 8000
  25. # The length of time to evaluate the metrics
  26. period: 10
  27. # How many times after the metrics match the condition, will trigger alarm
  28. count: 2
  29. # How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
  30. silence-period: 3
  31. message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
  32. service_resp_time_percentile_rule:
  33. # Metrics value need to be long, double or int
  34. metrics-name: service_percentile
  35. op: ">"
  36. threshold: 1000,1000,1000,1000,1000
  37. period: 10
  38. count: 3
  39. silence-period: 5
  40. message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
  41. service_instance_resp_time_rule:
  42. metrics-name: service_instance_resp_time
  43. op: ">"
  44. threshold: 1000
  45. period: 10
  46. count: 2
  47. silence-period: 5
  48. message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
  49. database_access_resp_time_rule:
  50. metrics-name: database_access_resp_time
  51. threshold: 1000
  52. op: ">"
  53. period: 10
  54. count: 2
  55. message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
  56. endpoint_relation_resp_time_rule:
  57. metrics-name: endpoint_relation_resp_time
  58. threshold: 1000
  59. op: ">"
  60. period: 10
  61. count: 2
  62. message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
  63. wechatHooks:
  64. textTemplate: |-
  65. {
  66. "msgtype": "text",
  67. "text": {
  68. "content": "Skywalking(生产环境): \n %s."
  69. }
  70. }
  71. webhooks:
  72. - https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=07ab9f9d-c9b0-44bf-9110-8aeaa2166f58
  73. #webHooks:
  74. # - http//172.0.0.1

1660797775153.jpg
image.png
image.png

配置 agent
通过 initContainer容器里的agent拷贝到目标容器里
使用JAVA_OPTS方式 远行skywalking-agent.jar服务,skywalking配置通过pipeline流水线变量传递

https://skywalking.apache.org/downloads/
image.png
image.png
image.png
image.png

  1. apiVersion: apps/v1
  2. kind: Deployment
  3. metadata:
  4. labels:
  5. app: kubesphere
  6. component: $APP_NAME
  7. tier: backend
  8. # 服务名称
  9. name: $APP_NAME
  10. # 项目名称
  11. namespace: $NAMESPACE
  12. spec:
  13. progressDeadlineSeconds: 600
  14. replicas: 2
  15. selector:
  16. matchLabels:
  17. app: kubesphere
  18. component: $APP_NAME
  19. tier: backend
  20. template:
  21. metadata:
  22. labels:
  23. app: kubesphere
  24. component: $APP_NAME
  25. tier: backend
  26. spec:
  27. affinity:
  28. podAntiAffinity:
  29. requiredDuringSchedulingIgnoredDuringExecution:
  30. - labelSelector:
  31. matchExpressions:
  32. - key: component
  33. operator: In
  34. values:
  35. - $APP_NAME
  36. topologyKey: "kubernetes.io/hostname"
  37. initContainers:
  38. # 初始化容器
  39. - name: ur-paas-init
  40. image: $HARBOR_HOST/paas/init-container:1.0.2
  41. imagePullPolicy: IfNotPresent
  42. command: ["cp", "-r", "/agent/", "/share/"]
  43. volumeMounts:
  44. - name: ur-share
  45. mountPath: /share
  46. resources:
  47. requests:
  48. cpu: 200m
  49. memory: 200Mi
  50. limits:
  51. cpu: 200m
  52. memory: 200Mi
  53. containers:
  54. - name: $APP_NAME
  55. image: $HARBOR_HOST/$HARBOR_NAMESPACE/$APP_NAME:$BRANCH_NAME-$BUILD_NUMBER
  56. command: ["sh","/home/run.sh"]
  57. imagePullPolicy: Always
  58. lifecycle:
  59. preStop:
  60. exec:
  61. command: ["sh","/home/preStop.sh"]
  62. env:
  63. - name: JAVA_OPTS
  64. value: $JAVA_OPTS
  65. - name: CACHE_IGNORE
  66. value: js|html
  67. - name: CACHE_PUBLIC_EXPIRATION
  68. value: 3d
  69. # 时区
  70. - name: TZ
  71. value: Asia/Shanghai
  72. # 引入skywalking agent
  73. - name: SW_AGENT_NAME
  74. value: ${APP_NAME}
  75. - name: SW_JDBC_TRACE_SQL_PARAMETERS
  76. value: 'true'
  77. - name: SW_PLUGIN_JDBC_SQL_PARAMETERS_MAX_LENGTH
  78. value: '512'
  79. # skywalking oap 后端
  80. - name: SW_AGENT_COLLECTOR_BACKEND_SERVICES
  81. value: ${SW_BACKEND}
  82. # nacos 注册中心配置中心
  83. - name: NACOS_HOST
  84. value: ${NACOS_HOST}
  85. - name: NACOS_NAMESPACE
  86. value: ${NACOS_NAMESPACE}
  87. - name: NACOS_GROUP
  88. value: ${NACOS_GROUP}
  89. - name: POD_ID
  90. valueFrom:
  91. fieldRef:
  92. fieldPath: metadata.name
  93. - name: POD_IP
  94. valueFrom:
  95. fieldRef:
  96. fieldPath: status.podIP
  97. livenessProbe:
  98. httpGet:
  99. path: $HEALTH_PATH
  100. port: $CONTAINER_PORT
  101. initialDelaySeconds: 60
  102. successThreshold: 1
  103. timeoutSeconds: 10
  104. failureThreshold: 10
  105. periodSeconds: 10
  106. readinessProbe:
  107. httpGet:
  108. path: $HEALTH_PATH
  109. port: $CONTAINER_PORT
  110. initialDelaySeconds: 30
  111. timeoutSeconds: 10
  112. failureThreshold: 30
  113. periodSeconds: 5
  114. ports:
  115. - containerPort: $CONTAINER_PORT
  116. protocol: TCP
  117. volumeMounts:
  118. - name: ur-share
  119. mountPath: /share
  120. - name: gc
  121. mountPath: /home/gc/
  122. - name: dump
  123. mountPath: /home/dump/
  124. - name: scripts
  125. mountPath: /home/run.sh
  126. subPath: run.sh
  127. - name: scripts
  128. mountPath: /home/preStop.sh
  129. subPath: preStop.sh
  130. - name: vol-log
  131. mountPath: /var/log
  132. policy:
  133. logs:
  134. rotate: Hourly
  135. annotations:
  136. format: '{"multi":{"mode":"regular","value":"([0-9]{3}[1-9]|[0-9]{2}[1-9][0-9]{1}|[0-9]{1}[1-9][0-9]{2}|[1-9][0-9]{3})-(((0[13578]|1[02])-(0[1-9]|[12][0-9]|3[01]))|((0[469]|11)-(0[1-9]|[12][0-9]|30))|(02-(0[1-9]|[1][0-9]|2[0-8])))"}}'
  137. pathPattern: /info.log
  138. resources:
  139. limits:
  140. cpu: 2
  141. memory: 4Gi
  142. requests:
  143. cpu: 2
  144. memory: 4Gi
  145. terminationMessagePath: /dev/termination-log
  146. terminationMessagePolicy: File
  147. volumes:
  148. # ur共享文件夹
  149. - name: ur-share
  150. emptyDir: {}
  151. - name: vol-log
  152. emptyDir: {}
  153. - name: gc
  154. persistentVolumeClaim:
  155. claimName: $APP_NAME
  156. - name: dump
  157. persistentVolumeClaim:
  158. claimName: scp-dump
  159. - name: scripts
  160. configMap:
  161. defaultMode: 0777
  162. name: $APP_NAME
  163. dnsPolicy: ClusterFirst
  164. imagePullSecrets:
  165. - name: $HARBOR_CREDENTIAL_ID
  166. restartPolicy: Always
  167. terminationGracePeriodSeconds: 40
  168. ---
  169. apiVersion: v1
  170. kind: ConfigMap
  171. metadata:
  172. name: $APP_NAME
  173. namespace: $NAMESPACE
  174. data:
  175. run.sh: |
  176. #!/bin/bash
  177. echo -javaagent:/share/agent/skywalking/skywalking-agent.jar \
  178. -XX:+UseContainerSupport \
  179. -XX:MaxRAMPercentage=75.0 \
  180. -XX:MinRAMPercentage=75.0 \
  181. -XX:InitialRAMPercentage=75.0 \
  182. -XX:MetaspaceSize=256m \
  183. -XX:MaxMetaspaceSize=256m \
  184. -XX:+UseConcMarkSweepGC \
  185. -XX:+UseCMSCompactAtFullCollection \
  186. -XX:+CMSClassUnloadingEnabled \
  187. -XX:CMSInitiatingOccupancyFraction=80 \
  188. -XX:+UseCMSInitiatingOccupancyOnly \
  189. -XX:+ExplicitGCInvokesConcurrentAndUnloadsClasses \
  190. -Dsun.rmi.dgc.server.gcInterval=2592000000 \
  191. -Dsun.rmi.dgc.client.gcInterval=2592000000 \
  192. -XX:+UseParNewGC \
  193. -XX:ParallelGCThreads=4 \
  194. -XX:SurvivorRatio=8 \
  195. -XX:+PrintGCDetails \
  196. -XX:+PrintGCDateStamps \
  197. -XX:+PrintTenuringDistribution \
  198. -XX:+PrintHeapAtGC \
  199. -XX:+PrintReferenceGC \
  200. -XX:+PrintGCApplicationStoppedTime \
  201. -XX:+UseGCLogFileRotation \
  202. -XX:NumberOfGCLogFiles=10 \
  203. -XX:GCLogFileSize=1000m \
  204. -Xloggc:/home/gc/gc-$${p}POD_ID-$(date +"%Y-%m-%d-%H-%M-%S").log \
  205. -XX:+HeapDumpOnOutOfMemoryError \
  206. -XX:HeapDumpPath=/home/dump/dump-$${p}POD_ID-$(date +"%Y-%m-%d-%H-%M-%S").hprof > /home/gc/java_tool_opts.sh
  207. export JAVA_TOOL_OPTIONS=$(cat /home/gc/java_tool_opts.sh)
  208. java -jar ${${p}JAVA_OPTS} -Djava.awt.headless=true -Dsun.net.client.defaultConnectTimeout=10000 -Dsun.net.client.defaultReadTimeout=30000 -Dfile.encoding=utf-8 -Djava.security.egd=file:/dev/./urandom *.jar
  209. preStop.sh: |
  210. #!/bin/bash
  211. curl -XPUT http://${NACOS_HOST}/nacos/v1/ns/instance?serviceName=${APP_NAME}\&groupName=${NACOS_GROUP}\&namespaceId=${NACOS_NAMESPACE}\&ip=$${p}POD_IP\&port=${CONTAINER_PORT}\&enable=false
  212. sleep 40s
  213. ---
  214. kind: PersistentVolumeClaim
  215. apiVersion: v1
  216. metadata:
  217. name: $APP_NAME
  218. namespace: $NAMESPACE
  219. spec:
  220. accessModes:
  221. - ReadWriteMany
  222. volumeMode: Filesystem
  223. resources:
  224. requests:
  225. storage: 2Gi
  226. storageClassName: sfsturbo-ur-scp
  227. ---
  228. apiVersion: autoscaling/v1
  229. kind: HorizontalPodAutoscaler
  230. metadata:
  231. annotations:
  232. extendedhpa.metrics: '[{"type":"Resource","name":"cpu","targetType":"Utilization","targetRange":{"low":"55","high":"65"}}]'
  233. extendedhpa.option: '{"downscaleWindow":"60m","upscaleWindow":"0m"}'
  234. name: $APP_NAME
  235. namespace: $NAMESPACE
  236. spec:
  237. minReplicas: 2
  238. maxReplicas: 6
  239. scaleTargetRef:
  240. apiVersion: apps/v1
  241. kind: Deployment
  242. name: $APP_NAME
  243. targetCPUUtilizationPercentage: 60

如何使用java探针注入器

https://skywalking.apache.org/zh/2022-04-19-how-to-use-the-java-agent-injector

https://github.com/apache/skywalking-swck

SWCK是部署在 Kubernetes 环境中,为 Skywalking 用户提供服务的平台,用户可以基于该平台使用、升级和维护 SkyWalking 相关组件。
实际上,SWCK 是基于 kubebuilder 开发的Operator,为用户提供自定义资源( CR )以及管理资源的控制器( Controller ),所有的自定义资源定义(CRD)如下所示:

  • JavaAgent
  • OAP
  • UI
  • Storage
  • Satellite
  • Fetcher
  • 透明性。用户应用一般运行在普通容器中而 java 探针则运行在初始化容器中,且两者都属于同一个 pod 。该 pod 中的每个容器都会挂载一个共享内存卷,为 java 探针提供存储路径。在 pod 启动时,初始化容器中的 java 探针会先于应用容器运行,由注入器将其中的探针文件存放在共享内存卷中。在应用容器启动时,注入器通过设置 JVM 参数将探针文件注入到应用程序中。用户可以通过这种方式实现 java 探针的注入,而无需重新构建包含 java 探针的容器镜像。
  • 可配置性。注入器提供两种方式配置 java 探针:全局配置和自定义配置。默认的全局配置存放在 configmap 中,用户可以根据需求修改全局配置,比如修改 backend_service 的地址。此外,用户也能通过 annotation 为特定应用设置自定义的一些配置,比如不同服务的 service_name 名称。详情可见 java探针说明书
  • 可观察性。每个 java 探针在被注入时,用户可以查看名为 JavaAgent 的 CRD 资源,用于观测注入后的 java 探针配置。详情可见 JavaAgent说明