初始化服务器

  1. # 开放k8s所需端口
  2. firewall-cmd --permanent --add-port=6443/tcp
  3. firewall-cmd --permanent --add-port=2379/tcp
  4. firewall-cmd --permanent --add-port=2380/tcp
  5. firewall-cmd --permanent --add-port=10250/tcp
  6. firewall-cmd --permanent --add-port=10251/tcp
  7. firewall-cmd --permanent --add-port=10252/tcp
  8. firewall-cmd --permanent --add-port=30000-32767/tcp
  9. firewall-cmd --permanent --add-port=8472/udp
  10. firewall-cmd --permanent --add-port=443/tcp
  11. firewall-cmd --permanent --add-port=9099/tcp
  12. firewall-cmd --permanent --add-port=22/tcp
  13. firewall-cmd --permanent --add-port=179/tcp
  14. firewall-cmd --permanent --add-port=6666-6667/tcp
  15. firewall-cmd --permanent --add-port=68/tcp
  16. firewall-cmd --permanent --add-port=53/tcp
  17. firewall-cmd --permanent --add-port=67/tcp
  18. # 开启 Firewalld 的伪装 ip:如果不开启此功能,那将无法进行 ip 转发,会导致 DNS 插件不起作用
  19. firewall-cmd --add-masquerade --permanent
  20. firewall-cmd --reload
  21. # 关闭selinux
  22. sed -i 's/enforcing/disabled/' /etc/selinux/config # 永久
  23. # 时间同步
  24. systemctl start chronyd
  25. systemctl enable chronyd
  26. yum install ntpdate -y
  27. ntpdate time.windows.com
  28. # swap关闭
  29. swapoff -a
  30. sed -ri 's/.*swap.*/#&/' /etc/fstab
  31. #允许 iptables 检查桥接流量
  32. cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
  33. br_netfilter
  34. EOF
  35. cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
  36. net.bridge.bridge-nf-call-ip6tables = 1
  37. net.bridge.bridge-nf-call-iptables = 1
  38. EOF
  39. sudo sysctl --system
  40. # 重启
  41. # reboot
  42. # 每个节点都要安装必要的工具
  43. yum install -y conntrack ntpdate ntp ipvsadm ipset jq iptables curl sysstat libseccomp wget vimnet-tools git

网络问题 & 升级内核版本

kubectl edit statefulset.apps/seata -n mall-xian

配置容器内dns映射

hostAliases:
- hostnames:
- lb.kubesphere.local
ip: 192.168.2.20
image.png

注册中心使用内部ip,本地不能访问到k8s机器内部ip
修改为ipvs:https://www.jianshu.com/p/31b161b99dc6
修改内核版本原因:https://blog.csdn.net/cljdsc/article/details/115701562
如何修改:https://www.cnblogs.com/varden/p/15178853.html

修改内核版本

  1. lsmod |grep ip_vs
  2. uname -r
  3. rpm --import https://www.elrepo.org/RPM-GPG-KEY-elrepo.org
  4. systemctl stop kubelet
  5. systemctl stop etcd
  6. systemctl stop docker
  7. yum install -y https://www.elrepo.org/elrepo-release-7.el7.elrepo.noarch.rpm
  8. yum list available --disablerepo=* --enablerepo=elrepo-kernel
  9. yum install -y kernel-lt-5.4.189-1.el7.elrepo --enablerepo=elrepo-kernel
  10. cat /boot/grub2/grub.cfg | grep menuentry
  11. grub2-editenv list
  12. grub2-set-default "CentOS Linux (5.4.189-1.el7.elrepo.x86_64) 7 (Core)"
  13. grub2-editenv list
  14. reboot

下载kk

export KKZONE=cn
curl -sfL https://get-kk.kubesphere.io | VERSION=v2.0.0 sh -
chmod +x kk

安装集群

  1. ./kk create config --with-kubesphere v3.2.1 --with-kubernetes v1.21.5
  2. ./kk create cluster -f config-sample.yaml

安装k8s和kubesphere配置文件

conf.yaml

  1. apiVersion: kubekey.kubesphere.io/v1alpha1
  2. kind: Cluster
  3. metadata:
  4. name: sample
  5. spec:
  6. hosts:
  7. - {name: master, address: 192.168.2.20, internalAddress: 192.168.2.20, user: root, password: root}
  8. - {name: node1, address: 192.168.2.21, internalAddress: 192.168.2.21, user: root, password: root}
  9. - {name: node2, address: 192.168.2.22, internalAddress: 192.168.2.22, user: root, password: root}
  10. roleGroups:
  11. etcd:
  12. - master
  13. master:
  14. - master
  15. worker:
  16. - master
  17. - node1
  18. - node2
  19. controlPlaneEndpoint:
  20. ## Internal loadbalancer for apiservers
  21. internalLoadbalancer: haproxy
  22. domain: lb.kubesphere.local
  23. address: ""
  24. port: 6443
  25. kubernetes:
  26. version: v1.20.4
  27. imageRepo: kubesphere
  28. clusterName: cluster.local
  29. network:
  30. plugin: calico
  31. kubePodsCIDR: 10.233.64.0/18
  32. kubeServiceCIDR: 10.233.0.0/18
  33. registry:
  34. registryMirrors: []
  35. insecureRegistries: []
  36. addons: []
  37. ---
  38. apiVersion: installer.kubesphere.io/v1alpha1
  39. kind: ClusterConfiguration
  40. metadata:
  41. name: ks-installer
  42. namespace: kubesphere-system
  43. labels:
  44. version: v3.1.1
  45. spec:
  46. persistence:
  47. storageClass: ""
  48. authentication:
  49. jwtSecret: ""
  50. zone: ""
  51. local_registry: ""
  52. etcd:
  53. monitoring: true
  54. endpointIps: localhost
  55. port: 2379
  56. tlsEnable: true
  57. common:
  58. redis:
  59. enabled: false
  60. redisVolumSize: 2Gi
  61. openldap:
  62. enabled: false
  63. openldapVolumeSize: 2Gi
  64. minioVolumeSize: 20Gi
  65. monitoring:
  66. endpoint: http://prometheus-operated.kubesphere-monitoring-system.svc:9090
  67. es:
  68. elasticsearchMasterVolumeSize: 4Gi
  69. elasticsearchDataVolumeSize: 20Gi
  70. logMaxAge: 7
  71. elkPrefix: logstash
  72. basicAuth:
  73. enabled: false
  74. username: ""
  75. password: ""
  76. externalElasticsearchUrl: ""
  77. externalElasticsearchPort: ""
  78. console:
  79. enableMultiLogin: true
  80. port: 30880
  81. alerting: # 告警是可观测性的重要组成部分,与监控和日志密切相关。KubeSphere 中的告警系统与其主动式故障通知 (Proactive Failure Notification) 系统相结合,使用户可以基于告警策略了解感兴趣的活动
  82. enabled: true
  83. # thanosruler:
  84. # replicas: 1
  85. # resources: {}
  86. auditing: # 审计日志系统提供了一套与安全相关并按时间顺序排列的记录,按顺序记录了与单个用户、管理人员或系统其他组件相关的活动
  87. enabled: true
  88. devops:
  89. enabled: true
  90. jenkinsMemoryLim: 2Gi
  91. jenkinsMemoryReq: 1500Mi
  92. jenkinsVolumeSize: 8Gi
  93. jenkinsJavaOpts_Xms: 512m
  94. jenkinsJavaOpts_Xmx: 512m
  95. jenkinsJavaOpts_MaxRAM: 2g
  96. events: # 事件系统使用户能够跟踪集群内部发生的事件,例如节点调度状态和镜像拉取结果。这些事件会被准确记录下来,并在 Web 控制台中显示具体的原因、状态和信息
  97. enabled: true # 默认情况下,如果启用了事件系统,KubeKey 将安装内置 Elasticsearch
  98. ruler:
  99. enabled: true
  100. replicas: 2
  101. logging:
  102. enabled: true # 默认情况下,如果启用了日志系统,KubeKey 将安装内置 Elasticsearch
  103. logsidecar:
  104. enabled: true
  105. replicas: 2
  106. metrics_server: # 支持用于部署的容器组(Pod)弹性伸缩程序 (HPA)。在 KubeSphere 中,Metrics Server 控制着 HPA 是否启用
  107. enabled: true
  108. monitoring:
  109. storageClass: ""
  110. prometheusMemoryRequest: 400Mi
  111. prometheusVolumeSize: 20Gi
  112. multicluster:
  113. clusterRole: none
  114. network:
  115. networkpolicy: # 通过网络策略,用户可以在同一集群内实现网络隔离
  116. enabled: true
  117. ippool:
  118. type: calico # 将“none”更改为“calico”。
  119. topology: # 您可以启用服务拓扑图以集成 Weave Scope(Docker 和 Kubernetes 的可视化和监控工具)。Weave Scope 使用既定的 API 收集信息,为应用和容器构建拓扑图。服务拓扑图显示在您的项目中,将服务之间的连接关系可视化。
  120. type: weave-scope # 将“none”更改为“weave-scope”。
  121. openpitrix: #提供了一个基于 Helm 的应用商店,用于应用生命周期管理
  122. store:
  123. enabled: true
  124. servicemesh: # KubeSphere 服务网格基于 Istio,将微服务治理和流量管理可视化。它拥有强大的工具包,包括熔断机制、蓝绿部署、金丝雀发布、流量镜像、链路追踪、可观测性和流量控制等
  125. enabled: true
  126. kubeedge: # KubeEdge 是一个开源系统,用于将容器化应用程序编排功能扩展到边缘的主机
  127. enabled: false
  128. cloudCore:
  129. nodeSelector: {"node-role.kubernetes.io/worker": ""}
  130. tolerations: []
  131. cloudhubPort: "10000"
  132. cloudhubQuicPort: "10001"
  133. cloudhubHttpsPort: "10002"
  134. cloudstreamPort: "10003"
  135. tunnelPort: "10004"
  136. cloudHub:
  137. advertiseAddress:
  138. - ""
  139. nodeLimit: "100"
  140. service:
  141. cloudhubNodePort: "30000"
  142. cloudhubQuicNodePort: "30001"
  143. cloudhubHttpsNodePort: "30002"
  144. cloudstreamNodePort: "30003"
  145. tunnelNodePort: "30004"
  146. edgeWatcher:
  147. nodeSelector: {"node-role.kubernetes.io/worker": ""}
  148. tolerations: []
  149. edgeWatcherAgent:
  150. nodeSelector: {"node-role.kubernetes.io/worker": ""}
  151. tolerations: []

安装后卸载可插拔组件

kubectl -n kubesphere-system edit clusterconfiguration ks-installer
或者界面操作
图片.png

  1. apiVersion: installer.kubesphere.io/v1alpha1
  2. kind: ClusterConfiguration
  3. metadata:
  4. annotations:
  5. kubectl.kubernetes.io/last-applied-configuration: >
  6. {"apiVersion":"installer.kubesphere.io/v1alpha1","kind":"ClusterConfiguration","metadata":{"annotations":{},"labels":{"version":"v3.1.1"},"name":"ks-installer","namespace":"kubesphere-system"},"spec":{"alerting":{"enabled":false},"auditing":{"enabled":false},"authentication":{"jwtSecret":""},"common":{"es":{"basicAuth":{"enabled":false,"password":"","username":""},"elasticsearchDataVolumeSize":"20Gi","elasticsearchMasterVolumeSize":"4Gi","elkPrefix":"logstash","externalElasticsearchPort":"","externalElasticsearchUrl":"","logMaxAge":7},"minioVolumeSize":"20Gi","monitoring":{"endpoint":"http://prometheus-operated.kubesphere-monitoring-system.svc:9090"},"openldap":{"enabled":false},"openldapVolumeSize":"2Gi","redis":{"enabled":false},"redisVolumSize":"2Gi"},"console":{"enableMultiLogin":true,"port":30880},"devops":{"enabled":false,"jenkinsJavaOpts_MaxRAM":"2g","jenkinsJavaOpts_Xms":"512m","jenkinsJavaOpts_Xmx":"512m","jenkinsMemoryLim":"2Gi","jenkinsMemoryReq":"1500Mi","jenkinsVolumeSize":"8Gi"},"etcd":{"endpointIps":"192.168.2.20","monitoring":false,"port":2379,"tlsEnable":true},"events":{"enabled":false,"ruler":{"enabled":true,"replicas":2}},"kubeedge":{"cloudCore":{"cloudHub":{"advertiseAddress":[""],"nodeLimit":"100"},"cloudhubHttpsPort":"10002","cloudhubPort":"10000","cloudhubQuicPort":"10001","cloudstreamPort":"10003","nodeSelector":{"node-role.kubernetes.io/worker":""},"service":{"cloudhubHttpsNodePort":"30002","cloudhubNodePort":"30000","cloudhubQuicNodePort":"30001","cloudstreamNodePort":"30003","tunnelNodePort":"30004"},"tolerations":[],"tunnelPort":"10004"},"edgeWatcher":{"edgeWatcherAgent":{"nodeSelector":{"node-role.kubernetes.io/worker":""},"tolerations":[]},"nodeSelector":{"node-role.kubernetes.io/worker":""},"tolerations":[]},"enabled":false},"logging":{"enabled":false,"logsidecar":{"enabled":true,"replicas":2}},"metrics_server":{"enabled":false},"monitoring":{"prometheusMemoryRequest":"400Mi","prometheusVolumeSize":"20Gi","storageClass":""},"multicluster":{"clusterRole":"none"},"network":{"ippool":{"type":"none"},"networkpolicy":{"enabled":false},"topology":{"type":"none"}},"openpitrix":{"store":{"enabled":false}},"persistence":{"storageClass":""},"servicemesh":{"enabled":false},"zone":"cn"}}
  7. labels:
  8. version: v3.1.1
  9. name: ks-installer
  10. namespace: kubesphere-system
  11. spec:
  12. alerting:
  13. enabled: false
  14. auditing:
  15. enabled: false
  16. authentication:
  17. jwtSecret: ''
  18. common:
  19. es:
  20. basicAuth:
  21. enabled: false
  22. password: ''
  23. username: ''
  24. elasticsearchDataVolumeSize: 20Gi
  25. elasticsearchMasterVolumeSize: 4Gi
  26. elkPrefix: logstash
  27. externalElasticsearchPort: ''
  28. externalElasticsearchUrl: ''
  29. logMaxAge: 7
  30. minioVolumeSize: 20Gi
  31. monitoring:
  32. endpoint: 'http://prometheus-operated.kubesphere-monitoring-system.svc:9090'
  33. openldap:
  34. enabled: false
  35. openldapVolumeSize: 2Gi
  36. redis:
  37. enabled: false
  38. redisVolumSize: 2Gi
  39. console:
  40. enableMultiLogin: true
  41. port: 30880
  42. devops:
  43. enabled: true
  44. jenkinsJavaOpts_MaxRAM: 2g
  45. jenkinsJavaOpts_Xms: 512m
  46. jenkinsJavaOpts_Xmx: 512m
  47. jenkinsMemoryLim: 2Gi
  48. jenkinsMemoryReq: 1500Mi
  49. jenkinsVolumeSize: 8Gi
  50. etcd:
  51. endpointIps: 192.168.2.20
  52. monitoring: true
  53. port: 2379
  54. tlsEnable: true
  55. events:
  56. enabled: false
  57. ruler:
  58. enabled: false
  59. replicas: 2
  60. kubeedge:
  61. cloudCore:
  62. cloudHub:
  63. advertiseAddress:
  64. - ''
  65. nodeLimit: '100'
  66. cloudhubHttpsPort: '10002'
  67. cloudhubPort: '10000'
  68. cloudhubQuicPort: '10001'
  69. cloudstreamPort: '10003'
  70. nodeSelector:
  71. node-role.kubernetes.io/worker: ''
  72. service:
  73. cloudhubHttpsNodePort: '30002'
  74. cloudhubNodePort: '30000'
  75. cloudhubQuicNodePort: '30001'
  76. cloudstreamNodePort: '30003'
  77. tunnelNodePort: '30004'
  78. tolerations: []
  79. tunnelPort: '10004'
  80. edgeWatcher:
  81. edgeWatcherAgent:
  82. nodeSelector:
  83. node-role.kubernetes.io/worker: ''
  84. tolerations: []
  85. nodeSelector:
  86. node-role.kubernetes.io/worker: ''
  87. tolerations: []
  88. enabled: false
  89. logging:
  90. enabled: false
  91. logsidecar:
  92. enabled: false
  93. replicas: 3
  94. metrics_server:
  95. enabled: true
  96. monitoring:
  97. prometheusMemoryRequest: 400Mi
  98. prometheusVolumeSize: 20Gi
  99. storageClass: ''
  100. multicluster:
  101. clusterRole: none
  102. network:
  103. ippool:
  104. type: calico
  105. networkpolicy:
  106. enabled: true
  107. topology:
  108. type: weave-scope
  109. openpitrix:
  110. store:
  111. enabled: false
  112. persistence:
  113. storageClass: ''
  114. servicemesh:
  115. enabled: false
  116. zone: cn

删除namespace

  1. # 1、删除namespaces一般使用
  2. kubectl delete namespaces <YOUR-NAMESPACE-NAME>
  3. #然后一直Terminating
  4. #**********************************
  5. #⚠️注意下面2-5步骤 中的grep 后的字符串 需要是待删除namespaces下已知的一些相关关键词 这四步也可以不做 直接第一步完成后到第六步
  6. # 2、删除状态是Released的pv, pv是不区分namespaces的
  7. kubectl get pv | grep "Released" | awk '{print $1}' | xargs kubectl delete pv
  8. # 3、删除crd(会删除所有的,注意!!!)
  9. #kubectl get crd | grep kubesphere | awk '{print $1}' | xargs kubectl delete crd
  10. # 4、删除clusterrole(会删除所有的,注意!!!)
  11. #kubectl get clusterrole | grep kubesphere | cut -d ' ' -f 1 | xargs kubectl delete clusterrole
  12. # 5、 删除clusterrolebindings(会删除所有的,注意!!!)
  13. #kubectl get clusterrolebindings.rbac.authorization.k8s.io | grep kubesphere | cut -d ' ' -f 1 | xargs kubectl delete clusterrolebindings.rbac.authorization.k8s.io
  14. #**********************************
  15. # 6、确定namespaces下的资源
  16. kubectl get all -n <YOUR-NAMESPACE-NAME>
  17. #看到no resource即可
  18. #这时候查看ns状态
  19. kubectl get ns
  20. #如果还显示Terminating
  21. # 7、强制删除
  22. kubectl delete ns <YOUR-NAMESPACE-NAME> --grace-period=0 --force
  23. kubectl get ns
  24. #如果还显示Terminating

kubesphere实现一站式部署

配置认证信息

配置maven仓库地址

  • 使用admin登陆ks
  • 进入集群管理
  • 进入配置中心
  • 找到配置
    • ks-devops-agent
    • 修改这个配置。加入maven阿里云镜像加速地址

远程私有maven仓库地址:https://packages.aliyun.com/repos/2212269-snapshot-Sxd58p/packages

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"
  3. xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4. xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">
  5. <mirrors>
  6. <mirror>
  7. <id>mirror</id>
  8. <mirrorOf>central,jcenter,!rdc-releases,!rdc-snapshots</mirrorOf>
  9. <name>mirror</name>
  10. <url>https://maven.aliyun.com/nexus/content/groups/public</url>
  11. </mirror>
  12. </mirrors>
  13. <servers>
  14. <server>
  15. <id>rdc-releases</id>
  16. <username>624c4b6ad12a1c6958d94bcb</username>
  17. <password>zhZm4kFQFp_q</password>
  18. </server>
  19. <server>
  20. <id>rdc-snapshots</id>
  21. <username>624c4b6ad12a1c6958d94bcb</username>
  22. <password>zhZm4kFQFp_q</password>
  23. </server>
  24. </servers>
  25. <profiles>
  26. <profile>
  27. <id>rdc</id>
  28. <properties>
  29. <altReleaseDeploymentRepository>
  30. rdc-releases::default::https://packages.aliyun.com/maven/repository/2212269-release-QNwAAi/
  31. </altReleaseDeploymentRepository>
  32. <altSnapshotDeploymentRepository>
  33. rdc-snapshots::default::https://packages.aliyun.com/maven/repository/2212269-snapshot-Sxd58p/
  34. </altSnapshotDeploymentRepository>
  35. </properties>
  36. <repositories>
  37. <repository>
  38. <id>central</id>
  39. <url>https://maven.aliyun.com/nexus/content/groups/public</url>
  40. <releases>
  41. <enabled>true</enabled>
  42. </releases>
  43. <snapshots>
  44. <enabled>false</enabled>
  45. </snapshots>
  46. </repository>
  47. <repository>
  48. <id>snapshots</id>
  49. <url>https://maven.aliyun.com/nexus/content/groups/public</url>
  50. <releases>
  51. <enabled>false</enabled>
  52. </releases>
  53. <snapshots>
  54. <enabled>true</enabled>
  55. </snapshots>
  56. </repository>
  57. <repository>
  58. <id>rdc-releases</id>
  59. <url>https://packages.aliyun.com/maven/repository/2212269-release-QNwAAi/</url>
  60. <releases>
  61. <enabled>true</enabled>
  62. </releases>
  63. <snapshots>
  64. <enabled>false</enabled>
  65. </snapshots>
  66. </repository>
  67. <repository>
  68. <id>rdc-snapshots</id>
  69. <url>https://packages.aliyun.com/maven/repository/2212269-snapshot-Sxd58p/</url>
  70. <releases>
  71. <enabled>false</enabled>
  72. </releases>
  73. <snapshots>
  74. <enabled>true</enabled>
  75. </snapshots>
  76. </repository>
  77. </repositories>
  78. <pluginRepositories>
  79. <pluginRepository>
  80. <id>central</id>
  81. <url>https://maven.aliyun.com/nexus/content/groups/public</url>
  82. <releases>
  83. <enabled>true</enabled>
  84. </releases>
  85. <snapshots>
  86. <enabled>false</enabled>
  87. </snapshots>
  88. </pluginRepository>
  89. <pluginRepository>
  90. <id>snapshots</id>
  91. <url>https://maven.aliyun.com/nexus/content/groups/public</url>
  92. <releases>
  93. <enabled>false</enabled>
  94. </releases>
  95. <snapshots>
  96. <enabled>true</enabled>
  97. </snapshots>
  98. </pluginRepository>
  99. <pluginRepository>
  100. <id>rdc-releases</id>
  101. <url>https://packages.aliyun.com/maven/repository/2212269-release-QNwAAi/</url>
  102. <releases>
  103. <enabled>true</enabled>
  104. </releases>
  105. <snapshots>
  106. <enabled>false</enabled>
  107. </snapshots>
  108. </pluginRepository>
  109. <pluginRepository>
  110. <id>rdc-snapshots</id>
  111. <url>https://packages.aliyun.com/maven/repository/2212269-snapshot-Sxd58p/</url>
  112. <releases>
  113. <enabled>false</enabled>
  114. </releases>
  115. <snapshots>
  116. <enabled>true</enabled>
  117. </snapshots>
  118. </pluginRepository>
  119. </pluginRepositories>
  120. </profile>
  121. </profiles>
  122. <activeProfiles>
  123. <activeProfile>rdc</activeProfile>
  124. </activeProfiles>
  125. </settings>

配置gitee账户凭证

gitee-token

配置docker账户凭证

docker-hub-token
DOCKER_USERNAME
DOCKER_PASSWORD

配置k8s凭证

kubernetes-token
image.png

使用maven命令打包

  1. sh 'ls'
  2. sh 'mvn clean package -Dmaven.test.skip=true'
  3. sh 'ls hospital-manage/target'

docker登录打包上传

  1. sh 'docker build -f Dockerfile -t $DOCKERHUB_NAMESPACE/$APP_NAME:SNAPSHOT-$BRANCH_NAME-$BUILD_NUMBER .'
  2. withCredentials([usernamePassword(credentialsId : 'docker-hub-token' ,passwordVariable : 'DOCKER_PASSWORD' ,usernameVariable : 'DOCKER_USERNAME' ,)]) {
  3. sh 'echo "$DOCKER_PASSWORD" | docker login $REGISTRY -u "$DOCKER_USERNAME" --password-stdin'
  4. sh 'docker push $DOCKERHUB_NAMESPACE/$APP_NAME:SNAPSHOT-$BRANCH_NAME-$BUILD_NUMBER'
  5. }

备份恢复etcd

https://www.cnblogs.com/yuhaohao/p/13214515.html

  1. # 每个节点都要停止
  2. systemctl stop kubelet && systemctl stop etcd
  3. cd /var/backups/kube_etcd
  4. rm -rf /var/lib/etcd
  5. etcdctl snapshot restore etcd-2022-04-06-21-30-01/snapshot.db --data-dir /var/lib/etcd
  6. # 每个节点都要启动
  7. systemctl start kubelet && systemctl start etcd

搭建minio文件存储

  1. # 安装docker
  2. yum install docker-ce -y && systemctl enable docker && service docker start
  3. # 创建挂载目录
  4. mkdir -p /private/mnt/data
  5. # 运行minio 密码不能少于8位
  6. docker pull minio/minio
  7. firewall-cmd --permanent --add-port=9000-9001/tcp
  8. firewall-cmd --reload
  9. docker run -di -p 9000:9000 \
  10. -p 9001:9001 \
  11. --name minio2 --restart=always \
  12. -v /private/mnt/data:/data \
  13. -e "MINIO_ACCESS_KEY=minio" \
  14. -e "MINIO_SECRET_KEY=minio123" \
  15. minio/minio server /data \
  16. --console-address ":9001"

创建bucket:

backup

在每台master节点上都安装velero

用来同步每台机器的etcd

  1. # 下载velero
  2. wget https://github.com/vmware-tanzu/velero/releases/download/v1.7.2/velero-v1.7.2-linux-amd64.tar.gz
  3. kubectl delete namespace/velero clusterrolebingding/velero
  4. kubectl delete crds -l component=velero
  5. cd /root/velero-v1.5.2-linux-amd64
  6. \cp -rf velero /usr/local/bin/
  7. # 创建minio凭证
  8. cat > credentials-velero << EOF
  9. [default]
  10. aws_access_key_id = minio
  11. aws_secret_access_key = minio123
  12. EOF
  13. velero install \
  14. --provider aws \
  15. --plugins velero/velero-plugin-for-aws:v1.1.0 \
  16. --bucket backup \
  17. --use-restic \
  18. --secret-file ./credentials-velero \
  19. --use-volume-snapshots=false \
  20. --backup-location-config region=minio,s3ForcePathStyle="true",s3Url=http://192.168.2.10:9000
  21. kubectl get all -n velero
  22. kubectl delete namespace/velero clusterrolebingding/velero
  23. kubectl delete crds -l component=velero
  24. cd /root/velero-v1.7.2-linux-amd64
  25. \cp -rf velero /usr/local/bin/
  26. # 创建minio凭证
  27. cat > credentials-velero << EOF
  28. [default]
  29. aws_access_key_id = minio
  30. aws_secret_access_key = minio123
  31. EOF
  32. velero install \
  33. --provider aws \
  34. --plugins velero/velero-plugin-for-aws:v1.3.1 \
  35. --bucket backup \
  36. --use-restic \
  37. --secret-file ./credentials-velero \
  38. --use-volume-snapshots=false \
  39. --backup-location-config region=minio,s3ForcePathStyle="true",s3Url=http://192.168.2.10:9000
  40. kubectl get all -n velero

参考: https://blog.csdn.net/weixin_42143049/article/details/115757747
https://zhuanlan.zhihu.com/p/441954396

velero命令

  1. kubectl get backupstoragelocations.velero.io -n velero
  2. kubectl describe backupstoragelocations.velero.io -n velero
  3. velero get backup-locations
  4. velero backup get
  5. velero version
  6. velero backup -h
  7. velero backup logs
  8. velero backup logs back-test
  9. kubectl logs -f deploy/velero -n velero
  10. # 创建备份定时任务 - 每6小时
  11. velero schedule create <SCHEDULE_NAME> \
  12. --include-namespaces <NAMESPACE> \
  13. --include-resources='deployments.apps,replicasets.apps,deployments.extensions,replicasets.extensions,pods,Service,StatefulSet,Ingress,ConfigMap,Secret' \
  14. --schedule="@every 6h"
  15. # 设置每日备份
  16. velero schedule create <SCHEDULE NAME> --schedule "0 7 * * *"
  17. # 创建备份-从定时任务
  18. velero backup create --from-schedule example-schedule
  19. # 创建备份
  20. velero backup create backup-demo1 --include-namespaces test --default-volumes-to-restic --include-cluster-resources
  21. # ***恢复前先依次关闭集群所有机器kubelet、etcd、docker***
  22. systemctl stop kubelet
  23. systemctl stop etcd
  24. systemctl stop docker
  25. # 恢复
  26. velero restore create --from-backup <BACKUP_NAME> --namespace-mappings <NAMESPACE>:bak-<NAMESPACE>
  27. # ***恢复后依次开启集群所有机器docker、etcd、kubelet***
  28. systemctl start docker
  29. systemctl start etcd
  30. systemctl start kubelet

批量启停集群所有节点

  1. # 停止集群
  2. nodes='master node1 node2'
  3. for node in ${nodes[@]}
  4. do
  5. echo "==== Stop docker on $node ===="
  6. ssh root@$node systemctl stop docker
  7. done
  1. # 启动集群
  2. nodes='master node1 node2'
  3. for node in ${nodes[@]}
  4. do
  5. echo "==== Start docker on $node ===="
  6. ssh root@$node systemctl start docker
  7. done

问题记录

check file permission: directory “/var/lib/etcd” exist, but the permission is “drwxr-xr-x”. The recommended permission is “-rwx———“ to prevent possible unprivileged access to the data.

chmod -R 700 /var/lib/etcd

Error listing backups in backup store” backupLocation=default controller=backup-sync error=”rpc error: code = Unknown desc = RequestTimeTooSkewed: The difference between the request time and the server’s time is too large

时间不一致
# 时间同步
systemctl start chronyd
systemctl enable chronyd
yum install ntpdate -y
ntpdate time.windows.com

参考

本地安装kubesphere: https://kubesphere.com.cn/docs/installing-on-kubernetes/on-prem-kubernetes/install-ks-on-linux-airgapped/
启用日志系统:https://kubesphere.com.cn/docs/pluggable-components/logging/
kubesphere文档:https://github.com/kubesphere/kubekey/blob/master/README_zh-CN.md
k8s官方文档网络问题:https://kubernetes.feisky.xyz/troubleshooting/network