1. 集群升级
# 查看RKE支持的Kubernetes版本
rke config —list-version —all

修改cluster.yml中的定义kubernetes版本
kubernetes_version: v1.17.2-rancher1-2

执行up的操作
rke up —config ./cluster.yml

  1. [liwm@rmaster01 ~]$ kubectl get nodes
  2. NAME STATUS ROLES AGE VERSION
  3. node01 Ready worker 21d v1.17.2
  4. node02 Ready worker 21d v1.17.2
  5. rmaster01 Ready controlplane,etcd 21d v1.17.2
  6. rmaster02 Ready controlplane,etcd 21d v1.17.2
  7. rmaster03 Ready controlplane,etcd 21d v1.17.2
  8. [liwm@rmaster01 ~]$
  9. [rancher@rmaster01 ~]$ rke config --list-version --all
  10. v1.16.8-rancher1-1
  11. v1.17.4-rancher1-1
  12. v1.15.11-rancher1-1
  13. [rancher@rmaster01 ~]$ vim cluster.yml
  14. [rancher@rmaster01 ~]$
  15. [rancher@rmaster01 ~]$ rke up --config cluster.yml
  16. INFO[0000] Running RKE version: v1.0.5
  17. INFO[0000] Initiating Kubernetes cluster
  18. INFO[0000] [certificates] Generating admin certificates and kubeconfig
  19. INFO[0000] Successfully Deployed state file at [./cluster.rkestate]
  20. INFO[0000] Building Kubernetes cluster
  21. INFO[0000] [dialer] Setup tunnel for host [192.168.31.133]
  22. INFO[0000] [dialer] Setup tunnel for host [192.168.31.132]
  23. INFO[0000] [dialer] Setup tunnel for host [192.168.31.131]
  24. INFO[0000] [dialer] Setup tunnel for host [192.168.31.134]
  25. INFO[0000] [dialer] Setup tunnel for host [192.168.31.130]
  26. INFO[0001] [network] No hosts added existing cluster, skipping port check
  27. INFO[0001] [certificates] Deploying kubernetes certificates to Cluster nodes
  28. INFO[0001] Checking if container [cert-deployer] is running on host [192.168.31.133], try #1
  29. INFO[0001] Checking if container [cert-deployer] is running on host [192.168.31.134], try #1
  30. INFO[0001] Checking if container [cert-deployer] is running on host [192.168.31.131], try #1
  31. INFO[0001] Checking if container [cert-deployer] is running on host [192.168.31.130], try #1
  32. INFO[0001] Checking if container [cert-deployer] is running on host [192.168.31.132], try #1
  33. INFO[0001] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.133]
  34. INFO[0001] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  35. INFO[0001] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  36. INFO[0001] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.134]
  37. INFO[0001] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  38. INFO[0001] Starting container [cert-deployer] on host [192.168.31.132], try #1
  39. INFO[0001] Starting container [cert-deployer] on host [192.168.31.134], try #1
  40. INFO[0002] Starting container [cert-deployer] on host [192.168.31.130], try #1
  41. INFO[0002] Starting container [cert-deployer] on host [192.168.31.133], try #1
  42. INFO[0003] Checking if container [cert-deployer] is running on host [192.168.31.132], try #1
  43. INFO[0004] Checking if container [cert-deployer] is running on host [192.168.31.133], try #1
  44. INFO[0004] Starting container [cert-deployer] on host [192.168.31.131], try #1
  45. INFO[0004] Checking if container [cert-deployer] is running on host [192.168.31.130], try #1
  46. INFO[0004] Checking if container [cert-deployer] is running on host [192.168.31.134], try #1
  47. INFO[0006] Checking if container [cert-deployer] is running on host [192.168.31.131], try #1
  48. INFO[0008] Checking if container [cert-deployer] is running on host [192.168.31.132], try #1
  49. INFO[0008] Removing container [cert-deployer] on host [192.168.31.132], try #1
  50. INFO[0009] Checking if container [cert-deployer] is running on host [192.168.31.133], try #1
  51. INFO[0009] Removing container [cert-deployer] on host [192.168.31.133], try #1
  52. INFO[0009] Checking if container [cert-deployer] is running on host [192.168.31.130], try #1
  53. INFO[0009] Removing container [cert-deployer] on host [192.168.31.130], try #1
  54. INFO[0009] Checking if container [cert-deployer] is running on host [192.168.31.134], try #1
  55. INFO[0009] Removing container [cert-deployer] on host [192.168.31.134], try #1
  56. INFO[0011] Checking if container [cert-deployer] is running on host [192.168.31.131], try #1
  57. INFO[0011] Removing container [cert-deployer] on host [192.168.31.131], try #1
  58. INFO[0011] [reconcile] Rebuilding and updating local kube config
  59. INFO[0011] Successfully Deployed local admin kubeconfig at [./kube_config_cluster.yml]
  60. INFO[0011] [reconcile] host [192.168.31.130] is active master on the cluster
  61. INFO[0011] [certificates] Successfully deployed kubernetes certificates to Cluster nodes
  62. INFO[0011] [reconcile] Reconciling cluster state
  63. INFO[0011] [reconcile] Check etcd hosts to be deleted
  64. INFO[0011] [reconcile] Check etcd hosts to be added
  65. INFO[0011] [reconcile] Rebuilding and updating local kube config
  66. INFO[0011] Successfully Deployed local admin kubeconfig at [./kube_config_cluster.yml]
  67. INFO[0011] [reconcile] host [192.168.31.130] is active master on the cluster
  68. INFO[0011] [reconcile] Reconciled cluster state successfully
  69. INFO[0011] Pre-pulling kubernetes images
  70. INFO[0011] Pulling image [rancher/hyperkube:v1.17.4-rancher1] on host [192.168.31.133], try #1
  71. INFO[0011] Pulling image [rancher/hyperkube:v1.17.4-rancher1] on host [192.168.31.130], try #1
  72. INFO[0011] Pulling image [rancher/hyperkube:v1.17.4-rancher1] on host [192.168.31.131], try #1
  73. INFO[0011] Pulling image [rancher/hyperkube:v1.17.4-rancher1] on host [192.168.31.132], try #1
  74. INFO[0011] Pulling image [rancher/hyperkube:v1.17.4-rancher1] on host [192.168.31.134], try #1
  75. INFO[0459] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  76. INFO[0640] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  77. INFO[0840] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.134]
  78. INFO[0903] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.133]
  79. INFO[1021] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  80. INFO[1021] Kubernetes images pulled successfully
  81. INFO[1021] [etcd] Building up etcd plane..
  82. INFO[1021] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  83. INFO[1023] Starting container [etcd-fix-perm] on host [192.168.31.130], try #1
  84. INFO[1025] Successfully started [etcd-fix-perm] container on host [192.168.31.130]
  85. INFO[1025] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.130]
  86. INFO[1025] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.130]
  87. INFO[1025] Container [etcd-fix-perm] is still running on host [192.168.31.130]
  88. INFO[1026] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.130]
  89. INFO[1026] Removing container [etcd-fix-perm] on host [192.168.31.130], try #1
  90. INFO[1028] [remove/etcd-fix-perm] Successfully removed container on host [192.168.31.130]
  91. INFO[1028] [etcd] Running rolling snapshot container [etcd-snapshot-once] on host [192.168.31.130]
  92. INFO[1028] Removing container [etcd-rolling-snapshots] on host [192.168.31.130], try #1
  93. INFO[1029] [remove/etcd-rolling-snapshots] Successfully removed container on host [192.168.31.130]
  94. INFO[1029] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  95. INFO[1029] Starting container [etcd-rolling-snapshots] on host [192.168.31.130], try #1
  96. INFO[1030] [etcd] Successfully started [etcd-rolling-snapshots] container on host [192.168.31.130]
  97. INFO[1035] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  98. INFO[1035] Starting container [rke-bundle-cert] on host [192.168.31.130], try #1
  99. INFO[1036] [certificates] Successfully started [rke-bundle-cert] container on host [192.168.31.130]
  100. INFO[1036] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.130]
  101. INFO[1036] Container [rke-bundle-cert] is still running on host [192.168.31.130]
  102. INFO[1037] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.130]
  103. INFO[1038] [certificates] successfully saved certificate bundle [/opt/rke/etcd-snapshots//pki.bundle.tar.gz] on host [192.168.31.130]
  104. INFO[1038] Removing container [rke-bundle-cert] on host [192.168.31.130], try #1
  105. INFO[1038] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  106. INFO[1038] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  107. INFO[1039] [etcd] Successfully started [rke-log-linker] container on host [192.168.31.130]
  108. INFO[1039] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  109. INFO[1039] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  110. INFO[1040] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  111. INFO[1086] Starting container [etcd-fix-perm] on host [192.168.31.131], try #1
  112. INFO[1135] Successfully started [etcd-fix-perm] container on host [192.168.31.131]
  113. INFO[1135] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.131]
  114. INFO[1135] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.131]
  115. INFO[1147] Removing container [etcd-fix-perm] on host [192.168.31.131], try #1
  116. INFO[1160] [remove/etcd-fix-perm] Successfully removed container on host [192.168.31.131]
  117. INFO[1160] [etcd] Running rolling snapshot container [etcd-snapshot-once] on host [192.168.31.131]
  118. INFO[1160] Removing container [etcd-rolling-snapshots] on host [192.168.31.131], try #1
  119. INFO[1170] [remove/etcd-rolling-snapshots] Successfully removed container on host [192.168.31.131]
  120. INFO[1170] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  121. INFO[1173] Starting container [etcd-rolling-snapshots] on host [192.168.31.131], try #1
  122. INFO[1176] [etcd] Successfully started [etcd-rolling-snapshots] container on host [192.168.31.131]
  123. INFO[1184] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  124. INFO[1189] Starting container [rke-bundle-cert] on host [192.168.31.131], try #1
  125. INFO[1194] [certificates] Successfully started [rke-bundle-cert] container on host [192.168.31.131]
  126. INFO[1194] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.131]
  127. INFO[1194] Container [rke-bundle-cert] is still running on host [192.168.31.131]
  128. INFO[1195] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.131]
  129. INFO[1195] [certificates] successfully saved certificate bundle [/opt/rke/etcd-snapshots//pki.bundle.tar.gz] on host [192.168.31.131]
  130. INFO[1195] Removing container [rke-bundle-cert] on host [192.168.31.131], try #1
  131. INFO[1195] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  132. INFO[1195] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  133. INFO[1196] [etcd] Successfully started [rke-log-linker] container on host [192.168.31.131]
  134. INFO[1196] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  135. INFO[1197] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  136. INFO[1197] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  137. INFO[1197] Starting container [etcd-fix-perm] on host [192.168.31.132], try #1
  138. INFO[1198] Successfully started [etcd-fix-perm] container on host [192.168.31.132]
  139. INFO[1198] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.132]
  140. INFO[1198] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.132]
  141. INFO[1198] Container [etcd-fix-perm] is still running on host [192.168.31.132]
  142. INFO[1199] Waiting for [etcd-fix-perm] container to exit on host [192.168.31.132]
  143. INFO[1199] Removing container [etcd-fix-perm] on host [192.168.31.132], try #1
  144. INFO[1199] [remove/etcd-fix-perm] Successfully removed container on host [192.168.31.132]
  145. INFO[1200] [etcd] Running rolling snapshot container [etcd-snapshot-once] on host [192.168.31.132]
  146. INFO[1200] Removing container [etcd-rolling-snapshots] on host [192.168.31.132], try #1
  147. INFO[1200] [remove/etcd-rolling-snapshots] Successfully removed container on host [192.168.31.132]
  148. INFO[1200] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  149. INFO[1200] Starting container [etcd-rolling-snapshots] on host [192.168.31.132], try #1
  150. INFO[1201] [etcd] Successfully started [etcd-rolling-snapshots] container on host [192.168.31.132]
  151. INFO[1206] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  152. INFO[1206] Starting container [rke-bundle-cert] on host [192.168.31.132], try #1
  153. INFO[1206] [certificates] Successfully started [rke-bundle-cert] container on host [192.168.31.132]
  154. INFO[1206] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.132]
  155. INFO[1206] Container [rke-bundle-cert] is still running on host [192.168.31.132]
  156. INFO[1207] Waiting for [rke-bundle-cert] container to exit on host [192.168.31.132]
  157. INFO[1207] [certificates] successfully saved certificate bundle [/opt/rke/etcd-snapshots//pki.bundle.tar.gz] on host [192.168.31.132]
  158. INFO[1207] Removing container [rke-bundle-cert] on host [192.168.31.132], try #1
  159. INFO[1207] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  160. INFO[1208] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  161. INFO[1208] [etcd] Successfully started [rke-log-linker] container on host [192.168.31.132]
  162. INFO[1208] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  163. INFO[1209] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  164. INFO[1209] [etcd] Successfully started etcd plane.. Checking etcd cluster health
  165. INFO[1209] [controlplane] Building up Controller Plane..
  166. INFO[1209] Checking if container [service-sidekick] is running on host [192.168.31.130], try #1
  167. INFO[1209] Checking if container [service-sidekick] is running on host [192.168.31.131], try #1
  168. INFO[1209] Checking if container [service-sidekick] is running on host [192.168.31.132], try #1
  169. INFO[1209] [sidekick] Sidekick container already created on host [192.168.31.132]
  170. INFO[1209] [sidekick] Sidekick container already created on host [192.168.31.131]
  171. INFO[1209] Checking if container [kube-apiserver] is running on host [192.168.31.132], try #1
  172. INFO[1209] Checking if container [kube-apiserver] is running on host [192.168.31.131], try #1
  173. INFO[1209] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  174. INFO[1209] Checking if container [old-kube-apiserver] is running on host [192.168.31.132], try #1
  175. INFO[1209] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  176. INFO[1209] Checking if container [old-kube-apiserver] is running on host [192.168.31.131], try #1
  177. INFO[1209] Stopping container [kube-apiserver] on host [192.168.31.132] with stopTimeoutDuration [5s], try #1
  178. INFO[1209] Stopping container [kube-apiserver] on host [192.168.31.131] with stopTimeoutDuration [5s], try #1
  179. INFO[1209] [sidekick] Sidekick container already created on host [192.168.31.130]
  180. INFO[1210] Checking if container [kube-apiserver] is running on host [192.168.31.130], try #1
  181. INFO[1210] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  182. INFO[1210] Checking if container [old-kube-apiserver] is running on host [192.168.31.130], try #1
  183. INFO[1210] Stopping container [kube-apiserver] on host [192.168.31.130] with stopTimeoutDuration [5s], try #1
  184. INFO[1211] Waiting for [kube-apiserver] container to exit on host [192.168.31.132]
  185. INFO[1211] Renaming container [kube-apiserver] to [old-kube-apiserver] on host [192.168.31.132], try #1
  186. INFO[1211] Starting container [kube-apiserver] on host [192.168.31.132], try #1
  187. INFO[1212] [controlplane] Successfully updated [kube-apiserver] container on host [192.168.31.132]
  188. INFO[1212] Removing container [old-kube-apiserver] on host [192.168.31.132], try #1
  189. INFO[1212] [healthcheck] Start Healthcheck on service [kube-apiserver] on host [192.168.31.132]
  190. INFO[1216] Waiting for [kube-apiserver] container to exit on host [192.168.31.130]
  191. INFO[1216] Renaming container [kube-apiserver] to [old-kube-apiserver] on host [192.168.31.130], try #1
  192. INFO[1216] Starting container [kube-apiserver] on host [192.168.31.130], try #1
  193. INFO[1217] Waiting for [kube-apiserver] container to exit on host [192.168.31.131]
  194. INFO[1217] Renaming container [kube-apiserver] to [old-kube-apiserver] on host [192.168.31.131], try #1
  195. INFO[1217] Starting container [kube-apiserver] on host [192.168.31.131], try #1
  196. INFO[1217] [controlplane] Successfully updated [kube-apiserver] container on host [192.168.31.130]
  197. INFO[1217] Removing container [old-kube-apiserver] on host [192.168.31.130], try #1
  198. INFO[1218] [healthcheck] Start Healthcheck on service [kube-apiserver] on host [192.168.31.130]
  199. INFO[1218] [controlplane] Successfully updated [kube-apiserver] container on host [192.168.31.131]
  200. INFO[1218] Removing container [old-kube-apiserver] on host [192.168.31.131], try #1
  201. INFO[1219] [healthcheck] Start Healthcheck on service [kube-apiserver] on host [192.168.31.131]
  202. INFO[1267] [healthcheck] service [kube-apiserver] on host [192.168.31.132] is healthy
  203. INFO[1267] [healthcheck] service [kube-apiserver] on host [192.168.31.130] is healthy
  204. INFO[1267] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  205. INFO[1267] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  206. INFO[1267] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  207. INFO[1268] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  208. INFO[1268] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.132]
  209. INFO[1268] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  210. INFO[1269] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  211. INFO[1269] Checking if container [kube-controller-manager] is running on host [192.168.31.132], try #1
  212. INFO[1269] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  213. INFO[1269] Checking if container [old-kube-controller-manager] is running on host [192.168.31.132], try #1
  214. INFO[1269] Stopping container [kube-controller-manager] on host [192.168.31.132] with stopTimeoutDuration [5s], try #1
  215. INFO[1269] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.130]
  216. INFO[1269] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  217. INFO[1269] Waiting for [kube-controller-manager] container to exit on host [192.168.31.132]
  218. INFO[1269] Renaming container [kube-controller-manager] to [old-kube-controller-manager] on host [192.168.31.132], try #1
  219. INFO[1269] Starting container [kube-controller-manager] on host [192.168.31.132], try #1
  220. INFO[1269] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  221. INFO[1269] Checking if container [kube-controller-manager] is running on host [192.168.31.130], try #1
  222. INFO[1269] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  223. INFO[1269] Checking if container [old-kube-controller-manager] is running on host [192.168.31.130], try #1
  224. INFO[1270] Stopping container [kube-controller-manager] on host [192.168.31.130] with stopTimeoutDuration [5s], try #1
  225. INFO[1270] [controlplane] Successfully updated [kube-controller-manager] container on host [192.168.31.132]
  226. INFO[1270] Removing container [old-kube-controller-manager] on host [192.168.31.132], try #1
  227. INFO[1270] [healthcheck] Start Healthcheck on service [kube-controller-manager] on host [192.168.31.132]
  228. INFO[1270] Waiting for [kube-controller-manager] container to exit on host [192.168.31.130]
  229. INFO[1270] Renaming container [kube-controller-manager] to [old-kube-controller-manager] on host [192.168.31.130], try #1
  230. INFO[1270] Starting container [kube-controller-manager] on host [192.168.31.130], try #1
  231. INFO[1270] [healthcheck] service [kube-apiserver] on host [192.168.31.131] is healthy
  232. INFO[1270] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  233. INFO[1270] [controlplane] Successfully updated [kube-controller-manager] container on host [192.168.31.130]
  234. INFO[1270] Removing container [old-kube-controller-manager] on host [192.168.31.130], try #1
  235. INFO[1271] [healthcheck] Start Healthcheck on service [kube-controller-manager] on host [192.168.31.130]
  236. INFO[1271] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  237. INFO[1274] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.131]
  238. INFO[1274] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  239. INFO[1274] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  240. INFO[1274] Checking if container [kube-controller-manager] is running on host [192.168.31.131], try #1
  241. INFO[1274] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  242. INFO[1274] Checking if container [old-kube-controller-manager] is running on host [192.168.31.131], try #1
  243. INFO[1274] Stopping container [kube-controller-manager] on host [192.168.31.131] with stopTimeoutDuration [5s], try #1
  244. INFO[1275] Waiting for [kube-controller-manager] container to exit on host [192.168.31.131]
  245. INFO[1275] Renaming container [kube-controller-manager] to [old-kube-controller-manager] on host [192.168.31.131], try #1
  246. INFO[1275] Starting container [kube-controller-manager] on host [192.168.31.131], try #1
  247. INFO[1275] [healthcheck] service [kube-controller-manager] on host [192.168.31.132] is healthy
  248. INFO[1275] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  249. INFO[1275] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  250. INFO[1275] [controlplane] Successfully updated [kube-controller-manager] container on host [192.168.31.131]
  251. INFO[1275] Removing container [old-kube-controller-manager] on host [192.168.31.131], try #1
  252. INFO[1276] [healthcheck] Start Healthcheck on service [kube-controller-manager] on host [192.168.31.131]
  253. INFO[1276] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.132]
  254. INFO[1276] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  255. INFO[1276] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  256. INFO[1276] Checking if container [kube-scheduler] is running on host [192.168.31.132], try #1
  257. INFO[1277] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  258. INFO[1277] Checking if container [old-kube-scheduler] is running on host [192.168.31.132], try #1
  259. INFO[1277] Stopping container [kube-scheduler] on host [192.168.31.132] with stopTimeoutDuration [5s], try #1
  260. INFO[1277] Waiting for [kube-scheduler] container to exit on host [192.168.31.132]
  261. INFO[1277] Renaming container [kube-scheduler] to [old-kube-scheduler] on host [192.168.31.132], try #1
  262. INFO[1277] Starting container [kube-scheduler] on host [192.168.31.132], try #1
  263. INFO[1278] [controlplane] Successfully updated [kube-scheduler] container on host [192.168.31.132]
  264. INFO[1278] Removing container [old-kube-scheduler] on host [192.168.31.132], try #1
  265. INFO[1278] [healthcheck] Start Healthcheck on service [kube-scheduler] on host [192.168.31.132]
  266. INFO[1281] [healthcheck] service [kube-controller-manager] on host [192.168.31.130] is healthy
  267. INFO[1281] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  268. INFO[1281] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  269. INFO[1284] [healthcheck] service [kube-scheduler] on host [192.168.31.132] is healthy
  270. INFO[1284] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  271. INFO[1284] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.130]
  272. INFO[1284] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  273. INFO[1284] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  274. INFO[1285] [healthcheck] service [kube-controller-manager] on host [192.168.31.131] is healthy
  275. INFO[1285] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  276. INFO[1285] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  277. INFO[1285] Checking if container [kube-scheduler] is running on host [192.168.31.130], try #1
  278. INFO[1285] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.132]
  279. INFO[1285] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  280. INFO[1285] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  281. INFO[1285] Checking if container [old-kube-scheduler] is running on host [192.168.31.130], try #1
  282. INFO[1286] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  283. INFO[1286] Stopping container [kube-scheduler] on host [192.168.31.130] with stopTimeoutDuration [5s], try #1
  284. INFO[1286] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  285. INFO[1287] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.131]
  286. INFO[1287] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  287. INFO[1287] Waiting for [kube-scheduler] container to exit on host [192.168.31.130]
  288. INFO[1287] Renaming container [kube-scheduler] to [old-kube-scheduler] on host [192.168.31.130], try #1
  289. INFO[1287] Starting container [kube-scheduler] on host [192.168.31.130], try #1
  290. INFO[1287] [controlplane] Successfully updated [kube-scheduler] container on host [192.168.31.130]
  291. INFO[1287] Removing container [old-kube-scheduler] on host [192.168.31.130], try #1
  292. INFO[1287] [healthcheck] Start Healthcheck on service [kube-scheduler] on host [192.168.31.130]
  293. INFO[1288] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  294. INFO[1288] Checking if container [kube-scheduler] is running on host [192.168.31.131], try #1
  295. INFO[1289] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  296. INFO[1289] Checking if container [old-kube-scheduler] is running on host [192.168.31.131], try #1
  297. INFO[1289] Stopping container [kube-scheduler] on host [192.168.31.131] with stopTimeoutDuration [5s], try #1
  298. INFO[1290] Waiting for [kube-scheduler] container to exit on host [192.168.31.131]
  299. INFO[1290] Renaming container [kube-scheduler] to [old-kube-scheduler] on host [192.168.31.131], try #1
  300. INFO[1290] Starting container [kube-scheduler] on host [192.168.31.131], try #1
  301. INFO[1291] [controlplane] Successfully updated [kube-scheduler] container on host [192.168.31.131]
  302. INFO[1291] Removing container [old-kube-scheduler] on host [192.168.31.131], try #1
  303. INFO[1291] [healthcheck] Start Healthcheck on service [kube-scheduler] on host [192.168.31.131]
  304. INFO[1295] [healthcheck] service [kube-scheduler] on host [192.168.31.130] is healthy
  305. INFO[1295] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  306. INFO[1295] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  307. INFO[1296] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.130]
  308. INFO[1296] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  309. INFO[1296] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  310. INFO[1298] [healthcheck] service [kube-scheduler] on host [192.168.31.131] is healthy
  311. INFO[1298] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  312. INFO[1298] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  313. INFO[1299] [controlplane] Successfully started [rke-log-linker] container on host [192.168.31.131]
  314. INFO[1300] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  315. INFO[1300] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  316. INFO[1300] [controlplane] Successfully started Controller Plane..
  317. INFO[1300] [authz] Creating rke-job-deployer ServiceAccount
  318. INFO[1300] [authz] rke-job-deployer ServiceAccount created successfully
  319. INFO[1300] [authz] Creating system:node ClusterRoleBinding
  320. INFO[1301] [authz] system:node ClusterRoleBinding created successfully
  321. INFO[1301] [authz] Creating kube-apiserver proxy ClusterRole and ClusterRoleBinding
  322. INFO[1301] [authz] kube-apiserver proxy ClusterRole and ClusterRoleBinding created successfully
  323. INFO[1302] Successfully Deployed state file at [./cluster.rkestate]
  324. INFO[1302] [state] Saving full cluster state to Kubernetes
  325. INFO[1302] [state] Successfully Saved full cluster state to Kubernetes ConfigMap: cluster-state
  326. INFO[1302] [worker] Building up Worker Plane..
  327. INFO[1302] Checking if container [service-sidekick] is running on host [192.168.31.130], try #1
  328. INFO[1302] Checking if container [service-sidekick] is running on host [192.168.31.131], try #1
  329. INFO[1302] Checking if container [service-sidekick] is running on host [192.168.31.132], try #1
  330. INFO[1302] [sidekick] Sidekick container already created on host [192.168.31.130]
  331. INFO[1302] [sidekick] Sidekick container already created on host [192.168.31.132]
  332. INFO[1302] Checking if container [kubelet] is running on host [192.168.31.132], try #1
  333. INFO[1302] Checking if container [kubelet] is running on host [192.168.31.130], try #1
  334. INFO[1302] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  335. INFO[1302] Checking if container [old-kubelet] is running on host [192.168.31.132], try #1
  336. INFO[1302] [sidekick] Sidekick container already created on host [192.168.31.131]
  337. INFO[1302] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  338. INFO[1302] Checking if container [old-kubelet] is running on host [192.168.31.130], try #1
  339. INFO[1302] Stopping container [kubelet] on host [192.168.31.132] with stopTimeoutDuration [5s], try #1
  340. INFO[1302] Stopping container [kubelet] on host [192.168.31.130] with stopTimeoutDuration [5s], try #1
  341. INFO[1302] Checking if container [kubelet] is running on host [192.168.31.131], try #1
  342. INFO[1302] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  343. INFO[1302] Checking if container [old-kubelet] is running on host [192.168.31.131], try #1
  344. INFO[1302] Stopping container [kubelet] on host [192.168.31.131] with stopTimeoutDuration [5s], try #1
  345. INFO[1302] Waiting for [kubelet] container to exit on host [192.168.31.132]
  346. INFO[1302] Renaming container [kubelet] to [old-kubelet] on host [192.168.31.132], try #1
  347. INFO[1302] Waiting for [kubelet] container to exit on host [192.168.31.130]
  348. INFO[1302] Renaming container [kubelet] to [old-kubelet] on host [192.168.31.130], try #1
  349. INFO[1302] Starting container [kubelet] on host [192.168.31.130], try #1
  350. INFO[1302] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.133]
  351. INFO[1303] Starting container [kubelet] on host [192.168.31.132], try #1
  352. INFO[1303] [worker] Successfully updated [kubelet] container on host [192.168.31.130]
  353. INFO[1303] Removing container [old-kubelet] on host [192.168.31.130], try #1
  354. INFO[1303] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.134]
  355. INFO[1304] Starting container [rke-log-linker] on host [192.168.31.134], try #1
  356. INFO[1304] [worker] Successfully updated [kubelet] container on host [192.168.31.132]
  357. INFO[1304] Removing container [old-kubelet] on host [192.168.31.132], try #1
  358. INFO[1304] [healthcheck] Start Healthcheck on service [kubelet] on host [192.168.31.130]
  359. INFO[1304] Starting container [rke-log-linker] on host [192.168.31.133], try #1
  360. INFO[1304] [healthcheck] Start Healthcheck on service [kubelet] on host [192.168.31.132]
  361. INFO[1304] Waiting for [kubelet] container to exit on host [192.168.31.131]
  362. INFO[1304] Renaming container [kubelet] to [old-kubelet] on host [192.168.31.131], try #1
  363. INFO[1305] Starting container [kubelet] on host [192.168.31.131], try #1
  364. INFO[1306] [worker] Successfully updated [kubelet] container on host [192.168.31.131]
  365. INFO[1306] Removing container [old-kubelet] on host [192.168.31.131], try #1
  366. INFO[1306] [worker] Successfully started [rke-log-linker] container on host [192.168.31.133]
  367. INFO[1307] Removing container [rke-log-linker] on host [192.168.31.133], try #1
  368. INFO[1307] [remove/rke-log-linker] Successfully removed container on host [192.168.31.133]
  369. INFO[1307] Checking if container [service-sidekick] is running on host [192.168.31.133], try #1
  370. INFO[1307] [healthcheck] Start Healthcheck on service [kubelet] on host [192.168.31.131]
  371. INFO[1307] [sidekick] Sidekick container already created on host [192.168.31.133]
  372. INFO[1307] Checking if container [kubelet] is running on host [192.168.31.133], try #1
  373. INFO[1307] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.133]
  374. INFO[1307] Checking if container [old-kubelet] is running on host [192.168.31.133], try #1
  375. INFO[1307] Stopping container [kubelet] on host [192.168.31.133] with stopTimeoutDuration [5s], try #1
  376. INFO[1308] [worker] Successfully started [rke-log-linker] container on host [192.168.31.134]
  377. INFO[1308] Removing container [rke-log-linker] on host [192.168.31.134], try #1
  378. INFO[1308] Waiting for [kubelet] container to exit on host [192.168.31.133]
  379. INFO[1308] Renaming container [kubelet] to [old-kubelet] on host [192.168.31.133], try #1
  380. INFO[1308] Starting container [kubelet] on host [192.168.31.133], try #1
  381. INFO[1309] [worker] Successfully updated [kubelet] container on host [192.168.31.133]
  382. INFO[1309] Removing container [old-kubelet] on host [192.168.31.133], try #1
  383. INFO[1309] [remove/rke-log-linker] Successfully removed container on host [192.168.31.134]
  384. INFO[1309] Checking if container [service-sidekick] is running on host [192.168.31.134], try #1
  385. INFO[1309] [sidekick] Sidekick container already created on host [192.168.31.134]
  386. INFO[1309] Checking if container [kubelet] is running on host [192.168.31.134], try #1
  387. INFO[1309] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.134]
  388. INFO[1309] Checking if container [old-kubelet] is running on host [192.168.31.134], try #1
  389. INFO[1309] Stopping container [kubelet] on host [192.168.31.134] with stopTimeoutDuration [5s], try #1
  390. INFO[1309] [healthcheck] Start Healthcheck on service [kubelet] on host [192.168.31.133]
  391. INFO[1309] Waiting for [kubelet] container to exit on host [192.168.31.134]
  392. INFO[1309] Renaming container [kubelet] to [old-kubelet] on host [192.168.31.134], try #1
  393. INFO[1309] Starting container [kubelet] on host [192.168.31.134], try #1
  394. INFO[1310] [worker] Successfully updated [kubelet] container on host [192.168.31.134]
  395. INFO[1310] Removing container [old-kubelet] on host [192.168.31.134], try #1
  396. INFO[1310] [healthcheck] Start Healthcheck on service [kubelet] on host [192.168.31.134]
  397. INFO[1331] [healthcheck] service [kubelet] on host [192.168.31.130] is healthy
  398. INFO[1331] [healthcheck] service [kubelet] on host [192.168.31.132] is healthy
  399. INFO[1331] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  400. INFO[1331] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  401. INFO[1331] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  402. INFO[1331] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  403. INFO[1332] [worker] Successfully started [rke-log-linker] container on host [192.168.31.132]
  404. INFO[1332] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  405. INFO[1332] [worker] Successfully started [rke-log-linker] container on host [192.168.31.130]
  406. INFO[1332] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  407. INFO[1332] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  408. INFO[1332] Checking if container [kube-proxy] is running on host [192.168.31.132], try #1
  409. INFO[1332] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.132]
  410. INFO[1332] Checking if container [old-kube-proxy] is running on host [192.168.31.132], try #1
  411. INFO[1332] Stopping container [kube-proxy] on host [192.168.31.132] with stopTimeoutDuration [5s], try #1
  412. INFO[1333] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  413. INFO[1333] Checking if container [kube-proxy] is running on host [192.168.31.130], try #1
  414. INFO[1333] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.130]
  415. INFO[1333] Checking if container [old-kube-proxy] is running on host [192.168.31.130], try #1
  416. INFO[1333] Waiting for [kube-proxy] container to exit on host [192.168.31.132]
  417. INFO[1333] Renaming container [kube-proxy] to [old-kube-proxy] on host [192.168.31.132], try #1
  418. INFO[1333] Stopping container [kube-proxy] on host [192.168.31.130] with stopTimeoutDuration [5s], try #1
  419. INFO[1333] Starting container [kube-proxy] on host [192.168.31.132], try #1
  420. INFO[1333] [worker] Successfully updated [kube-proxy] container on host [192.168.31.132]
  421. INFO[1333] Removing container [old-kube-proxy] on host [192.168.31.132], try #1
  422. INFO[1333] [healthcheck] Start Healthcheck on service [kube-proxy] on host [192.168.31.132]
  423. INFO[1334] [healthcheck] service [kubelet] on host [192.168.31.131] is healthy
  424. INFO[1334] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  425. INFO[1334] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  426. INFO[1335] Waiting for [kube-proxy] container to exit on host [192.168.31.130]
  427. INFO[1335] Renaming container [kube-proxy] to [old-kube-proxy] on host [192.168.31.130], try #1
  428. INFO[1335] Starting container [kube-proxy] on host [192.168.31.130], try #1
  429. INFO[1335] [healthcheck] service [kubelet] on host [192.168.31.133] is healthy
  430. INFO[1335] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.133]
  431. INFO[1335] Starting container [rke-log-linker] on host [192.168.31.133], try #1
  432. INFO[1335] [worker] Successfully updated [kube-proxy] container on host [192.168.31.130]
  433. INFO[1335] Removing container [old-kube-proxy] on host [192.168.31.130], try #1
  434. INFO[1336] [healthcheck] Start Healthcheck on service [kube-proxy] on host [192.168.31.130]
  435. INFO[1336] [worker] Successfully started [rke-log-linker] container on host [192.168.31.131]
  436. INFO[1336] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  437. INFO[1336] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  438. INFO[1336] Checking if container [kube-proxy] is running on host [192.168.31.131], try #1
  439. INFO[1336] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.131]
  440. INFO[1336] Checking if container [old-kube-proxy] is running on host [192.168.31.131], try #1
  441. INFO[1336] Stopping container [kube-proxy] on host [192.168.31.131] with stopTimeoutDuration [5s], try #1
  442. INFO[1337] Waiting for [kube-proxy] container to exit on host [192.168.31.131]
  443. INFO[1337] Renaming container [kube-proxy] to [old-kube-proxy] on host [192.168.31.131], try #1
  444. INFO[1337] Starting container [kube-proxy] on host [192.168.31.131], try #1
  445. INFO[1337] [worker] Successfully updated [kube-proxy] container on host [192.168.31.131]
  446. INFO[1337] Removing container [old-kube-proxy] on host [192.168.31.131], try #1
  447. INFO[1337] [healthcheck] Start Healthcheck on service [kube-proxy] on host [192.168.31.131]
  448. INFO[1337] [worker] Successfully started [rke-log-linker] container on host [192.168.31.133]
  449. INFO[1337] Removing container [rke-log-linker] on host [192.168.31.133], try #1
  450. INFO[1338] [remove/rke-log-linker] Successfully removed container on host [192.168.31.133]
  451. INFO[1338] [healthcheck] service [kubelet] on host [192.168.31.134] is healthy
  452. INFO[1338] Checking if container [kube-proxy] is running on host [192.168.31.133], try #1
  453. INFO[1338] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.133]
  454. INFO[1338] Checking if container [old-kube-proxy] is running on host [192.168.31.133], try #1
  455. INFO[1338] Stopping container [kube-proxy] on host [192.168.31.133] with stopTimeoutDuration [5s], try #1
  456. INFO[1338] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.134]
  457. INFO[1338] Waiting for [kube-proxy] container to exit on host [192.168.31.133]
  458. INFO[1338] Renaming container [kube-proxy] to [old-kube-proxy] on host [192.168.31.133], try #1
  459. INFO[1338] Starting container [rke-log-linker] on host [192.168.31.134], try #1
  460. INFO[1338] Starting container [kube-proxy] on host [192.168.31.133], try #1
  461. INFO[1339] [worker] Successfully updated [kube-proxy] container on host [192.168.31.133]
  462. INFO[1339] Removing container [old-kube-proxy] on host [192.168.31.133], try #1
  463. INFO[1339] [healthcheck] Start Healthcheck on service [kube-proxy] on host [192.168.31.133]
  464. INFO[1339] [healthcheck] service [kube-proxy] on host [192.168.31.132] is healthy
  465. INFO[1339] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  466. INFO[1339] Starting container [rke-log-linker] on host [192.168.31.132], try #1
  467. INFO[1340] [worker] Successfully started [rke-log-linker] container on host [192.168.31.134]
  468. INFO[1340] [worker] Successfully started [rke-log-linker] container on host [192.168.31.132]
  469. INFO[1341] Removing container [rke-log-linker] on host [192.168.31.132], try #1
  470. INFO[1341] [remove/rke-log-linker] Successfully removed container on host [192.168.31.132]
  471. INFO[1341] Removing container [rke-log-linker] on host [192.168.31.134], try #1
  472. INFO[1341] [remove/rke-log-linker] Successfully removed container on host [192.168.31.134]
  473. INFO[1341] Checking if container [kube-proxy] is running on host [192.168.31.134], try #1
  474. INFO[1341] Image [rancher/hyperkube:v1.17.4-rancher1] exists on host [192.168.31.134]
  475. INFO[1341] Checking if container [old-kube-proxy] is running on host [192.168.31.134], try #1
  476. INFO[1341] Stopping container [kube-proxy] on host [192.168.31.134] with stopTimeoutDuration [5s], try #1
  477. INFO[1341] [healthcheck] service [kube-proxy] on host [192.168.31.130] is healthy
  478. INFO[1341] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  479. INFO[1341] Starting container [rke-log-linker] on host [192.168.31.130], try #1
  480. INFO[1342] Waiting for [kube-proxy] container to exit on host [192.168.31.134]
  481. INFO[1342] Renaming container [kube-proxy] to [old-kube-proxy] on host [192.168.31.134], try #1
  482. INFO[1342] Starting container [kube-proxy] on host [192.168.31.134], try #1
  483. INFO[1342] [worker] Successfully updated [kube-proxy] container on host [192.168.31.134]
  484. INFO[1342] Removing container [old-kube-proxy] on host [192.168.31.134], try #1
  485. INFO[1342] [healthcheck] service [kube-proxy] on host [192.168.31.131] is healthy
  486. INFO[1343] [healthcheck] Start Healthcheck on service [kube-proxy] on host [192.168.31.134]
  487. INFO[1343] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  488. INFO[1343] Starting container [rke-log-linker] on host [192.168.31.131], try #1
  489. INFO[1343] [healthcheck] service [kube-proxy] on host [192.168.31.134] is healthy
  490. INFO[1343] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.134]
  491. INFO[1343] [worker] Successfully started [rke-log-linker] container on host [192.168.31.130]
  492. INFO[1343] Removing container [rke-log-linker] on host [192.168.31.130], try #1
  493. INFO[1343] Starting container [rke-log-linker] on host [192.168.31.134], try #1
  494. INFO[1344] [worker] Successfully started [rke-log-linker] container on host [192.168.31.134]
  495. INFO[1344] Removing container [rke-log-linker] on host [192.168.31.134], try #1
  496. INFO[1344] [worker] Successfully started [rke-log-linker] container on host [192.168.31.131]
  497. INFO[1344] Removing container [rke-log-linker] on host [192.168.31.131], try #1
  498. INFO[1344] [remove/rke-log-linker] Successfully removed container on host [192.168.31.134]
  499. INFO[1344] [healthcheck] service [kube-proxy] on host [192.168.31.133] is healthy
  500. INFO[1344] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.133]
  501. INFO[1344] Starting container [rke-log-linker] on host [192.168.31.133], try #1
  502. INFO[1344] [remove/rke-log-linker] Successfully removed container on host [192.168.31.131]
  503. INFO[1345] [remove/rke-log-linker] Successfully removed container on host [192.168.31.130]
  504. INFO[1345] [worker] Successfully started [rke-log-linker] container on host [192.168.31.133]
  505. INFO[1345] Removing container [rke-log-linker] on host [192.168.31.133], try #1
  506. INFO[1345] [remove/rke-log-linker] Successfully removed container on host [192.168.31.133]
  507. INFO[1345] [worker] Successfully started Worker Plane..
  508. INFO[1345] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.133]
  509. INFO[1345] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.131]
  510. INFO[1345] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.130]
  511. INFO[1346] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.132]
  512. INFO[1346] Image [rancher/rke-tools:v0.1.52] exists on host [192.168.31.134]
  513. INFO[1346] Starting container [rke-log-cleaner] on host [192.168.31.133], try #1
  514. INFO[1346] Starting container [rke-log-cleaner] on host [192.168.31.132], try #1
  515. INFO[1346] Starting container [rke-log-cleaner] on host [192.168.31.130], try #1
  516. INFO[1346] Starting container [rke-log-cleaner] on host [192.168.31.131], try #1
  517. INFO[1346] Starting container [rke-log-cleaner] on host [192.168.31.134], try #1
  518. INFO[1346] [cleanup] Successfully started [rke-log-cleaner] container on host [192.168.31.133]
  519. INFO[1346] Removing container [rke-log-cleaner] on host [192.168.31.133], try #1
  520. INFO[1347] [remove/rke-log-cleaner] Successfully removed container on host [192.168.31.133]
  521. INFO[1347] [cleanup] Successfully started [rke-log-cleaner] container on host [192.168.31.131]
  522. INFO[1347] Removing container [rke-log-cleaner] on host [192.168.31.131], try #1
  523. INFO[1347] [cleanup] Successfully started [rke-log-cleaner] container on host [192.168.31.132]
  524. INFO[1347] Removing container [rke-log-cleaner] on host [192.168.31.132], try #1
  525. INFO[1348] [cleanup] Successfully started [rke-log-cleaner] container on host [192.168.31.130]
  526. INFO[1348] Removing container [rke-log-cleaner] on host [192.168.31.130], try #1
  527. INFO[1348] [remove/rke-log-cleaner] Successfully removed container on host [192.168.31.130]
  528. INFO[1349] [remove/rke-log-cleaner] Successfully removed container on host [192.168.31.131]
  529. INFO[1349] [remove/rke-log-cleaner] Successfully removed container on host [192.168.31.132]
  530. INFO[1350] [cleanup] Successfully started [rke-log-cleaner] container on host [192.168.31.134]
  531. INFO[1350] Removing container [rke-log-cleaner] on host [192.168.31.134], try #1
  532. INFO[1353] [remove/rke-log-cleaner] Successfully removed container on host [192.168.31.134]
  533. INFO[1353] [sync] Syncing nodes Labels and Taints
  534. INFO[1354] [sync] Successfully synced nodes Labels and Taints
  535. INFO[1354] [network] Setting up network plugin: canal
  536. INFO[1354] [addons] Saving ConfigMap for addon rke-network-plugin to Kubernetes
  537. INFO[1354] [addons] Successfully saved ConfigMap for addon rke-network-plugin to Kubernetes
  538. INFO[1354] [addons] Executing deploy job rke-network-plugin
  539. INFO[1383] [addons] Setting up coredns
  540. INFO[1383] [addons] Saving ConfigMap for addon rke-coredns-addon to Kubernetes
  541. INFO[1383] [addons] Successfully saved ConfigMap for addon rke-coredns-addon to Kubernetes
  542. INFO[1383] [addons] Executing deploy job rke-coredns-addon
  543. INFO[1383] [addons] CoreDNS deployed successfully..
  544. INFO[1383] [dns] DNS provider coredns deployed successfully
  545. INFO[1383] [addons] Setting up Metrics Server
  546. INFO[1383] [addons] Saving ConfigMap for addon rke-metrics-addon to Kubernetes
  547. INFO[1383] [addons] Successfully saved ConfigMap for addon rke-metrics-addon to Kubernetes
  548. INFO[1383] [addons] Executing deploy job rke-metrics-addon
  549. INFO[1383] [addons] Metrics Server deployed successfully
  550. INFO[1383] [ingress] Setting up nginx ingress controller
  551. INFO[1383] [addons] Saving ConfigMap for addon rke-ingress-controller to Kubernetes
  552. INFO[1383] [addons] Successfully saved ConfigMap for addon rke-ingress-controller to Kubernetes
  553. INFO[1383] [addons] Executing deploy job rke-ingress-controller
  554. INFO[1383] [ingress] ingress controller nginx deployed successfully
  555. INFO[1383] [addons] Setting up user addons
  556. INFO[1383] [addons] no user addons defined
  557. INFO[1383] Finished building Kubernetes cluster successfully
  558. [rancher@rmaster01 ~]$
  559. [rancher@rmaster01 ~]$ kubectl get nodes
  560. NAME STATUS ROLES AGE VERSION
  561. node01 Ready worker 21d v1.17.4
  562. node02 Ready worker 21d v1.17.4
  563. rmaster01 Ready controlplane,etcd 21d v1.17.4
  564. rmaster02 Ready controlplane,etcd 21d v1.17.4
  565. rmaster03 Ready controlplane,etcd 21d v1.17.4
  566. [rancher@rmaster01 ~]$

2. 集群备份与恢复
# etcd数据备份,备份文件保存在/opt/rke/etcd-snapshots
rke etcd snapshot-save —config cluster.yml —name snapshot-test

etcd数据恢复
rke etcd snapshot-restore —config cluster.yml —name snapshot-test

  1. 定时备份
    当集群启用了etcd-snapshot服务时,可以查看etcd-roll-snapshot容器日志,以确认是否自动创建备份。
    docker logs etcd-rolling-snapshots

3. 证书管理
默认情况下,Kubernetes集群需要证书,RKE将自动为集群生成证书。在证书过期之前以及证书受到破坏时,轮换些证书非常重要。
证书轮换之后,Kubernetes组件将自动重新启动。证书轮换可用于下列服务:

  • etcd
  • kubelet
  • kube-apiserver
  • kube-proxy
  • kube-scheduler
  • kube-controller-manager

RKE可以通过一些简单的命令轮换自动生成的证书:

  • 使用相同的CA轮换所有服务证书
  • 使用相同的CA为单个服务轮换证书
  • 轮换CA和所有服务证书

当您准备轮换证书时, RKE 配置文件 cluster.yml是必须的。运行rke cert rotate命令时,可通过—config指定配置路径。

使用相同CA轮换所有服务证书
rke cert rotate —config cluster.yml

使用相同CA轮换单个服务证书
rke cert rotate —service kubelet —config cluster.yml

轮换CA和所有服务证书
rke cert rotate —rotate-ca —config cluster.yml

rke部署默认证书10年

  1. [root@uat-rancher-node01 ~]# openssl x509 -in /etc/kubernetes/ssl/kube-apiserver.pem -noout -dates
  2. notBefore=Aug 29 09:26:10 2020 GMT
  3. notAfter=Aug 27 09:26:11 2030 GMT
  4. [root@uat-rancher-node01 ~]#

image.png

  1. 集群节点管理
    # 添加删除节点需修改cluster.yml文件重新执行rke up

如果仅增加或者删除Work node节点可以添加 —update-only参数,表明只更新Work node的资源
rke up —update-only —config cluster.yml

4. 删除Kubernetes集群
4.1 删除Kubernetes集群
rke remove —config cluster.yml

4.2 执行脚本,清理残留信息.注意:使用root用户执行

  1. #!/bin/bash
  2. # 停止服务
  3. systemctl disable kubelet.service
  4. systemctl disable kube-scheduler.service
  5. systemctl disable kube-proxy.service
  6. systemctl disable kube-controller-manager.service
  7. systemctl disable kube-apiserver.service
  8. systemctl stop kubelet.service
  9. systemctl stop kube-scheduler.service
  10. systemctl stop kube-proxy.service
  11. systemctl stop kube-controller-manager.service
  12. systemctl stop kube-apiserver.service
  13. # 删除所有容器
  14. docker rm -f $(docker ps -qa)
  15. # 删除所有容器卷
  16. docker volume rm $(docker volume ls -q)
  17. # 卸载mount目录
  18. for mount in $(mount | grep tmpfs | grep '/var/lib/kubelet' | awk '{ print $3 }') /var/lib/kubelet /var/lib/rancher; do umount $mount; done
  19. # 备份目录
  20. mv /etc/kubernetes /etc/kubernetes-bak-$(date +"%Y%m%d%H%M")
  21. mv /var/lib/etcd /var/lib/etcd-bak-$(date +"%Y%m%d%H%M")
  22. mv /var/lib/rancher /var/lib/rancher-bak-$(date +"%Y%m%d%H%M")
  23. mv /opt/rke /opt/rke-bak-$(date +"%Y%m%d%H%M")
  24. # 删除残留路径
  25. rm -rf /etc/ceph \
  26. /etc/cni \
  27. /opt/cni \
  28. /run/secrets/kubernetes.io \
  29. /run/calico \
  30. /run/flannel \
  31. /var/lib/calico \
  32. /var/lib/cni \
  33. /var/lib/kubelet \
  34. /var/log/containers \
  35. /var/log/pods \
  36. /var/run/calico
  37. # 清理网络接口
  38. network_interface=`ls /sys/class/net`
  39. for net_inter in $network_interface;
  40. do
  41. if ! echo $net_inter | grep -qiE 'lo|docker0|eth*|ens*';then
  42. ip link delete $net_inter
  43. fi
  44. done
  45. # 清理残留进程
  46. port_list='80 443 6443 2376 2379 2380 8472 9099 10250 10254'
  47. for port in $port_list
  48. do
  49. pid=`netstat -atlnup|grep $port |awk '{print $7}'|awk -F '/' '{print $1}'|grep -v -|sort -rnk2|uniq`
  50. if [[ -n $pid ]];then
  51. kill -9 $pid
  52. fi
  53. done
  54. pro_pid=`ps -ef |grep -v grep |grep kube|awk '{print $2}'`
  55. if [[ -n $pro_pid ]];then
  56. kill -9 $pro_pid
  57. fi
  58. # 清理Iptables表
  59. ## 注意:如果节点Iptables有特殊配置,以下命令请谨慎操作
  60. sudo iptables --flush
  61. sudo iptables --flush --table nat
  62. sudo iptables --flush --table filter
  63. sudo iptables --table nat --delete-chain
  64. sudo iptables --table filter --delete-chain
  65. systemctl restart docker
  66. ### 注意:清理完后请重启机器

5. 恢复Kubectl配置文件
# 编写脚本

  1. #!/bin/bash
  2. help ()
  3. {
  4. echo ' ================================================================ '
  5. echo ' --master-ip: 指定Master节点IP,任意一个K8S Master节点IP即可。'
  6. echo ' 使用示例:bash restore-kube-config.sh --master-ip=1.1.1.1 '
  7. echo ' ================================================================'
  8. }
  9. case "$1" in
  10. -h|--help) help; exit;;
  11. esac
  12. if [[ $1 == '' ]];then
  13. help;
  14. exit;
  15. fi
  16. CMDOPTS="$*"
  17. for OPTS in $CMDOPTS;
  18. do
  19. key=$(echo ${OPTS} | awk -F"=" '{print $1}' )
  20. value=$(echo ${OPTS} | awk -F"=" '{print $2}' )
  21. case "$key" in
  22. --master-ip) K8S_MASTER_NODE_IP=$value ;;
  23. esac
  24. done
  25. # 获取Rancher Agent镜像
  26. RANCHER_IMAGE=$( docker images --filter=label=io.cattle.agent=true |grep 'v2.' | \
  27. grep -v -E 'rc|alpha|<none>' | head -n 1 | awk '{print $3}' )
  28. if [ -d /opt/rke/etc/kubernetes/ssl ]; then
  29. K8S_SSLDIR=/opt/rke/etc/kubernetes/ssl
  30. else
  31. K8S_SSLDIR=/etc/kubernetes/ssl
  32. fi
  33. CHECK_CLUSTER_STATE_CONFIGMAP=$( docker run --rm --entrypoint bash --net=host \
  34. -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro $RANCHER_IMAGE -c '\
  35. if kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml \
  36. -n kube-system get configmap full-cluster-state | grep full-cluster-state > /dev/null; then \
  37. echo 'yes'; else echo 'no'; fi' )
  38. if [ $CHECK_CLUSTER_STATE_CONFIGMAP != 'yes' ]; then
  39. docker run --rm --net=host \
  40. --entrypoint bash \
  41. -e K8S_MASTER_NODE_IP=$K8S_MASTER_NODE_IP \
  42. -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro \
  43. $RANCHER_IMAGE \
  44. -c '\
  45. kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml \
  46. -n kube-system \
  47. get secret kube-admin -o jsonpath={.data.Config} | base64 --decode | \
  48. sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://${K8S_MASTER_NODE_IP}:6443\"_"' > kubeconfig_admin.yaml
  49. if [ -s kubeconfig_admin.yaml ]; then
  50. echo '恢复成功,执行以下命令测试:'
  51. echo ''
  52. echo "kubectl --kubeconfig kubeconfig_admin.yaml get nodes"
  53. else
  54. echo "kubeconfig恢复失败。"
  55. fi
  56. else
  57. docker run --rm --entrypoint bash --net=host \
  58. -e K8S_MASTER_NODE_IP=$K8S_MASTER_NODE_IP \
  59. -v $K8S_SSLDIR:/etc/kubernetes/ssl:ro \
  60. $RANCHER_IMAGE \
  61. -c '\
  62. kubectl --kubeconfig /etc/kubernetes/ssl/kubecfg-kube-node.yaml \
  63. -n kube-system \
  64. get configmap full-cluster-state -o json | \
  65. jq -r .data.\"full-cluster-state\" | \
  66. jq -r .currentState.certificatesBundle.\"kube-admin\".config | \
  67. sed -e "/^[[:space:]]*server:/ s_:.*_: \"https://${K8S_MASTER_NODE_IP}:6443\"_"' > kubeconfig_admin.yaml
  68. if [ -s kubeconfig_admin.yaml ]; then
  69. echo '恢复成功,执行以下命令测试:'
  70. echo ''
  71. echo "kubectl --kubeconfig kubeconfig_admin.yaml get nodes"
  72. else
  73. echo "kubeconfig恢复失败。"
  74. fi
  75. fi

添加执行权限
chmod +x estore-kube-config.sh

恢复文件
./estore-kube-config.sh —master-ip=<任意一台master节点IP>

测试
kubectl —kubeconfig ./kubeconfig_admin.yaml get nodes

6. 恢复rkestate状态文件
RKE在集群配置文件cluster.yml的同一目录中创建cluster.rkestate文件。该.rkestate文件包含集群的当前状态,包括RKE配置和证书。需要保留此文件以更新集群或通过RKE对集群执行任何操作。

安装jq工具
yum -y install jq

恢复文件
kubectl —kubeconfig kube_config_cluster.yml get configmap -n kube-system \
full-cluster-state -o json | jq -r .data.\”full-cluster-state\” | jq -r . > cluster.rkestate

7. 节点维护
# 设置维护模式
kubectl cordon

取消维护模式
kubectl uncordon

8. 节点应用迁移
kubectl drain