# 声明使用的存储类,用于动态分配
volumeMode <string> # 使用卷的文件系统还是当成块设备来用,一般不配置<br /> selector <Object> # 标签选择器,选择PV的标签,默认在所有PV中寻找<br /> volumeName <string> # 指定PV名称,直接绑定PV<br /> revisionHistoryLimit <integer> # 保留历史版本数量,默认10<br /> podManagementPolicy <string> # 控制Pod启停顺序,默认OrderedReady,启动顺序,停止倒序。Parallel表示并行<br /> updateStrategy <Object> # 更新策略<br /> rollingUpdate <Object> # 滚动更新策略<br /> partition <integer> # 指定更新到哪个分区,N表示更新序号大于等于N的Pod<br /> type <string> # 更新策略,默认滚动更新RollingUpdate.
6.3. 案例
6.3.1. 创建Statefulset资源
—-
apiVersion: v1
kind: Service
metadata:
name: my-svc
namespace: apps
spec:
selector:
app: nginx-web
role: web
state: “true”
clusterIP: None
ports:
- name: http
port: 80
targetPort: 80
—-
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: nginx-web
namespace: apps
labels:
app: nginx
role: web
spec:
replicas: 2
serviceName: my-svc
selector:
matchLabels:
app: nginx-web
role: web
state: “true”
template:
metadata:
labels:
app: nginx-web
role: web
state: “true”
spec:
containers:
- name: nginx-web
image: linuxduduniao/nginx:v1.0.0
ports:
- name: http
containerPort: 80
readinessProbe:
httpGet:
port: 80
path: /health
volumeMounts:
- name: nginx-web
mountPath: /usr/share/nginx/html
volumeClaimTemplates:
- metadata:
name: nginx-web
namespace: apps
spec:
accessModes: [“ReadWriteOnce”]
resources:
requests:
storage: 2Gi
storageClassName: managed-nfs-storage # nfs classstorage,参考https://www.yuque.com/duduniao/k8s/vgms23#3W9oz
[root@duduniao local-k8s-yaml]# kubectl -n apps get sts -o wide # 当前sts 状态
NAME READY AGE CONTAINERS IMAGES
nginx-web 2/2 19s nginx-web linuxduduniao/nginx:v1.0.0
[root@duduniao local-k8s-yaml]# kubectl -n apps get pod -o wide # 注意Pod名称
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-web-0 1/1 Running 0 86s 172.16.4.158 centos-7-55
nginx-web-1 1/1 Running 0 77s 172.16.3.98 centos-7-54
[root@duduniao local-k8s-yaml]# kubectl -n apps get pvc -o wide # 注意pvc名称
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE VOLUMEMODE
nginx-web-nginx-web-0 Bound pvc-b36b90f1-a268-4815-b55b-6f7fda587193 2Gi RWX managed-nfs-storage 93s Filesystem
nginx-web-nginx-web-1 Bound pvc-1461c537-e784-41fd-9e78-b1e6b212912c 2Gi RWX managed-nfs-storage 84s Filesystem
[root@duduniao local-k8s-yaml]# kubectl -n apps get pv -o wide # 注意pv名称
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE VOLUMEMODE
pvc-1461c537-e784-41fd-9e78-b1e6b212912c 2Gi RWX Delete Bound apps/nginx-web-nginx-web-1 managed-nfs-storage 88s Filesystem
pvc-b36b90f1-a268-4815-b55b-6f7fda587193 2Gi RWX Delete Bound apps/nginx-web-nginx-web-0 managed-nfs-storage 97s Filesystem
[root@duduniao local-k8s-yaml]# kubectl -n apps describe svc my-svc
Name: my-svc
Namespace: apps
Labels:
Annotations: Selector: app=nginx-web,role=web,state=true
Type: ClusterIP
IP: None
Port: http 80/TCP
TargetPort: 80/TCP
Endpoints: 172.16.3.98:80,172.16.4.158:80
Session Affinity: None
Events:
[root@centos-7-51 ~]# dig -t A my-svc.apps.svc.cluster.local @10.96.0.10 +short # 无头服务特性,用于服务发现
172.16.4.216
172.16.3.105
[root@centos-7-51 ~]# dig -t A nginx-web-0.my-svc.apps.svc.cluster.local @10.96.0.10 +short # statefulset 特性,可以用pod名称解析到Pod的地址
172.16.4.216
[root@centos-7-51 ~]# dig -t A nginx-web-1.my-svc.apps.svc.cluster.local @10.96.0.10 +short
172.16.3.105
6.3.2. 扩缩容
在默认的策略下,statefulset启停Pod都是按照顺序执行的,新建Pod按照序号从小到大进行,删除Pod从大到小进行,通常需要在Pod配置就绪性探针,严格保证次序。
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“replicas”:5}}’ # 增加副本数
[root@duduniao ~]# kubectl get pod -n apps -w # 观察启动顺序,前一个就绪后,再启动下一个
NAME READY STATUS RESTARTS AGE
nginx-web-0 1/1 Running 2 10h
nginx-web-1 1/1 Running 2 10h
nginx-web-2 0/1 Pending 0 0s
nginx-web-2 0/1 Pending 0 0s
nginx-web-2 0/1 Pending 0 1s
nginx-web-2 0/1 ContainerCreating 0 2s
nginx-web-2 0/1 Running 0 3s
nginx-web-2 1/1 Running 0 5s
nginx-web-3 0/1 Pending 0 0s
nginx-web-3 0/1 Pending 0 0s
nginx-web-3 0/1 Pending 0 1s
nginx-web-3 0/1 ContainerCreating 0 2s
nginx-web-3 0/1 Running 0 3s
nginx-web-3 1/1 Running 0 9s
nginx-web-4 0/1 Pending 0 0s
nginx-web-4 0/1 Pending 0 0s
nginx-web-4 0/1 Pending 0 1s
nginx-web-4 0/1 ContainerCreating 0 2s
nginx-web-4 0/1 Running 0 3s
nginx-web-4 1/1 Running 0 7s
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“replicas”:2}}’ # 减少副本数
[root@duduniao ~]# kubectl get pod -n apps -w # 观察停止顺序,按序号倒序停止
NAME READY STATUS RESTARTS AGE
nginx-web-0 1/1 Running 2 10h
nginx-web-1 1/1 Running 2 10h
nginx-web-2 1/1 Running 0 117s
nginx-web-3 1/1 Running 0 112s
nginx-web-4 1/1 Running 0 103s
nginx-web-4 1/1 Terminating 0 109s
nginx-web-4 0/1 Terminating 0 110s
nginx-web-4 0/1 Terminating 0 111s
nginx-web-4 0/1 Terminating 0 111s
nginx-web-4 0/1 Terminating 0 111s
nginx-web-3 1/1 Terminating 0 2m
nginx-web-3 0/1 Terminating 0 2m
nginx-web-3 0/1 Terminating 0 2m1s
nginx-web-3 0/1 Terminating 0 2m1s
nginx-web-2 1/1 Terminating 0 2m7s
nginx-web-2 0/1 Terminating 0 2m7s
nginx-web-2 0/1 Terminating 0 2m13s
nginx-web-2 0/1 Terminating 0 2m13s
6.3.3. 滚动更新
滚动更新是按照倒序方式进行,可以指定分区号,如指定分区号N,则表示更新序号大于等于N的Pod,默认分区为0,表示更新所有Pod
[root@duduniao local-k8s-yaml]# kubectl -n apps set image sts nginx-web nginx-web=linuxduduniao/nginx:v1.0.1
[root@duduniao ~]# kubectl get pod -n apps -w # 注意是倒序更新所有Pod
NAME READY STATUS RESTARTS AGE
nginx-web-0 1/1 Running 2 10h
nginx-web-1 1/1 Running 2 10h
nginx-web-1 1/1 Terminating 2 10h
nginx-web-1 0/1 Terminating 2 10h
nginx-web-1 0/1 Terminating 2 10h
nginx-web-1 0/1 Terminating 2 10h
nginx-web-1 0/1 Pending 0 0s
nginx-web-1 0/1 Pending 0 0s
nginx-web-1 0/1 ContainerCreating 0 0s
nginx-web-1 0/1 Running 0 22s
nginx-web-1 1/1 Running 0 27s
nginx-web-0 1/1 Terminating 2 10h
nginx-web-0 0/1 Terminating 2 10h
nginx-web-0 0/1 Terminating 2 10h
nginx-web-0 0/1 Terminating 2 10h
nginx-web-0 0/1 Pending 0 0s
nginx-web-0 0/1 Pending 0 0s
nginx-web-0 0/1 ContainerCreating 0 0s
nginx-web-0 0/1 ErrImagePull 0 29s
nginx-web-0 0/1 ImagePullBackOff 0 44s
nginx-web-0 0/1 Running 0 62s
nginx-web-0 1/1 Running 0 70s
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“replicas”:5}}’ # 扩容到5个副本,方便演示
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“updateStrategy”:{“type”:”RollingUpdate”,”rollingUpdate”:{“partition”:3}}}}’ # 指定更新分区为3
[root@duduniao local-k8s-yaml]# kubectl -n apps set image sts nginx-web nginx-web=linuxduduniao/nginx:v1.0.2 # 开始更新
[root@duduniao ~]# kubectl get pod -n apps -w # 只更新序号4和3的Pod
NAME READY STATUS RESTARTS AGE
nginx-web-0 1/1 Running 0 6m31s
nginx-web-1 1/1 Running 0 7m12s
nginx-web-2 1/1 Running 0 3m38s
nginx-web-3 1/1 Running 0 3m33s
nginx-web-4 1/1 Running 0 3m23s
nginx-web-4 1/1 Terminating 0 3m25s
nginx-web-4 0/1 Terminating 0 3m26s
nginx-web-4 0/1 Terminating 0 3m32s
nginx-web-4 0/1 Terminating 0 3m32s
nginx-web-4 0/1 Pending 0 0s
nginx-web-4 0/1 Pending 0 0s
nginx-web-4 0/1 ContainerCreating 0 0s
nginx-web-4 0/1 Running 0 10s
nginx-web-4 1/1 Running 0 17s
nginx-web-3 1/1 Terminating 0 3m59s
nginx-web-3 0/1 Terminating 0 4m
nginx-web-3 0/1 Terminating 0 4m1s
nginx-web-3 0/1 Terminating 0 4m1s
nginx-web-3 0/1 Pending 0 0s
nginx-web-3 0/1 Pending 0 0s
nginx-web-3 0/1 ContainerCreating 0 0s
nginx-web-3 0/1 Running 0 22s
nginx-web-3 1/1 Running 0 25s
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“updateStrategy”:{“type”:”RollingUpdate”,”rollingUpdate”:{“partition”:0}}}}’ # 修改分区为0
[root@duduniao ~]# kubectl get pod -n apps -w # 会更新剩下来的三个Pod
NAME READY STATUS RESTARTS AGE
nginx-web-0 1/1 Running 0 9m10s
nginx-web-1 1/1 Running 0 9m51s
nginx-web-2 1/1 Running 0 6m17s
nginx-web-3 1/1 Running 0 2m11s
nginx-web-4 1/1 Running 0 2m30s
nginx-web-2 1/1 Terminating 0 6m24s
nginx-web-2 0/1 Terminating 0 6m24s
nginx-web-2 0/1 Terminating 0 6m25s
nginx-web-2 0/1 Terminating 0 6m25s
nginx-web-2 0/1 Pending 0 0s
nginx-web-2 0/1 Pending 0 0s
nginx-web-2 0/1 ContainerCreating 0 0s
nginx-web-2 0/1 Running 0 1s
nginx-web-2 1/1 Running 0 9s
nginx-web-1 1/1 Terminating 0 10m
nginx-web-1 0/1 Terminating 0 10m
nginx-web-1 0/1 Terminating 0 10m
nginx-web-1 0/1 Terminating 0 10m
nginx-web-1 0/1 Pending 0 0s
nginx-web-1 0/1 Pending 0 0s
nginx-web-1 0/1 ContainerCreating 0 0s
nginx-web-1 0/1 Running 0 1s
nginx-web-1 1/1 Running 0 8s
nginx-web-0 1/1 Terminating 0 9m47s
nginx-web-0 0/1 Terminating 0 9m48s
nginx-web-0 0/1 Terminating 0 10m
nginx-web-0 0/1 Terminating 0 10m
nginx-web-0 0/1 Pending 0 0s
nginx-web-0 0/1 Pending 0 0s
nginx-web-0 0/1 ContainerCreating 0 0s
nginx-web-0 0/1 Running 0 1s
nginx-web-0 1/1 Running 0 9s
6.3.4. 删除Statefulset
与statefulset相关资源有三部分:statefulset、pv/pvc、svc,svc和pv/pvc需要手动删除。
通常删除statefulset时,推荐将其副本数置为0,实现有序停止,然后再删除statefulset,最后考虑是否需要删除svc和存储卷!
statefulset还提供了 --cascade=false 选项,表示不删除对应Pod,不过此项使用较少。
[root@duduniao local-k8s-yaml]# kubectl -n apps patch sts nginx-web -p ‘{“spec”:{“replicas”:0}}’ # 有序停止
[root@duduniao local-k8s-yaml]# kubectl delete -n apps sts nginx-web # 删除statefulset
[root@duduniao local-k8s-yaml]# kubectl delete svc -n apps my-svc # 删除svc
[root@duduniao local-k8s-yaml]# kubectl get pv -n apps
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
pvc-1461c537-e784-41fd-9e78-b1e6b212912c 2Gi RWX Delete Bound apps/nginx-web-nginx-web-1 managed-nfs-storage 11h
pvc-277cff8f-c5e6-43fc-9498-ddc9ba788cb9 2Gi RWO Delete Bound apps/nginx-web-nginx-web-4 managed-nfs-storage 30m
pvc-7d1a1169-6e34-4331-9549-f2f4d2bf6f94 2Gi RWO Delete Bound apps/nginx-web-nginx-web-3 managed-nfs-storage 30m
pvc-b36b90f1-a268-4815-b55b-6f7fda587193 2Gi RWX Delete Bound apps/nginx-web-nginx-web-0 managed-nfs-storage 11h
pvc-d804a299-95e1-486c-a9cd-73c925ed21d6 2Gi RWO Delete Bound apps/nginx-web-nginx-web-2 managed-nfs-storage 31m
[root@duduniao local-k8s-yaml]# kubectl get pvc -n apps # pv和pvc即使删除Pod还是处于绑定状态,需要手动释放
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
nginx-web-nginx-web-0 Bound pvc-b36b90f1-a268-4815-b55b-6f7fda587193 2Gi RWX managed-nfs-storage 11h
nginx-web-nginx-web-1 Bound pvc-1461c537-e784-41fd-9e78-b1e6b212912c 2Gi RWX managed-nfs-storage 11h
nginx-web-nginx-web-2 Bound pvc-d804a299-95e1-486c-a9cd-73c925ed21d6 2Gi RWO managed-nfs-storage 31m
nginx-web-nginx-web-3 Bound pvc-7d1a1169-6e34-4331-9549-f2f4d2bf6f94 2Gi RWO managed-nfs-storage 31m
nginx-web-nginx-web-4 Bound pvc-277cff8f-c5e6-43fc-9498-ddc9ba788cb9 2Gi RWO managed-nfs-storage 30m
[root@duduniao local-k8s-yaml]# kubectl get pvc -n apps | awk ‘/nginx-web-nginx-web/{print “kubectl -n apps delete pvc”, $1}’|bash
6.3.5. 部署有状态应用一般思路
有状态应用通常会组成集群,如MySQL。而且在启动之初还需要作初始化工作,如数据拷贝、数据导入等,通常需要initcontainer来实现。当出现故障时,如何顺利的进行故障转移也是需要考虑的,在实际操作中往往伴随着比较大的风险(相对于虚拟机部署而言)。一般性的建议是在对应官网找到kubernetes中部署方式,通常是helm脚本,通过配置合适的 values.yaml 来实现部署,但是即使如此,还是谨慎一些!
官方提供了两个示例:cassandra集群部署 、 MySQL集群部署。这里再推荐一个helm chart项目,以供学习使用:charts
7. 原理部分
7.1. Deployment创建过程
以 kubectl apply -f deployment.yaml 为例,了解 Deployment 创建的过程。
- Kubectl 生成 API 请求
- Kubectl 校验 deployment.yaml 中的字段,确保资源清单的合法性
- Kubectl 检查 deployment.yaml 中的API字段,填充kubeconfig的认证信息,填充 deployment 请求体信息。
- 将封装好的请求发送到 ApiServer
- ApiServer 身份认证
- apiServer 通过认证信息确认用户合法身份
- 通过RBAC鉴权,确认用户具备资源操作权限
- 通过准入控制,确保满足Kubernetes部分高级功能,参考文档
- 将HTTP请求反序列化,存入ECTD中
- 执行资源初始化逻辑,如边车容器注入、证书注入等操作
- ControllerManager创建资源对象
- Deployment Controller 通过ApiServer发现有Deployment需要创建
- Deployment Controller 通过ApiServer查询对应ReplicaSet是否满足,不满则则创建新的ReplicaSet,并设置版本号
- ReplicaSet Controller 通过ApiServer发现有新的 ReplicaSet资源,通过查询是否有满足条件的Pod,如果Pod不满足条件,则创建Pod对象
- Scheduler 调度资源
- Scheduler 通过ApiServer监听创建新Pod的请求,一旦需要创建新Pod,则通过调度算法选择Node
- 先通过一组预算策略进行评估,筛选可以调度的Node
- 再通过优选策略,在符合条件的Node中选择得分最高的Node作为运行该Pod的节点,最高分有多个节点时,随机选择一个
- Scheduler 创建一个Binding对象并且请求ApiServer,该对象包含了Pod的唯一信息和选择的Nodename
- Kubelet 初始化Pod
之前所有的操作,都是在操作 etcd 数据库,只有 Kubelet 这一步才开始正式创建Pod
- Kubelet 通过ApiServer查询到当前自身节点上Pod清单,与自己缓存中记录进行比对,如果是新的Pod则进入创建流程
- Kubelet 生成Podstatus对象,填充Pod状态(Pending,Running,Succeeded,Failed,Unkown)
- Kubelet 检查Pod是否具备对应的准入权限,如果不具备则处于Pending状态
- 如果Pod存在资源限制,则通过Cgroups 对Pod进行资源限制
- 创建容器目录,并挂载存储卷的目录
- 检索ImagePullSecret,用于拉取镜像
- 通过CRI 接接口调用容器runtime创建容器
- 创建Pause容器,用于为其他容器提供共享的网络名称空间和启动pid名称空间,从而实现网络共享和僵尸进程回收
- docker runtime 调用CNI插件,从IP资源池中分配一个IP地址给当前Pause容器
- 拉取业务镜像,如果填充了 imagepullsecret 会通过指定的密钥去拉取镜像
- 挂载configmap到容器
- 填充Pod元数据信息
- 启动容器,监听容器的event,执行对应钩子
8. 调度
8.1. 调度器
Kubernetes 中调度是指为Pod分配合适的工作节点,该过程是由Scheduler组件完成,调度结果写入 etcd 数据库中,交由kubelet组件去完成Pod启动。scheduler 调度过程分两个阶段:
- 预选策略:过滤出可调度节点。根据Pod清单筛选处集群中所有满足Pod运行条件的节点,这些节点称为可调度节点。
- 优选策略:打分并选择最高分。对可调度节点进行打分,选择最高峰的节点,如果存在多个最高分则随机选一个。
Scheduler 的策略可以通过kube-scheduler进行修改,但是一般很少操作。
scheduler 有一个配置 percentageOfNodesToScore ,值是0-100之间,0表示使用默认值,100以上等价于100。当该值为N,节点数为 M时,表示当可调度节点数量达到 N*M*0.01 时,停止执行预选函数,直接进入优选打分环节,这样可以避免轮询过多的节点影响性能。scheduler 默认计算方式:在 100-节点集群 下取 50%,在 5000-节点的集群下取 10%,这个参数默认的最低值是 5%,另外如果最小可调度节点数是50,这是程序中写死的,无法调整。从官网的描述来看,当节点数在几百个甚至更少的情况,调整percentageOfNodesToScore没有明显效果。
8.2. 节点选择器
为了方面演示,增加到三个节点,效果更加明显
[root@centos-7-51 ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
centos-7-51 Ready master 10d v1.18.12
centos-7-52 Ready master 10d v1.18.12
centos-7-53 Ready master 10d v1.18.12
centos-7-54 Ready worker 10d v1.18.12
centos-7-55 Ready worker 10d v1.18.12
centos-7-56 Ready worker 7m58s v1.18.12
[root@centos-7-51 ~]# kubectl label node centos-7-54 ssd=true # 打上标签方便区分
[root@centos-7-51 ~]# kubectl label node centos-7-55 ssd=true
[root@centos-7-51 ~]# kubectl label node centos-7-54 cpu=high
[root@centos-7-51 ~]# kubectl label node centos-7-56 cpu=high
节点选择器有两种,一种时直接指定nodeName,另一种是通过 nodeSelector 来根据标签选择:
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
nodeName: centos-7-56
[root@centos-7-51 ~]# kubectl get pod -o wide # 全部调度到 centos-7-56 节点
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-5648cd896-64ktx 1/1 Running 0 5m54s 172.16.5.11 centos-7-56
nginx-deploy-5648cd896-fgx75 1/1 Running 0 5m54s 172.16.5.13 centos-7-56
nginx-deploy-5648cd896-fvrlq 1/1 Running 0 5m54s 172.16.5.12 centos-7-56
nginx-deploy-5648cd896-hzljl 1/1 Running 0 5m54s 172.16.5.15 centos-7-56
nginx-deploy-5648cd896-qwrb5 1/1 Running 0 5m54s 172.16.5.14 centos-7-56
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
nodeSelector:
ssd: “true”
cpu: high
[root@centos-7-51 ~]# kubectl get pod -o wide # nodeSelector多个选项之间是 and 关系
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-6d5b594bf5-b7s68 1/1 Running 0 14s 172.16.3.134 centos-7-54
nginx-deploy-6d5b594bf5-kv5kn 1/1 Running 0 14s 172.16.3.132 centos-7-54
nginx-deploy-6d5b594bf5-sxsgv 1/1 Running 0 11s 172.16.3.135 centos-7-54
nginx-deploy-6d5b594bf5-t2p8n 1/1 Running 0 11s 172.16.3.136 centos-7-54
nginx-deploy-6d5b594bf5-xrrhp 1/1 Running 0 14s 172.16.3.133 centos-7-54
8.3. 节点亲和性
节点亲和性分为强制选择(硬亲和)和优先选择(软亲和):
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ssd
operator: DoesNotExist
- key: cpu
operator: In
values: [“high”]
[root@duduniao ~]# kubectl get pod -o wide # 不存在ssd标签,并且cpu值为 high
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-6f8b6d748c-4pt2x 1/1 Running 0 8s 172.16.5.20 centos-7-56
nginx-deploy-6f8b6d748c-m9kb4 1/1 Running 0 8s 172.16.5.19 centos-7-56
nginx-deploy-6f8b6d748c-st8mw 1/1 Running 0 5s 172.16.5.22 centos-7-56
nginx-deploy-6f8b6d748c-w4mc9 1/1 Running 0 5s 172.16.5.21 centos-7-56
nginx-deploy-6f8b6d748c-wjvxx 1/1 Running 0 8s 172.16.5.18 centos-7-56
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: ssd
operator: DoesNotExist
# 此处改为cpu不等于high
- key: cpu
operator: NotIn
values: [“high”]
[root@duduniao ~]# kubectl describe pod nginx-deploy-746f88c86-96dbp # 注意message
Name: nginx-deploy-746f88c86-96dbp
Namespace: default
Priority: 0
Node:
……
Status: Pending
……
Events:
Type Reason Age From Message
—— ——— —— —— ———-
Warning FailedScheduling 30s (x3 over 32s) default-scheduler 0/6 nodes are available: 3 node(s) didn’t match node selector, 3 node(s) had taint {node-role.kubernetes.io/master: }, that the pod didn’t tolerate.
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- preference:
matchExpressions:
# 系统中并没有这个的Node
- key: ssd
operator: DoesNotExist
- key: cpu
operator: NotIn
values: [“high”]
weight: 5
[root@duduniao ~]# kubectl get pod -o wide # 没有满足的节点,所以随机分布了
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-5b9b5b66bc-5zrps 1/1 Running 0 31s 172.16.3.153 centos-7-54
nginx-deploy-5b9b5b66bc-brwgb 1/1 Running 0 31s 172.16.4.249 centos-7-55
nginx-deploy-5b9b5b66bc-f49j9 1/1 Running 0 31s 172.16.5.23 centos-7-56
nginx-deploy-5b9b5b66bc-jnh45 1/1 Running 0 30s 172.16.4.250 centos-7-55
nginx-deploy-5b9b5b66bc-njft8 1/1 Running 0 29s 172.16.3.154 centos-7-54
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 5
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
affinity:
nodeAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
# 满足该条件的节点为 centos-7-56,当并不是一把梭全部压在该节点,而且该节点的优先级更高,通过weight可以增加优先级
- preference:
matchExpressions:
- key: ssd
operator: DoesNotExist
- key: cpu
operator: In
values:
- high
weight: 10
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-5996df99f8-4cmfv 1/1 Running 0 7s 172.16.4.4 centos-7-55
nginx-deploy-5996df99f8-77k5l 1/1 Running 0 7s 172.16.5.38 centos-7-56
nginx-deploy-5996df99f8-8kxvc 1/1 Running 0 7s 172.16.5.37 centos-7-56
nginx-deploy-5996df99f8-t55hj 1/1 Running 0 7s 172.16.3.161 centos-7-54
nginx-deploy-5996df99f8-zbpf2 1/1 Running 0 7s 172.16.5.39 centos-7-56
8.4. Pod亲和性
pod亲和性和节点亲和性类似,也非为硬亲和软亲和
●案例一(硬亲和)
[root@duduniao local-k8s-yaml]# kubectl label node centos-7-55 cpu=slow # 此时三个节点都存在cpu标签,仅centos-7-55为cpu=slow其它为high
[root@duduniao local-k8s-yaml]# kubectl get pod -l app=nginx -o wide # 为了效果明显,建nginx-deploy减少为1个副本,当前在cnetos-7-56上
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-cfbbb7cbd-jb4xz 1/1 Running 0 13m 172.16.5.40 centos-7-56
apiVersion: apps/v1
kind: Deployment
metadata:
name: slb-deploy
spec:
replicas: 5
selector:
matchLabels:
app: slb
template:
metadata:
labels:
app: slb
spec:
containers:
- name: slb-demo
image: linuxduduniao/nginx:v1.0.1
affinity:
podAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
# centos-7-54和centos-7-56都满足
- labelSelector:
matchLabels:
app: nginx
namespaces: [default]
topologyKey: cpu
[root@duduniao local-k8s-yaml]# kubectl get pod -l app=slb -o wide # centos-7-55不满足 cpu=high,所以不会调度
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
slb-deploy-7d9b6c47d-drjls 1/1 Running 0 4m19s 172.16.3.165 centos-7-54
slb-deploy-7d9b6c47d-dsw7t 1/1 Running 0 4m19s 172.16.5.43 centos-7-56
slb-deploy-7d9b6c47d-fn6k2 1/1 Running 0 4m19s 172.16.5.44 centos-7-56
slb-deploy-7d9b6c47d-kw9vh 1/1 Running 0 4m19s 172.16.3.166 centos-7-54
slb-deploy-7d9b6c47d-pl6lz 1/1 Running 0 4m19s 172.16.5.45 centos-7-56
[root@duduniao local-k8s-yaml]# kubectl label node centos-7-54 cpu=slow —overwrite # 使得仅cnetos-7-56满足cpu=high
apiVersion: apps/v1
kind: Deployment
metadata:
name: slb-deploy
spec:
replicas: 5
selector:
matchLabels:
app: slb
template:
metadata:
labels:
app: slb
spec:
containers:
- name: slb-demo
image: linuxduduniao/nginx:v1.0.1
affinity:
podAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchLabels:
app: nginx
namespaces: [default]
topologyKey: cpu
weight: 10
[root@duduniao local-k8s-yaml]# kubectl get pod -l app=slb -o wide # 大部分选择了cnetos-7-56,而不是全部梭哈
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
slb-deploy-6b5fc7bd96-78cj4 1/1 Running 0 16s 172.16.5.48 centos-7-56
slb-deploy-6b5fc7bd96-clchc 1/1 Running 0 18s 172.16.3.167 centos-7-54
slb-deploy-6b5fc7bd96-fxwjf 1/1 Running 0 16s 172.16.5.47 centos-7-56
slb-deploy-6b5fc7bd96-ls8zr 1/1 Running 0 18s 172.16.5.46 centos-7-56
slb-deploy-6b5fc7bd96-zn9t8 1/1 Running 0 18s 172.16.4.5 centos-7-55
8.5. 污点和污点容忍度
用法:
增加污点: kubectl taint node key=value:effect
取消污点: kubectl taint node key=value:effect-
查看污点: kubectl describe node
effect:
PreferNoSchedule: 优先不调度,但是其它节点不满足时可以调度
NoSchedule: 禁止新的Pod调度,已经调度的Pod不会被驱逐
NoExecute: 禁止新的Pod调度,并且已经运行在该节点时的,其不能容忍污点的Pod将被驱逐
# 查看master的污点
[root@duduniao local-k8s-yaml]# kubectl describe node centos-7-51
Name: centos-7-51
Roles: master
Labels: beta.kubernetes.io/arch=amd64
beta.kubernetes.io/os=linux
kubernetes.io/arch=amd64
kubernetes.io/hostname=centos-7-51
kubernetes.io/os=linux
node-role.kubernetes.io/master=
Annotations: flannel.alpha.coreos.com/backend-data: {“VNI”:1,”VtepMAC”:”52:8a:0e:48:b4:92”}
flannel.alpha.coreos.com/backend-type: vxlan
flannel.alpha.coreos.com/kube-subnet-manager: true
flannel.alpha.coreos.com/public-ip: 10.4.7.51
kubeadm.alpha.kubernetes.io/cri-socket: /var/run/dockershim.sock
node.alpha.kubernetes.io/ttl: 0
volumes.kubernetes.io/controller-managed-attach-detach: true
CreationTimestamp: Fri, 04 Dec 2020 21:49:43 +0800
Taints: node-role.kubernetes.io/master:NoSchedule # 不可调度
……
# 以此作为示例Pod
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deploy
spec:
replicas: 6
selector:
matchLabels:
app: nginx
template:
metadata:
labels:
app: nginx
spec:
containers:
- name: nginx-demo
image: linuxduduniao/nginx:v1.0.0
# kubectl apply 部署上述的deployment后,Pod分散在三个不同的node上
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-8697d45cb8-4x564 1/1 Running 0 4m9s 172.16.4.9 centos-7-55
nginx-deploy-8697d45cb8-bxms4 1/1 Running 0 4m9s 172.16.5.55 centos-7-56
nginx-deploy-8697d45cb8-c4rbf 1/1 Running 0 4m9s 172.16.3.171 centos-7-54
nginx-deploy-8697d45cb8-hvs92 1/1 Running 0 4m9s 172.16.3.172 centos-7-54
nginx-deploy-8697d45cb8-sbfvj 1/1 Running 0 4m9s 172.16.5.54 centos-7-56
nginx-deploy-8697d45cb8-sw5m4 1/1 Running 0 4m9s 172.16.4.10 centos-7-55
# 使用NoSchedule污点,发现并不会使得现有的Pod发生重调度
[root@duduniao local-k8s-yaml]# kubectl taint node centos-7-54 monitor=true:NoSchedule
node/centos-7-54 tainted
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-8697d45cb8-4x564 1/1 Running 0 5m18s 172.16.4.9 centos-7-55
nginx-deploy-8697d45cb8-bxms4 1/1 Running 0 5m18s 172.16.5.55 centos-7-56
nginx-deploy-8697d45cb8-c4rbf 1/1 Running 0 5m18s 172.16.3.171 centos-7-54
nginx-deploy-8697d45cb8-hvs92 1/1 Running 0 5m18s 172.16.3.172 centos-7-54
nginx-deploy-8697d45cb8-sbfvj 1/1 Running 0 5m18s 172.16.5.54 centos-7-56
nginx-deploy-8697d45cb8-sw5m4 1/1 Running 0 5m18s 172.16.4.10 centos-7-55
更新deployment后,新的Pod不再调到 NoSchedule 节点
root@duduniao local-k8s-yaml]# kubectl set image deployment nginx-deploy nginx-demo=linuxduduniao/nginx:v1.0.1
deployment.apps/nginx-deploy image updated
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-8494c5b6c5-5ddd9 1/1 Running 0 8s 172.16.4.13 centos-7-55
nginx-deploy-8494c5b6c5-5zdjg 1/1 Running 0 10s 172.16.5.56 centos-7-56
nginx-deploy-8494c5b6c5-bqpgn 1/1 Running 0 10s 172.16.5.57 centos-7-56
nginx-deploy-8494c5b6c5-dq44w 1/1 Running 0 9s 172.16.4.12 centos-7-55
nginx-deploy-8494c5b6c5-pnvw6 1/1 Running 0 8s 172.16.5.58 centos-7-56
nginx-deploy-8494c5b6c5-xnf77 1/1 Running 0 10s 172.16.4.11 centos-7-55
# 设置NoExecute后,现有Pod会被驱逐
[root@duduniao local-k8s-yaml]# kubectl taint node centos-7-55 monitor=true:NoExecute
node/centos-7-55 tainted
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deploy-8494c5b6c5-5zdjg 1/1 Running 0 3m8s 172.16.5.56 centos-7-56
nginx-deploy-8494c5b6c5-bqpgn 1/1 Running 0 3m8s 172.16.5.57 centos-7-56
nginx-deploy-8494c5b6c5-bzc2c 1/1 Running 0 14s 172.16.5.60 centos-7-56
nginx-deploy-8494c5b6c5-f7k2b 1/1 Running 0 14s 172.16.5.62 centos-7-56
nginx-deploy-8494c5b6c5-pnvw6 1/1 Running 0 3m6s 172.16.5.58 centos-7-56
nginx-deploy-8494c5b6c5-s57tv 1/1 Running 0 14s 172.16.5.61 centos-7-56
api-server 能容忍所有NoExecute的污点,因此能在Master上运行
[root@duduniao local-k8s-yaml]# kubectl describe pod -n kube-system kube-apiserver-centos-7-51
……
Tolerations: :NoExecute
# 清除所有节点的污点后,执行以下操作。模拟Prometheus(Prometheus占用内存巨大,推荐单独部署到一个固有节点)的部署
[root@duduniao local-k8s-yaml]# kubectl label node centos-7-56 prometheus=true
node/centos-7-56 labeled
[root@duduniao local-k8s-yaml]# kubectl taint node centos-7-56 monitor=true:NoSchedule
node/centos-7-56 tainted
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- name: prometheus-demo
image: linuxduduniao/nginx:v1.0.1
nodeSelector:
prometheus: “true”
tolerations:
- key: monitor
operator: Exists
effect: NoSchedule
# 通过节点选择器和污点容忍度,实现独占一个节点
[root@duduniao local-k8s-yaml]# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
prometheus-76f64854b7-sxdq9 1/1 Running 0 2m21s 172.16.5.71 centos-7-56
8.6. Pod驱逐
在升级节点、移除节点之前,需要将该节上的Pod进行驱逐,并且保证不会有新的Pod调度进来,通常使用 kubectl drain options
节点移除:https://www.yuque.com/duduniao/k8s/togtwi#HpAMj
节点维护:https://www.yuque.com/duduniao/k8s/togtwi#Q5Ao6