1. 说明

Kubeadm是一种常用的kubernetes集群安装方式,可以在生产环境中安装高可用集群,需要注意的是,这种方式安装的集群证书有效期只有一年,需要定期轮换。使用kubeadm部署高可用集群的时候,etcd集群可以放到master节点上,也可以使用外部的etcd集群。前者节约资源,方便管理,后者可用性更高:

01-2-Kubeadm方式安装 - 图1

01-2-Kubeadm方式安装 - 图2

1.1. 节点配置

01-2-Kubeadm方式安装 - 图3

1.2. 检查项目

以下是安装集群之前必须要进行的检查项,避免安装失败!

1.2.1. 经验swap分区

  1. [root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "swapoff -a"
  2. [root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "sed -i '/swap/s/^/#/' /etc/fstab"

1.2.2. 确保mac地址唯一

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "ifconfig ens32 | grep ether | awk '{print \$2}'"|xargs -n 2
10.4.7.53 00:0c:29:54:d0:72
10.4.7.51 00:0c:29:6b:45:04
10.4.7.52 00:0c:29:cb:13:af
10.4.7.55 00:0c:29:9e:79:38
10.4.7.54 00:0c:29:de:32:40

1.2.3. 确保product_uuid唯一

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "cat /sys/class/dmi/id/product_uuid"|xargs -n 2
10.4.7.52 F7714D56-DB80-A3B5-90A3-430EC8CB13AF
10.4.7.51 4E4F4D56-B05B-E126-ADF4-957BCF6B4504
10.4.7.55 60714D56-029B-C734-16B1-15939A9E7938
10.4.7.54 A0554D56-4083-5FB4-96F0-009C94DE3240
10.4.7.53 84CC4D56-9BED-62CD-D826-04336B54D072

1.2.4. 关闭selinux

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "setenforce 0; sed -ri '/^SELINUX=/s/SELINUX=.+/SELINUX=disabled/' /etc/selinux/config"
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "grep '^SELINUX='  /etc/selinux/config" | xargs -n 2
10.4.7.54 SELINUX=disabled
10.4.7.52 SELINUX=disabled
10.4.7.51 SELINUX=disabled
10.4.7.53 SELINUX=disabled
10.4.7.55 SELINUX=disabled

1.2.5. 启用br_netfilter模块

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "modprobe br_netfilter"

1.2.6. 配置内核参数

部分centos7用户会碰到的iptable被绕过,没法路由到正确的IP地址上,需要配置以下系统参数

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "echo -e 'net.bridge.bridge-nf-call-ip6tables = 1\nnet.bridge.bridge-nf-call-iptables = 1' > /etc/sysctl.d/k8s.conf "
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "sysctl --system"

1.2.7. 安装时间同步服务

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "yum install -y chrony"
[root@duduniao ~]# scan_host.sh push -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 chrony.conf /etc/
10.4.7.54         chrony.conf --> /etc/ Y
10.4.7.52         chrony.conf --> /etc/ Y
10.4.7.53         chrony.conf --> /etc/ Y
10.4.7.55         chrony.conf --> /etc/ Y
10.4.7.51         chrony.conf --> /etc/ Y
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "systemctl start chronyd ; systemctl enable chronyd"
[root@centos-7-51 ~]# vim /etc/chrony.conf # 参考修改时间服务器地址即可(https://help.aliyun.com/document_detail/92704.html)
# Use public servers from the pool.ntp.org project.
# Please consider joining the pool (http://www.pool.ntp.org/join.html).
server ntp1.aliyun.com iburst
server ntp2.aliyun.com iburst
server ntp3.aliyun.com iburst
server ntp4.aliyun.com iburst

1.2.8. 启用ipvs模块

参考文档: https://github.com/kubernetes/kubernetes/blob/master/pkg/proxy/ipvs/README.md

检查内核版本,如果内核版本超过4.19,则需要安装 nf_conntrack 而不是 nf_conntrack_ipv4

# 检查ipvs模块是否已经编译到内核中,如果没有,则需要手动加载
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "grep -e ipvs -e nf_conntrack_ipv4 /lib/modules/\$(uname -r)/modules.builtin"
# 载入ipvs模块
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "modprobe -- ip_vs;modprobe -- ip_vs_rr;modprobe -- ip_vs_wrr;modprobe -- ip_vs_sh;modprobe -- nf_conntrack_ipv4"
# 检查ipvs模块
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 -f 1 "cut -f1 -d ' '  /proc/modules | grep -e ip_vs -e nf_conntrack_ipv4"
# 安装ipset 和 ipvsadm
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 -f 1 "yum install -y ipset ipvsadm"
# 设置重启机器时保证ipvs模块启用
[root@duduniao ~]# cat containerd.service  # 修改containerd模块启动脚本,让其先加载ipvs模块,这种方式是我测试多次后发现比较有效的
#   Copyright 2018-2020 Docker Inc.

#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at

#       http://www.apache.org/licenses/LICENSE-2.0

#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.

[Unit]
Description=containerd container runtime
Documentation=https://containerd.io
After=network.target

[Service]
ExecStartPre=-/sbin/modprobe overlay
ExecStartPre=-/sbin/modprobe ip_vs
ExecStartPre=-/sbin/modprobe ip_vs_rr
ExecStartPre=-/sbin/modprobe ip_vs_wrr
ExecStartPre=-/sbin/modprobe ip_vs_sh
ExecStartPre=-/sbin/modprobe nf_conntrack_ipv4
ExecStart=/usr/bin/containerd
KillMode=process
Delegate=yes
LimitNOFILE=1048576
# Having non-zero Limit*s causes performance problems due to accounting overhead
# in the kernel. We recommend using cgroups to do container-local accounting.
LimitNPROC=infinity
LimitCORE=infinity
TasksMax=infinity

[Install]
WantedBy=multi-user.target
[root@duduniao ~]# scan_host.sh push -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 containerd.service /usr/lib/systemd/system/containerd.service
10.4.7.52     containerd.service --> /usr/lib/systemd/system/containerd.service Y
10.4.7.51     containerd.service --> /usr/lib/systemd/system/containerd.service Y
10.4.7.54     containerd.service --> /usr/lib/systemd/system/containerd.service Y
10.4.7.55     containerd.service --> /usr/lib/systemd/system/containerd.service Y
10.4.7.53     containerd.service --> /usr/lib/systemd/system/containerd.service Y
10.4.7.56     containerd.service --> /usr/lib/systemd/system/containerd.service Y
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "systemctl daemon-reload"

2. 安装集群

2.1. 安装LB

[root@centos-7-59 ~]# wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
[root@centos-7-59 ~]# yum install -y nginx
[root@centos-7-59 ~]# vim /etc/nginx/nginx.conf # 末尾追加
...
stream {
    log_format proxy '$time_local|$remote_addr|$upstream_addr|$protocol|$status|'
                     '$session_time|$upstream_connect_time|$bytes_sent|$bytes_received|'
                     '$upstream_bytes_sent|$upstream_bytes_received' ;
    upstream kube-apiserver {
        server 10.4.7.51:6443 ;
        server 10.4.7.52:6443 ;
        server 10.4.7.53:6443 ;
    }

    server {
        listen 10.4.7.59:6443 backlog=65535 so_keepalive=on;
        allow 10.4.7.0/24;
        allow 192.168.0.0/16;
        allow 172.19.0.0/16;
        allow 172.24.0.0/16;
        deny all;

        proxy_connect_timeout 3s;
        proxy_next_upstream on;
        proxy_next_upstream_timeout 5;
        proxy_next_upstream_tries 1;

        proxy_pass kube-apiserver;
        access_log /var/log/nginx/kube-apiserver.log proxy;
    }
}
[root@centos-7-59 ~]# systemctl start nginx.service ; systemctl enable nginx.service

2.2. 安装docker-ce

[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "wget -O /etc/yum.repos.d/docker-ce.repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo"
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "yum install -y containerd.io-1.2.13 docker-ce-19.03.11 docker-ce-cli-19.03.11"
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "mkdir /etc/docker"
[root@duduniao ~]# cat daemon.json 
{
  "graph": "/data/docker",
  "storage-driver": "overlay2",
  "insecure-registries": ["registry.access.redhat.com","quay.io"],
  "registry-mirrors": ["https://q2gr04ke.mirror.aliyuncs.com"],
  "exec-opts": ["native.cgroupdriver=systemd"],
  "log-opts": {"max-size":"32M", "max-file":"2"}
}
[root@duduniao ~]# scan_host.sh push -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 daemon.json /etc/docker/
10.4.7.55         daemon.json --> /etc/docker/ Y
10.4.7.51         daemon.json --> /etc/docker/ Y
10.4.7.52         daemon.json --> /etc/docker/ Y
10.4.7.54         daemon.json --> /etc/docker/ Y
10.4.7.53         daemon.json --> /etc/docker/ Y
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "systemctl restart docker"
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "systemctl enable docker"

2.3. 安装k8s集群

2.3.1. 安装kubeadm

[root@duduniao ~]# cat kubeadm.repo # 配置阿里的Yum源
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
[root@duduniao ~]# scan_host.sh push -h  10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 kubeadm.repo /etc/yum.repos.d/
10.4.7.53         kubeadm.repo --> /etc/yum.repos.d/ Y
10.4.7.52         kubeadm.repo --> /etc/yum.repos.d/ Y
10.4.7.54         kubeadm.repo --> /etc/yum.repos.d/ Y
10.4.7.51         kubeadm.repo --> /etc/yum.repos.d/ Y
10.4.7.55         kubeadm.repo --> /etc/yum.repos.d/ Y
[root@centos-7-55 ~]# yum list kubelet --showduplicates | grep 1.17 # 1.17 版本支持docker 1903,在github的中有说明
kubelet.x86_64                       1.17.0-0                         kubernetes
kubelet.x86_64                       1.17.1-0                         kubernetes
kubelet.x86_64                       1.17.2-0                         kubernetes
kubelet.x86_64                       1.17.3-0                         kubernetes
kubelet.x86_64                       1.17.4-0                         kubernetes
kubelet.x86_64                       1.17.5-0                         kubernetes
kubelet.x86_64                       1.17.6-0                         kubernetes
kubelet.x86_64                       1.17.7-0                         kubernetes
kubelet.x86_64                       1.17.7-1                         kubernetes
kubelet.x86_64                       1.17.8-0                         kubernetes
kubelet.x86_64                       1.17.9-0                         kubernetes
kubelet.x86_64                       1.17.11-0                        kubernetes
kubelet.x86_64                       1.17.12-0                        kubernetes
kubelet.x86_64                       1.17.13-0                        kubernetes
kubelet.x86_64                       1.17.14-0                        kubernetes
# node节点其实不需要安装kubectl的命令行工具
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "yum install -y kubeadm-1.17.14 kubelet-1.17.14 kubectl-1.17.14"
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.51 10.4.7.52 10.4.7.53 10.4.7.54 10.4.7.55 "systemctl start kubelet.service; systemctl enable kubelet.service"

2.3.2. 初始化控制平面

选择一台master进行初始化操作:

# 仔细阅读输出的信息,尤其是如何扩展节点和kubectl使用
[root@centos-7-51 ~]# kubeadm init --control-plane-endpoint "10.4.7.59:6443" --pod-network-cidr 172.16.0.0/16 --service-cidr 10.96.0.0/16  --image-repository registry.aliyuncs.com/google_containers --upload-cert
I1204 21:49:05.949552   15589 version.go:251] remote version is much newer: v1.19.4; falling back to: stable-1.17

W1204 21:49:08.043867   15589 validation.go:28] Cannot validate kube-proxy config - no validator is available
W1204 21:49:08.043888   15589 validation.go:28] Cannot validate kubelet config - no validator is available
[init] Using Kubernetes version: v1.17.14
[preflight] Running pre-flight checks
    [WARNING Hostname]: hostname "centos-7-51" could not be reached
    [WARNING Hostname]: hostname "centos-7-51": lookup centos-7-51 on 10.4.7.254:53: no such host
[preflight] Pulling images required for setting up a Kubernetes cluster
[preflight] This might take a minute or two, depending on the speed of your internet connection
[preflight] You can also perform this action in beforehand using 'kubeadm config images pull'
[kubelet-start] Writing kubelet environment file with flags to file "/var/lib/kubelet/kubeadm-flags.env"
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[kubelet-start] Starting the kubelet
[certs] Using certificateDir folder "/etc/kubernetes/pki"
[certs] Generating "ca" certificate and key
[certs] Generating "apiserver" certificate and key
[certs] apiserver serving cert is signed for DNS names [centos-7-51 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 10.4.7.51 10.4.7.59]
[certs] Generating "apiserver-kubelet-client" certificate and key
[certs] Generating "front-proxy-ca" certificate and key
[certs] Generating "front-proxy-client" certificate and key
[certs] Generating "etcd/ca" certificate and key
[certs] Generating "etcd/server" certificate and key
[certs] etcd/server serving cert is signed for DNS names [centos-7-51 localhost] and IPs [10.4.7.51 127.0.0.1 ::1]
[certs] Generating "etcd/peer" certificate and key
[certs] etcd/peer serving cert is signed for DNS names [centos-7-51 localhost] and IPs [10.4.7.51 127.0.0.1 ::1]
[certs] Generating "etcd/healthcheck-client" certificate and key
[certs] Generating "apiserver-etcd-client" certificate and key
[certs] Generating "sa" key and public key
[kubeconfig] Using kubeconfig folder "/etc/kubernetes"
[kubeconfig] Writing "admin.conf" kubeconfig file
[kubeconfig] Writing "kubelet.conf" kubeconfig file
[kubeconfig] Writing "controller-manager.conf" kubeconfig file
[kubeconfig] Writing "scheduler.conf" kubeconfig file
[control-plane] Using manifest folder "/etc/kubernetes/manifests"
[control-plane] Creating static Pod manifest for "kube-apiserver"
[control-plane] Creating static Pod manifest for "kube-controller-manager"
W1204 21:49:11.018264   15589 manifests.go:214] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[control-plane] Creating static Pod manifest for "kube-scheduler"
W1204 21:49:11.019029   15589 manifests.go:214] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[etcd] Creating static Pod manifest for local etcd in "/etc/kubernetes/manifests"
[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[apiclient] All control plane components are healthy after 31.035114 seconds
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.17" in namespace kube-system with the configuration for the kubelets in the cluster
[upload-certs] Storing the certificates in Secret "kubeadm-certs" in the "kube-system" Namespace
[upload-certs] Using certificate key:
aeca2ae59ec761196f824a869b5ac0c4dd1cc24bfd93a2d81671f9d570a0357e
[mark-control-plane] Marking the node centos-7-51 as control-plane by adding the label "node-role.kubernetes.io/master=''"
[mark-control-plane] Marking the node centos-7-51 as control-plane by adding the taints [node-role.kubernetes.io/master:NoSchedule]
[bootstrap-token] Using token: j8eck0.u5vu8pxb63eq9bxb
[bootstrap-token] Configuring bootstrap tokens, cluster-info ConfigMap, RBAC Roles
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[bootstrap-token] Creating the "cluster-info" ConfigMap in the "kube-public" namespace
[kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

You can now join any number of the control-plane node running the following command on each as root:

  kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb \
    --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8 \
    --control-plane --certificate-key aeca2ae59ec761196f824a869b5ac0c4dd1cc24bfd93a2d81671f9d570a0357e

Please note that the certificate-key gives access to cluster sensitive data, keep it secret!
As a safeguard, uploaded-certs will be deleted in two hours; If necessary, you can use
"kubeadm init phase upload-certs --upload-certs" to reload certs afterward.

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb \
    --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8

2.3.3. 追加其它控制平面

将其它的master节点添加到集群中

[root@centos-7-52 ~]# kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8 --control-plane --certificate-key aeca2ae59ec761196f824a869b5ac0c4dd1cc24bfd93a2d81671f9d570a0357e
[root@centos-7-53 ~]# kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8 --control-plane --certificate-key aeca2ae59ec761196f824a869b5ac0c4dd1cc24bfd93a2d81671f9d570a0357e

2.3.4. 追加其它Node节点

[root@centos-7-54 ~]# kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8
[root@centos-7-55 ~]# kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8

2.3.5. 配置kubeconfig文件

在master节点根据提示操作:

[root@centos-7-51 ~]# mkdir -p $HOME/.kube
[root@centos-7-51 ~]# cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
[root@centos-7-51 ~]# chown $(id -u):$(id -g) $HOME/.kube/config

2.3.6. 解决组件报错问题

[root@centos-7-51 ~]# kubectl get cs # 解决controller-manager和scheduler不健康问题
NAME                 STATUS      MESSAGE                                                                                     ERROR
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused   
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused   
etcd-0               Healthy     {"health":"true"}

以下是百度到的方案,没有对此做深入的研究,理应由其它更优雅的解决方案。除了kubelet之外,其它组件时通过裸pod的方式运行,配置文件在 /etc/kubernetes/manifests 内,修改该配置文件后会动态生效。将 kube-scheduler.yamlkube-controller-manager.yaml 容器启动参数中 —port=0去掉。该—port=0表示禁止使用非安全的http接口,同时 --bind-address 非安全的绑定地址参数失效。

[root@centos-7-51 ~]# vim /etc/kubernetes/manifests/kube-scheduler.yaml 
apiVersion: v1
kind: Pod
metadata:
  creationTimestamp: null
  labels:
    component: kube-scheduler
    tier: control-plane
  name: kube-scheduler
  namespace: kube-system
spec:
  containers:
  - command:
    - kube-scheduler
    - --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
    - --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
    - --bind-address=127.0.0.1
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
    # - --port=0
    image: registry.aliyuncs.com/google_containers/kube-scheduler:v1.17.14
    imagePullPolicy: IfNotPresent
    livenessProbe:
      failureThreshold: 8
...
[root@centos-7-51 ~]# kubectl get cs # 修改kube-scheduler.yaml和kube-controller-manager.yaml后
NAME                 STATUS    MESSAGE             ERROR
controller-manager   Healthy   ok                  
scheduler            Healthy   ok                  
etcd-0               Healthy   {"health":"true"}

2.3.7. 安装CNI插件

为了方便,此处选择安装Flannel作为插件,github地址:https://github.com/coreos/flannel

安装指导: https://github.com/coreos/flannel/blob/master/Documentation/kubernetes.md

01-2-Kubeadm方式安装 - 图4

# 以下节点处于not ready状态,并且coredns处于pending状态,原因是缺少CNI插件
[root@centos-7-51 ~]# kubectl get node
NAME          STATUS     ROLES    AGE     VERSION
centos-7-51   NotReady   master   7m22s   v1.17.14
centos-7-52   NotReady   master   5m16s   v1.17.14
centos-7-53   NotReady   master   4m26s   v1.17.14
centos-7-54   NotReady   <none>   3m      v1.17.14
centos-7-55   NotReady   <none>   2m57s   v1.17.14
[root@centos-7-51 ~]# kubectl get pod -A | grep Pending
kube-system   coredns-9d85f5447-6wh25               0/1     Pending   0          7m49s
kube-system   coredns-9d85f5447-klsxf               0/1     Pending   0          7m49s
# 此处需要对flannel中yaml文件就行修改:
#   1. 镜像地址修改
#   2. pod的CIDR修改
---
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
  name: psp.flannel.unprivileged
  annotations:
    seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
    seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
    apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
    apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
spec:
  privileged: false
  volumes:
  - configMap
  - secret
  - emptyDir
  - hostPath
  allowedHostPaths:
  - pathPrefix: "/etc/cni/net.d"
  - pathPrefix: "/etc/kube-flannel"
  - pathPrefix: "/run/flannel"
  readOnlyRootFilesystem: false
  # Users and groups
  runAsUser:
    rule: RunAsAny
  supplementalGroups:
    rule: RunAsAny
  fsGroup:
    rule: RunAsAny
  # Privilege Escalation
  allowPrivilegeEscalation: false
  defaultAllowPrivilegeEscalation: false
  # Capabilities
  allowedCapabilities: ['NET_ADMIN', 'NET_RAW']
  defaultAddCapabilities: []
  requiredDropCapabilities: []
  # Host namespaces
  hostPID: false
  hostIPC: false
  hostNetwork: true
  hostPorts:
  - min: 0
    max: 65535
  # SELinux
  seLinux:
    # SELinux is unused in CaaSP
    rule: 'RunAsAny'
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
rules:
- apiGroups: ['extensions']
  resources: ['podsecuritypolicies']
  verbs: ['use']
  resourceNames: ['psp.flannel.unprivileged']
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-system
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: flannel
  namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: kube-flannel-cfg
  namespace: kube-system
  labels:
    tier: node
    app: flannel
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "172.16.0.0/16",
      "Backend": {
        "Type": "vxlan"
      }
    }
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: kube-flannel-ds
  namespace: kube-system
  labels:
    tier: node
    app: flannel
spec:
  selector:
    matchLabels:
      app: flannel
  template:
    metadata:
      labels:
        tier: node
        app: flannel
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      hostNetwork: true
      priorityClassName: system-node-critical
      tolerations:
      - operator: Exists
        effect: NoSchedule
      serviceAccountName: flannel
      initContainers:
      - name: install-cni
        # image: quay.io/coreos/flannel:v0.13.1-rc1
        image: quay.mirrors.ustc.edu.cn/coreos/flannel:v0.13.1-rc1
        command:
        - cp
        args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        volumeMounts:
        - name: cni
          mountPath: /etc/cni/net.d
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      containers:
      - name: kube-flannel
        # image: quay.io/coreos/flannel:v0.13.1-rc1
        image: quay.mirrors.ustc.edu.cn/coreos/flannel:v0.13.1-rc1
        command:
        - /opt/bin/flanneld
        args:
        - --ip-masq
        - --kube-subnet-mgr
        resources:
          requests:
            cpu: "100m"
            memory: "50Mi"
          limits:
            cpu: "100m"
            memory: "50Mi"
        securityContext:
          privileged: false
          capabilities:
            add: ["NET_ADMIN", "NET_RAW"]
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        volumeMounts:
        - name: run
          mountPath: /run/flannel
        - name: flannel-cfg
          mountPath: /etc/kube-flannel/
      volumes:
      - name: run
        hostPath:
          path: /run/flannel
      - name: cni
        hostPath:
          path: /etc/cni/net.d
      - name: flannel-cfg
        configMap:
          name: kube-flannel-cfg
[root@centos-7-51 ~]# kubectl apply -f /tmp/flannel.yaml # flannel.yaml内容就是上述的yaml
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS   ROLES    AGE   VERSION
centos-7-51   Ready    master   86m   v1.17.14
centos-7-52   Ready    master   84m   v1.17.14
centos-7-53   Ready    master   83m   v1.17.14
centos-7-54   Ready    <none>   82m   v1.17.14
centos-7-55   Ready    <none>   82m   v1.17.14
[root@centos-7-51 ~]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
scheduler            Healthy   ok                  
controller-manager   Healthy   ok                  
etcd-0               Healthy   {"health":"true"}   
[root@centos-7-51 ~]# kubectl get pod -A 
NAMESPACE     NAME                                  READY   STATUS    RESTARTS   AGE
kube-system   coredns-9d85f5447-6wh25               1/1     Running   1          86m
kube-system   coredns-9d85f5447-klsxf               1/1     Running   0          86m
kube-system   etcd-centos-7-51                      1/1     Running   0          86m
kube-system   etcd-centos-7-52                      1/1     Running   0          84m
kube-system   etcd-centos-7-53                      1/1     Running   0          83m
kube-system   kube-apiserver-centos-7-51            1/1     Running   0          86m
kube-system   kube-apiserver-centos-7-52            1/1     Running   0          84m
kube-system   kube-apiserver-centos-7-53            1/1     Running   1          82m
kube-system   kube-controller-manager-centos-7-51   1/1     Running   0          81m
kube-system   kube-controller-manager-centos-7-52   1/1     Running   0          81m
kube-system   kube-controller-manager-centos-7-53   1/1     Running   0          80m
kube-system   kube-flannel-ds-c2bs6                 1/1     Running   0          10m
kube-system   kube-flannel-ds-t6j77                 1/1     Running   0          10m
kube-system   kube-flannel-ds-w526w                 1/1     Running   0          10m
kube-system   kube-flannel-ds-wp546                 1/1     Running   0          10m
kube-system   kube-flannel-ds-xc5cb                 1/1     Running   0          10m
kube-system   kube-proxy-56ll2                      1/1     Running   1          82m
kube-system   kube-proxy-5rl4c                      1/1     Running   0          84m
kube-system   kube-proxy-62rb9                      1/1     Running   0          86m
kube-system   kube-proxy-bjzlv                      1/1     Running   0          83m
kube-system   kube-proxy-llrjg                      1/1     Running   0          82m
kube-system   kube-scheduler-centos-7-51            1/1     Running   0          81m
kube-system   kube-scheduler-centos-7-52            1/1     Running   0          81m
kube-system   kube-scheduler-centos-7-53            1/1     Running   0          80m

2.3.8. kube-proxy修改

kube-proxy默认采用了 iptables 模式,如果需要需改为ipvs,则修改其configmap,并杀死kube-proxy的pod,让其重启。

[root@centos-7-51 ~]# kubectl get cm -n kube-system kube-proxy  -o yaml
apiVersion: v1
data:
  config.conf: |-
    apiVersion: kubeproxy.config.k8s.io/v1alpha1
        ......
    mode: "ipvs" # 修改为 ipvs即可

查看当前使用的模式则,两种方式,通过 ipvsadm -L 查看是否有ipvs转发规则,也可以通过日志查看

[root@centos-7-51 ~]# kubectl logs -f -n  kube-system kube-proxy-49s6m
I1205 12:14:08.430165       1 node.go:135] Successfully retrieved node IP: 10.4.7.55
I1205 12:14:08.430391       1 server_others.go:172] Using ipvs Proxier.
W1205 12:14:08.431429       1 proxier.go:420] IPVS scheduler not specified, use rr by default

2.4. 安装插件

在上述步骤中,已经将集群搭建完毕,但是该集群缺少很多插件,无法在生产环境使用。接下来将逐个补全相关插件。

2.4.1. traefik安装

此处演示使用 traefik 2.2 来作为 ingress controller,traefik 在2.x版本支持CRD方式定义路由,因此需要创建CRD资源

---
## IngressRoute
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: ingressroutes.traefik.containo.us
spec:
  scope: Namespaced
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: IngressRoute
    plural: ingressroutes
    singular: ingressroute
---
## IngressRouteTCP
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: ingressroutetcps.traefik.containo.us
spec:
  scope: Namespaced
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: IngressRouteTCP
    plural: ingressroutetcps
    singular: ingressroutetcp
---
## Middleware
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: middlewares.traefik.containo.us
spec:
  scope: Namespaced
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: Middleware
    plural: middlewares
    singular: middleware
---
## TLSOption
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: tlsoptions.traefik.containo.us
spec:
  scope: Namespaced
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: TLSOption
    plural: tlsoptions
    singular: tlsoption
---
## TraefikService
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: traefikservices.traefik.containo.us
spec:
  scope: Namespaced
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: TraefikService
    plural: traefikservices
    singular: traefikservice
---
## TLSStore
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: tlsstores.traefik.containo.us
spec:
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: TLSStore
    plural: tlsstores
    singular: tlsstore
  scope: Namespaced
---
## IngressRouteUDP
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
  name: ingressrouteudps.traefik.containo.us
spec:
  group: traefik.containo.us
  version: v1alpha1
  names:
    kind: IngressRouteUDP
    plural: ingressrouteudps
    singular: ingressrouteudp
  scope: Namespaced
---
## ServiceAccount
apiVersion: v1
kind: ServiceAccount
metadata:
  namespace: kube-system
  name: traefik-ingress-controller
---
## ClusterRole
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: traefik-ingress-controller
rules:
  - apiGroups: [""]
    resources: ["services","endpoints","secrets"]
    verbs: ["get","list","watch"]
  - apiGroups: ["extensions"]
    resources: ["ingresses"]
    verbs: ["get","list","watch"]
  - apiGroups: ["extensions"]
    resources: ["ingresses/status"]
    verbs: ["update"]
  - apiGroups: ["traefik.containo.us"]
    resources: ["middlewares","ingressroutes","ingressroutetcps","tlsoptions","ingressrouteudps","traefikservices","tlsstores"]
    verbs: ["get","list","watch"]
---
## ClusterRoleBinding
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1beta1
metadata:
  name: traefik-ingress-controller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: traefik-ingress-controller
subjects:
  - kind: ServiceAccount
    name: traefik-ingress-controller
    namespace: kube-system
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: traefik-config
  namespace: kube-system
data:
  traefik.yaml: |-
    ping: ""                    ## 启用 Ping
    serversTransport:
      insecureSkipVerify: true  ## Traefik 忽略验证代理服务的 TLS 证书
    api:
      insecure: true            ## 允许 HTTP 方式访问 API
      dashboard: true           ## 启用 Dashboard
      debug: false              ## 启用 Debug 调试模式
    metrics:
      prometheus: ""            ## 配置 Prometheus 监控指标数据,并使用默认配置
    entryPoints:
      web:
        address: ":80"          ## 配置 80 端口,并设置入口名称为 web
      websecure:
        address: ":443"         ## 配置 443 端口,并设置入口名称为 websecure
    providers:
      kubernetesCRD: ""         ## 启用 Kubernetes CRD 方式来配置路由规则
      kubernetesIngress: ""     ## 启动 Kubernetes Ingress 方式来配置路由规则
    log:
      filePath: ""              ## 设置调试日志文件存储路径,如果为空则输出到控制台
      level: error              ## 设置调试日志级别
      format: json              ## 设置调试日志格式
    accessLog:
      filePath: ""              ## 设置访问日志文件存储路径,如果为空则输出到控制台
      format: json              ## 设置访问调试日志格式
      bufferingSize: 0          ## 设置访问日志缓存行数
      filters:
        #statusCodes: ["200"]   ## 设置只保留指定状态码范围内的访问日志
        retryAttempts: true     ## 设置代理访问重试失败时,保留访问日志
        minDuration: 20         ## 设置保留请求时间超过指定持续时间的访问日志
      fields:                   ## 设置访问日志中的字段是否保留(keep 保留、drop 不保留)
        defaultMode: keep       ## 设置默认保留访问日志字段
        names:                  ## 针对访问日志特别字段特别配置保留模式
          ClientUsername: drop  
        headers:                ## 设置 Header 中字段是否保留
          defaultMode: keep     ## 设置默认保留 Header 中字段
          names:                ## 针对 Header 中特别字段特别配置保留模式
            User-Agent: redact
            Authorization: drop
            Content-Type: keep
    #tracing:                     ## 链路追踪配置,支持 zipkin、datadog、jaeger、instana、haystack 等 
    #  serviceName:               ## 设置服务名称(在链路追踪端收集后显示的服务名)
    #  zipkin:                    ## zipkin配置
    #    sameSpan: true           ## 是否启用 Zipkin SameSpan RPC 类型追踪方式
    #    id128Bit: true           ## 是否启用 Zipkin 128bit 的跟踪 ID
    #    sampleRate: 0.1          ## 设置链路日志采样率(可以配置0.0到1.0之间的值)
    #    httpEndpoint: http://localhost:9411/api/v2/spans     ## 配置 Zipkin Server 端点
---
apiVersion: v1
kind: Service
metadata:
  name: traefik
  namespace: kube-system
spec:
  ports:
    - name: web
      port: 80
    - name: websecure
      port: 443
    - name: admin
      port: 8080
  selector:
    app: traefik
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: traefik-ingress-controller
  namespace: kube-system
  labels:
    app: traefik
spec:
  selector:
    matchLabels:
      app: traefik
  template:
    metadata:
      name: traefik
      labels:
        app: traefik
    spec:
      serviceAccountName: traefik-ingress-controller
      terminationGracePeriodSeconds: 1
      containers:
        - image: traefik:v2.2.8
          name: traefik-ingress-lb
          ports:
            - name: web
              containerPort: 80
              hostPort: 80         ## 将容器端口绑定所在服务器的 80 端口
            - name: websecure
              containerPort: 443
              hostPort: 443        ## 将容器端口绑定所在服务器的 443 端口
            - name: admin
              containerPort: 8080  ## Traefik Dashboard 端口
          resources:
            limits:
              cpu: 2000m
              memory: 1024Mi
            requests:
              cpu: 1000m
              memory: 1024Mi
          securityContext:
            capabilities:
              drop:
                - ALL
              add:
                - NET_BIND_SERVICE
          args:
            - --configfile=/config/traefik.yaml
          volumeMounts:
            - mountPath: "/config"
              name: "config"
          readinessProbe:
            httpGet:
              path: /ping
              port: 8080
            failureThreshold: 3
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          livenessProbe:
            httpGet:
              path: /ping
              port: 8080
            failureThreshold: 3
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5    
      volumes:
        - name: config
          configMap:
            name: traefik-config 
      # tolerations:              ## 设置容忍所有污点,防止节点被设置污点
      #   - operator: "Exists"
      # nodeSelector:             ## 设置node筛选器,在特定label的节点上启动
      #   IngressProxy: "true"
---
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: traefik-dashboard-ingress
  namespace: kube-system
  annotations:
    kubernetes.io/ingress.class: traefik            
    traefik.ingress.kubernetes.io/router.entrypoints: web
spec:
  rules:
  - host: traefik.k8s-ddn.com
    http:
      paths:
      - path: /              
        backend:
          serviceName: traefik
          servicePort: 8080
[root@centos-7-51 ~]# kubectl apply -f /tmp/traefik.yaml # 应用traefik资源清单
# 一般场景中,不建议将https放入到k8s集群内部,这样会将SSL会话卸载工作交给了集群,并且可能需要cert-manager管理证书,增加负担
# 常规做法是在集群外面的SLB上卸载证书,SLB到ingress走http协议
# 此处配置了泛域名证书,泛域名证书制作教程: https://www.yuque.com/duduniao/nginx/smgh7e#T7Ys0
[root@centos-7-59 ~]# cat /etc/nginx/conf.d/k8s-app.conf 
server {
    server_name *.k8s-ddn.com;
    listen 80;
    rewrite ^(.*)$ https://${host}$1 permanent;
}

server {
    listen       443 ssl;
    server_name  *.k8s-ddn.com;

    ssl_certificate "certs/k8s-app.crt";
    ssl_certificate_key "certs/k8s-app.key";
    ssl_session_cache shared:SSL:1m;
    ssl_session_timeout  10m;
    ssl_ciphers HIGH:!aNULL:!MD5;
    ssl_prefer_server_ciphers on;

    location / {
        proxy_pass http://k8s_http_pool;
        proxy_set_header Host       $http_host;
        proxy_set_header x-forwarded-for $proxy_add_x_forwarded_for;
        access_log /var/log/nginx/k8s.log proxy;
    }
}

upstream k8s_http_pool {
    server 10.4.7.55:80    max_fails=3 fail_timeout=10s;
    server 10.4.7.54:80    max_fails=3 fail_timeout=10s;
}
# 暂时不使用https
upstream k8s_https_pool {
    server 10.4.7.55:443    max_fails=3 fail_timeout=10s;
    server 10.4.7.54:443    max_fails=3 fail_timeout=10s;
}

01-2-Kubeadm方式安装 - 图5

2.4.2. nfs-client-provisioner安装

Kubernetes 中支持动态申请PV和PVC的功能,但是nfs存储本身不支持这个功能,但是nfs存储又是非常常用的一种共享存储。NFS-Client Provisioner 使得nfs server具备对外提供动态PV的能力,官方主页: https://github.com/kubernetes-sigs/nfs-subdir-external-provisioner。生成的目录为: ${namespace}-${pvcName}-${pvName}

# 创建nfs server, 在 centos-7-59 上安装nfs server,并对外暴露 /data/nfs 目录
[root@centos-7-59 ~]# yum install -y nfs-utils
[root@centos-7-59 ~]# echo '/data/nfs *(rw,sync,no_wdelay,all_squash)' > /etc/exports
[root@centos-7-59 ~]# systemctl start nfs && systemctl enable nfs
[root@centos-7-59 ~]# showmount  -e 
Export list for centos-7-59:
/data/nfs *
---
apiVersion: v1
kind: Namespace
metadata:
  name: storage
spec:
  finalizers:
  - kubernetes
---
apiVersion: v1
kind: ServiceAccount
metadata:
  name: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: storage
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: nfs-client-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["create", "update", "patch"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: run-nfs-client-provisioner
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: storage
roleRef:
  kind: ClusterRole
  name: nfs-client-provisioner-runner
  apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: storage
rules:
  - apiGroups: [""]
    resources: ["endpoints"]
    verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: storage
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    # replace with namespace where provisioner is deployed
    namespace: storage
roleRef:
  kind: Role
  name: leader-locking-nfs-client-provisioner
  apiGroup: rbac.authorization.k8s.io
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  annotations:
    # 指定为默认存储
    storageclass.kubernetes.io/is-default-class: "true"
  name: managed-nfs-storage
provisioner: fuseim.pri/ifs # or choose another name, must match deployment's env PROVISIONER_NAME'
parameters:
  archiveOnDelete: "false"
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nfs-client-provisioner
  labels:
    app: nfs-client-provisioner
  # replace with namespace where provisioner is deployed
  namespace: storage
spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: nfs-client-provisioner
  template:
    metadata:
      labels:
        app: nfs-client-provisioner
    spec:
      serviceAccountName: nfs-client-provisioner
      containers:
        - name: nfs-client-provisioner
          image: quay.mirrors.ustc.edu.cn/external_storage/nfs-client-provisioner:latest
          volumeMounts:
            - name: nfs-client-root
              mountPath: /persistentvolumes
          env:
            - name: PROVISIONER_NAME
              value: fuseim.pri/ifs
            # 指定nfs信息
            - name: NFS_SERVER
              value: 10.4.7.59
            - name: NFS_PATH
              value: /data/nfs
      volumes:
        - name: nfs-client-root
          nfs:
            # 指定nfs信息
            server: 10.4.7.59
            path: /data/nfs
# 部署nfs-client-provisioner之前,需要安装nfs-utils,因为当前集群仅node上部署应用,因此仅对两个Node安装nfs-utils
[root@duduniao ~]# scan_host.sh cmd -h 10.4.7.54 10.4.7.55 "yum install -y nfs-utils"
[root@centos-7-51 ~]# kubectl apply -f /tmp/nfs-client.yaml
[root@centos-7-51 ~]# kubectl get pod -n storage
NAME                                      READY   STATUS    RESTARTS   AGE
nfs-client-provisioner-5bdc64dd59-zf22c   1/1     Running   0          5m31s
# 启动一个pvc并验证
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
  name: test-claim
  annotations:
    volume.beta.kubernetes.io/storage-class: "managed-nfs-storage"
spec:
  storageClassName: managed-nfs-storage
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 1Mi
---
kind: Pod
apiVersion: v1
metadata:
  name: test-pod
spec:
  containers:
  - name: test-pod
    image: busybox:latest
    command:
      - "/bin/sh"
    args:
      - "-c"
      - "touch /mnt/SUCCESS && exit 0 || exit 1"
    volumeMounts:
      - name: nfs-pvc
        mountPath: "/mnt"
  restartPolicy: "Never"
  volumes:
    - name: nfs-pvc
      persistentVolumeClaim:
        claimName: test-claim
[root@centos-7-51 ~]# kubectl apply -f /tmp/test-pvc.yaml
[root@centos-7-51 ~]# kubectl get pvc
NAME         STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLASS          AGE
test-claim   Bound    pvc-4ca7e0e6-c477-4420-84b0-c63b296cdbb9   1Mi        RWX            managed-nfs-storage   102s
# 注意nfs中路径,${namespace}-${pvcName}-${pvName}
[root@centos-7-59 ~]# ls /data/nfs/default-test-claim-pvc-4ca7e0e6-c477-4420-84b0-c63b296cdbb9/SUCCESS
/data/nfs/default-test-claim-pvc-4ca7e0e6-c477-4420-84b0-c63b296cdbb9/SUCCESS

2.4.3. metrics-server

在kubernetes中HPA自动伸缩指标依据、kubectl top 命令的资源使用率,可以通过 metrics-server 来获取。但是官方明确表示,该指标不应该用于监控指标采集。官方主页:https://github.com/kubernetes-sigs/metrics-server。在大部分情况下,使用deployment部署一个副本即可,最多支持5000个node,每个node消耗3m CPU 和 3M 内存。

apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  - nodes/stats
  - namespaces
  - configmaps
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
        - --kubelet-use-node-status-port
        # 不加 --kubelet-insecure-tls,会报错: unable to fully scrape metrics from node centos-7-54: unable to fetch metrics from node centos-7-54: Get "https://10.4.7.54:10250/stats/summary?only_cpu_and_memory=true" xxx
        - --kubelet-insecure-tls
        # github上提供的镜像,需要翻墙,且当前版本没有在阿里云镜像站找到,因此使用docker hub 上的镜像替代
        image: bitnami/metrics-server:0.4.1
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 4443
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          periodSeconds: 10
        securityContext:
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      volumes:
      - emptyDir: {}
        name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100
[root@centos-7-51 ~]# kubectl apply -f /tmp/kube-metrics-server.yaml
[root@centos-7-51 ~]# kubectl top node
NAME          CPU(cores)   CPU%   MEMORY(bytes)   MEMORY%   
centos-7-51   260m         13%    879Mi           23%       
centos-7-52   136m         6%     697Mi           18%       
centos-7-53   125m         6%     702Mi           19%       
centos-7-54   58m          2%     532Mi           14%       
centos-7-55   42m          2%     475Mi           12%

2.4.4. 安装dashboard

dashboard默认的yaml是采用https接口,本实验中前端SLB采用http与后端集群中ingress通信,因此需要对该Yaml文件进行改造,否则会出现以客户端http方式请求https接口的报错。dashboard的github主页:https://github.com/kubernetes/dashboard

# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: v1
kind: Namespace
metadata:
  name: kubernetes-dashboard

---

apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard

---

kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  ports:
    - port: 443
      targetPort: 8443
  selector:
    k8s-app: kubernetes-dashboard

---

apiVersion: v1
kind: Secret
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard-certs
  namespace: kubernetes-dashboard
type: Opaque

---

apiVersion: v1
kind: Secret
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard-csrf
  namespace: kubernetes-dashboard
type: Opaque
data:
  csrf: ""

---

apiVersion: v1
kind: Secret
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard-key-holder
  namespace: kubernetes-dashboard
type: Opaque

---

kind: ConfigMap
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard-settings
  namespace: kubernetes-dashboard

---

kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
rules:
  # Allow Dashboard to get, update and delete Dashboard exclusive secrets.
  - apiGroups: [""]
    resources: ["secrets"]
    resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"]
    verbs: ["get", "update", "delete"]
    # Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map.
  - apiGroups: [""]
    resources: ["configmaps"]
    resourceNames: ["kubernetes-dashboard-settings"]
    verbs: ["get", "update"]
    # Allow Dashboard to get metrics.
  - apiGroups: [""]
    resources: ["services"]
    resourceNames: ["heapster", "dashboard-metrics-scraper"]
    verbs: ["proxy"]
  - apiGroups: [""]
    resources: ["services/proxy"]
    resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"]
    verbs: ["get"]

---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
rules:
  # Allow Metrics Scraper to get metrics from the Metrics server
  - apiGroups: ["metrics.k8s.io"]
    resources: ["pods", "nodes"]
    verbs: ["get", "list", "watch"]

---

apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: kubernetes-dashboard
subjects:
  - kind: ServiceAccount
    name: kubernetes-dashboard
    namespace: kubernetes-dashboard

---

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: kubernetes-dashboard
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: kubernetes-dashboard
subjects:
  - kind: ServiceAccount
    name: kubernetes-dashboard
    namespace: kubernetes-dashboard

---

kind: Deployment
apiVersion: apps/v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      k8s-app: kubernetes-dashboard
  template:
    metadata:
      labels:
        k8s-app: kubernetes-dashboard
    spec:
      containers:
        - name: kubernetes-dashboard
          image: kubernetesui/dashboard:v2.0.5
          imagePullPolicy: Always
          ports:
            # - containerPort: 8443
            #   protocol: TCP
            - containerPort: 8080
              protocol: TCP
          args:
            - --insecure-port=8080
            - --enable-insecure-login=true
            # - --auto-generate-certificates
            - --namespace=kubernetes-dashboard
            # Uncomment the following line to manually specify Kubernetes API server Host
            # If not specified, Dashboard will attempt to auto discover the API server and connect
            # to it. Uncomment only if the default does not work.
            # - --apiserver-host=http://my-address:port
          volumeMounts:
            # - name: kubernetes-dashboard-certs
            #   mountPath: /certs
              # Create on-disk volume to store exec logs
            - mountPath: /tmp
              name: tmp-volume
          livenessProbe:
            httpGet:
              scheme: HTTP
              path: /
              port: 8080
              # scheme: HTTPS
              # path: /
              # port: 8443
            initialDelaySeconds: 30
            timeoutSeconds: 30
          securityContext:
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            runAsUser: 1001
            runAsGroup: 2001
      volumes:
        # - name: kubernetes-dashboard-certs
        #   secret:
        #     secretName: kubernetes-dashboard-certs
        - name: tmp-volume
          emptyDir: {}
      serviceAccountName: kubernetes-dashboard
      nodeSelector:
        "kubernetes.io/os": linux
      # Comment the following tolerations if Dashboard must not be deployed on master
      tolerations:
        - key: node-role.kubernetes.io/master
          effect: NoSchedule
---

kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  ports:
    - port: 80
      targetPort: 8080
  selector:
    k8s-app: kubernetes-dashboard

---

apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  rules:
  - host: dashboard.k8s-ddn.com
    http:
      paths:
      - backend:
          serviceName: kubernetes-dashboard
          servicePort: 80

---

kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: dashboard-metrics-scraper
  name: dashboard-metrics-scraper
  namespace: kubernetes-dashboard
spec:
  ports:
    - port: 8000
      targetPort: 8000
  selector:
    k8s-app: dashboard-metrics-scraper

---

kind: Deployment
apiVersion: apps/v1
metadata:
  labels:
    k8s-app: dashboard-metrics-scraper
  name: dashboard-metrics-scraper
  namespace: kubernetes-dashboard
spec:
  replicas: 1
  revisionHistoryLimit: 10
  selector:
    matchLabels:
      k8s-app: dashboard-metrics-scraper
  template:
    metadata:
      labels:
        k8s-app: dashboard-metrics-scraper
      annotations:
        seccomp.security.alpha.kubernetes.io/pod: 'runtime/default'
    spec:
      containers:
        - name: dashboard-metrics-scraper
          image: kubernetesui/metrics-scraper:v1.0.6
          ports:
            - containerPort: 8000
              protocol: TCP
          livenessProbe:
            httpGet:
              scheme: HTTP
              path: /
              port: 8000
            initialDelaySeconds: 30
            timeoutSeconds: 30
          volumeMounts:
          - mountPath: /tmp
            name: tmp-volume
          securityContext:
            allowPrivilegeEscalation: false
            readOnlyRootFilesystem: true
            runAsUser: 1001
            runAsGroup: 2001
      serviceAccountName: kubernetes-dashboard
      nodeSelector:
        "kubernetes.io/os": linux
      # Comment the following tolerations if Dashboard must not be deployed on master
      tolerations:
        - key: node-role.kubernetes.io/master
          effect: NoSchedule
      volumes:
        - name: tmp-volume
          emptyDir: {}
[root@centos-7-51 ~]# kubectl apply -f /tmp/dashborad.yaml

01-2-Kubeadm方式安装 - 图6

# admin.yaml, 管理员账号
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: kubernetes-dashboard
    addonmanager.kubernetes.io/mode: Reconcile
  name: kubernetes-dashboard-admin
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: kubernetes-dashboard-admin
  namespace: kube-system
  labels:
    k8s-app: kubernetes-dashboard
    addonmanager.kubernetes.io/mode: Reconcile
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: kubernetes-dashboard-admin
  namespace: kube-system
# 使用管理员的token进行测试
[root@centos-7-51 ~]# kubectl apply -f /tmp/admin.yaml
[root@centos-7-51 ~]# kubectl get secret -n kube-system| grep kubernetes-dashboard-admin
kubernetes-dashboard-admin-token-fnp6v           kubernetes.io/service-account-token   3      69s
[root@centos-7-51 ~]# kubectl describe secret -n kube-system kubernetes-dashboard-admin-token-fnp6v | grep token
Name:         kubernetes-dashboard-admin-token-fnp6v
Type:  kubernetes.io/service-account-token
token:      eyJhbGciOiJSUzI1NiIsImtpZCI6InlsYXNzWkZ4OU5kMXhqcGgxZkIzdkJqbjBid05oVHp1bjF4TWRKRURkM2cifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlLXN5c3RlbSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJrdWJlcm5ldGVzLWRhc2hib2FyZC1hZG1pbi10b2tlbi1mbnA2diIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJrdWJlcm5ldGVzLWRhc2hib2FyZC1hZG1pbiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6ImU3YmExY2I1LTM5MWQtNDhkMy1iNzBmLTg1MTIwODEzM2YxYyIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlLXN5c3RlbTprdWJlcm5ldGVzLWRhc2hib2FyZC1hZG1pbiJ9.SW7ztjjepblrPla2q6pGa0WOytF6BVKHMigakUZLKinWlntX0uOD12Q76CBMjV6_7TC2ho2FJSV3GyfDkyHZujrGUApNHsxxcjUxqFOcrfuq2yoOyrF-x_REeAxKoNlDiW0s-zQYL7w97OF2BV83ecmd-fZ1kcaAgT3n-mmgDwYTBJtnPKLbo59MoFL1DcXDsUilRPKb4-FnlCju72qznbsudKKo4NOKqIrnl6uTNEdyuDvX2c_pIXZ0AsGnVOeu6Qv9I-e3INHPulx1zZ0oz4a_zk1wRFLCM0ycKzYK_PTkjUXc9OsDYWL4iFEavhYNUWZbXnUihg3tRtfrTGvBIA

01-2-Kubeadm方式安装 - 图7

2.4.5. stakater Reloader

2.4.6. node local dns cache

3. 集群管理

3.1. 节点扩容

当前集群已经存在三个master,正常情况下,足以承担几百台的Node工作负载,因此此处仅做 node 扩容演示。

3.1.1. 扩容前准备

  • 完成1.2中8项检查
  • 安装docker-ce
  • 安装kubelet, kubeadm
  • 安装nfs-utils

3.1.2. 扩容节点

# kubeadm的token是存在有效期的,可以通过 kubeadm token create|delete|list 进行管理
[root@centos-7-51 ~]# kubeadm token list
TOKEN                     TTL         EXPIRES                     USAGES                   DESCRIPTION                                                EXTRA GROUPS
j8eck0.u5vu8pxb63eq9bxb   1h          2020-12-05T21:49:44+08:00   authentication,signing   The default bootstrap token generated by 'kubeadm init'.   system:bootstrappers:kubeadm:default-node-token
# 获取证书的hash
[root@centos-7-51 ~]# openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | awk '{print $2}'
6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8
[root@centos-7-56 ~]# kubeadm join 10.4.7.59:6443 --token j8eck0.u5vu8pxb63eq9bxb --discovery-token-ca-cert-hash sha256:6cdf0e0c66e095beca8b7eadb74da19aa904b45e2378bf53172c7f633c0dc9e8
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS   ROLES    AGE     VERSION
centos-7-51   Ready    master   22h     v1.17.14
centos-7-52   Ready    master   22h     v1.17.14
centos-7-53   Ready    master   22h     v1.17.14
centos-7-54   Ready    <none>   22h     v1.17.14
centos-7-55   Ready    <none>   22h     v1.17.14
centos-7-56   Ready    <none>   4m38s   v1.17.14

3.2. 节点下线

3.2.1. 环境模拟

为了模拟生产环境,创建三个deployment,每组10个副本。

apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-deploy
  namespace: default
spec:
  replicas: 10
  selector:
    matchLabels:
      app: nginx
  template:
    metadata:
      labels:
        app: nginx
    spec:
      containers:
      - name: nginx-pod
        image: nginx:latest
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: busybox
  namespace: default
spec:
  replicas: 10
  selector:
    matchLabels:
      app: busybox
  template:
    metadata:
      labels:
        app: busybox
    spec:
      containers:
      - name: busybox-pod
        image: busybox:latest
        command:
        - sleep
        - "3600"
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: os
  namespace: default
spec:
  replicas: 10
  selector:
    matchLabels:
      app: centos
  template:
    metadata:
      labels:
        app: centos
    spec:
      containers:
      - name: centos-pod
        image: centos:7
        command:
        - sleep
        - "3600"
---
[root@centos-7-51 ~]# kubectl get pod -o wide
NAME                           READY   STATUS    RESTARTS   AGE    IP            NODE          NOMINATED NODE   READINESS GATES
busybox-64dbb4f97f-45v8x       1/1     Running   0          107s   172.16.3.14   centos-7-54   <none>           <none>
busybox-64dbb4f97f-4d2xx       1/1     Running   0          107s   172.16.4.15   centos-7-55   <none>           <none>
busybox-64dbb4f97f-5pq8g       1/1     Running   0          107s   172.16.5.4    centos-7-56   <none>           <none>
busybox-64dbb4f97f-9tkwg       1/1     Running   0          107s   172.16.5.9    centos-7-56   <none>           <none>
busybox-64dbb4f97f-fffqp       1/1     Running   0          107s   172.16.5.7    centos-7-56   <none>           <none>
busybox-64dbb4f97f-hr25g       1/1     Running   0          107s   172.16.4.21   centos-7-55   <none>           <none>
busybox-64dbb4f97f-jj2gt       1/1     Running   0          107s   172.16.3.17   centos-7-54   <none>           <none>
busybox-64dbb4f97f-km2gj       1/1     Running   0          107s   172.16.4.19   centos-7-55   <none>           <none>
busybox-64dbb4f97f-ndqsm       1/1     Running   0          107s   172.16.3.20   centos-7-54   <none>           <none>
busybox-64dbb4f97f-rh97p       1/1     Running   0          107s   172.16.4.18   centos-7-55   <none>           <none>
nginx-deploy-646dfb87b-4lmgj   1/1     Running   0          107s   172.16.3.15   centos-7-54   <none>           <none>
nginx-deploy-646dfb87b-52cqf   1/1     Running   0          107s   172.16.5.6    centos-7-56   <none>           <none>
nginx-deploy-646dfb87b-7xv98   1/1     Running   0          107s   172.16.3.21   centos-7-54   <none>           <none>
nginx-deploy-646dfb87b-ghr7d   1/1     Running   0          107s   172.16.4.14   centos-7-55   <none>           <none>
nginx-deploy-646dfb87b-h2mxc   1/1     Running   0          107s   172.16.5.12   centos-7-56   <none>           <none>
nginx-deploy-646dfb87b-h98ns   1/1     Running   0          107s   172.16.5.10   centos-7-56   <none>           <none>
nginx-deploy-646dfb87b-rsg4k   1/1     Running   0          107s   172.16.4.12   centos-7-55   <none>           <none>
nginx-deploy-646dfb87b-szqpc   1/1     Running   0          107s   172.16.3.22   centos-7-54   <none>           <none>
nginx-deploy-646dfb87b-wltv4   1/1     Running   0          107s   172.16.5.3    centos-7-56   <none>           <none>
nginx-deploy-646dfb87b-xzg4q   1/1     Running   0          107s   172.16.4.17   centos-7-55   <none>           <none>
os-7df59bfb4f-5xn7h            1/1     Running   0          107s   172.16.5.11   centos-7-56   <none>           <none>
os-7df59bfb4f-7k2xq            1/1     Running   0          106s   172.16.4.11   centos-7-55   <none>           <none>
os-7df59bfb4f-7pv59            1/1     Running   0          107s   172.16.4.20   centos-7-55   <none>           <none>
os-7df59bfb4f-bkmcv            1/1     Running   0          107s   172.16.3.16   centos-7-54   <none>           <none>
os-7df59bfb4f-bt7gg            1/1     Running   0          107s   172.16.3.19   centos-7-54   <none>           <none>
os-7df59bfb4f-cv2x2            1/1     Running   0          107s   172.16.5.5    centos-7-56   <none>           <none>
os-7df59bfb4f-jwrgm            1/1     Running   0          107s   172.16.5.8    centos-7-56   <none>           <none>
os-7df59bfb4f-mfmzq            1/1     Running   0          106s   172.16.4.13   centos-7-55   <none>           <none>
os-7df59bfb4f-t6lh4            1/1     Running   0          106s   172.16.3.18   centos-7-54   <none>           <none>
os-7df59bfb4f-whqdn            1/1     Running   0          107s   172.16.4.16   centos-7-55   <none>           <none>

3.2.2. 删除节点

[root@centos-7-51 ~]# kubectl drain centos-7-56 --delete-local-data --force --ignore-daemonsets # 驱逐pod
node/centos-7-56 cordoned
evicting pod "os-7df59bfb4f-jwrgm"
evicting pod "busybox-64dbb4f97f-5pq8g"
evicting pod "busybox-64dbb4f97f-9tkwg"
evicting pod "busybox-64dbb4f97f-fffqp"
evicting pod "nginx-deploy-646dfb87b-52cqf"
evicting pod "nginx-deploy-646dfb87b-h2mxc"
evicting pod "nginx-deploy-646dfb87b-h98ns"
evicting pod "nginx-deploy-646dfb87b-wltv4"
evicting pod "os-7df59bfb4f-5xn7h"
evicting pod "os-7df59bfb4f-cv2x2"
pod/nginx-deploy-646dfb87b-52cqf evicted
pod/nginx-deploy-646dfb87b-wltv4 evicted
pod/nginx-deploy-646dfb87b-h2mxc evicted
pod/nginx-deploy-646dfb87b-h98ns evicted
pod/os-7df59bfb4f-jwrgm evicted
pod/busybox-64dbb4f97f-5pq8g evicted
pod/busybox-64dbb4f97f-9tkwg evicted
pod/os-7df59bfb4f-5xn7h evicted
pod/busybox-64dbb4f97f-fffqp evicted
pod/os-7df59bfb4f-cv2x2 evicted
node/centos-7-56 evicted
[root@centos-7-51 ~]# kubectl get pod -A -o wide | grep centos-7-56
kube-system            kube-flannel-ds-944j9                       1/1     Running   0          15m     10.4.7.56     centos-7-56   <none>           <none>
kube-system            kube-proxy-rjlc7                            1/1     Running   0          19m     10.4.7.56     centos-7-56   <none>           <none>
kube-system            traefik-ingress-controller-w8w2c            1/1     Running   0          15m     172.16.5.2    centos-7-56   <none>           <none>
[root@centos-7-56 ~]# kubeadm reset # 重置节点
[root@centos-7-56 ~]# iptables -F && iptables -t nat -F && iptables -t mangle -F && iptables -X
[root@centos-7-56 ~]# ipvsadm -C # 清除ipvs
[root@centos-7-56 ~]# rm -fr /etc/cni/net.d  # 删除cni配置
[root@centos-7-51 ~]# kubectl delete node centos-7-56 # 删除节点

3.3. 集群升级

在实际的运维中,kubernetes的版本需要不定期的进行升级,有两种方式的升级,一种是次版本升级,如 1.17.x 升级到 1.18.x。另一种是补丁版本升级,如 1.17.x 升级到 1.17.y 。需要注意的是,在次版本升级中,不能跨版本,即不能从 1.y.x 升级到 1.y+2.x !本实验将集群从 1.17 升级到 1.18。参考文档: https://v1-18.docs.kubernetes.io/zh/docs/tasks/administer-cluster/kubeadm/kubeadm-upgrade. kubernetes 1.18 的发行页:https://github.com/kubernetes/kubernetes/blob/master/CHANGELOG/CHANGELOG-1.18.md,在发行页没有看到对docker版本支持的变化,本次实验不升级docker版本。

[root@centos-7-51 ~]# yum list --showduplicates kubeadm  | grep 1.18 # 升级到最新的 1.18.12
kubeadm.x86_64                       1.18.0-0                        kubernetes 
kubeadm.x86_64                       1.18.1-0                        kubernetes 
kubeadm.x86_64                       1.18.2-0                        kubernetes 
kubeadm.x86_64                       1.18.3-0                        kubernetes 
kubeadm.x86_64                       1.18.4-0                        kubernetes 
kubeadm.x86_64                       1.18.4-1                        kubernetes 
kubeadm.x86_64                       1.18.5-0                        kubernetes 
kubeadm.x86_64                       1.18.6-0                        kubernetes 
kubeadm.x86_64                       1.18.8-0                        kubernetes 
kubeadm.x86_64                       1.18.9-0                        kubernetes 
kubeadm.x86_64                       1.18.10-0                       kubernetes 
kubeadm.x86_64                       1.18.12-0                       kubernetes

3.3.1. 升级第一台master

[root@centos-7-51 ~]# yum install -y kubeadm-1.18.12-0
[root@centos-7-51 ~]# kubeadm version # 验证版本
kubeadm version: &version.Info{Major:"1", Minor:"18", GitVersion:"v1.18.12", GitCommit:"7cd5e9086de8ae25d6a1514d0c87bac67ca4a481", GitTreeState:"clean", BuildDate:"2020-11-12T09:16:40Z", GoVersion:"go1.13.15", Compiler:"gc", Platform:"linux/amd64"}
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS   ROLES    AGE   VERSION
centos-7-51   Ready    master   35h   v1.17.14
centos-7-52   Ready    master   35h   v1.17.14
centos-7-53   Ready    master   35h   v1.17.14
centos-7-54   Ready    <none>   35h   v1.17.14
centos-7-55   Ready    <none>   35h   v1.17.14
[root@centos-7-51 ~]# kubectl drain centos-7-51 --ignore-daemonsets # 腾出centos-7-51
node/centos-7-51 cordoned
node/centos-7-51 drained
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS                     ROLES    AGE   VERSION
centos-7-51   Ready,SchedulingDisabled   master   35h   v1.17.14
centos-7-52   Ready                      master   35h   v1.17.14
centos-7-53   Ready                      master   35h   v1.17.14
centos-7-54   Ready                      <none>   35h   v1.17.14
centos-7-55   Ready                      <none>   35h   v1.17.14
[root@centos-7-51 ~]# kubectl get pod -A -o wide | grep centos-7-51 # 这里的dashboard是因为指定了允许部署在master,升级中可以忽略
kube-system            etcd-centos-7-51                            1/1     Running   1          35h   10.4.7.51     centos-7-51   <none>           <none>
kube-system            kube-apiserver-centos-7-51                  1/1     Running   1          35h   10.4.7.51     centos-7-51   <none>           <none>
kube-system            kube-controller-manager-centos-7-51         1/1     Running   2          35h   10.4.7.51     centos-7-51   <none>           <none>
kube-system            kube-flannel-ds-c2bs6                       1/1     Running   1          34h   10.4.7.51     centos-7-51   <none>           <none>
kube-system            kube-proxy-f6bmd                            1/1     Running   0          13h   10.4.7.51     centos-7-51   <none>           <none>
kube-system            kube-scheduler-centos-7-51                  1/1     Running   1          35h   10.4.7.51     centos-7-51   <none>           <none>
kubernetes-dashboard   kubernetes-dashboard-ffb9ffc86-2np2t        1/1     Running   0          15h   172.16.0.3    centos-7-51   <none>           <none>
[root@centos-7-51 ~]# kubeadm upgrade plan # 检查升级计划
[upgrade/config] Making sure the configuration is correct:
[upgrade/config] Reading configuration from the cluster...
[upgrade/config] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks.
[upgrade] Running cluster health checks
[upgrade] Fetching available versions to upgrade to
[upgrade/versions] Cluster version: v1.17.14
[upgrade/versions] kubeadm version: v1.18.12
I1206 09:27:00.744351   33247 version.go:252] remote version is much newer: v1.19.4; falling back to: stable-1.18
[upgrade/versions] Latest stable version: v1.18.12
[upgrade/versions] Latest stable version: v1.18.12
[upgrade/versions] Latest version in the v1.17 series: v1.17.14
[upgrade/versions] Latest version in the v1.17 series: v1.17.14

Components that must be upgraded manually after you have upgraded the control plane with 'kubeadm upgrade apply':
COMPONENT   CURRENT        AVAILABLE
Kubelet     5 x v1.17.14   v1.18.12

Upgrade to the latest stable version:

COMPONENT            CURRENT    AVAILABLE
API Server           v1.17.14   v1.18.12
Controller Manager   v1.17.14   v1.18.12
Scheduler            v1.17.14   v1.18.12
Kube Proxy           v1.17.14   v1.18.12
CoreDNS              1.6.5      1.6.7
Etcd                 3.4.3      3.4.3-0

You can now apply the upgrade by executing the following command:

    kubeadm upgrade apply v1.18.12
# 升级中会自动替换证书,并且拉取的镜像repository和初始化时一致
[root@centos-7-51 ~]# kubeadm upgrade apply v1.18.12
[upgrade/config] Making sure the configuration is correct:
[upgrade/config] Reading configuration from the cluster...
[upgrade/config] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[preflight] Running pre-flight checks.
[upgrade] Running cluster health checks
[upgrade/version] You have chosen to change the cluster version to "v1.18.12"
[upgrade/versions] Cluster version: v1.17.14
[upgrade/versions] kubeadm version: v1.18.12
[upgrade/confirm] Are you sure you want to proceed with the upgrade? [y/N]: y
[upgrade/prepull] Will prepull images for components [kube-apiserver kube-controller-manager kube-scheduler etcd]
[upgrade/prepull] Prepulling image for component etcd.
[upgrade/prepull] Prepulling image for component kube-apiserver.
[upgrade/prepull] Prepulling image for component kube-controller-manager.
[upgrade/prepull] Prepulling image for component kube-scheduler.
[apiclient] Found 0 Pods for label selector k8s-app=upgrade-prepull-etcd
[apiclient] Found 3 Pods for label selector k8s-app=upgrade-prepull-kube-controller-manager
[apiclient] Found 3 Pods for label selector k8s-app=upgrade-prepull-kube-apiserver
[apiclient] Found 0 Pods for label selector k8s-app=upgrade-prepull-kube-scheduler
[apiclient] Found 3 Pods for label selector k8s-app=upgrade-prepull-etcd
[apiclient] Found 3 Pods for label selector k8s-app=upgrade-prepull-kube-scheduler
[upgrade/prepull] Prepulled image for component etcd.
[upgrade/prepull] Prepulled image for component kube-apiserver.
[upgrade/prepull] Prepulled image for component kube-scheduler.
[upgrade/prepull] Prepulled image for component kube-controller-manager.
[upgrade/prepull] Successfully prepulled the images for all the control plane components
[upgrade/apply] Upgrading your Static Pod-hosted control plane to version "v1.18.12"...
Static pod: kube-apiserver-centos-7-51 hash: d66ea877d4c9baceca5434f6c40e56c3
Static pod: kube-controller-manager-centos-7-51 hash: 3b690a04ed3bb330968e5355fcb77dfe
Static pod: kube-scheduler-centos-7-51 hash: 92ea98b5d89a947c103103de725f5e88
[upgrade/etcd] Upgrading to TLS for etcd
[upgrade/etcd] Non fatal issue encountered during upgrade: the desired etcd version for this Kubernetes version "v1.18.12" is "3.4.3-0", but the current etcd version is "3.4.3". Won't downgrade etcd, instead just continue
[upgrade/staticpods] Writing new Static Pod manifests to "/etc/kubernetes/tmp/kubeadm-upgraded-manifests011210605"
W1206 09:31:26.483819   35018 manifests.go:225] the default kube-apiserver authorization-mode is "Node,RBAC"; using "Node,RBAC"
[upgrade/staticpods] Preparing for "kube-apiserver" upgrade
[upgrade/staticpods] Renewing apiserver certificate
[upgrade/staticpods] Renewing apiserver-kubelet-client certificate
[upgrade/staticpods] Renewing front-proxy-client certificate
[upgrade/staticpods] Renewing apiserver-etcd-client certificate
[upgrade/staticpods] Moved new manifest to "/etc/kubernetes/manifests/kube-apiserver.yaml" and backed up old manifest to "/etc/kubernetes/tmp/kubeadm-backup-manifests-2020-12-06-09-31-24/kube-apiserver.yaml"
[upgrade/staticpods] Waiting for the kubelet to restart the component
[upgrade/staticpods] This might take a minute or longer depending on the component/version gap (timeout 5m0s)
Static pod: kube-apiserver-centos-7-51 hash: d66ea877d4c9baceca5434f6c40e56c3
Static pod: kube-apiserver-centos-7-51 hash: d66ea877d4c9baceca5434f6c40e56c3
Static pod: kube-apiserver-centos-7-51 hash: d176ad7b601e72e4540b622eb6333914
[apiclient] Found 3 Pods for label selector component=kube-apiserver
[upgrade/staticpods] Component "kube-apiserver" upgraded successfully!
[upgrade/staticpods] Preparing for "kube-controller-manager" upgrade
[upgrade/staticpods] Renewing controller-manager.conf certificate
[upgrade/staticpods] Moved new manifest to "/etc/kubernetes/manifests/kube-controller-manager.yaml" and backed up old manifest to "/etc/kubernetes/tmp/kubeadm-backup-manifests-2020-12-06-09-31-24/kube-controller-manager.yaml"
[upgrade/staticpods] Waiting for the kubelet to restart the component
[upgrade/staticpods] This might take a minute or longer depending on the component/version gap (timeout 5m0s)
Static pod: kube-controller-manager-centos-7-51 hash: 3b690a04ed3bb330968e5355fcb77dfe
Static pod: kube-controller-manager-centos-7-51 hash: 51215cf52dca86b9d58c8d50b2498fbe
[apiclient] Found 3 Pods for label selector component=kube-controller-manager
[upgrade/staticpods] Component "kube-controller-manager" upgraded successfully!
[upgrade/staticpods] Preparing for "kube-scheduler" upgrade
[upgrade/staticpods] Renewing scheduler.conf certificate
[upgrade/staticpods] Moved new manifest to "/etc/kubernetes/manifests/kube-scheduler.yaml" and backed up old manifest to "/etc/kubernetes/tmp/kubeadm-backup-manifests-2020-12-06-09-31-24/kube-scheduler.yaml"
[upgrade/staticpods] Waiting for the kubelet to restart the component
[upgrade/staticpods] This might take a minute or longer depending on the component/version gap (timeout 5m0s)
Static pod: kube-scheduler-centos-7-51 hash: 92ea98b5d89a947c103103de725f5e88
Static pod: kube-scheduler-centos-7-51 hash: 6b11dd84db49a8fbdf45993beee95da7
[apiclient] Found 3 Pods for label selector component=kube-scheduler
[upgrade/staticpods] Component "kube-scheduler" upgraded successfully!
[upload-config] Storing the configuration used in ConfigMap "kubeadm-config" in the "kube-system" Namespace
[kubelet] Creating a ConfigMap "kubelet-config-1.18" in namespace kube-system with the configuration for the kubelets in the cluster
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to get nodes
[bootstrap-token] configured RBAC rules to allow Node Bootstrap tokens to post CSRs in order for nodes to get long term certificate credentials
[bootstrap-token] configured RBAC rules to allow the csrapprover controller automatically approve CSRs from a Node Bootstrap Token
[bootstrap-token] configured RBAC rules to allow certificate rotation for all node client certificates in the cluster
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

[upgrade/successful] SUCCESS! Your cluster was upgraded to "v1.18.12". Enjoy!

[upgrade/kubelet] Now that your control plane is upgraded, please proceed with upgrading your kubelets if you haven't already done so.
[root@centos-7-51 ~]# docker image ls | grep 1.18 
registry.aliyuncs.com/google_containers/kube-proxy                v1.18.12            06f1dd86004c        3 weeks ago         117MB
registry.aliyuncs.com/google_containers/kube-apiserver            v1.18.12            d4e7de4ee6a8        3 weeks ago         173MB
registry.aliyuncs.com/google_containers/kube-controller-manager   v1.18.12            37efdbf07b2a        3 weeks ago         162MB
registry.aliyuncs.com/google_containers/kube-scheduler            v1.18.12            fb649979593e        3 weeks ago         95.3MB
[root@centos-7-51 ~]# kubectl get cs # 参考2.3.6操作
NAME                 STATUS      MESSAGE                                                                                     ERROR
controller-manager   Unhealthy   Get http://127.0.0.1:10252/healthz: dial tcp 127.0.0.1:10252: connect: connection refused   
scheduler            Unhealthy   Get http://127.0.0.1:10251/healthz: dial tcp 127.0.0.1:10251: connect: connection refused   
etcd-0               Healthy     {"health":"true"}                                                                           
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS                     ROLES    AGE   VERSION
centos-7-51   Ready,SchedulingDisabled   master   35h   v1.17.14
centos-7-52   Ready                      master   35h   v1.17.14
centos-7-53   Ready                      master   35h   v1.17.14
centos-7-54   Ready                      <none>   35h   v1.17.14
centos-7-55   Ready                      <none>   35h   v1.17.14
[root@centos-7-51 ~]# kubectl get cs 
NAME                 STATUS    MESSAGE             ERROR
scheduler            Healthy   ok                  
controller-manager   Healthy   ok                  
etcd-0               Healthy   {"health":"true"}   
[root@centos-7-51 ~]# yum install -y kubelet-1.18.12-0 kubectl-1.18.12-0 # 升级kubelet和kubectl
[root@centos-7-51 ~]# systemctl daemon-reload;systemctl restart kubelet
[root@centos-7-51 ~]# systemctl status kubelet.service 
● kubelet.service - kubelet: The Kubernetes Node Agent
   Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled)
  Drop-In: /usr/lib/systemd/system/kubelet.service.d
           └─10-kubeadm.conf
   Active: active (running) since Sun 2020-12-06 09:42:12 CST; 6s ago
...
# flannel 安装的是最新版本,因此不涉及更新
[root@centos-7-51 ~]# kubectl uncordon centos-7-51 # 恢复master
node/centos-7-51 uncordoned
[root@centos-7-51 ~]# kubectl get node 
NAME          STATUS   ROLES    AGE   VERSION
centos-7-51   Ready    master   35h   v1.17.14
centos-7-52   Ready    master   35h   v1.17.14
centos-7-53   Ready    master   35h   v1.17.14
centos-7-54   Ready    <none>   35h   v1.17.14
centos-7-55   Ready    <none>   35h   v1.17.14

3.3.2. 升级其它master

此处以 centos-7-52为例,centos-7-53操作与centos-7-52一致。

[root@centos-7-52 ~]# yum install -y kubeadm-1.18.12-0
[root@centos-7-52 ~]# kubeadm version
kubeadm version: &version.Info{Major:"1", Minor:"18", GitVersion:"v1.18.12", GitCommit:"7cd5e9086de8ae25d6a1514d0c87bac67ca4a481", GitTreeState:"clean", BuildDate:"2020-11-12T09:16:40Z", GoVersion:"go1.13.15", Compiler:"gc", Platform:"linux/amd64"}
[root@centos-7-52 ~]# kubectl drain centos-7-52 --ignore-daemonsets
[root@centos-7-52 ~]# kubeadm upgrade plan
[root@centos-7-52 ~]# kubeadm upgrade node # 这个步骤和上面不一致!!!!
[root@centos-7-52 ~]# kubectl get cs # 同样需要修改kube-scheduler.yaml和kube-controller-manager.yaml
[root@centos-7-52 ~]# yum install -y kubelet-1.18.12-0 kubectl-1.18.12-0
[root@centos-7-52 ~]# systemctl daemon-reload;systemctl restart kubelet
[root@centos-7-52 ~]# kubectl uncordon centos-7-52
node/centos-7-52 uncordoned
[root@centos-7-52 ~]# kubectl get node
NAME          STATUS   ROLES    AGE   VERSION
centos-7-51   Ready    master   36h   v1.18.12
centos-7-52   Ready    master   36h   v1.18.12
centos-7-53   Ready    master   36h   v1.17.14
centos-7-54   Ready    <none>   36h   v1.17.14
centos-7-55   Ready    <none>   36h   v1.17.14
[root@centos-7-52 ~]# kubectl get cs
NAME                 STATUS    MESSAGE             ERROR
controller-manager   Healthy   ok                  
scheduler            Healthy   ok                  
etcd-0               Healthy   {"health":"true"}

3.3.3. 升级node

node数量少的情况下,一台一台升级,数量多的话,按照一定的比例升级,升级时需要驱逐pod。每一台node操作步骤完全一致。

[root@centos-7-54 ~]# yum install -y kubeadm-1.18.12-0 
[root@centos-7-51 ~]# kubectl drain centos-7-54 --ignore-daemonsets --delete-local-data
[root@centos-7-54 ~]# kubeadm upgrade node # 升级node
[upgrade] Reading configuration from the cluster...
[upgrade] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'
[upgrade] Skipping phase. Not a control plane node.
[kubelet-start] Downloading configuration for the kubelet from the "kubelet-config-1.18" ConfigMap in the kube-system namespace
[kubelet-start] Writing kubelet configuration to file "/var/lib/kubelet/config.yaml"
[upgrade] The configuration for this node was successfully updated!
[upgrade] Now you should go ahead and upgrade the kubelet package using your package manager.
[root@centos-7-54 ~]# yum install -y kubelet-1.18.12-0
[root@centos-7-54 ~]# systemctl daemon-reload;systemctl restart kubelet
[root@centos-7-51 ~]# kubectl uncordon centos-7-54
node/centos-7-54 uncordoned
[root@centos-7-51 ~]# kubectl get node
NAME          STATUS   ROLES    AGE   VERSION
centos-7-51   Ready    master   36h   v1.18.12
centos-7-52   Ready    master   36h   v1.18.12
centos-7-53   Ready    master   36h   v1.18.12
centos-7-54   Ready    <none>   36h   v1.18.12
centos-7-55   Ready    <none>   36h   v1.17.14

3.4. 证书替换

kubeadm方式安装的证书很少单独替换,因为随着版本升级,会自动替换证书,不需要手动换,此处演示使用kubeadm进行证书的替换。CA证书有效期为10年,一般不会涉及更换,此实验跳过。需要注意的是,kubeadm不能管理外部CA签名的证书!官方推荐的是通过版本升级解决证书问题。

kubeadm集群证书管理: https://kubernetes.io/zh/docs/tasks/administer-cluster/kubeadm/kubeadm-certs/

kubelet 证书轮换: https://kubernetes.io/zh/docs/tasks/tls/certificate-rotation/,kubeadm会配置kubelet自动更换证书!

CA证书替换: https://kubernetes.io/zh/docs/tasks/tls/manual-rotation-of-ca-certificates

[root@centos-7-51 ~]# kubeadm alpha certs check-expiration # 查看当前证书过期时间
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'

CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Dec 06, 2021 01:31 UTC   364d                                    no      
apiserver                  Dec 06, 2021 01:31 UTC   364d            ca                      no      
apiserver-etcd-client      Dec 06, 2021 01:31 UTC   364d            etcd-ca                 no      
apiserver-kubelet-client   Dec 06, 2021 01:31 UTC   364d            ca                      no      
controller-manager.conf    Dec 06, 2021 01:31 UTC   364d                                    no      
etcd-healthcheck-client    Dec 04, 2021 13:49 UTC   363d            etcd-ca                 no      
etcd-peer                  Dec 04, 2021 13:49 UTC   363d            etcd-ca                 no      
etcd-server                Dec 04, 2021 13:49 UTC   363d            etcd-ca                 no      
front-proxy-client         Dec 06, 2021 01:31 UTC   364d            front-proxy-ca          no      
scheduler.conf             Dec 06, 2021 01:31 UTC   364d                                    no      

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Dec 02, 2030 13:49 UTC   9y              no      
etcd-ca                 Dec 02, 2030 13:49 UTC   9y              no      
front-proxy-ca          Dec 02, 2030 13:49 UTC   9y              no      
[root@centos-7-51 ~]# kubeadm alpha certs renew --help  # 可以替换任意类型的证书
This command is not meant to be run on its own. See list of available subcommands.

Usage:
  kubeadm alpha certs renew [flags]
  kubeadm alpha certs renew [command]

Available Commands:
  admin.conf               Renew the certificate embedded in the kubeconfig file for the admin to use and for kubeadm itself
  all                      Renew all available certificates
  apiserver                Renew the certificate for serving the Kubernetes API
  apiserver-etcd-client    Renew the certificate the apiserver uses to access etcd
  apiserver-kubelet-client Renew the certificate for the API server to connect to kubelet
  controller-manager.conf  Renew the certificate embedded in the kubeconfig file for the controller manager to use
  etcd-healthcheck-client  Renew the certificate for liveness probes to healthcheck etcd
  etcd-peer                Renew the certificate for etcd nodes to communicate with each other
  etcd-server              Renew the certificate for serving etcd
  front-proxy-client       Renew the certificate for the front proxy client
  scheduler.conf           Renew the certificate embedded in the kubeconfig file for the scheduler manager to use

Flags:
  -h, --help   help for renew

Global Flags:
      --add-dir-header           If true, adds the file directory to the header
      --log-file string          If non-empty, use this log file
      --log-file-max-size uint   Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
      --rootfs string            [EXPERIMENTAL] The path to the 'real' host root filesystem.
      --skip-headers             If true, avoid header prefixes in the log messages
      --skip-log-headers         If true, avoid headers when opening log files
  -v, --v Level                  number for the log level verbosity

Use "kubeadm alpha certs renew [command] --help" for more information about a command.
[root@centos-7-51 ~]# kubeadm alpha certs renew all  # 更新所有证书,每一台master都需要执行
[renew] Reading configuration from the cluster...
[renew] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'

certificate embedded in the kubeconfig file for the admin to use and for kubeadm itself renewed
certificate for serving the Kubernetes API renewed
certificate the apiserver uses to access etcd renewed
certificate for the API server to connect to kubelet renewed
certificate embedded in the kubeconfig file for the controller manager to use renewed
certificate for liveness probes to healthcheck etcd renewed
certificate for etcd nodes to communicate with each other renewed
certificate for serving etcd renewed
certificate for the front proxy client renewed
certificate embedded in the kubeconfig file for the scheduler manager to use renewed
[root@centos-7-51 ~]# kubeadm alpha certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -oyaml'

CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Dec 06, 2021 02:35 UTC   364d                                    no      
apiserver                  Dec 06, 2021 02:35 UTC   364d            ca                      no      
apiserver-etcd-client      Dec 06, 2021 02:35 UTC   364d            etcd-ca                 no      
apiserver-kubelet-client   Dec 06, 2021 02:35 UTC   364d            ca                      no      
controller-manager.conf    Dec 06, 2021 02:35 UTC   364d                                    no      
etcd-healthcheck-client    Dec 06, 2021 02:35 UTC   364d            etcd-ca                 no      
etcd-peer                  Dec 06, 2021 02:35 UTC   364d            etcd-ca                 no      
etcd-server                Dec 06, 2021 02:35 UTC   364d            etcd-ca                 no      
front-proxy-client         Dec 06, 2021 02:35 UTC   364d            front-proxy-ca          no      
scheduler.conf             Dec 06, 2021 02:35 UTC   364d                                    no      

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Dec 02, 2030 13:49 UTC   9y              no      
etcd-ca                 Dec 02, 2030 13:49 UTC   9y              no      
front-proxy-ca          Dec 02, 2030 13:49 UTC   9y              no   
# 之后依次重启kubernetes的组件即可