etcd备份
所有master节点上先复制出容器里的etcdctl
docker cp `docker ps -a | awk '/k8s_etcd/{print $1}'|head -n1`:/usr/local/bin/etcdctl /usr/local/bin/etcdctl
编写一个简单别名,记得替换对应的ip
cat >/etc/profile.d/etcd.sh<<'EOF'ETCD_CERET_DIR=/etc/kubernetes/pki/etcd/ETCD_CA_FILE=ca.crtETCD_KEY_FILE=healthcheck-client.keyETCD_CERT_FILE=healthcheck-client.crtETCD_EP=https://192.168.33.101:2379,https://192.168.33.102:2379,https://192.168.33.103:2379alias etcd_v3="ETCDCTL_API=3 \etcdctl \--cert ${ETCD_CERET_DIR}/${ETCD_CERT_FILE} \--key ${ETCD_CERET_DIR}/${ETCD_KEY_FILE} \--cacert ${ETCD_CERET_DIR}/${ETCD_CA_FILE} \--endpoints $ETCD_EP"EOF
source /etc/profile.d/etcd.sh
etcd_v3 endpoint status --write-out=table+-----------------------------+------------------+---------+---------+-----------+-----------+------------+| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |+-----------------------------+------------------+---------+---------+-----------+-----------+------------+| https://192.168.33.101:2379 | c724c500884441af | 3.3.17 | 1.6 MB | true | 7 | 1865 || https://192.168.33.102:2379 | 3dcceec24ad5c5d4 | 3.3.17 | 1.6 MB | false | 7 | 1865 || https://192.168.33.103:2379 | bc21062efb4a5d4c | 3.3.17 | 1.5 MB | false | 7 | 1865 |+-----------------------------+------------------+---------+---------+-----------+-----------+------------+
etcd_v3 endpoint health --write-out=table+-----------------------------+--------+-------------+-------+| ENDPOINT | HEALTH | TOOK | ERROR |+-----------------------------+--------+-------------+-------+| https://192.168.33.103:2379 | true | 19.288026ms | || https://192.168.33.102:2379 | true | 19.2603ms | || https://192.168.33.101:2379 | true | 22.490443ms | |+-----------------------------+--------+-------------+-------+
配置etcd备份脚本
记得替换对应的ip
mkdir -p /opt/etcdcat>/opt/etcd/etcd_cron.sh<<'EOF'#!/bin/bashset -esource /etc/profileexport PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/binmaster=`ETCDCTL_API=3 etcdctl --endpoints="https://192.168.33.101:2379,https://192.168.33.102:2379,https://192.168.33.103:2379" --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key --cacert=/etc/kubernetes/pki/etcd/ca.crt endpoint status | grep true|awk -F"," '{print $1}'`: ${bak_dir:=/root/} #缺省备份目录,可以修改成存在的目录: ${cert_dir:=/etc/kubernetes/pki/etcd/}: ${endpoints:=$master}bak_prefix='etcd-'cmd_suffix='date +%Y-%m-%d-%H-%M'bak_suffix='.db'#将规范化后的命令行参数分配至位置参数($1,$2,...)temp=`getopt -n $0 -o c:d: -u -- "$@"`[ $? != 0 ] && {echo 'Examples:# just save oncebash $0 /tmp/etcd.db# save in contab and keep 5bash $0 -c 5'exit 1}set -- $temp# -c 备份保留副本数量# -d 指定备份存放目录while true;docase "$1" in-c)[ -z "$bak_count" ] && bak_count=$2printf -v null %d "$bak_count" &>/dev/null || \{ echo 'the value of the -c must be number';exit 1; }shift 2;;-d)[ ! -d "$2" ] && mkdir -p $2bak_dir=$2shift 2;;*)[[ -z "$1" || "$1" == '--' ]] && { shift;break; }echo "Internal error!"exit 1;;esacdoneetcd::cron::save(){cd $bak_dir/ETCDCTL_API=3 etcdctl --endpoints=$endpoints --cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key --cacert=/etc/kubernetes/pki/etcd/ca.crt snapshot save $bak_prefix$($cmd_suffix)$bak_suffixrm_files=`ls -t $bak_prefix*$bak_suffix | tail -n +$[bak_count+1]`if [ -n "$rm_files" ];thenrm -f $rm_filesfi}main(){[ -n "$bak_count" ] && etcd::cron::save || etcd_v3 snapshot save $@}main $@EOF

备份
bash /opt/etcd/etcd_cron.sh -c 4 -d /opt/etcd/

定时备份
crontab -e添加下面内容自动保留四个备份副本
0 * * * * bash /opt/etcd/etcd_cron.sh -c 4 -d /opt/etcd/ &>/dev/null
etcd还原
举个例子,先查看default下资源,发现一个pod 一个deployemnt 一个svc
kubectl get all

先备份一下当前的信息
bash /opt/etcd/etcd_cron.sh -c 4 -d /tmp

将备份信息发给其他2个master节点
删掉pod deployment svc
还原备份记录到新的存储目录
在3个etcd节点都执行一遍还原操作
master01
ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--name=master01 \
--cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key \
--data-dir /var/lib/etcd-from-backup \
--initial-cluster=master01=https://192.168.33.101:2380,master02=https://192.168.33.102:2380,master03=https://192.168.33.103:2380 \
--initial-cluster-token=etcd-cluster-1 \
--initial-advertise-peer-urls=https://192.168.33.101:2380 \
snapshot restore /tmp/etcd-2020-06-10-07-21.db
master02
ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--name=master02 \
--cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key \
--data-dir /var/lib/etcd-from-backup \
--initial-cluster=master01=https://192.168.33.101:2380,master02=https://192.168.33.102:2380,master03=https://192.168.33.103:2380 \
--initial-cluster-token=etcd-cluster-1 \
--initial-advertise-peer-urls=https://192.168.33.102:2380 \
snapshot restore /tmp/etcd-2020-06-10-07-21.db
master03
ETCDCTL_API=3 etcdctl \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--name=master03 \
--cert=/etc/kubernetes/pki/etcd/server.crt --key=/etc/kubernetes/pki/etcd/server.key \
--data-dir /var/lib/etcd-from-backup \
--initial-cluster=master01=https://192.168.33.101:2380,master02=https://192.168.33.102:2380,master03=https://192.168.33.103:2380 \
--initial-cluster-token=etcd-cluster-1 \
--initial-advertise-peer-urls=https://192.168.33.103:2380 \
snapshot restore /tmp/etcd-2020-06-10-07-21.db
统计修改3个节点上的/etc/kubernetes/manifests/etcd.yaml
更新—data-dir
--data-dir=/var/lib/etcd-from-backup
添加 initial-cluster-token
--initial-cluster-token=etcd-cluster-1
更新—initial-cluste
--initial-cluster=master01=https://192.168.33.101:2380,master02=https://192.168.33.102:2380,master03=https://192.168.33.103:2380
更新- —initial-cluster-state
- --initial-cluster-state=new

更新目录
volumeMounts:
- mountPath: /var/lib/etcd-from-backup
name: etcd-data
- mountPath: /etc/kubernetes/pki/etcd
name: etcd-certs
hostNetwork: true
priorityClassName: system-cluster-critical
volumes:
- hostPath:
path: /var/lib/etcd-from-backup
type: DirectoryOrCreate
name: etcd-data
- hostPath:
path: /etc/kubernetes/pki/etcd
type: DirectoryOrCreate
name: etcd-certs

然后保存退出即可
之后发现已还原
如果遇到3个etcd 一直处于pending时,重启主机即可。
