1、获取安装源码

git clone —single-branch —branch release-1.7 https://github.com/rook/rook.git
目录树结构
[root@master1 examples]# tree kubernetes/
kubernetes/
|— ceph
| |— ceph-client.yaml
| |— cluster-external-management.yaml
| |— cluster-external.yaml
| |— cluster-on-local-pvc.yaml
| |— cluster-on-pvc.yaml
| |— cluster-stretched-aws.yaml
| |— cluster-stretched.yaml
| |— cluster-test.yaml
| |— cluster.yaml
| |— common-external.yaml
| |— common-second-cluster.yaml
| |— common.yaml
| |— crds.yaml
| |— create-external-cluster-resources.py
| |— create-external-cluster-resources.sh
| |— csi
| | |— cephfs
| | | |— kube-registry.yaml
| | | |— pod.yaml
| | | |— pvc-clone.yaml
| | | |— pvc-restore.yaml
| | | |— pvc.yaml
| | | |— snapshotclass.yaml
| | | |— snapshot.yaml
| | | |— storageclass-ec.yaml
| | | -- storageclass.yaml<br />| | |-- rbd<br />| | | |-- pod.yaml<br />| | | |-- pvc-clone.yaml<br />| | | |-- pvc-restore.yaml<br />| | | |-- pvc.yaml<br />| | | |-- snapshotclass.yaml<br />| | | |-- snapshot.yaml<br />| | | |-- storageclass-ec.yaml<br />| | | |-- storageclass-test.yaml<br />| | |— storageclass.yaml
| | -- template<br />| | |-- cephfs<br />| | | |-- csi-cephfsplugin-provisioner-dep.yaml<br />| | | |-- csi-cephfsplugin-svc.yaml<br />| | |— csi-cephfsplugin.yaml
| | -- rbd<br />| | |-- csi-rbdplugin-provisioner-dep.yaml<br />| | |-- csi-rbdplugin-svc.yaml<br />| |— csi-rbdplugin.yaml
| |— dashboard-external-https.yaml
| |— dashboard-external-http.yaml
| |— dashboard-ingress-https.yaml
| |— dashboard-loadbalancer.yaml
| |— direct-mount.yaml
| |— filesystem-ec.yaml
| |— filesystem-mirror.yaml
| |— filesystem-test.yaml
| |— filesystem.yaml
| |— flex
| | |— kube-registry.yaml
| | |— storageclass-ec.yaml
| | |— storageclass-test.yaml
| | -- storageclass.yaml<br />| |-- images.txt<br />| |-- import-external-cluster.sh<br />| |-- monitoring<br />| | |-- csi-metrics-service-monitor.yaml<br />| | |-- prometheus-ceph-v14-rules-external.yaml<br />| | |-- prometheus-ceph-v14-rules.yaml<br />| | |-- prometheus-ceph-v15-rules-external.yaml -> prometheus-ceph-v14-rules-external.yaml<br />| | |-- prometheus-ceph-v15-rules.yaml -> prometheus-ceph-v14-rules.yaml<br />| | |-- prometheus-ceph-v16-rules-external.yaml -> prometheus-ceph-v14-rules-external.yaml<br />| | |-- prometheus-ceph-v16-rules.yaml -> prometheus-ceph-v14-rules.yaml<br />| | |-- prometheus-service.yaml<br />| | |-- prometheus.yaml<br />| | |-- rbac.yaml<br />| |— service-monitor.yaml
| |— nfs-test.yaml
| |— nfs.yaml
| |— object-bucket-claim-delete.yaml
| |— object-bucket-claim-retain.yaml
| |— object-ec.yaml
| |— object-external.yaml
| |— object-multisite-pull-realm.yaml
| |— object-multisite.yaml
| |— object-openshift.yaml
| |— object-test.yaml
| |— object-user.yaml
| |— object.yaml
| |— operator-openshift.yaml
| |— operator.yaml
| |— osd-purge.yaml
| |— pool-ec.yaml
| |— pool-test.yaml
| |— pool.yaml
| |— pre-k8s-1.16
| | -- crds.yaml<br />| |-- rbdmirror.yaml<br />| |-- rgw-external.yaml<br />| |-- scc.yaml<br />| |-- storageclass-bucket-delete.yaml<br />| |-- storageclass-bucket-retain.yaml<br />| |-- test-data<br />| |— ceph-status-out
| |— toolbox-job.yaml
| -- toolbox.yaml<br />|-- mysql.yaml<br />|-- README.md<br />— wordpress.yaml
每⼀个存储driver⽬录下包含有相关的安装部署⽅法,均会包含⼀个核⼼的组件operator。

2、部署rook⾃定义资源对象

kubectl apply -f crds.yaml
crds中包含有Ceph的相关⾃定义资源对象,如cephclusters,cephclients,volumes, cephfilesystem,cephobjectstores,objectbuckets,objectbucketclaims等

[root@master1 examples]# kubectl get customresourcedefinitions.apiextensions.k8s.io
NAME CREATED AT
cephblockpools.ceph.rook.io 2021-09-14T09:40:32Z
cephclients.ceph.rook.io 2021-09-14T09:40:32Z
cephclusters.ceph.rook.io 2021-09-14T09:40:32Z
cephfilesystemmirrors.ceph.rook.io 2021-09-14T09:40:32Z
cephfilesystems.ceph.rook.io 2021-09-14T09:40:32Z
cephnfses.ceph.rook.io 2021-09-14T09:40:33Z
cephobjectrealms.ceph.rook.io 2021-09-14T09:40:33Z
cephobjectstores.ceph.rook.io 2021-09-14T09:40:33Z
cephobjectstoreusers.ceph.rook.io 2021-09-14T09:40:33Z
cephobjectzonegroups.ceph.rook.io 2021-09-14T09:40:33Z
cephobjectzones.ceph.rook.io 2021-09-14T09:40:33Z
cephrbdmirrors.ceph.rook.io 2021-09-14T09:40:33Z
objectbucketclaims.objectbucket.io 2021-09-14T09:40:33Z
objectbuckets.objectbucket.io 2021-09-14T09:40:33Z
volumereplicationclasses.replication.storage.openshift.io 2021-09-14T09:40:33Z
volumereplications.replication.storage.openshift.io 2021-09-14T09:40:33Z
volumes.rook.io 2021-09-14T09:40:33Z

3、部署集群认证信息

kubectl apply -f common.yaml
会⾃动⽣成RBAC认证相关的⻆⾊,包括Role,clusterrole,clusterrolebinding,serviceaccount等对象,通过可以获取到对应权限信息。
kubectl get clusterrole
kubectl get clusterrolebindings.rbac.authorization.k8s.io
kubectl get serviceaccounts -n rook-ceph clusterrole和clusterrolebindings是kubernetes中集群级别的资源,serviceaccounts则是namespace 级资源
注意:需要了解Role,ClusterRole,ClusterRolbinding,serviceaccount可以https://www.yuque.com/junmoxiao-pwkpf/zpmbo6/ktd3gi看我之前写的RBAC权限控制这块

4、部署operator

kubectl apply -f operator.yaml
会通过deployment创建⼀个pods容器管理Ceph集群,配置⽂件包含在configmap中,通过如下可以查看。
[root@master1 ceph]# kubectl get cm rook-ceph-operator-config -n rook-ceph
NAME DATA AGE
rook-ceph-operator-config 15 60m

[root@master1 ceph]# kubectl get deployment rook-ceph-operator -n rook-ceph
NAME READY UP-TO-DATE AVAILABLE AGE
rook-ceph-operator 1/1 1 1 60m

5、 创建cluster

kubectl apply -f cluster.yaml

6、集群创建过程

通过logs可以看到Ceph集群初始化的过程,初始化是通过rook operator进⾏控制的,operator会⾃动 编排ceph mon,mgr,osd等⻆⾊的初始化过程
kubectl logs rook-ceph-operator- -n rook-ceph -f

7、 获取CSI镜像

image.png
image.png
此时集群会⾃动创建pods来构建Ceph集群,包括mon,mgr,osd等⻆⾊,初次之外还会在每⼀个节点 部署驱动,包含有rbd和cephfs两种不同的驱动类型,两种驱动镜像需要到google下载,因此需要⼿动拉取⼀下,先获取到镜像的名称
[root@master1 ceph]# for i in kubectl get pods -n rook-ceph -o jsonpath='{.items[*].spec.containers[*].image}';do echo ${i} | grep gcr.io;done
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0
k8s.gcr.io/sig-storage/csi-attacher:v3.2.1
k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1
k8s.gcr.io/sig-storage/csi-resizer:v1.2.0
k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
k8s.gcr.io/sig-storage/csi-attacher:v3.2.1
k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1
k8s.gcr.io/sig-storage/csi-resizer:v1.2.0
k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0
k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
k8s.gcr.io/sig-storage/csi-resizer:v1.2.0
k8s.gcr.io/sig-storage/csi-attacher:v3.2.1
k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1
k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2
k8s.gcr.io/sig-storage/csi-resizer:v1.2.0
k8s.gcr.io/sig-storage/csi-attacher:v3.2.1
k8s.gcr.io/sig-storage/csi-snapshotter:v4.1.1
k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0

从国内的镜像⼿动拉取⼀下,使⽤如下脚本替换
#!/bin/bash
image_list=(
csi-node-driver-registrar:v2.2.0
csi-resizer:v1.2.0
csi-provisioner:v2.2.2
csi-snapshotter:v4.1.1
csi-attacher:v3.2.1
)

aliyuncs=”registry.aliyuncs.com/it00021hot”
google_gcr=”k8s.gcr.io/sig-storage”
for image in ${image_list[*]}
do
docker image pull ${aliyuncs}/${image}
docker image tag ${aliyuncs}/${image} ${google_gcr}/${image}
docker image rm ${aliyuncs}/${image}
echo “${aliyuncs}/${image} ${google_gcr}/${image} downloaded.”
done

确认pods运⾏情况
[root@master1 ceph]# kubectl get pods -n rook-ceph
NAME READY STATUS RESTARTS AGE
csi-cephfsplugin-czpst 3/3 Running 0 56m
csi-cephfsplugin-djcsd 3/3 Running 0 56m
csi-cephfsplugin-provisioner-7dcc8ff54d-4kvg5 6/6 Running 0 56m
csi-cephfsplugin-provisioner-7dcc8ff54d-fgswf 6/6 Running 0 56m
csi-cephfsplugin-v2rvs 3/3 Running 0 56m
csi-rbdplugin-5gj7t 3/3 Running 0 56m
csi-rbdplugin-h49df 3/3 Running 0 56m
csi-rbdplugin-provisioner-d8bcc5fc4-2znkc 6/6 Running 0 56m
csi-rbdplugin-provisioner-d8bcc5fc4-j5fd7 6/6 Running 0 56m
csi-rbdplugin-v9ffd 3/3 Running 0 56m
rook-ceph-crashcollector-master2-79959c4c4d-7m46b 1/1 Running 0 54m
rook-ceph-crashcollector-node1-8876bb88c-9sbnr 1/1 Running 0 54m
rook-ceph-crashcollector-node2-6f5c95d7f8-s5mr4 1/1 Running 0 54m
rook-ceph-mgr-a-7db599c577-bj9c9 1/1 Running 0 54m
rook-ceph-mon-a-6bd6bd4988-qfkrn 1/1 Running 0 58m
rook-ceph-mon-b-678c474548-65vkz 1/1 Running 0 55m
rook-ceph-mon-c-7594996d65-2gwmn 1/1 Running 0 55m
rook-ceph-operator-7c55c7cdcb-7r2gt 1/1 Running 0 75m
rook-ceph-osd-0-65d45d5468-2cxt4 1/1 Running 0 54m
rook-ceph-osd-1-5685dcb85-g4qxq 1/1 Running 0 54m
rook-ceph-osd-2-64f9d67455-cq44z 1/1 Running 0 54m
rook-ceph-osd-prepare-master2-g25wl 0/1 Completed 0 54m
rook-ceph-osd-prepare-node1-lmj47 0/1 Completed 0 54m
rook-ceph-osd-prepare-node2-n9wjl 0/1 Completed 0 54m

8、 master加⼊osd

image.png
默认master节点没有加⼊到Ceph集群中作为osd存储⻆⾊,为何呢?答案是master节点默认设置了污 点,因此默认不会将其调度到master节点,如何解决呢?有两个⽅法:
1、 设置osd的调度参数,设置“容忍”参数,允许master的“污点”,设置⽅法可以在cluster.yaml中的 placement中调整tolerations参数 ,我这里是用第一种,因为线上一些pod调度到master节点可能会导致雪崩等一系列问题。
如果不知道怎么写,可以用
[root@master1 ceph]# kubectl explain CephCluster.spec.placement.toleration
查看详细信息
image.png
image.png

2、 删除master的污点
kubectl taint node master1 node-role.kubernetes.io/master:NoSchedule- 不建议使用

9、 调整osd发现参数

配置中使⽤了所有的节点和节点所有的设备,rook会⾃动去扫描所有节点的设备,⼀旦有节点或者磁盘 添加进来,会⾃动将其添加到Ceph集群中,operator是通过启动rook-discover⼀个容器定期去扫描, 参数为ROOK_DISCOVER_DEVICES_INTERVAL 。
间隔默认是60m,,将其调整为60s,然后在重新apply operator.yaml
image.png
可以看到,我们没有做任何操作,一旦节点加入到进群中或者有磁盘添加进来,会自动将其添加到集群中
image.png