date: 2021-04-20title: Ceph与k8s完美集成 #标题
tags: 存储,k8s持久化 #标签
categories: k8s # 分类
本文目的是实现ceph和k8s完美集成,包括与volumes、pvc、sc等集成。
这里不讲ceph以及k8s集群如何安装,可以找我之前的博文或者使用其他方式将集群安装完成。
参考: 官方文档
ceph与volumes结合
参考:官方文档
k8s节点安装ceph工具
在k8s所有节点上安装。
$ yum -y install ceph-common
# 如果找不到软件包,则需要配置ceph仓库
$ cat > /etc/yum.repos.d/ceph.repo << EOF
[ceph-norch]
name=ceph-norch
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/noarch/
enabled=1
gpgcheck=0
[ceph-x86_64]
name=ceph-x86_64
baseurl=https://mirrors.aliyun.com/ceph/rpm-nautilus/el7/x86_64/
enabled=1
gpgcheck=0
EOF
ceph集群中创建pool、块文件及认证用户
在任意一个ceph管理节点上执行。
# 创建pool
$ ceph osd pool create kubernetes 16 16
# 初始化pool
$ rbd pool init kubernetes
# 创建块文件
$ rbd create -p kubernetes --image-feature layering rbd.img --size 10G
# 创建授权用户
$ ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=kubernetes' mgr 'profile rbd pool=kubernetes'
[client.kubernetes]
key = AQCgD3xgde3HDRAA4iqESawxR8LgB3mAZ70fWQ==
# 确认授权列表
$ ceph auth get client.kubernetes
exported keyring for client.kubernetes
[client.kubernetes]
key = AQCgD3xgde3HDRAA4iqESawxR8LgB3mAZ70fWQ==
caps mgr = "profile rbd pool=kubernetes"
caps mon = "profile rbd"
caps osd = "profile rbd pool=kubernetes"
创建secret资源对象
在k8s集群中进行如下操作。
# 对ceph中创建的ceph授权key进行base64加密
$ echo 'AQCgD3xgde3HDRAA4iqESawxR8LgB3mAZ70fWQ==' | base64
QVFDZ0QzeGdkZTNIRFJBQTRpcUVTYXd4UjhMZ0IzbUFaNzBmV1E9PQo=
# 编写secret资源对象
cat > ceph_secret.yml << EOF
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret
type: "kubernetes.io/rbd"
data:
key: QVFDZ0QzeGdkZTNIRFJBQTRpcUVTYXd4UjhMZ0IzbUFaNzBmV1E9PQo=
EOF
# 上面的key指定的就是加密后的ceph授权秘钥
# 创建secret资源对象
$ kubectl apply -f ceph_secret.yml
secret/ceph-secret created
# 确认已创建
$ kubectl get secret/ceph-secret -o yaml
apiVersion: v1
data:
key: QVFDZ0QzeGdkZTNIRFJBQTRpcUVTYXd4UjhMZ0IzbUFaNzBmV1E9PQo=
kind: Secret
metadata:
............. # 忽略部分输出
k8s创建deployment资源对象使用ceph块设备
下面的yaml文件就考验各位对k8s的熟悉程度了,懒得解释。
$ cat > nginx.yaml << EOF
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: web-nginx
labels:
k8s.cn/layer: web
spec:
replicas: 1
selector:
matchLabels:
k8s.cn/layer: web
template:
metadata:
labels:
k8s.cn/layer: web
spec:
containers:
- image: nginx
imagePullPolicy: IfNotPresent
name: nginx
ports:
- containerPort: 80
name: www
protocol: TCP
volumeMounts:
- mountPath: /data
name: ceph-demo
volumes:
- name: ceph-demo
rbd:
monitors:
- 192.168.20.10:6789
- 192.168.20.5:6789
- 192.168.20.6:6789
pool: kubernetes
image: rbd.img
fsType: ext4
user: kubernetes
secretRef:
name: ceph-secret
EOF
# 创建deployment
$ kubectl apply -f nginx.yaml
# 稍等片刻,确认容器运行成功
$ kubectl get pods
NAME READY STATUS RESTARTS AGE
web-nginx-6bf57f9cf7-dfvv4 1/1 Running 0 8s
进入容器进行验证
# 查询pod
$ kubectl get pods
NAME READY STATUS RESTARTS AGE
web-nginx-6bf57f9cf7-dfvv4 1/1 Running 0 13s
# 进入pod
$ kubectl exec pod/web-nginx-6bf57f9cf7-dfvv4 -- /bin/bash
# 查看/data 目录挂载情况
root@web-nginx-6bf57f9cf7-dfvv4:/# df -hT data
Filesystem Type Size Used Avail Use% Mounted on
/dev/rbd0 ext4 9.8G 37M 9.7G 1% /data
验证块设备挂载流程
经过上面的容器内验证,我们已经确认块设备成功挂载到了pod中,那么现在要看看这个块设备是如何挂载到pod中的。
# 查看pod所在节点
$ kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
web-nginx-6bf57f9cf7-dfvv4 1/1 Running 0 11m 10.100.15.202 centos-20-4 <none> <none>
# centos-20-4 宿主机节点进行查看
$ df -hT | grep rbd # 可以看出来是先挂载到宿主机上,然后再映射到pod中的
/dev/rbd0 ext4 9.8G 37M 9.7G 1% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/kubernetes-image-rbd.img
结论1:我自己删除pod后,使deployment控制器自动拉起pod,确定rbd.img中的数据不会丢失,说明如果你的rbd文件没有文件系统,那么会按照资源清单中指定的文件系统格式进行格式化,如果已存在文件系统,则直接挂载使用。
结论2:上面创建deployment的yaml文件不支持多副本,当扩容副本数量时,将会产生报错,一个rbd只能挂载到一个主机上。所以,如果想要扩展多个pod共享一个rbd块设备,那么只有指定一个固定节点运行pod了,下面是我创建的第二个deployment资源对象,通过nodeName字段指定其只能运行在centos-20-4 宿主机节点上,如下(除了资源对象名字以外,其他都一样):
$ cat nginx2.yaml
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: web-nginx2
labels:
k8s.cn/layer: web2
spec:
replicas: 1
selector:
matchLabels:
k8s.cn/layer: web2
template:
metadata:
labels:
k8s.cn/layer: web2
spec:
nodeName: centos-20-4
containers:
- image: nginx
imagePullPolicy: IfNotPresent
name: nginx2
ports:
- containerPort: 80
name: www
protocol: TCP
volumeMounts:
- mountPath: /data
name: ceph-demo2
volumes:
- name: ceph-demo2
rbd:
monitors:
- 192.168.20.10:6789
- 192.168.20.5:6789
- 192.168.20.6:6789
pool: kubernetes
image: rbd.img
fsType: ext4
user: kubernetes
secretRef:
name: ceph-secret
Ceph与PV/PVC集成
准备工作
参考ceph与volumes结合小节,创建好pool、用户认证、secrets(如已创建,可以忽略,直接复用即可)。
# ceph节点创建块文件
$ rbd create -p kubernetes --image-feature layering demo-1.img --size 10G
# 确认块设备文件已存在
$ rbd -p kubernetes ls
demo-1.img
rbd.img
创建PV
接下来的所有yaml文件字段值,这里不展开解释,如果你不懂,请自行查阅其他资料。
# 定义yaml文件
$ cat > pv.yaml << EOF
apiVersion: v1
kind: PersistentVolume
metadata:
name: rbd-demo
spec:
accessModes:
- ReadWriteOnce
capacity:
storage: 10G
rbd:
monitors:
- 192.168.20.10:678
- 192.168.20.5:6789
- 192.168.20.6:6789
pool: kubernetes
image: demo-1.img
fsType: ext4
user: kubernetes
secretRef:
name: ceph-secret
persistentVolumeReclaimPolicy: Retain
storageClassName: rbd
EOF
# 创建PV
$ kubectl apply -f pv.yaml
persistentvolume/rbd-demo created
# 确认PV已创建
$ kubectl get pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
rbd-demo 10G RWO Retain Available rbd 13s
定义PVC,引用PV
# 定义pvc的yaml文件
$ cat > pvc.yaml << EOF
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc-demo
spec:
accessModes:
- ReadWriteOnce # 访问模式需要和PV一致
volumeName: rbd-demo # 指定PV的名称
resources:
requests:
storage: 10G
storageClassName: rbd # 这个字段需要和PV的yaml文件中的storageClassName字段值保持一致
EOF
# 创建pvc
$ kubectl apply -f pvc.yaml
# 确认PVC已创建
$ kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
pvc-demo Bound rbd-demo 10G RWO rbd 100s
确认PVC和PV处于Bound状态
$ kubectl get pv,pvc
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
persistentvolume/rbd-demo 10G RWO Retain Bound default/pvc-demo rbd 3m43s
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
persistentvolumeclaim/pvc-demo Bound rbd-demo 10G RWO rbd 32s
创建deployment挂载PVC
$ cat > nginx.yaml << EOF
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: web-nginx
labels:
k8s.cn/layer: web
spec:
replicas: 1
selector:
matchLabels:
k8s.cn/layer: web
template:
metadata:
labels:
k8s.cn/layer: web
spec:
containers:
- image: nginx
imagePullPolicy: IfNotPresent
name: nginx
ports:
- containerPort: 80
name: www
protocol: TCP
volumeMounts:
- mountPath: /data
name: rbd # 此处需要和下面volumes.name字段的值一致
volumes:
- name: rbd
persistentVolumeClaim:
claimName: pvc-demo # 这里需要指定pvc的名字
EOF
# 创建deployment
$ kubectl apply -f nginx.yaml
deployment.apps/web-nginx created
# 确认deployment已创建
$ kubectl get pods
NAME READY STATUS RESTARTS AGE
web-nginx-57b545d4d8-c9zk4 1/1 Running 0 4m
验证挂载情况
$ kubectl exec -it web-nginx-57b545d4d8-c9zk4 -- /bin/bash
root@web-nginx-57b545d4d8-c9zk4:/# df -hT /data
Filesystem Type Size Used Avail Use% Mounted on
/dev/rbd0 ext4 9.8G 37M 9.7G 1% /data
# 查看pod所在节点
$ kubectl get pods -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
web-nginx-57b545d4d8-c9zk4 1/1 Running 0 6m27s 10.100.15.205 centos-20-4 <none> <none>
# 查看宿主机挂载情况
$ df -hT | grep rbd
/dev/rbd0 ext4 9.8G 37M 9.7G 1% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/kubernetes-image-demo-1.img
# 宿主机上的挂载目录和你pod中的data目录内容是完全一致的。
Ceph与SC集成架构
参考:官方文档。
在K8s v1.13及以后的版本中,可以通过Ceph -csi驱动使用Ceph块设备镜像,它动态地提供RBD映像来支持K8s存储卷,并将这些RBD镜像映射为运行引用RBD支持的卷的pods的工作节点上的块设备(可以将这些RBD镜像映射为镜像中包含的文件系统)。
下面是k8s使用ceph-csi驱动工作的一个架构示意图:
在上图中,k8s收到用户请求后,会通过ceph-csi去动态的创建PV、PVC以及ceph集群中的块文件,并且将其挂载到pod对应的节点上。
部署ceph-csi驱动程序
1、查询ceph集群信息(ceph控制节点上执行)
$ ceph mon dump
dumped monmap epoch 3
epoch 3
fsid d94fee92-ef1a-4f1f-80a5-1c7e1caf4a4a # 我们需要这个id
last_changed 2021-04-14 17:58:46.874896
created 2021-04-14 17:54:27.836955
min_mon_release 14 (nautilus)
# 以及下面的monitor监听地址
0: [v2:192.168.20.10:3300/0,v1:192.168.20.10:6789/0] mon.centos-20-10
1: [v2:192.168.20.5:3300/0,v1:192.168.20.5:6789/0] mon.centos-20-5
2: [v2:192.168.20.6:3300/0,v1:192.168.20.6:6789/0] mon.centos-20-6
2、创建ceph-csi配置文件(k8s集群节点上执行)
# 定义configmap文件
$ cat <<EOF > csi-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
config.json: |-
[
{
"clusterID": "d94fee92-ef1a-4f1f-80a5-1c7e1caf4a4a", # 替换为你查出来的ceph集群id
"monitors": [ # 将下面替换为你ceph集群的monitor地址
"192.168.20.5:6789",
"192.168.20.6:6789",
"192.168.20.10:6789"
]
}
]
metadata:
name: ceph-csi-config
EOF
# 创建configmap
$ kubectl apply -f csi-config-map.yaml
3、定义认证信息文件
# ceph集群中查询kubernetes用户对应的key
# 如果后续创建pvc失败,可以尝试使用admin用户替换kubernetes用户
# 正常来说kubernetes用户是没问题的
$ ceph auth get-or-create client.kubernetes mon 'profile rbd' osd 'profile rbd pool=kubernetes' mgr 'profile rbd pool=kubernetes'
[client.kubernetes]
key = AQDMi35gIuRmIxAApx47Id2rsMPF33R5r4jrwQ==
# k8s中编写secret文件
$ cat <<EOF > csi-rbd-secret.yaml
---
apiVersion: v1
kind: Secret
metadata:
name: csi-rbd-secret
namespace: default
stringData:
userID: kubernetes
userKey: AQCadX5ggqz0GRAAuQuf/Ks3B7aJoK5L3SqXDQ==
EOF
# 创建secret
$ kubectl apply -f csi-rbd-secret.yaml
# ceph-csi还需要一个额外的ConfigMap对象来定义密钥管理服务(KMS)提供程序细节。
# 如果没有设置KMS,则在csi-kms-config-map中放置一个空配置。
# yaml文件或参考示例 https://github.com/ceph/ceph-csi/tree/master/examples/kms
# 这里直接创建一个空的configmap即可,如下:
$ cat <<EOF > csi-kms-config-map.yaml
---
apiVersion: v1
kind: ConfigMap
data:
config.json: |-
{}
metadata:
name: ceph-csi-encryption-kms-config
EOF
# 创建configmap
$ kubectl apply -f csi-kms-config-map.yaml
4、创建RBAC授权认证
$ kubectl apply -f https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-provisioner-rbac.yaml
$ kubectl apply -f https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-nodeplugin-rbac.yaml
5、安装ceph-csi驱动插件
# 如下是官方的yaml文件链接,注意:有问题
$ wget https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-rbdplugin-provisioner.yaml
$ wget https://raw.githubusercontent.com/ceph/ceph-csi/master/deploy/rbd/kubernetes/csi-rbdplugin.yaml
在官方提供的yaml文件中,已知有两个问题,具体如下:
- 问题1:
kubernetes/csi-rbdplugin-provisioner.yaml
配置了节点亲和度,并且副本数为3,如果你的k8s集群中只有两个node节点,那么将会有一个pod调度失败,比如我这里是1个master,2个node节点,这样执行创建动作后,就会有一个副本调度失败(不要想着将副本数改为2,这个东西貌似涉及到leader选举,所以还是建议3副本),需要配置下污点容忍,使其可以被调度到master节点上,如果你的k8s集群中有三个及以上的node节点,那么可以忽略这个问题。- 问题2:官方的yaml文件中,定义的
quay.io/cephcsi/cephcsi
镜像标签都是canary
版本,也就是开发测试版本,如果你是用于生产,还是建议去 官方镜像仓库 找到最新的稳定版,并替换为相应的版本号。
下面是我修改后的两个yaml文件(只是增加了一个污点容忍的配置并且修改了quay.io/cephcsi/cephcsi
的镜像标签):
$ cat csi-rbdplugin-provisioner.yaml
---
kind: Service
apiVersion: v1
metadata:
name: csi-rbdplugin-provisioner
labels:
app: csi-metrics
spec:
selector:
app: csi-rbdplugin-provisioner
ports:
- name: http-metrics
port: 8080
protocol: TCP
targetPort: 8680
---
kind: Deployment
apiVersion: apps/v1
metadata:
name: csi-rbdplugin-provisioner
spec:
replicas: 3
selector:
matchLabels:
app: csi-rbdplugin-provisioner
template:
metadata:
labels:
app: csi-rbdplugin-provisioner
spec:
tolerations: # 只是增加了这个字段
- effect: NoSchedule
key: node-role.kubernetes.io/master
operator: Exists
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- csi-rbdplugin-provisioner
topologyKey: "kubernetes.io/hostname"
serviceAccountName: rbd-csi-provisioner
priorityClassName: system-cluster-critical
containers:
- name: csi-provisioner
image: k8s.gcr.io/sig-storage/csi-provisioner:v2.0.4
args:
- "--csi-address=$(ADDRESS)"
- "--v=5"
- "--timeout=150s"
- "--retry-interval-start=500ms"
- "--leader-election=true"
# set it to true to use topology based provisioning
- "--feature-gates=Topology=false"
# if fstype is not specified in storageclass, ext4 is default
- "--default-fstype=ext4"
- "--extra-create-metadata=true"
env:
- name: ADDRESS
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-snapshotter
image: k8s.gcr.io/sig-storage/csi-snapshotter:v4.0.0
args:
- "--csi-address=$(ADDRESS)"
- "--v=5"
- "--timeout=150s"
- "--leader-election=true"
env:
- name: ADDRESS
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
securityContext:
privileged: true
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-attacher
image: k8s.gcr.io/sig-storage/csi-attacher:v3.0.2
args:
- "--v=5"
- "--csi-address=$(ADDRESS)"
- "--leader-election=true"
- "--retry-interval-start=500ms"
env:
- name: ADDRESS
value: /csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-resizer
image: k8s.gcr.io/sig-storage/csi-resizer:v1.0.1
args:
- "--csi-address=$(ADDRESS)"
- "--v=5"
- "--timeout=150s"
- "--leader-election"
- "--retry-interval-start=500ms"
- "--handle-volume-inuse-error=false"
env:
- name: ADDRESS
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: csi-rbdplugin
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
# for stable functionality replace canary with latest release version
image: quay.io/cephcsi/cephcsi:v3.3.0
args:
- "--nodeid=$(NODE_ID)"
- "--type=rbd"
- "--controllerserver=true"
- "--endpoint=$(CSI_ENDPOINT)"
- "--v=5"
- "--drivername=rbd.csi.ceph.com"
- "--pidlimit=-1"
- "--rbdhardmaxclonedepth=8"
- "--rbdsoftmaxclonedepth=4"
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# - name: POD_NAMESPACE
# valueFrom:
# fieldRef:
# fieldPath: spec.namespace
# - name: KMS_CONFIGMAP_NAME
# value: encryptionConfig
- name: CSI_ENDPOINT
value: unix:///csi/csi-provisioner.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- mountPath: /dev
name: host-dev
- mountPath: /sys
name: host-sys
- mountPath: /lib/modules
name: lib-modules
readOnly: true
- name: ceph-csi-config
mountPath: /etc/ceph-csi-config/
- name: ceph-csi-encryption-kms-config
mountPath: /etc/ceph-csi-encryption-kms-config/
- name: keys-tmp-dir
mountPath: /tmp/csi/keys
- name: csi-rbdplugin-controller
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
# for stable functionality replace canary with latest release version
image: quay.io/cephcsi/cephcsi:v3.3.0
args:
- "--type=controller"
- "--v=5"
- "--drivername=rbd.csi.ceph.com"
- "--drivernamespace=$(DRIVER_NAMESPACE)"
env:
- name: DRIVER_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: ceph-csi-config
mountPath: /etc/ceph-csi-config/
- name: keys-tmp-dir
mountPath: /tmp/csi/keys
- name: liveness-prometheus
image: quay.io/cephcsi/cephcsi:v3.3.0
args:
- "--type=liveness"
- "--endpoint=$(CSI_ENDPOINT)"
- "--metricsport=8680"
- "--metricspath=/metrics"
- "--polltime=60s"
- "--timeout=3s"
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi-provisioner.sock
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
volumeMounts:
- name: socket-dir
mountPath: /csi
imagePullPolicy: "IfNotPresent"
volumes:
- name: host-dev
hostPath:
path: /dev
- name: host-sys
hostPath:
path: /sys
- name: lib-modules
hostPath:
path: /lib/modules
- name: socket-dir
emptyDir: {
medium: "Memory"
}
- name: ceph-csi-config
configMap:
name: ceph-csi-config
- name: ceph-csi-encryption-kms-config
configMap:
name: ceph-csi-encryption-kms-config
- name: keys-tmp-dir
emptyDir: {
medium: "Memory"
}
$ cat csi-rbdplugin.yaml
---
kind: DaemonSet
apiVersion: apps/v1
metadata:
name: csi-rbdplugin
spec:
selector:
matchLabels:
app: csi-rbdplugin
template:
metadata:
labels:
app: csi-rbdplugin
spec:
serviceAccountName: rbd-csi-nodeplugin
hostNetwork: true
hostPID: true
priorityClassName: system-node-critical
# to use e.g. Rook orchestrated cluster, and mons' FQDN is
# resolved through k8s service, set dns policy to cluster first
dnsPolicy: ClusterFirstWithHostNet
containers:
- name: driver-registrar
# This is necessary only for systems with SELinux, where
# non-privileged sidecar containers cannot access unix domain socket
# created by privileged CSI driver container.
securityContext:
privileged: true
image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.0.1
args:
- "--v=5"
- "--csi-address=/csi/csi.sock"
- "--kubelet-registration-path=/var/lib/kubelet/plugins/rbd.csi.ceph.com/csi.sock"
env:
- name: KUBE_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- name: socket-dir
mountPath: /csi
- name: registration-dir
mountPath: /registration
- name: csi-rbdplugin
securityContext:
privileged: true
capabilities:
add: ["SYS_ADMIN"]
allowPrivilegeEscalation: true
# for stable functionality replace canary with latest release version
image: quay.io/cephcsi/cephcsi:v3.3.0
args:
- "--nodeid=$(NODE_ID)"
- "--type=rbd"
- "--nodeserver=true"
- "--endpoint=$(CSI_ENDPOINT)"
- "--v=5"
- "--drivername=rbd.csi.ceph.com"
# If topology based provisioning is desired, configure required
# node labels representing the nodes topology domain
# and pass the label names below, for CSI to consume and advertise
# its equivalent topology domain
# - "--domainlabels=failure-domain/region,failure-domain/zone"
env:
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
- name: NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
# - name: POD_NAMESPACE
# valueFrom:
# fieldRef:
# fieldPath: spec.namespace
# - name: KMS_CONFIGMAP_NAME
# value: encryptionConfig
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: socket-dir
mountPath: /csi
- mountPath: /dev
name: host-dev
- mountPath: /sys
name: host-sys
- mountPath: /run/mount
name: host-mount
- mountPath: /lib/modules
name: lib-modules
readOnly: true
- name: ceph-csi-config
mountPath: /etc/ceph-csi-config/
- name: ceph-csi-encryption-kms-config
mountPath: /etc/ceph-csi-encryption-kms-config/
- name: plugin-dir
mountPath: /var/lib/kubelet/plugins
mountPropagation: "Bidirectional"
- name: mountpoint-dir
mountPath: /var/lib/kubelet/pods
mountPropagation: "Bidirectional"
- name: keys-tmp-dir
mountPath: /tmp/csi/keys
- name: liveness-prometheus
securityContext:
privileged: true
image: quay.io/cephcsi/cephcsi:v3.3.0
args:
- "--type=liveness"
- "--endpoint=$(CSI_ENDPOINT)"
- "--metricsport=8680"
- "--metricspath=/metrics"
- "--polltime=60s"
- "--timeout=3s"
env:
- name: CSI_ENDPOINT
value: unix:///csi/csi.sock
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
volumeMounts:
- name: socket-dir
mountPath: /csi
imagePullPolicy: "IfNotPresent"
volumes:
- name: socket-dir
hostPath:
path: /var/lib/kubelet/plugins/rbd.csi.ceph.com
type: DirectoryOrCreate
- name: plugin-dir
hostPath:
path: /var/lib/kubelet/plugins
type: Directory
- name: mountpoint-dir
hostPath:
path: /var/lib/kubelet/pods
type: DirectoryOrCreate
- name: registration-dir
hostPath:
path: /var/lib/kubelet/plugins_registry/
type: Directory
- name: host-dev
hostPath:
path: /dev
- name: host-sys
hostPath:
path: /sys
- name: host-mount
hostPath:
path: /run/mount
- name: lib-modules
hostPath:
path: /lib/modules
- name: ceph-csi-config
configMap:
name: ceph-csi-config
- name: ceph-csi-encryption-kms-config
configMap:
name: ceph-csi-encryption-kms-config
- name: keys-tmp-dir
emptyDir: {
medium: "Memory"
}
---
# This is a service to expose the liveness metrics
apiVersion: v1
kind: Service
metadata:
name: csi-metrics-rbdplugin
labels:
app: csi-metrics
spec:
ports:
- name: http-metrics
port: 8080
protocol: TCP
targetPort: 8680
selector:
app: csi-rbdplugin
上面的两个yaml文件中需要使用几个k8s官方的镜像,所以我买了台云主机将镜像手动下载下来了,建议在安装插件之前,将我百度网盘中的镜像(提取码:1234)导入到你的k8s环境中,如果你有其他方式下载k8s官方的镜像,忽略这个建议即可。
当你导入镜像后,就可以执行下面两个指令创建相应的资源对象了:
$ kubectl apply -f csi-rbdplugin-provisioner.yaml
$ kubectl apply -f csi-rbdplugin.yaml
创建存储类
你可以定义多个存储类,比如你的ceph集群中有ssd和hdd两种类型的存储介质,那么你可以创建两种存储类,分别对应不同的存储介质。
# 定义yaml文件
$ cat <<EOF > csi-rbd-sc.yaml
---
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: csi-rbd-sc
provisioner: rbd.csi.ceph.com # 指定所用驱动
parameters:
clusterID: d94fee92-ef1a-4f1f-80a5-1c7e1caf4a4a # 你的ceph集群id,可以通过ceph -s 指令查询
pool: kubernetes # 指定你要使用ceph集群中的哪个pool
imageFeatures: layering
csi.storage.k8s.io/provisioner-secret-name: csi-rbd-secret # 指定你访问ceph集群的用户及秘钥(我们将其写入到了secret资源对象中,所以指定这个secret资源对象即可)
csi.storage.k8s.io/provisioner-secret-namespace: default # 上面指定的secret属于哪个名称空间
csi.storage.k8s.io/node-stage-secret-name: csi-rbd-secret # 指定挂载时使用的用户资源(一般和上面保持一致)
csi.storage.k8s.io/node-stage-secret-namespace: default # 同上
csi.storage.k8s.io/controller-expand-secret-name: csi-rbd-secret
csi.storage.k8s.io/controller-expand-secret-namespace: default
reclaimPolicy: Delete # 指定回收策略
allowVolumeExpansion: true
mountOptions:
- discard
EOF
# 创建资源对象
$ kubectl apply -f csi-rbd-sc.yaml
# 确认sc已创建
$ kubectl get sc
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
csi-rbd-sc rbd.csi.ceph.com Delete Immediate true 2s
创建PVC动态申请空间
PVC支持块级别或文件系统级别去申请存储资源。一般推荐使用文件系统级别去申请,如果申请块设备,那么只能以块设备的方式映射到pod中,还需要在pod运行后对其进行磁盘格式化等操作,这个不太可能这么做,如果你想了解下如何申请块设备,可以参考官方文档(其实都差不多,只是将下面的Filesystem
改成了Block
,只是pod使用这个块时,就不能用volumeMounts
这个关键字了,需要使用volumeDevices
):
# 申请文件系统级别存储资源
$ cat <<EOF > rbd-pvc.yaml
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: rbd-pvc
spec:
accessModes:
- ReadWriteOnce
volumeMode: Filesystem
resources:
requests:
storage: 1Gi
storageClassName: csi-rbd-sc
EOF
# 创建PVC
$ kubectl apply -f rbd-pvc.yaml
查看SC、PV、PVC资源对象
当我们PVC创建成功后,PVC会找SC去创建相应的PV,并将PV和PVC进行绑定。整个过程于用户而言,都是自动的。
$ kubectl get pv,pvc,sc
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
persistentvolume/pvc-74eb57c1-9cb5-442f-8dfe-71f2e869f4df 1Gi RWO Delete Bound default/rbd-pvc csi-rbd-sc 18s
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
persistentvolumeclaim/rbd-pvc Bound pvc-74eb57c1-9cb5-442f-8dfe-71f2e869f4df 1Gi RWO csi-rbd-sc 18s
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
storageclass.storage.k8s.io/csi-rbd-sc rbd.csi.ceph.com Delete Immediate true 75m
# 到ceph集群中查看,也有对应的一个块文件
$ rbd -p kubernetes ls
csi-vol-1310f455-a180-11eb-9aeb-6e7f74732123
至此,就实现了PV、PVC及SC整套流程的使用,也是生产中最为实用的一个存储解决方案。
创建pod挂载PVC
现在就定义一个deployment资源对象,来挂载使用下我们上面创建的PVC。
$ cat > nginx.yaml << EOF
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: web-nginx
labels:
k8s.cn/layer: web
spec:
replicas: 1
selector:
matchLabels:
k8s.cn/layer: web
template:
metadata:
labels:
k8s.cn/layer: web
spec:
containers:
- image: nginx
imagePullPolicy: IfNotPresent
name: nginx
ports:
- containerPort: 80
name: www
protocol: TCP
volumeMounts:
- mountPath: /data
name: rbd # 这里要和下面定义的volumes名字一致
volumes:
- name: rbd
persistentVolumeClaim:
claimName: rbd-pvc # 这里需要指定pvc的名字
EOF
# 创建deployment
$ kubectl apply -f nginx.yaml
验证存储挂载情况
# 自行查阅pod名称并进入pod
$ kubectl exec -it web-nginx-7698cd7569-5bjf7 -- /bin/bash
root@web-nginx-7698cd7569-5bjf7:/# df -hT /data # 查看 /data 目录
Filesystem Type Size Used Avail Use% Mounted on
/dev/rbd0 ext4 976M 2.6M 958M 1% /data
至此,已经实现了sc的功能,但由于我们使用deployment资源对象来验证的,并没有将其功能完全展示出来,我们还需要在使用前去创建PVC,但如果你使用StatefulSets
来使用存储资源,那么将会更自动化些,下面来试试。
StorageClass最佳实践
一句话: StorageClass
和StatefulSets
结合使用,才可以展现其最大魅力。
下面的yaml文件参考官方文档。
# 定义StatefulSets资源对象
$ cat > nginx.yaml << EOF
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: web
spec:
selector:
matchLabels:
app: nginx # has to match .spec.template.metadata.labels
serviceName: "nginx"
replicas: 3 # by default is 1
template:
metadata:
labels:
app: nginx # has to match .spec.selector.matchLabels
spec:
terminationGracePeriodSeconds: 10
containers:
- name: nginx
image: nginx
ports:
- containerPort: 80
name: web
volumeMounts:
- name: www
mountPath: /usr/share/nginx/html
volumeClaimTemplates: # 主要是这一字段,通过定义卷模板,去创建相应的pvc,pv
- metadata:
name: www
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: "csi-rbd-sc"
resources:
requests:
storage: 1Gi
EOF
# 创建statefulset资源对象
$ kubectl apply -f nginx.yaml
# 查看创建的资源对象(可以看到,自动创建了相应的pv,pvc并绑定)
$ kubectl get sts,pvc,pv
NAME READY AGE
statefulset.apps/web 3/3 20m
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
persistentvolumeclaim/www-web-0 Bound pvc-aff8fcc7-41bb-467a-a3d7-e29bbdede904 1Gi RWO csi-rbd-sc 20m
persistentvolumeclaim/www-web-1 Bound pvc-cc929a1b-0401-48aa-bf2a-dc2dca28079a 1Gi RWO csi-rbd-sc 20m
persistentvolumeclaim/www-web-2 Bound pvc-b077642f-b802-4b1a-b02b-df02d55a8891 1Gi RWO csi-rbd-sc 19m
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
persistentvolume/pvc-aff8fcc7-41bb-467a-a3d7-e29bbdede904 1Gi RWO Delete Bound default/www-web-0 csi-rbd-sc 20m
persistentvolume/pvc-b077642f-b802-4b1a-b02b-df02d55a8891 1Gi RWO Delete Bound default/www-web-2 csi-rbd-sc 19m
persistentvolume/pvc-cc929a1b-0401-48aa-bf2a-dc2dca28079a 1Gi RWO Delete Bound default/www-web-1 csi-rbd-sc 20m
$ kubectl get pods | grep web
web-0 1/1 Running 0 19m
web-1 1/1 Running 0 19m
web-2 1/1 Running 0 18m