docker bridge模式下会在创建容器之前先:
1、创建bridge
2、创建veth pair,把一端连到bridge
3、创建一个新的net ns
4、把veth另一端移动到net ns里
5、(我认为会)将runC的spec中network namespace的path填写为刚创建的net ns的path
6、(我认为会)runC不会创建新的net ns,而是复用该net ns,新建其他的ns
但是在看源码时5、6步不是像我想的那样做的,它实际上创建的config.json是这样的:
/run/containerd/io.containerd.runtime.v1.linux/moby/$container_id/config.json
{
"ociVersion": "1.0.1-dev",
"process": {
"user": {
"uid": 0,
"gid": 0,
"additionalGids": [
10
]
},
"args": [
"sleep",
"24h"
],
"env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=790b6f02b024"
],
"cwd": "/",
"oomScoreAdj": 0
},
"root": {
"path": "/var/lib/docker/aufs/mnt/071238d45a177890efadf9dbd1f66c505808e77e4c3bba6355323081ac269351"
},
"hostname": "790b6f02b024",
"mounts": [
{
"destination": "/proc",
"type": "proc",
"source": "proc",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/dev",
"type": "tmpfs",
"source": "tmpfs",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
]
},
{
"destination": "/dev/pts",
"type": "devpts",
"source": "devpts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
]
},
{
"destination": "/sys",
"type": "sysfs",
"source": "sysfs",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
]
},
{
"destination": "/sys/fs/cgroup",
"type": "cgroup",
"source": "cgroup",
"options": [
"ro",
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/dev/mqueue",
"type": "mqueue",
"source": "mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
]
},
{
"destination": "/etc/resolv.conf",
"type": "bind",
"source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/resolv.conf",
"options": [
"rbind",
"rprivate"
]
},
{
"destination": "/etc/hostname",
"type": "bind",
"source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/hostname",
"options": [
"rbind",
"rprivate"
]
},
{
"destination": "/etc/hosts",
"type": "bind",
"source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/hosts",
"options": [
"rbind",
"rprivate"
]
},
{
"destination": "/dev/shm",
"type": "bind",
"source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/mounts/shm",
"options": [
"rbind",
"rprivate"
]
}
],
"hooks": {
"prestart": [
{
"path": "/proc/2325/exe",
"args": [
"libnetwork-setkey",
"790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44",
"c2a68d82d6cfefcc3404f3a2ce3a5bba4204f518d27da191ae7f7359619cd6c3"
]
}
]
},
"linux": {
"resources": {
"devices": [
{
"allow": false,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 1,
"minor": 5,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 1,
"minor": 3,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 1,
"minor": 9,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 1,
"minor": 8,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 5,
"minor": 0,
"access": "rwm"
},
{
"allow": true,
"type": "c",
"major": 5,
"minor": 1,
"access": "rwm"
},
{
"allow": false,
"type": "c",
"major": 10,
"minor": 229,
"access": "rwm"
}
],
"memory": {
"disableOOMKiller": false
},
"cpu": {
"shares": 0
},
"pids": {
"limit": 0
},
"blockIO": {
"weight": 0
}
},
"cgroupsPath": "/docker/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44",
"namespaces": [
{
"type": "mount"
},
{
"type": "network"
},
{
"type": "uts"
},
{
"type": "pid"
},
{
"type": "ipc"
}
],
"maskedPaths": [
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"readonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
}
}
runC会对于有path的ns选择setns,对于没有pathd的ns选择新建。
docker中有关spec构造的源码是daemon/oci_linux.go#setNamespaces。
对于default(bridge)模式,并不会设置ns的path。
// network
if !c.Config.NetworkDisabled {
ns := specs.LinuxNamespace{Type: "network"}
parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
if parts[0] == "container" {
nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
if err != nil {
return err
}
ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
if userNS {
// to share a net namespace, they must also share a user namespace
nsUser := specs.LinuxNamespace{Type: "user"}
nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
setNamespace(s, nsUser)
}
} else if c.HostConfig.NetworkMode.IsHost() {
ns.Path = c.NetworkSettings.SandboxKey
}
setNamespace(s, ns)
}
那么问题来了,runC为什么没有使用docker创建的net ns,而是选择自己新建net ns呢?还是说是用了docker创建的net ns,是我理解有问题?