docker bridge模式下会在创建容器之前先:
    1、创建bridge
    2、创建veth pair,把一端连到bridge
    3、创建一个新的net ns
    4、把veth另一端移动到net ns里
    5、(我认为会)将runC的spec中network namespace的path填写为刚创建的net ns的path
    6、(我认为会)runC不会创建新的net ns,而是复用该net ns,新建其他的ns

    但是在看源码时5、6步不是像我想的那样做的,它实际上创建的config.json是这样的:
    /run/containerd/io.containerd.runtime.v1.linux/moby/$container_id/config.json

    1. {
    2. "ociVersion": "1.0.1-dev",
    3. "process": {
    4. "user": {
    5. "uid": 0,
    6. "gid": 0,
    7. "additionalGids": [
    8. 10
    9. ]
    10. },
    11. "args": [
    12. "sleep",
    13. "24h"
    14. ],
    15. "env": [
    16. "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
    17. "HOSTNAME=790b6f02b024"
    18. ],
    19. "cwd": "/",
    20. "oomScoreAdj": 0
    21. },
    22. "root": {
    23. "path": "/var/lib/docker/aufs/mnt/071238d45a177890efadf9dbd1f66c505808e77e4c3bba6355323081ac269351"
    24. },
    25. "hostname": "790b6f02b024",
    26. "mounts": [
    27. {
    28. "destination": "/proc",
    29. "type": "proc",
    30. "source": "proc",
    31. "options": [
    32. "nosuid",
    33. "noexec",
    34. "nodev"
    35. ]
    36. },
    37. {
    38. "destination": "/dev",
    39. "type": "tmpfs",
    40. "source": "tmpfs",
    41. "options": [
    42. "nosuid",
    43. "strictatime",
    44. "mode=755",
    45. "size=65536k"
    46. ]
    47. },
    48. {
    49. "destination": "/dev/pts",
    50. "type": "devpts",
    51. "source": "devpts",
    52. "options": [
    53. "nosuid",
    54. "noexec",
    55. "newinstance",
    56. "ptmxmode=0666",
    57. "mode=0620",
    58. "gid=5"
    59. ]
    60. },
    61. {
    62. "destination": "/sys",
    63. "type": "sysfs",
    64. "source": "sysfs",
    65. "options": [
    66. "nosuid",
    67. "noexec",
    68. "nodev",
    69. "ro"
    70. ]
    71. },
    72. {
    73. "destination": "/sys/fs/cgroup",
    74. "type": "cgroup",
    75. "source": "cgroup",
    76. "options": [
    77. "ro",
    78. "nosuid",
    79. "noexec",
    80. "nodev"
    81. ]
    82. },
    83. {
    84. "destination": "/dev/mqueue",
    85. "type": "mqueue",
    86. "source": "mqueue",
    87. "options": [
    88. "nosuid",
    89. "noexec",
    90. "nodev"
    91. ]
    92. },
    93. {
    94. "destination": "/etc/resolv.conf",
    95. "type": "bind",
    96. "source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/resolv.conf",
    97. "options": [
    98. "rbind",
    99. "rprivate"
    100. ]
    101. },
    102. {
    103. "destination": "/etc/hostname",
    104. "type": "bind",
    105. "source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/hostname",
    106. "options": [
    107. "rbind",
    108. "rprivate"
    109. ]
    110. },
    111. {
    112. "destination": "/etc/hosts",
    113. "type": "bind",
    114. "source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/hosts",
    115. "options": [
    116. "rbind",
    117. "rprivate"
    118. ]
    119. },
    120. {
    121. "destination": "/dev/shm",
    122. "type": "bind",
    123. "source": "/var/lib/docker/containers/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44/mounts/shm",
    124. "options": [
    125. "rbind",
    126. "rprivate"
    127. ]
    128. }
    129. ],
    130. "hooks": {
    131. "prestart": [
    132. {
    133. "path": "/proc/2325/exe",
    134. "args": [
    135. "libnetwork-setkey",
    136. "790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44",
    137. "c2a68d82d6cfefcc3404f3a2ce3a5bba4204f518d27da191ae7f7359619cd6c3"
    138. ]
    139. }
    140. ]
    141. },
    142. "linux": {
    143. "resources": {
    144. "devices": [
    145. {
    146. "allow": false,
    147. "access": "rwm"
    148. },
    149. {
    150. "allow": true,
    151. "type": "c",
    152. "major": 1,
    153. "minor": 5,
    154. "access": "rwm"
    155. },
    156. {
    157. "allow": true,
    158. "type": "c",
    159. "major": 1,
    160. "minor": 3,
    161. "access": "rwm"
    162. },
    163. {
    164. "allow": true,
    165. "type": "c",
    166. "major": 1,
    167. "minor": 9,
    168. "access": "rwm"
    169. },
    170. {
    171. "allow": true,
    172. "type": "c",
    173. "major": 1,
    174. "minor": 8,
    175. "access": "rwm"
    176. },
    177. {
    178. "allow": true,
    179. "type": "c",
    180. "major": 5,
    181. "minor": 0,
    182. "access": "rwm"
    183. },
    184. {
    185. "allow": true,
    186. "type": "c",
    187. "major": 5,
    188. "minor": 1,
    189. "access": "rwm"
    190. },
    191. {
    192. "allow": false,
    193. "type": "c",
    194. "major": 10,
    195. "minor": 229,
    196. "access": "rwm"
    197. }
    198. ],
    199. "memory": {
    200. "disableOOMKiller": false
    201. },
    202. "cpu": {
    203. "shares": 0
    204. },
    205. "pids": {
    206. "limit": 0
    207. },
    208. "blockIO": {
    209. "weight": 0
    210. }
    211. },
    212. "cgroupsPath": "/docker/790b6f02b02464bd9301125fb83eee5c9527f411433346dd194b74eb5f096e44",
    213. "namespaces": [
    214. {
    215. "type": "mount"
    216. },
    217. {
    218. "type": "network"
    219. },
    220. {
    221. "type": "uts"
    222. },
    223. {
    224. "type": "pid"
    225. },
    226. {
    227. "type": "ipc"
    228. }
    229. ],
    230. "maskedPaths": [
    231. "/proc/asound",
    232. "/proc/acpi",
    233. "/proc/kcore",
    234. "/proc/keys",
    235. "/proc/latency_stats",
    236. "/proc/timer_list",
    237. "/proc/timer_stats",
    238. "/proc/sched_debug",
    239. "/proc/scsi",
    240. "/sys/firmware"
    241. ],
    242. "readonlyPaths": [
    243. "/proc/bus",
    244. "/proc/fs",
    245. "/proc/irq",
    246. "/proc/sys",
    247. "/proc/sysrq-trigger"
    248. ]
    249. }
    250. }

    runC会对于有path的ns选择setns,对于没有pathd的ns选择新建。

    docker中有关spec构造的源码是daemon/oci_linux.go#setNamespaces。
    对于default(bridge)模式,并不会设置ns的path。

    1. // network
    2. if !c.Config.NetworkDisabled {
    3. ns := specs.LinuxNamespace{Type: "network"}
    4. parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
    5. if parts[0] == "container" {
    6. nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
    7. if err != nil {
    8. return err
    9. }
    10. ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
    11. if userNS {
    12. // to share a net namespace, they must also share a user namespace
    13. nsUser := specs.LinuxNamespace{Type: "user"}
    14. nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
    15. setNamespace(s, nsUser)
    16. }
    17. } else if c.HostConfig.NetworkMode.IsHost() {
    18. ns.Path = c.NetworkSettings.SandboxKey
    19. }
    20. setNamespace(s, ns)
    21. }

    那么问题来了,runC为什么没有使用docker创建的net ns,而是选择自己新建net ns呢?还是说是用了docker创建的net ns,是我理解有问题?