容器处理同样从路由开始追踪,通过路由代码可以看到核心处理组件为 daemon.Daemon 实例。当请求到达 HTTP Server 后,根据匹配的路由处理方法进行预处理(参数是否正确、配置文件格式是否正确等)后,最终由 Backend 实例进行处理。
container-routes.svg
图 1:Container 路由

container.Backend 接口定义如下,根据不同的方法将接口归类,以 22 ~ 33 行的 stateBackend 为例,它定义了容器状态相关的方法,其他类似。

  1. // execBackend includes functions to implement to provide exec functionality.
  2. type execBackend interface {
  3. ContainerExecCreate(name string, config *types.ExecConfig) (string, error)
  4. ContainerExecInspect(id string) (*backend.ExecInspect, error)
  5. ContainerExecResize(name string, height, width int) error
  6. ContainerExecStart(ctx context.Context, name string, stdin io.Reader, stdout io.Writer, stderr io.Writer) error
  7. ExecExists(name string) (bool, error)
  8. }
  9. // copyBackend includes functions to implement to provide container copy functionality.
  10. type copyBackend interface {
  11. ContainerArchivePath(name string, path string) (content io.ReadCloser, stat *types.ContainerPathStat, err error)
  12. ContainerCopy(name string, res string) (io.ReadCloser, error)
  13. ContainerExport(name string, out io.Writer) error
  14. ContainerExtractToDir(name, path string, copyUIDGID, noOverwriteDirNonDir bool, content io.Reader) error
  15. ContainerStatPath(name string, path string) (stat *types.ContainerPathStat, err error)
  16. }
  17. // stateBackend includes functions to implement to provide container state lifecycle functionality.
  18. type stateBackend interface {
  19. ContainerCreate(config types.ContainerCreateConfig) (container.ContainerCreateCreatedBody, error)
  20. ContainerKill(name string, sig uint64) error
  21. ContainerPause(name string) error
  22. ContainerRename(oldName, newName string) error
  23. ContainerResize(name string, height, width int) error
  24. ContainerRestart(name string, seconds *int) error
  25. ContainerRm(name string, config *types.ContainerRmConfig) error
  26. ContainerStart(name string, hostConfig *container.HostConfig, checkpoint string, checkpointDir string) error
  27. ContainerStop(name string, seconds *int) error
  28. ContainerUnpause(name string) error
  29. ContainerUpdate(name string, hostConfig *container.HostConfig) (container.ContainerUpdateOKBody, error)
  30. ContainerWait(ctx context.Context, name string, condition containerpkg.WaitCondition) (<-chan containerpkg.StateStatus, error)
  31. }
  32. // monitorBackend includes functions to implement to provide containers monitoring functionality.
  33. type monitorBackend interface {
  34. ContainerChanges(name string) ([]archive.Change, error)
  35. ContainerInspect(name string, size bool, version string) (interface{}, error)
  36. ContainerLogs(ctx context.Context, name string, config *types.ContainerLogsOptions) (msgs <-chan *backend.LogMessage, tty bool, err error)
  37. ContainerStats(ctx context.Context, name string, config *backend.ContainerStatsConfig) error
  38. ContainerTop(name string, psArgs string) (*container.ContainerTopOKBody, error)
  39. Containers(config *types.ContainerListOptions) ([]*types.Container, error)
  40. }
  41. // attachBackend includes function to implement to provide container attaching functionality.
  42. type attachBackend interface {
  43. ContainerAttach(name string, c *backend.ContainerAttachConfig) error
  44. }
  45. // systemBackend includes functions to implement to provide system wide containers functionality
  46. type systemBackend interface {
  47. ContainersPrune(ctx context.Context, pruneFilters filters.Args) (*types.ContainersPruneReport, error)
  48. }
  49. type commitBackend interface {
  50. CreateImageFromContainer(name string, config *backend.CreateImageConfig) (imageID string, err error)
  51. }
  52. // Backend is all the methods that need to be implemented to provide container specific functionality.
  53. type Backend interface {
  54. commitBackend
  55. execBackend
  56. copyBackend
  57. stateBackend
  58. monitorBackend
  59. attachBackend
  60. systemBackend
  61. }

Create a Container

Configuration

创建一个容器时,需要根据配置创建一个容器实例,但是,并不运行这个容器实例。创建时需要根据用户配置来设置容器实例,核心配置为 ContainerCreateConfig,主要包含了一般性配置项、主机配置项、网络配置及镜像运行平台配置。

  1. type ContainerCreateConfig struct {
  2. Name string
  3. Config *container.Config
  4. HostConfig *container.HostConfig
  5. NetworkingConfig *network.NetworkingConfig
  6. Platform *specs.Platform
  7. AdjustCPUShares bool
  8. }

Config

一般配置中包含了容器配置、运行的基础信息,如:主机名、域名、标准输入输出、环境变量、卷存储信息等等,其定义如下。

  1. type Config struct {
  2. Hostname string // Hostname
  3. Domainname string // Domainname
  4. User string // User that will run the command(s) inside the container, also support user:group
  5. AttachStdin bool // Attach the standard input, makes possible user interaction
  6. AttachStdout bool // Attach the standard output
  7. AttachStderr bool // Attach the standard error
  8. ExposedPorts nat.PortSet `json:",omitempty"` // List of exposed ports
  9. Tty bool // Attach standard streams to a tty, including stdin if it is not closed.
  10. OpenStdin bool // Open stdin
  11. StdinOnce bool // If true, close stdin after the 1 attached client disconnects.
  12. Env []string // List of environment variable to set in the container
  13. Cmd strslice.StrSlice // Command to run when starting the container
  14. Healthcheck *HealthConfig `json:",omitempty"` // Healthcheck describes how to check the container is healthy
  15. ArgsEscaped bool `json:",omitempty"` // True if command is already escaped (meaning treat as a command line) (Windows specific).
  16. Image string // Name of the image as it was passed by the operator (e.g. could be symbolic)
  17. Volumes map[string]struct{} // List of volumes (mounts) used for the container
  18. WorkingDir string // Current directory (PWD) in the command will be launched
  19. Entrypoint strslice.StrSlice // Entrypoint to run when starting the container
  20. NetworkDisabled bool `json:",omitempty"` // Is network disabled
  21. MacAddress string `json:",omitempty"` // Mac Address of the container
  22. OnBuild []string // ONBUILD metadata that were defined on the image Dockerfile
  23. Labels map[string]string // List of labels set to this container
  24. StopSignal string `json:",omitempty"` // Signal to stop a container
  25. StopTimeout *int `json:",omitempty"` // Timeout (in seconds) to stop a container
  26. Shell strslice.StrSlice `json:",omitempty"` // Shell for shell-form of RUN, CMD, ENTRYPOINT
  27. }

Host Config

HostConfig 用于设置容器运行的宿主平台配置选项,这些选项与容器运行的宿主机是强关联的,或者说是容器的非通用设置部分。

  1. type HostConfig struct {
  2. // Applicable to all platforms
  3. Binds []string // List of volume bindings for this container
  4. ContainerIDFile string // File (path) where the containerId is written
  5. LogConfig LogConfig // Configuration of the logs for this container
  6. NetworkMode NetworkMode // Network mode to use for the container
  7. PortBindings nat.PortMap // Port mapping between the exposed port (container) and the host
  8. RestartPolicy RestartPolicy // Restart policy to be used for the container
  9. AutoRemove bool // Automatically remove container when it exits
  10. VolumeDriver string // Name of the volume driver used to mount volumes
  11. VolumesFrom []string // List of volumes to take from other container
  12. // Applicable to UNIX platforms
  13. CapAdd strslice.StrSlice // List of kernel capabilities to add to the container
  14. CapDrop strslice.StrSlice // List of kernel capabilities to remove from the container
  15. CgroupnsMode CgroupnsMode // Cgroup namespace mode to use for the container
  16. DNS []string `json:"Dns"` // List of DNS server to lookup
  17. DNSOptions []string `json:"DnsOptions"` // List of DNSOption to look for
  18. DNSSearch []string `json:"DnsSearch"` // List of DNSSearch to look for
  19. ExtraHosts []string // List of extra hosts
  20. GroupAdd []string // List of additional groups that the container process will run as
  21. IpcMode IpcMode // IPC namespace to use for the container
  22. Cgroup CgroupSpec // Cgroup to use for the container
  23. Links []string // List of links (in the name:alias form)
  24. OomScoreAdj int // Container preference for OOM-killing
  25. PidMode PidMode // PID namespace to use for the container
  26. Privileged bool // Is the container in privileged mode
  27. PublishAllPorts bool // Should docker publish all exposed port for the container
  28. ReadonlyRootfs bool // Is the container root filesystem in read-only
  29. SecurityOpt []string // List of string values to customize labels for MLS systems, such as SELinux.
  30. StorageOpt map[string]string `json:",omitempty"` // Storage driver options per container.
  31. Tmpfs map[string]string `json:",omitempty"` // List of tmpfs (mounts) used for the container
  32. UTSMode UTSMode // UTS namespace to use for the container
  33. UsernsMode UsernsMode // The user namespace to use for the container
  34. ShmSize int64 // Total shm memory usage
  35. Sysctls map[string]string `json:",omitempty"` // List of Namespaced sysctls used for the container
  36. Runtime string `json:",omitempty"` // Runtime to use with this container
  37. // Applicable to Windows
  38. ConsoleSize [2]uint // Initial console size (height,width)
  39. Isolation Isolation // Isolation technology of the container (e.g. default, hyperv)
  40. // Contains container's resources (cgroups, ulimits)
  41. Resources
  42. // Mounts specs used by the container
  43. Mounts []mount.Mount `json:",omitempty"`
  44. // MaskedPaths is the list of paths to be masked inside the container (this overrides the default set of paths)
  45. MaskedPaths []string
  46. // ReadonlyPaths is the list of paths to be set as read-only inside the container (this overrides the default set of paths)
  47. ReadonlyPaths []string
  48. // Run a custom init inside the container, if null, use the daemon's configured settings
  49. Init *bool `json:",omitempty"`
  50. }

Network Config

NetworkingConfig 用于配置容器的 NIC 接口配置,其定义如下所示,EndpointSettings 暂时不在这里展开。

  1. type NetworkingConfig struct {
  2. EndpointsConfig map[string]*EndpointSettings // Endpoint configs for each connecting network
  3. }

Network Mode

NetworkMode 类型是 string 类型的别名,通过别名附加额外功能可以让代码逻辑变得更加清晰,提升可读性,同时并不影响真实配置项。

  1. // NetworkMode represents the container network stack.
  2. type NetworkMode string
  3. // IsNone indicates whether container isn't using a network stack.
  4. func (n NetworkMode) IsNone() bool {
  5. return n == "none"
  6. }
  7. // IsDefault indicates whether container uses the default network stack.
  8. func (n NetworkMode) IsDefault() bool {
  9. return n == "default"
  10. }
  11. // IsPrivate indicates whether container uses its private network stack.
  12. func (n NetworkMode) IsPrivate() bool {
  13. return !(n.IsHost() || n.IsContainer())
  14. }
  15. // IsContainer indicates whether container uses a container network stack.
  16. func (n NetworkMode) IsContainer() bool {
  17. parts := strings.SplitN(string(n), ":", 2)
  18. return len(parts) > 1 && parts[0] == "container"
  19. }
  20. // ConnectedContainer is the id of the container which network this container is connected to.
  21. func (n NetworkMode) ConnectedContainer() string {
  22. parts := strings.SplitN(string(n), ":", 2)
  23. if len(parts) > 1 {
  24. return parts[1]
  25. }
  26. return ""
  27. }
  28. // UserDefined indicates user-created network
  29. func (n NetworkMode) UserDefined() string {
  30. if n.IsUserDefined() {
  31. return string(n)
  32. }
  33. return ""
  34. }

Container Creation

首先,进行镜像与运行平台检查,如果镜像与运行平台信息不符,提供警告信息。注意触发条件是配置中没有运行平台信息且镜像名称不为空。

  1. if opts.params.Platform == nil && opts.params.Config.Image != "" {
  2. if img, _ := daemon.imageService.GetImage(opts.params.Config.Image, opts.params.Platform); img != nil {
  3. p := platforms.DefaultSpec()
  4. imgPlat := v1.Platform{
  5. OS: img.OS,
  6. Architecture: img.Architecture,
  7. Variant: img.Variant,
  8. }
  9. if !images.OnlyPlatformWithFallback(p).Match(imgPlat) {
  10. warnings = append(warnings, fmt.Sprintf("The requested image's platform (%s) does not match the detected host platform (%s) and no specific platform was requested", platforms.Format(imgPlat), platforms.Format(p)))
  11. }
  12. }
  13. }

接下来,将对网络配置进行检查,检查是否有不正确的 IP 地址配置存在,注意在返回错误信息时,会将报警信息同时返回。

  1. err = verifyNetworkingConfig(opts.params.NetworkingConfig)
  2. if err != nil {
  3. return containertypes.ContainerCreateCreatedBody{Warnings: warnings}, errdefs.InvalidParameter(err)
  4. }

然后,根据宿主机平台进行进一步设置,注意第 4 行的 adaptContainerSettings 是与操作系统相关的方法,需要根据目标操作系统继续跟进。

  1. if opts.params.HostConfig == nil {
  2. opts.params.HostConfig = &containertypes.HostConfig{}
  3. }
  4. err = daemon.adaptContainerSettings(opts.params.HostConfig, opts.params.AdjustCPUShares)

最后,执行容器创建方法。

  1. ctr, err := daemon.create(opts)

Merge Configuration

执行容器创建前,会进一步根据容器用户配置与镜像配置进行配置合并操作,如果用户配置中没有配置诸如 User、ExposedPorts 等等时,会根据镜像默认配置进行补全,细节部分如下。

  1. func merge(userConf, imageConf *containertypes.Config) error {
  2. if userConf.User == "" {
  3. userConf.User = imageConf.User
  4. }
  5. if len(userConf.ExposedPorts) == 0 {
  6. userConf.ExposedPorts = imageConf.ExposedPorts
  7. } else if imageConf.ExposedPorts != nil {
  8. for port := range imageConf.ExposedPorts {
  9. if _, exists := userConf.ExposedPorts[port]; !exists {
  10. userConf.ExposedPorts[port] = struct{}{}
  11. }
  12. }
  13. }
  14. if len(userConf.Env) == 0 {
  15. userConf.Env = imageConf.Env
  16. } else {
  17. for _, imageEnv := range imageConf.Env {
  18. found := false
  19. imageEnvKey := strings.Split(imageEnv, "=")[0]
  20. for _, userEnv := range userConf.Env {
  21. userEnvKey := strings.Split(userEnv, "=")[0]
  22. if isWindows {
  23. // Case insensitive environment variables on Windows
  24. imageEnvKey = strings.ToUpper(imageEnvKey)
  25. userEnvKey = strings.ToUpper(userEnvKey)
  26. }
  27. if imageEnvKey == userEnvKey {
  28. found = true
  29. break
  30. }
  31. }
  32. if !found {
  33. userConf.Env = append(userConf.Env, imageEnv)
  34. }
  35. }
  36. }
  37. if userConf.Labels == nil {
  38. userConf.Labels = map[string]string{}
  39. }
  40. for l, v := range imageConf.Labels {
  41. if _, ok := userConf.Labels[l]; !ok {
  42. userConf.Labels[l] = v
  43. }
  44. }
  45. if len(userConf.Entrypoint) == 0 {
  46. if len(userConf.Cmd) == 0 {
  47. userConf.Cmd = imageConf.Cmd
  48. }
  49. if userConf.Entrypoint == nil {
  50. userConf.Entrypoint = imageConf.Entrypoint
  51. }
  52. }
  53. if imageConf.Healthcheck != nil {
  54. if userConf.Healthcheck == nil {
  55. userConf.Healthcheck = imageConf.Healthcheck
  56. } else {
  57. if len(userConf.Healthcheck.Test) == 0 {
  58. userConf.Healthcheck.Test = imageConf.Healthcheck.Test
  59. }
  60. if userConf.Healthcheck.Interval == 0 {
  61. userConf.Healthcheck.Interval = imageConf.Healthcheck.Interval
  62. }
  63. if userConf.Healthcheck.Timeout == 0 {
  64. userConf.Healthcheck.Timeout = imageConf.Healthcheck.Timeout
  65. }
  66. if userConf.Healthcheck.StartPeriod == 0 {
  67. userConf.Healthcheck.StartPeriod = imageConf.Healthcheck.StartPeriod
  68. }
  69. if userConf.Healthcheck.Retries == 0 {
  70. userConf.Healthcheck.Retries = imageConf.Healthcheck.Retries
  71. }
  72. }
  73. }
  74. if userConf.WorkingDir == "" {
  75. userConf.WorkingDir = imageConf.WorkingDir
  76. }
  77. if len(userConf.Volumes) == 0 {
  78. userConf.Volumes = imageConf.Volumes
  79. } else {
  80. for k, v := range imageConf.Volumes {
  81. userConf.Volumes[k] = v
  82. }
  83. }
  84. if userConf.StopSignal == "" {
  85. userConf.StopSignal = imageConf.StopSignal
  86. }
  87. return nil
  88. }

ID & Name

创建容器时,会生成一个随机的定长字符串,方法如下所示,注意使用的 rand.Read 方法可以确保这个字符串是安全的随机。

  1. func GenerateRandomID() string {
  2. b := make([]byte, 32)
  3. for {
  4. if _, err := rand.Read(b); err != nil {
  5. panic(err) // This shouldn't happen
  6. }
  7. id := hex.EncodeToString(b)
  8. // if we try to parse the truncated for as an int and we don't have
  9. // an error then the value is all numeric and causes issues when
  10. // used as a hostname. ref #3869
  11. if _, err := strconv.ParseInt(TruncateID(id), 10, 64); err == nil {
  12. continue
  13. }
  14. return id
  15. }
  16. }

如果容器配置中没有设置容器名称,那么会通过如下算法随机生成一个容器名称,第 4 行的过滤有些致敬前辈的意味,这个随机的名称就是我们看到的奇怪的容器名称。

  1. func GetRandomName(retry int) string {
  2. begin:
  3. name := left[rand.Intn(len(left))] + "_" + right[rand.Intn(len(right))] //nolint:gosec // G404: Use of weak random number generator (math/rand instead of crypto/rand)
  4. if name == "boring_wozniak" /* Steve Wozniak is not boring */ {
  5. goto begin
  6. }
  7. if retry > 0 {
  8. name += strconv.Itoa(rand.Intn(10)) //nolint:gosec // G404: Use of weak random number generator (math/rand instead of crypto/rand)
  9. }
  10. return name
  11. }

当然,如果还有进一步的兴趣,可以自己去研究一下这些名人信息😊,下面仅摘录一部分。

  1. // Maria Gaetana Agnesi - Italian mathematician, philosopher, theologian and humanitarian. She was the first woman to write a mathematics handbook and the first woman appointed as a Mathematics Professor at a University. https://en.wikipedia.org/wiki/Maria_Gaetana_Agnesi
  2. "agnesi",
  3. // Muhammad ibn Jābir al-Ḥarrānī al-Battānī was a founding father of astronomy. https://en.wikipedia.org/wiki/Mu%E1%B8%A5ammad_ibn_J%C4%81bir_al-%E1%B8%A4arr%C4%81n%C4%AB_al-Batt%C4%81n%C4%AB
  4. "albattani",
  5. // Frances E. Allen, became the first female IBM Fellow in 1989. In 2006, she became the first female recipient of the ACM's Turing Award. https://en.wikipedia.org/wiki/Frances_E._Allen
  6. "allen",
  7. // June Almeida - Scottish virologist who took the first pictures of the rubella virus - https://en.wikipedia.org/wiki/June_Almeida
  8. "almeida",
  9. // Kathleen Antonelli, American computer programmer and one of the six original programmers of the ENIAC - https://en.wikipedia.org/wiki/Kathleen_Antonelli
  10. "antonelli",
  11. // Archimedes was a physicist, engineer and mathematician who invented too many things to list them here. https://en.wikipedia.org/wiki/Archimedes
  12. "archimedes",

比如理工科都熟悉的阿基米得:
image.png
图 2:Archimedes Thoughtful

View DB

在创建容器 ID、Name 时,会使用一个 ViewDB 对这些已经生成的信息进行保存,每次生成时还需要进行检查,确保没有相同的 ID、Name 出现。

  1. err := daemon.containersReplica.ReserveName(name, id);

ViewDB 接口及相关数据结构如下,根据注释信息可以看到 ViewDB 是一个内存数据库,它的实现使用了 github.com/hashicorp/go-memdb

  1. // ViewDB provides an in-memory transactional (ACID) container Store
  2. type ViewDB interface {
  3. Snapshot() View
  4. Save(*Container) error
  5. Delete(*Container) error
  6. ReserveName(name, containerID string) error
  7. ReleaseName(name string) error
  8. }
  9. // View can be used by readers to avoid locking
  10. type View interface {
  11. All() ([]Snapshot, error)
  12. Get(id string) (*Snapshot, error)
  13. GetID(name string) (string, error)
  14. GetAllNames() map[string][]string
  15. }
  16. type Snapshot struct {
  17. types.Container
  18. // additional info queries need to filter on
  19. // preserve nanosec resolution for queries
  20. CreatedAt time.Time
  21. StartedAt time.Time
  22. Name string
  23. Pid int
  24. ExitCode int
  25. Running bool
  26. Paused bool
  27. Managed bool
  28. ExposedPorts nat.PortSet
  29. PortBindings nat.PortSet
  30. Health string
  31. HostConfig struct {
  32. Isolation string
  33. }
  34. }

Container

接下来就是容器实例的创建及配置工作了,首先是容器根目录,简单通过 daemon.repository 和容器 ID 拼接而成。

  1. func (daemon *Daemon) containerRoot(id string) string {
  2. return filepath.Join(daemon.repository, id)
  3. }

然后通过 NewBaseContainer 创建 Container 实例

  1. func NewBaseContainer(id, root string) *Container {
  2. return &Container{
  3. ID: id,
  4. State: NewState(),
  5. ExecCommands: exec.NewStore(),
  6. Root: root,
  7. MountPoints: make(map[string]*volumemounts.MountPoint),
  8. StreamConfig: stream.NewConfig(),
  9. attachContext: &attachContext{},
  10. }
  11. }

最后,配置 Container 实例其他域成员,下面代码中的 base 就是创建的 Container 实例。

  1. base := daemon.newBaseContainer(id)
  2. base.Created = time.Now().UTC()
  3. base.Managed = managed
  4. base.Path = entrypoint
  5. base.Args = args // FIXME: de-duplicate from config
  6. base.Config = config
  7. base.HostConfig = &containertypes.HostConfig{}
  8. base.ImageID = imgID
  9. base.NetworkSettings = &network.Settings{IsAnonymousEndpoint: noExplicitName}
  10. base.Name = name
  11. base.Driver = daemon.imageService.GraphDriverName()
  12. base.OS = operatingSystem

RW Layer

容器实例创建完毕后,需要准备与该容器实例运行时需要的可写层,可写层通过 Daemon 常驻进程实例中的 ImageService 实现。

  1. rwLayer, err := daemon.imageService.CreateLayer(ctr, setupInitLayer(daemon.idMapping))

创建可写层主要依赖 layer.Store 接口的实现,通过镜像获取到的 ChainID 做为 CreateRWLayer 的 parent 参数,如图 3 所示。
container-layer-store-interface.svg
图 3:CreateRWLayer 示意图

layer.ChainID 类型是 digest.Digest 的别名,也就是最终仍然是一个字符串,可以用于唯一标识一个镜像。

  1. // ChainID is the content-addressable ID of a layer.
  2. type ChainID digest.Digest

Layer Store

CreateLayer 方法是创建可写层的唯一方法,第 19 的 container.ID 目前是一个 SHA256 摘要信息,也是容器唯一标识信息,这样可以保证可写层与容器的唯一对应关系。

  1. func (i *ImageService) CreateLayer(container *container.Container, initFunc layer.MountInit) (layer.RWLayer, error) {
  2. var layerID layer.ChainID
  3. if container.ImageID != "" {
  4. img, err := i.imageStore.Get(container.ImageID)
  5. if err != nil {
  6. return nil, err
  7. }
  8. layerID = img.RootFS.ChainID()
  9. }
  10. rwLayerOpts := &layer.CreateRWLayerOpts{
  11. MountLabel: container.MountLabel,
  12. InitFunc: initFunc,
  13. StorageOpt: container.HostConfig.StorageOpt,
  14. }
  15. // Indexing by OS is safe here as validation of OS has already been performed in create() (the only
  16. // caller), and guaranteed non-nil
  17. return i.layerStore.CreateRWLayer(container.ID, layerID, rwLayerOpts)
  18. }

在创建 layer.Store 实例时,newStoreFromGraphDriver 是核心方法,这个方法会创建存在于宿主机文件系统中的 Metadata Store 实例,并与 Graph Driver 关联。创建过程中,会将已经存在的层进行加载,见代码 26 ~ 41 行。

  1. func newStoreFromGraphDriver(root string, driver graphdriver.Driver) (Store, error) {
  2. caps := graphdriver.Capabilities{}
  3. if capDriver, ok := driver.(graphdriver.CapabilityDriver); ok {
  4. caps = capDriver.Capabilities()
  5. }
  6. ms, err := newFSMetadataStore(root)
  7. if err != nil {
  8. return nil, err
  9. }
  10. ls := &layerStore{
  11. store: ms,
  12. driver: driver,
  13. layerMap: map[ChainID]*roLayer{},
  14. mounts: map[string]*mountedLayer{},
  15. locker: locker.New(),
  16. useTarSplit: !caps.ReproducesExactDiffs,
  17. }
  18. ids, mounts, err := ms.List()
  19. if err != nil {
  20. return nil, err
  21. }
  22. for _, id := range ids {
  23. l, err := ls.loadLayer(id)
  24. if err != nil {
  25. logrus.Debugf("Failed to load layer %s: %s", id, err)
  26. continue
  27. }
  28. if l.parent != nil {
  29. l.parent.referenceCount++
  30. }
  31. }
  32. for _, mount := range mounts {
  33. if err := ls.loadMount(mount); err != nil {
  34. logrus.Debugf("Failed to load mount %s: %s", mount, err)
  35. }
  36. }
  37. return ls, nil
  38. }

创建过程关键部分如图 4 所示。
container-layer-store.svg
图 4:layer.Store 创建示意图

Setup Initial Layer

执行初始化操作的 setupInitialLayer 是与宿主机操作系统相关的方法,以 Unix 版本实现为例,这个方法返回一个闭包方法来执行最终的初始化操作。

  1. func setupInitLayer(idMapping *idtools.IdentityMapping) func(containerfs.ContainerFS) error {
  2. return func(initPath containerfs.ContainerFS) error {
  3. return initlayer.Setup(initPath, idMapping.RootPair())
  4. }
  5. }

Unix 版本的 Setup 操作主要是对文件、目录、符号链接等进行创建并设置合理的权限,详细步骤如下所示。

  1. func Setup(initLayerFs containerfs.ContainerFS, rootIdentity idtools.Identity) error {
  2. // Since all paths are local to the container, we can just extract initLayerFs.Path()
  3. initLayer := initLayerFs.Path()
  4. for pth, typ := range map[string]string{
  5. "/dev/pts": "dir",
  6. "/dev/shm": "dir",
  7. "/proc": "dir",
  8. "/sys": "dir",
  9. "/.dockerenv": "file",
  10. "/etc/resolv.conf": "file",
  11. "/etc/hosts": "file",
  12. "/etc/hostname": "file",
  13. "/dev/console": "file",
  14. "/etc/mtab": "/proc/mounts",
  15. } {
  16. parts := strings.Split(pth, "/")
  17. prev := "/"
  18. for _, p := range parts[1:] {
  19. prev = filepath.Join(prev, p)
  20. unix.Unlink(filepath.Join(initLayer, prev))
  21. }
  22. if _, err := os.Stat(filepath.Join(initLayer, pth)); err != nil {
  23. if os.IsNotExist(err) {
  24. if err := idtools.MkdirAllAndChownNew(filepath.Join(initLayer, filepath.Dir(pth)), 0755, rootIdentity); err != nil {
  25. return err
  26. }
  27. switch typ {
  28. case "dir":
  29. if err := idtools.MkdirAllAndChownNew(filepath.Join(initLayer, pth), 0755, rootIdentity); err != nil {
  30. return err
  31. }
  32. case "file":
  33. f, err := os.OpenFile(filepath.Join(initLayer, pth), os.O_CREATE, 0755)
  34. if err != nil {
  35. return err
  36. }
  37. f.Chown(rootIdentity.UID, rootIdentity.GID)
  38. f.Close()
  39. default:
  40. if err := os.Symlink(typ, filepath.Join(initLayer, pth)); err != nil {
  41. return err
  42. }
  43. }
  44. } else {
  45. return err
  46. }
  47. }
  48. }
  49. // Layer is ready to use, if it wasn't before.
  50. return nil
  51. }

Start a Container

Containerd

启动容器的过程中,会使用到 containerd,图 5 以 Unix 版本实现中第一步创建容器(确实是在运行容器过程中创建)为例展示 Daemon 实例与 containerd 的交互过程。
container-containerd-create.svg
图 5:containerd 交互示意图

创建 containerd 的代码在 initLibcontainerd 中,下面代码为 Unix 版本实现。

  1. func (daemon *Daemon) initLibcontainerd(ctx context.Context) error {
  2. var err error
  3. daemon.containerd, err = remote.NewClient(
  4. ctx,
  5. daemon.containerdCli,
  6. filepath.Join(daemon.configStore.ExecRoot, "containerd"),
  7. daemon.configStore.ContainerdNamespace,
  8. daemon,
  9. )
  10. return err
  11. }

在 containerd 中的接口类型 containers.Store 用于与容器底层数据库进行交互。

  1. type Store interface {
  2. // Get a container using the id.
  3. //
  4. // Container object is returned on success. If the id is not known to the
  5. // store, an error will be returned.
  6. Get(ctx context.Context, id string) (Container, error)
  7. // List returns containers that match one or more of the provided filters.
  8. List(ctx context.Context, filters ...string) ([]Container, error)
  9. // Create a container in the store from the provided container.
  10. Create(ctx context.Context, container Container) (Container, error)
  11. // Update the container with the provided container object. ID must be set.
  12. //
  13. // If one or more fieldpaths are provided, only the field corresponding to
  14. // the fieldpaths will be mutated.
  15. Update(ctx context.Context, container Container, fieldpaths ...string) (Container, error)
  16. // Delete a container using the id.
  17. //
  18. // nil will be returned on success. If the container is not known to the
  19. // store, ErrNotFound will be returned.
  20. Delete(ctx context.Context, id string) error
  21. }

最终的 Create 方法通过 GRPC 调用执行。

  1. func (c *containersClient) Create(ctx context.Context, in *CreateContainerRequest, opts ...grpc.CallOption) (*CreateContainerResponse, error) {
  2. out := new(CreateContainerResponse)
  3. err := c.cc.Invoke(ctx, "/containerd.services.containers.v1.Containers/Create", in, out, opts...)
  4. if err != nil {
  5. return nil, err
  6. }
  7. return out, nil
  8. }

container.xml