- 运行时文件/var/lib/docker
- api/server/router/container/container_routes.go#containerRouter.postContainersStart
- 1) daemon/start.go#Daemon.ContainerStart
- 1.1) daemon/container.go#Daemon.GetContainer
- 1.2) daemon/start.go#Daemon.containerStart
- 1.2.1) daemon/container_operations.go#Daemon.initializingNetworking
- 1.2.1.1) daemon/container_operations.go#Daemon.allocateNetwork
- —1.2.1.1.1) daemon/container_operations.go#Daemon.updateContainerNetworkSettings
- —1.2.1.1.2) daemon/container_operations.go#Daemon.connectToNetwork
- ——1.2.1.1.2.1) daemon/container_operations.go#findAndAttachNetwork
- ——1.2.1.1.2.2) daemon/container_operations.go#Daemon.getNetworkSandbox
- ——1.2.1.1.2.3) daemon/network.go#buildCreateEndpointOptions
- ——1.2.1.1.2.4) libnetwork/network.go#network.CreateEndpoint
- ——— 1.2.1.1.2.4.1) libnetwork/drivers/bridge/bridge.go#driver.CreateEndpoint
- ——1.2.1.1.2.5) daemon/container_operations.go#Daemon.updateEndpointNetworkSettings
- ——1.2.1.1.2.6) daemon/container_operations.go#Daemon.buildSandboxOptions
- ——1.2.1.1.2.7-创建NetNS) libnetwork/controller.go#controller.NewSandbox
- ———1.2.1.1.2.7.1) libnetwork/sandbox_dns_unix.go#sandbox.setupResolutionFiles
- ————1.2.1.1.2.7.1.1) libnetwork/sandbox_dns_unix.go#sandbox.buildHostsFile
- ————1.2.1.1.2.7.1.2) libnetwork/sandbox_dns_unix.go#sandbox.setupDNS
- ———1.2.1.1.2.7.2) libnetwork/osl/namespace_linux.go#NewSandbox
- ————1.2.1.1.2.7.2.1) libnetwork/osl/namespace_linux.go#createNetworkNamespace
- ——1.2.1.1.2.8) daemon/container_operations.go#updateSandboxNetworkSettings
- ——1.2.1.1.2.9) daemon/network.go#buildJoinOptions
- ——1.2.1.1.2.10) libnetwork/endpoint.go#endpoint.Join
- ———1.2.1.1.2.10.1) libnetwork/drivers/bridge/bridge.go#driver#Join
- ———1.2.1.1.2.10.2) libnetwork/sandbox.go#sandbox.populateNetworkResources
- ————1.2.1.1.2.10.2.1) libnetwork/osl/interface_linux.go#networkNamespace#AddInterface
- —1.2.1.1.3) container/container.go#Container.WriteHostConfig
- 1.2.1.2) container/container_unix.go#Container.BuildHostnameFile
- 1.2.2) daemon/oci_linux.go#Daemon.createSpec
- 1.2.2.1) oci/defaults.go#DefaultSpec
- 1.2.2.2-Link实现) daemon/oci_linux.go#Daemon.populateCommonSpec
- 1.2.2.3) daemon/oci_linux.go#setNamespaces
- 1.2.2.4) daemon/container_operations_unix.go#setupContainerMountsRoot
- 1.2.2.5) daemon/volumes_unix.go#setupMounts
- —1.2.2.5.1) container/container_unix.go#Container.NetworkMounts
- 1.2.3) libcontainerd/client_daemon.go#client.Create
- 1.2.4) libcontainerd/client_daemon.go#client.Start
- 1.2.1) daemon/container_operations.go#Daemon.initializingNetworking
运行时文件/var/lib/docker
├── aufs/
│ ├── diff/
│ │ ├── 3e5dad646755b80ceeb2776db26d93cd8b2a45b66c670173eb0a5def51b27a26/
│ │ ├── 890f6393af02ab70fac8d9de2e9d37b92fc0a461d72bf3a17b73e1f809e1b01a/
│ │ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703/
│ │ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703-init/
│ │ ├── b2fe7fefbb164b8a8adb84273b94eeccc6794c231fc3958bc98a35cc72225cbb/
│ │ ├── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910/
│ │ └── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910-init/
│ ├── layers/
│ │ ├── 3e5dad646755b80ceeb2776db26d93cd8b2a45b66c670173eb0a5def51b27a26
│ │ ├── 890f6393af02ab70fac8d9de2e9d37b92fc0a461d72bf3a17b73e1f809e1b01a
│ │ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703
│ │ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703-init
│ │ ├── b2fe7fefbb164b8a8adb84273b94eeccc6794c231fc3958bc98a35cc72225cbb
│ │ ├── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910
│ │ └── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910-init
│ └── mnt/
│ ├── 3e5dad646755b80ceeb2776db26d93cd8b2a45b66c670173eb0a5def51b27a26/
│ ├── 890f6393af02ab70fac8d9de2e9d37b92fc0a461d72bf3a17b73e1f809e1b01a/
│ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703/
│ ├── 9f2849b92cad358be0cfc0cbada718203184791d8b9dc4d02fb966ab7ef7f703-init/
│ ├── b2fe7fefbb164b8a8adb84273b94eeccc6794c231fc3958bc98a35cc72225cbb/
│ ├── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910/
│ └── d0ff844478a02c0f0544115e77626cf4f0c4a7996a6a6764eea2a2dd29201910-init/
├── builder/
│ └── fscache.db
├── buildkit/
│ ├── content/
│ │ └── ingest/
│ ├── executor/
│ ├── cache.db
│ ├── metadata.db
│ └── snapshots.db
├── containerd/
│ └── daemon/
│ ├── io.containerd.content.v1.content/
│ ├── io.containerd.metadata.v1.bolt/
│ ├── io.containerd.runtime.v1.linux/
│ ├── io.containerd.runtime.v2.task/
│ ├── io.containerd.snapshotter.v1.btrfs/
│ ├── io.containerd.snapshotter.v1.native/
│ ├── io.containerd.snapshotter.v1.overlayfs/
│ └── tmpmounts/
├── containers/
│ ├── a8205034ae074d1fbc73327fa348ffaac3a1568d6a3de1953aeb49ea99dcfdb0/
│ │ ├── checkpoints/
│ │ ├── mounts/
│ │ ├── a8205034ae074d1fbc73327fa348ffaac3a1568d6a3de1953aeb49ea99dcfdb0-json.log
│ │ ├── config.v2.json
│ │ ├── hostconfig.json
│ │ ├── hostname
│ │ ├── hosts
│ │ ├── resolv.conf
│ │ └── resolv.conf.hash
│ └── bf1b6932346bfd4793f4ebea51ca7a0ce5d460620b07bea3b3a6e1beb9e502b8/
│ ├── checkpoints/
│ ├── mounts/
│ ├── bf1b6932346bfd4793f4ebea51ca7a0ce5d460620b07bea3b3a6e1beb9e502b8-json.log
│ ├── config.v2.json
│ ├── hostconfig.json
│ ├── hostname
│ ├── hosts
│ ├── resolv.conf
│ └── resolv.conf.hash
├── image/
│ ├── aufs/
│ │ ├── distribution/
│ │ ├── imagedb/
│ │ ├── layerdb/
│ │ └── repositories.json
│ └── overlay/
│ ├── distribution/
│ ├── imagedb/
│ ├── layerdb/
│ └── repositories.json
├── network/
│ └── files/
│ └── local-kv.db
├── overlay/
├── plugins/
│ ├── storage/
│ │ └── blobs/
│ └── tmp/
├── runtimes/
├── swarm/
├── tmp/
├── trust/
└── volumes/
└── metadata.db
api/server/router/container/container_routes.go#containerRouter.postContainersStart
func (s *containerRouter) postContainersStart(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
// If contentLength is -1, we can assumed chunked encoding
// or more technically that the length is unknown
// https://golang.org/src/pkg/net/http/request.go#L139
// net/http otherwise seems to swallow any headers related to chunked encoding
// including r.TransferEncoding
// allow a nil body for backwards compatibility
version := httputils.VersionFromContext(ctx)
var hostConfig *container.HostConfig
// A non-nil json object is at least 7 characters.
if r.ContentLength > 7 || r.ContentLength == -1 {
if versions.GreaterThanOrEqualTo(version, "1.24") {
return bodyOnStartError{}
}
if err := httputils.CheckForJSON(r); err != nil {
return err
}
c, err := s.decoder.DecodeHostConfig(r.Body)
if err != nil {
return err
}
hostConfig = c
}
if err := httputils.ParseForm(r); err != nil {
return err
}
checkpoint := r.Form.Get("checkpoint")
checkpointDir := r.Form.Get("checkpoint-dir")
// ********************************** NOTICE ********************************** //
if err := s.backend.ContainerStart(vars["name"], hostConfig, checkpoint, checkpointDir); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
w.WriteHeader(http.StatusNoContent)
return nil
}
1) daemon/start.go#Daemon.ContainerStart
// ContainerStart starts a container.
func (daemon *Daemon) ContainerStart(name string, hostConfig *containertypes.HostConfig, checkpoint string, checkpointDir string) error {
if checkpoint != "" && !daemon.HasExperimental() {
return errdefs.InvalidParameter(errors.New("checkpoint is only supported in experimental mode"))
}
// ********************************** NOTICE ********************************** //
container, err := daemon.GetContainer(name)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
validateState := func() error {
container.Lock()
defer container.Unlock()
if container.Paused {
return errdefs.Conflict(errors.New("cannot start a paused container, try unpause instead"))
}
if container.Running {
return containerNotModifiedError{running: true}
}
if container.RemovalInProgress || container.Dead {
return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))
}
return nil
}
if err := validateState(); err != nil {
return err
}
// Windows does not have the backwards compatibility issue here.
if runtime.GOOS != "windows" {
// This is kept for backward compatibility - hostconfig should be passed when
// creating a container, not during start.
if hostConfig != nil {
logrus.Warn("DEPRECATED: Setting host configuration options when the container starts is deprecated and has been removed in Docker 1.12")
oldNetworkMode := container.HostConfig.NetworkMode
if err := daemon.setSecurityOptions(container, hostConfig); err != nil {
return errdefs.InvalidParameter(err)
}
if err := daemon.mergeAndVerifyLogConfig(&hostConfig.LogConfig); err != nil {
return errdefs.InvalidParameter(err)
}
if err := daemon.setHostConfig(container, hostConfig); err != nil {
return errdefs.InvalidParameter(err)
}
newNetworkMode := container.HostConfig.NetworkMode
if string(oldNetworkMode) != string(newNetworkMode) {
// if user has change the network mode on starting, clean up the
// old networks. It is a deprecated feature and has been removed in Docker 1.12
container.NetworkSettings.Networks = nil
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
return errdefs.System(err)
}
}
container.InitDNSHostConfig()
}
} else {
if hostConfig != nil {
return errdefs.InvalidParameter(errors.New("Supplying a hostconfig on start is not supported. It should be supplied on create"))
}
}
// check if hostConfig is in line with the current system settings.
// It may happen cgroups are umounted or the like.
if _, err = daemon.verifyContainerSettings(container.OS, container.HostConfig, nil, false); err != nil {
return errdefs.InvalidParameter(err)
}
// Adapt for old containers in case we have updates in this function and
// old containers never have chance to call the new function in create stage.
if hostConfig != nil {
if err := daemon.adaptContainerSettings(container.HostConfig, false); err != nil {
return errdefs.InvalidParameter(err)
}
}
// ********************************** NOTICE ********************************** //
return daemon.containerStart(container, checkpoint, checkpointDir, true)
// ********************************** NOTICE ********************************** //
}
1.1) daemon/container.go#Daemon.GetContainer
func (daemon *Daemon) GetContainer(prefixOrName string) (*container.Container, error) {
if len(prefixOrName) == 0 {
return nil, errors.WithStack(invalidIdentifier(prefixOrName))
}
if containerByID := daemon.containers.Get(prefixOrName); containerByID != nil {
// prefix is an exact match to a full container ID
return containerByID, nil
}
// GetByName will match only an exact name provided; we ignore errors
if containerByName, _ := daemon.GetByName(prefixOrName); containerByName != nil {
// prefix is an exact match to a full container Name
return containerByName, nil
}
containerID, indexError := daemon.idIndex.Get(prefixOrName)
if indexError != nil {
// When truncindex defines an error type, use that instead
if indexError == truncindex.ErrNotExist {
return nil, containerNotFound(prefixOrName)
}
return nil, errdefs.System(indexError)
}
// ********************************** NOTICE ********************************** //
return daemon.containers.Get(containerID), nil
// ********************************** NOTICE ********************************** //
}
1.2) daemon/start.go#Daemon.containerStart
func (daemon *Daemon) containerStart(container *container.Container, checkpoint string, checkpointDir string, resetRestartManager bool) (err error) {
start := time.Now()
container.Lock()
defer container.Unlock()
if resetRestartManager && container.Running { // skip this check if already in restarting step and resetRestartManager==false
return nil
}
if container.RemovalInProgress || container.Dead {
return errdefs.Conflict(errors.New("container is marked for removal and cannot be started"))
}
if checkpointDir != "" {
// TODO(mlaventure): how would we support that?
return errdefs.Forbidden(errors.New("custom checkpointdir is not supported"))
}
// if we encounter an error during start we need to ensure that any other
// setup has been cleaned up properly
defer func() {
if err != nil {
container.SetError(err)
// if no one else has set it, make sure we don't leave it at zero
if container.ExitCode() == 0 {
container.SetExitCode(128)
}
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
logrus.Errorf("%s: failed saving state on start failure: %v", container.ID, err)
}
container.Reset(false)
daemon.Cleanup(container)
// if containers AutoRemove flag is set, remove it after clean up
if container.HostConfig.AutoRemove {
container.Unlock()
if err := daemon.ContainerRm(container.ID, &types.ContainerRmConfig{ForceRemove: true, RemoveVolume: true}); err != nil {
logrus.Errorf("can't remove container %s: %v", container.ID, err)
}
container.Lock()
}
}
}()
if err := daemon.conditionalMountOnStart(container); err != nil {
return err
}
// ********************************** NOTICE ********************************** //
if err := daemon.initializeNetworking(container); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
// ********************************** NOTICE ********************************** //
spec, err := daemon.createSpec(container)
// ********************************** NOTICE ********************************** //
if err != nil {
return errdefs.System(err)
}
if resetRestartManager {
container.ResetRestartManager(true)
container.HasBeenManuallyStopped = false
}
if daemon.saveApparmorConfig(container); err != nil {
return err
}
if checkpoint != "" {
checkpointDir, err = getCheckpointDir(checkpointDir, checkpoint, container.Name, container.ID, container.CheckpointDir(), false)
if err != nil {
return err
}
}
createOptions, err := daemon.getLibcontainerdCreateOptions(container)
if err != nil {
return err
}
// ********************************** NOTICE ********************************** //
err = daemon.containerd.Create(context.Background(), container.ID, spec, createOptions)
// ********************************** NOTICE ********************************** //
if err != nil {
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
}
// TODO(mlaventure): we need to specify checkpoint options here
// ********************************** NOTICE ********************************** //
pid, err := daemon.containerd.Start(context.Background(), container.ID, checkpointDir,
container.StreamConfig.Stdin() != nil || container.Config.Tty,
container.InitializeStdio)
// ********************************** NOTICE ********************************** //
if err != nil {
if err := daemon.containerd.Delete(context.Background(), container.ID); err != nil {
logrus.WithError(err).WithField("container", container.ID).
Error("failed to delete failed start container")
}
return translateContainerdStartErr(container.Path, container.SetExitCode, err)
}
container.SetRunning(pid, true)
container.HasBeenStartedBefore = true
daemon.setStateCounter(container)
daemon.initHealthMonitor(container)
if err := container.CheckpointTo(daemon.containersReplica); err != nil {
logrus.WithError(err).WithField("container", container.ID).
Errorf("failed to store container")
}
daemon.LogContainerEvent(container, "start")
containerActions.WithValues("start").UpdateSince(start)
return nil
}
1.2.1) daemon/container_operations.go#Daemon.initializingNetworking
func (daemon *Daemon) initializeNetworking(container *container.Container) error {
var err error
// container模式
if container.HostConfig.NetworkMode.IsContainer() {
// we need to get the hosts files from the container to join
nc, err := daemon.getNetworkedContainer(container.ID, container.HostConfig.NetworkMode.ConnectedContainer())
if err != nil {
return err
}
err = daemon.initializeNetworkingPaths(container, nc)
if err != nil {
return err
}
container.Config.Hostname = nc.Config.Hostname
container.Config.Domainname = nc.Config.Domainname
return nil
}
// host模式
if container.HostConfig.NetworkMode.IsHost() {
if container.Config.Hostname == "" {
container.Config.Hostname, err = os.Hostname()
if err != nil {
return err
}
}
}
// 其他模式,包括bridge和none
// ********************************** NOTICE ********************************** //
if err := daemon.allocateNetwork(container); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
// ********************************** NOTICE ********************************** //
return container.BuildHostnameFile()
// ********************************** NOTICE ********************************** //
}
1.2.1.1) daemon/container_operations.go#Daemon.allocateNetwork
func (daemon *Daemon) allocateNetwork(container *container.Container) error {
start := time.Now()
controller := daemon.netController
if daemon.netController == nil {
return nil
}
// Cleanup any stale sandbox left over due to ungraceful daemon shutdown
if err := controller.SandboxDestroy(container.ID); err != nil {
logrus.Errorf("failed to cleanup up stale network sandbox for container %s", container.ID)
}
if container.Config.NetworkDisabled || container.HostConfig.NetworkMode.IsContainer() {
return nil
}
updateSettings := false
if len(container.NetworkSettings.Networks) == 0 {
// ********************************** NOTICE ********************************** //
daemon.updateContainerNetworkSettings(container, nil)
// ********************************** NOTICE ********************************** //
updateSettings = true
}
// 如果有bridge网络,那么连接即可(默认是有的)
// always connect default network first since only default
// network mode support link and we need do some setting
// on sandbox initialize for link, but the sandbox only be initialized
// on first network connecting.
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
cleanOperationalData(nConf)
// ********************************** NOTICE ********************************** //
if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
}
// 下面不用看了
// the intermediate map is necessary because "connectToNetwork" modifies "container.NetworkSettings.Networks"
networks := make(map[string]*network.EndpointSettings)
for n, epConf := range container.NetworkSettings.Networks {
if n == defaultNetName {
continue
}
networks[n] = epConf
}
for netName, epConf := range networks {
cleanOperationalData(epConf)
if err := daemon.connectToNetwork(container, netName, epConf.EndpointSettings, updateSettings); err != nil {
return err
}
}
// If the container is not to be connected to any network,
// create its network sandbox now if not present
if len(networks) == 0 {
if nil == daemon.getNetworkSandbox(container) {
options, err := daemon.buildSandboxOptions(container)
if err != nil {
return err
}
sb, err := daemon.netController.NewSandbox(container.ID, options...)
if err != nil {
return err
}
updateSandboxNetworkSettings(container, sb)
defer func() {
if err != nil {
sb.Delete()
}
}()
}
}
// ********************************** NOTICE ********************************** //
if _, err := container.WriteHostConfig(); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
networkActions.WithValues("allocate").UpdateSince(start)
return nil
}
—1.2.1.1.1) daemon/container_operations.go#Daemon.updateContainerNetworkSettings
// updateContainerNetworkSettings updates the network settings
// 第二个参数为nil
func (daemon *Daemon) updateContainerNetworkSettings(container *container.Container, endpointsConfig map[string]*networktypes.EndpointSettings) {
var n libnetwork.Network
mode := container.HostConfig.NetworkMode
if container.Config.NetworkDisabled || mode.IsContainer() {
return
}
networkName := mode.NetworkName()
if mode.IsDefault() {
// ********************************** NOTICE ********************************** //
networkName = daemon.netController.Config().Daemon.DefaultNetwork
// ********************************** NOTICE ********************************** //
}
if mode.IsUserDefined() {
var err error
n, err = daemon.FindNetwork(networkName)
if err == nil {
networkName = n.Name()
}
}
if container.NetworkSettings == nil {
container.NetworkSettings = &network.Settings{}
}
if len(endpointsConfig) > 0 {
if container.NetworkSettings.Networks == nil {
container.NetworkSettings.Networks = make(map[string]*network.EndpointSettings)
}
for name, epConfig := range endpointsConfig {
container.NetworkSettings.Networks[name] = &network.EndpointSettings{
EndpointSettings: epConfig,
}
}
}
if container.NetworkSettings.Networks == nil {
// ********************************** NOTICE ********************************** //
container.NetworkSettings.Networks = make(map[string]*network.EndpointSettings)
// ********************************** NOTICE ********************************** //
container.NetworkSettings.Networks[networkName] = &network.EndpointSettings{
EndpointSettings: &networktypes.EndpointSettings{},
}
}
// Convert any settings added by client in default name to
// engine's default network name key
if mode.IsDefault() {
if nConf, ok := container.NetworkSettings.Networks[mode.NetworkName()]; ok {
container.NetworkSettings.Networks[networkName] = nConf
delete(container.NetworkSettings.Networks, mode.NetworkName())
}
}
if !mode.IsUserDefined() {
return
}
// Make sure to internally store the per network endpoint config by network name
if _, ok := container.NetworkSettings.Networks[networkName]; ok {
return
}
if n != nil {
if nwConfig, ok := container.NetworkSettings.Networks[n.ID()]; ok {
container.NetworkSettings.Networks[networkName] = nwConfig
delete(container.NetworkSettings.Networks, n.ID())
return
}
}
}
—1.2.1.1.2) daemon/container_operations.go#Daemon.connectToNetwork
func (daemon *Daemon) connectToNetwork(container *container.Container, idOrName string, endpointConfig *networktypes.EndpointSettings, updateSettings bool) (err error) {
start := time.Now()
if container.HostConfig.NetworkMode.IsContainer() {
return runconfig.ErrConflictSharedNetwork
}
if containertypes.NetworkMode(idOrName).IsBridge() &&
daemon.configStore.DisableBridge {
container.Config.NetworkDisabled = true
return nil
}
if endpointConfig == nil {
endpointConfig = &networktypes.EndpointSettings{}
}
// ********************************** NOTICE ********************************** //
n, config, err := daemon.findAndAttachNetwork(container, idOrName, endpointConfig)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
if n == nil {
return nil
}
var operIPAM bool
if config != nil {
if epConfig, ok := config.EndpointsConfig[n.Name()]; ok {
if endpointConfig.IPAMConfig == nil ||
(endpointConfig.IPAMConfig.IPv4Address == "" &&
endpointConfig.IPAMConfig.IPv6Address == "" &&
len(endpointConfig.IPAMConfig.LinkLocalIPs) == 0) {
operIPAM = true
}
// copy IPAMConfig and NetworkID from epConfig via AttachNetwork
endpointConfig.IPAMConfig = epConfig.IPAMConfig
endpointConfig.NetworkID = epConfig.NetworkID
}
}
err = daemon.updateNetworkConfig(container, n, endpointConfig, updateSettings)
if err != nil {
return err
}
controller := daemon.netController
// ********************************** NOTICE ********************************** //
sb := daemon.getNetworkSandbox(container)
// ********************************** NOTICE ********************************** //
createOptions, err := buildCreateEndpointOptions(container, n, endpointConfig, sb, daemon.configStore.DNS)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
endpointName := strings.TrimPrefix(container.Name, "/")
// ********************************** NOTICE ********************************** //
ep, err := n.CreateEndpoint(endpointName, createOptions...)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
defer func() {
if err != nil {
if e := ep.Delete(false); e != nil {
logrus.Warnf("Could not rollback container connection to network %s", idOrName)
}
}
}()
container.NetworkSettings.Networks[n.Name()] = &network.EndpointSettings{
EndpointSettings: endpointConfig,
IPAMOperational: operIPAM,
}
if _, ok := container.NetworkSettings.Networks[n.ID()]; ok {
delete(container.NetworkSettings.Networks, n.ID())
}
// ********************************** NOTICE ********************************** //
if err := daemon.updateEndpointNetworkSettings(container, n, ep); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
if sb == nil {
// ********************************** NOTICE ********************************** //
options, err := daemon.buildSandboxOptions(container)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
// ********************************** NOTICE ********************************** //
sb, err = controller.NewSandbox(container.ID, options...)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
// ********************************** NOTICE ********************************** //
updateSandboxNetworkSettings(container, sb)
// ********************************** NOTICE ********************************** //
}
// ********************************** NOTICE ********************************** //
joinOptions, err := buildJoinOptions(container.NetworkSettings, n)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
// ********************************** NOTICE ********************************** //
if err := ep.Join(sb, joinOptions...); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
if !container.Managed {
// add container name/alias to DNS
if err := daemon.ActivateContainerServiceBinding(container.Name); err != nil {
return fmt.Errorf("Activate container service binding for %s failed: %v", container.Name, err)
}
}
if err := updateJoinInfo(container.NetworkSettings, n, ep); err != nil {
return fmt.Errorf("Updating join info failed: %v", err)
}
container.NetworkSettings.Ports = getPortMapInfo(sb)
daemon.LogNetworkEventWithAttributes(n, "connect", map[string]string{"container": container.ID})
networkActions.WithValues("connect").UpdateSince(start)
return nil
}
——1.2.1.1.2.1) daemon/container_operations.go#findAndAttachNetwork
func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrName string, epConfig *networktypes.EndpointSettings) (libnetwork.Network, *networktypes.NetworkingConfig, error) {
id := getNetworkID(idOrName, epConfig)
n, err := daemon.FindNetwork(id)
if err != nil {
// We should always be able to find the network for a
// managed container.
if container.Managed {
return nil, nil, err
}
}
// If we found a network and if it is not dynamically created
// we should never attempt to attach to that network here.
if n != nil {
if container.Managed || !n.Info().Dynamic() {
return n, nil, nil
}
}
var addresses []string
if epConfig != nil && epConfig.IPAMConfig != nil {
if epConfig.IPAMConfig.IPv4Address != "" {
addresses = append(addresses, epConfig.IPAMConfig.IPv4Address)
}
if epConfig.IPAMConfig.IPv6Address != "" {
addresses = append(addresses, epConfig.IPAMConfig.IPv6Address)
}
}
var (
config *networktypes.NetworkingConfig
retryCount int
)
if n == nil && daemon.attachableNetworkLock != nil {
daemon.attachableNetworkLock.Lock(id)
defer daemon.attachableNetworkLock.Unlock(id)
}
for {
// In all other cases, attempt to attach to the network to
// trigger attachment in the swarm cluster manager.
if daemon.clusterProvider != nil {
var err error
config, err = daemon.clusterProvider.AttachNetwork(id, container.ID, addresses)
if err != nil {
return nil, nil, err
}
}
n, err = daemon.FindNetwork(id)
if err != nil {
if daemon.clusterProvider != nil {
if err := daemon.clusterProvider.DetachNetwork(id, container.ID); err != nil {
logrus.Warnf("Could not rollback attachment for container %s to network %s: %v", container.ID, idOrName, err)
}
}
// Retry network attach again if we failed to
// find the network after successful
// attachment because the only reason that
// would happen is if some other container
// attached to the swarm scope network went down
// and removed the network while we were in
// the process of attaching.
if config != nil {
if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
if retryCount >= 5 {
return nil, nil, fmt.Errorf("could not find network %s after successful attachment", idOrName)
}
retryCount++
continue
}
}
return nil, nil, err
}
break
}
// This container has attachment to a swarm scope
// network. Update the container network settings accordingly.
container.NetworkSettings.HasSwarmEndpoint = true
return n, config, nil
}
——1.2.1.1.2.2) daemon/container_operations.go#Daemon.getNetworkSandbox
func (daemon *Daemon) getNetworkSandbox(container *container.Container) libnetwork.Sandbox {
var sb libnetwork.Sandbox
daemon.netController.WalkSandboxes(func(s libnetwork.Sandbox) bool {
if s.ContainerID() == container.ID {
sb = s
return true
}
return false
})
return sb
}
——1.2.1.1.2.3) daemon/network.go#buildCreateEndpointOptions
// buildCreateEndpointOptions builds endpoint options from a given network.
func buildCreateEndpointOptions(c *container.Container, n libnetwork.Network, epConfig *network.EndpointSettings, sb libnetwork.Sandbox, daemonDNS []string) ([]libnetwork.EndpointOption, error) {
var (
bindings = make(nat.PortMap)
pbList []networktypes.PortBinding
exposeList []networktypes.TransportPort
createOptions []libnetwork.EndpointOption
)
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
if (!c.EnableServiceDiscoveryOnDefaultNetwork() && n.Name() == defaultNetName) ||
c.NetworkSettings.IsAnonymousEndpoint {
createOptions = append(createOptions, libnetwork.CreateOptionAnonymous())
}
if epConfig != nil {
ipam := epConfig.IPAMConfig
if ipam != nil {
var (
ipList []net.IP
ip, ip6, linkip net.IP
)
for _, ips := range ipam.LinkLocalIPs {
if linkip = net.ParseIP(ips); linkip == nil && ips != "" {
return nil, errors.Errorf("Invalid link-local IP address: %s", ipam.LinkLocalIPs)
}
ipList = append(ipList, linkip)
}
if ip = net.ParseIP(ipam.IPv4Address); ip == nil && ipam.IPv4Address != "" {
return nil, errors.Errorf("Invalid IPv4 address: %s)", ipam.IPv4Address)
}
if ip6 = net.ParseIP(ipam.IPv6Address); ip6 == nil && ipam.IPv6Address != "" {
return nil, errors.Errorf("Invalid IPv6 address: %s)", ipam.IPv6Address)
}
createOptions = append(createOptions,
libnetwork.CreateOptionIpam(ip, ip6, ipList, nil))
}
for _, alias := range epConfig.Aliases {
createOptions = append(createOptions, libnetwork.CreateOptionMyAlias(alias))
}
for k, v := range epConfig.DriverOpts {
createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(options.Generic{k: v}))
}
}
if c.NetworkSettings.Service != nil {
svcCfg := c.NetworkSettings.Service
var vip string
if svcCfg.VirtualAddresses[n.ID()] != nil {
vip = svcCfg.VirtualAddresses[n.ID()].IPv4
}
var portConfigs []*libnetwork.PortConfig
for _, portConfig := range svcCfg.ExposedPorts {
portConfigs = append(portConfigs, &libnetwork.PortConfig{
Name: portConfig.Name,
Protocol: libnetwork.PortConfig_Protocol(portConfig.Protocol),
TargetPort: portConfig.TargetPort,
PublishedPort: portConfig.PublishedPort,
})
}
createOptions = append(createOptions, libnetwork.CreateOptionService(svcCfg.Name, svcCfg.ID, net.ParseIP(vip), portConfigs, svcCfg.Aliases[n.ID()]))
}
if !containertypes.NetworkMode(n.Name()).IsUserDefined() {
createOptions = append(createOptions, libnetwork.CreateOptionDisableResolution())
}
// configs that are applicable only for the endpoint in the network
// to which container was connected to on docker run.
// Ideally all these network-specific endpoint configurations must be moved under
// container.NetworkSettings.Networks[n.Name()]
if n.Name() == c.HostConfig.NetworkMode.NetworkName() ||
(n.Name() == defaultNetName && c.HostConfig.NetworkMode.IsDefault()) {
if c.Config.MacAddress != "" {
mac, err := net.ParseMAC(c.Config.MacAddress)
if err != nil {
return nil, err
}
genericOption := options.Generic{
netlabel.MacAddress: mac,
}
createOptions = append(createOptions, libnetwork.EndpointOptionGeneric(genericOption))
}
}
// Port-mapping rules belong to the container & applicable only to non-internal networks
portmaps := getSandboxPortMapInfo(sb)
if n.Info().Internal() || len(portmaps) > 0 {
return createOptions, nil
}
if c.HostConfig.PortBindings != nil {
for p, b := range c.HostConfig.PortBindings {
bindings[p] = []nat.PortBinding{}
for _, bb := range b {
bindings[p] = append(bindings[p], nat.PortBinding{
HostIP: bb.HostIP,
HostPort: bb.HostPort,
})
}
}
}
portSpecs := c.Config.ExposedPorts
ports := make([]nat.Port, len(portSpecs))
var i int
for p := range portSpecs {
ports[i] = p
i++
}
nat.SortPortMap(ports, bindings)
for _, port := range ports {
expose := networktypes.TransportPort{}
expose.Proto = networktypes.ParseProtocol(port.Proto())
expose.Port = uint16(port.Int())
exposeList = append(exposeList, expose)
pb := networktypes.PortBinding{Port: expose.Port, Proto: expose.Proto}
binding := bindings[port]
for i := 0; i < len(binding); i++ {
pbCopy := pb.GetCopy()
newP, err := nat.NewPort(nat.SplitProtoPort(binding[i].HostPort))
var portStart, portEnd int
if err == nil {
portStart, portEnd, err = newP.Range()
}
if err != nil {
return nil, errors.Wrapf(err, "Error parsing HostPort value (%s)", binding[i].HostPort)
}
pbCopy.HostPort = uint16(portStart)
pbCopy.HostPortEnd = uint16(portEnd)
pbCopy.HostIP = net.ParseIP(binding[i].HostIP)
pbList = append(pbList, pbCopy)
}
if c.HostConfig.PublishAllPorts && len(binding) == 0 {
pbList = append(pbList, pb)
}
}
var dns []string
if len(c.HostConfig.DNS) > 0 {
dns = c.HostConfig.DNS
} else if len(daemonDNS) > 0 {
dns = daemonDNS
}
if len(dns) > 0 {
createOptions = append(createOptions,
libnetwork.CreateOptionDNS(dns))
}
createOptions = append(createOptions,
libnetwork.CreateOptionPortMapping(pbList),
libnetwork.CreateOptionExposedPorts(exposeList))
return createOptions, nil
}
——1.2.1.1.2.4) libnetwork/network.go#network.CreateEndpoint
func (n *network) CreateEndpoint(name string, options ...EndpointOption) (Endpoint, error) {
var err error
if !config.IsValidName(name) {
return nil, ErrInvalidName(name)
}
if n.ConfigOnly() {
return nil, types.ForbiddenErrorf("cannot create endpoint on configuration-only network")
}
if _, err = n.EndpointByName(name); err == nil {
return nil, types.ForbiddenErrorf("endpoint with name %s already exists in network %s", name, n.Name())
}
n.ctrlr.networkLocker.Lock(n.id)
defer n.ctrlr.networkLocker.Unlock(n.id)
// ********************************** NOTICE ********************************** //
return n.createEndpoint(name, options...)
// ********************************** NOTICE ********************************** //
}
func (n *network) createEndpoint(name string, options ...EndpointOption) (Endpoint, error) {
var err error
ep := &endpoint{name: name, generic: make(map[string]interface{}), iface: &endpointInterface{}}
ep.id = stringid.GenerateRandomID()
// Initialize ep.network with a possibly stale copy of n. We need this to get network from
// store. But once we get it from store we will have the most uptodate copy possibly.
ep.network = n
ep.locator = n.getController().clusterHostID()
ep.network, err = ep.getNetworkFromStore()
if err != nil {
return nil, fmt.Errorf("failed to get network during CreateEndpoint: %v", err)
}
n = ep.network
ep.processOptions(options...)
for _, llIPNet := range ep.Iface().LinkLocalAddresses() {
if !llIPNet.IP.IsLinkLocalUnicast() {
return nil, types.BadRequestErrorf("invalid link local IP address: %v", llIPNet.IP)
}
}
if opt, ok := ep.generic[netlabel.MacAddress]; ok {
if mac, ok := opt.(net.HardwareAddr); ok {
ep.iface.mac = mac
}
}
ipam, cap, err := n.getController().getIPAMDriver(n.ipamType)
if err != nil {
return nil, err
}
if cap.RequiresMACAddress {
if ep.iface.mac == nil {
ep.iface.mac = netutils.GenerateRandomMAC()
}
if ep.ipamOptions == nil {
ep.ipamOptions = make(map[string]string)
}
ep.ipamOptions[netlabel.MacAddress] = ep.iface.mac.String()
}
// ********************************** NOTICE ********************************** //
if err = ep.assignAddress(ipam, true, n.enableIPv6 && !n.postIPv6); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
defer func() {
if err != nil {
ep.releaseAddress()
}
}()
// ********************************** NOTICE ********************************** //
if err = n.addEndpoint(ep); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
defer func() {
if err != nil {
if e := ep.deleteEndpoint(false); e != nil {
logrus.Warnf("cleaning up endpoint failed %s : %v", name, e)
}
}
}()
// We should perform updateToStore call right after addEndpoint
// in order to have iface properly configured
if err = n.getController().updateToStore(ep); err != nil {
return nil, err
}
defer func() {
if err != nil {
if e := n.getController().deleteFromStore(ep); e != nil {
logrus.Warnf("error rolling back endpoint %s from store: %v", name, e)
}
}
}()
if err = ep.assignAddress(ipam, false, n.enableIPv6 && n.postIPv6); err != nil {
return nil, err
}
// Watch for service records
n.getController().watchSvcRecord(ep)
defer func() {
if err != nil {
n.getController().unWatchSvcRecord(ep)
}
}()
// Increment endpoint count to indicate completion of endpoint addition
if err = n.getEpCnt().IncEndpointCnt(); err != nil {
return nil, err
}
return ep, nil
}
func (n *network) addEndpoint(ep *endpoint) error {
d, err := n.driver(true)
if err != nil {
return fmt.Errorf("failed to add endpoint: %v", err)
}
// ********************************** NOTICE ********************************** //
err = d.CreateEndpoint(n.id, ep.id, ep.Interface(), ep.generic)
// ********************************** NOTICE ********************************** //
if err != nil {
return types.InternalErrorf("failed to create endpoint %s on network %s: %v",
ep.Name(), n.Name(), err)
}
return nil
}
——— 1.2.1.1.2.4.1) libnetwork/drivers/bridge/bridge.go#driver.CreateEndpoint
func (d *driver) CreateEndpoint(nid, eid string, ifInfo driverapi.InterfaceInfo, epOptions map[string]interface{}) error {
defer osl.InitOSContext()()
if ifInfo == nil {
return errors.New("invalid interface info passed")
}
// Get the network handler and make sure it exists
d.Lock()
n, ok := d.networks[nid]
dconfig := d.config
d.Unlock()
if !ok {
return types.NotFoundErrorf("network %s does not exist", nid)
}
if n == nil {
return driverapi.ErrNoNetwork(nid)
}
// Sanity check
n.Lock()
if n.id != nid {
n.Unlock()
return InvalidNetworkIDError(nid)
}
n.Unlock()
// Check if endpoint id is good and retrieve correspondent endpoint
ep, err := n.getEndpoint(eid)
if err != nil {
return err
}
// Endpoint with that id exists either on desired or other sandbox
if ep != nil {
return driverapi.ErrEndpointExists(eid)
}
// Try to convert the options to endpoint configuration
epConfig, err := parseEndpointOptions(epOptions)
if err != nil {
return err
}
// Create and add the endpoint
n.Lock()
endpoint := &bridgeEndpoint{id: eid, nid: nid, config: epConfig}
n.endpoints[eid] = endpoint
n.Unlock()
// On failure make sure to remove the endpoint
defer func() {
if err != nil {
n.Lock()
delete(n.endpoints, eid)
n.Unlock()
}
}()
// Generate a name for what will be the host side pipe interface
hostIfName, err := netutils.GenerateIfaceName(d.nlh, vethPrefix, vethLen)
if err != nil {
return err
}
// Generate a name for what will be the sandbox side pipe interface
containerIfName, err := netutils.GenerateIfaceName(d.nlh, vethPrefix, vethLen)
if err != nil {
return err
}
// Generate and add the interface pipe host <-> sandbox
// ********************************** NOTICE ********************************** //
veth := &netlink.Veth{
LinkAttrs: netlink.LinkAttrs{Name: hostIfName, TxQLen: 0},
PeerName: containerIfName}
if err = d.nlh.LinkAdd(veth); err != nil {
// ********************************** NOTICE ********************************** //
return types.InternalErrorf("failed to add the host (%s) <=> sandbox (%s) pair interfaces: %v", hostIfName, containerIfName, err)
}
// Get the host side pipe interface handler
host, err := d.nlh.LinkByName(hostIfName)
if err != nil {
return types.InternalErrorf("failed to find host side interface %s: %v", hostIfName, err)
}
defer func() {
if err != nil {
if err := d.nlh.LinkDel(host); err != nil {
logrus.WithError(err).Warnf("Failed to delete host side interface (%s)'s link", hostIfName)
}
}
}()
// Get the sandbox side pipe interface handler
sbox, err := d.nlh.LinkByName(containerIfName)
if err != nil {
return types.InternalErrorf("failed to find sandbox side interface %s: %v", containerIfName, err)
}
defer func() {
if err != nil {
if err := d.nlh.LinkDel(sbox); err != nil {
logrus.WithError(err).Warnf("Failed to delete sandbox side interface (%s)'s link", containerIfName)
}
}
}()
n.Lock()
config := n.config
n.Unlock()
// Add bridge inherited attributes to pipe interfaces
if config.Mtu != 0 {
err = d.nlh.LinkSetMTU(host, config.Mtu)
if err != nil {
return types.InternalErrorf("failed to set MTU on host interface %s: %v", hostIfName, err)
}
err = d.nlh.LinkSetMTU(sbox, config.Mtu)
if err != nil {
return types.InternalErrorf("failed to set MTU on sandbox interface %s: %v", containerIfName, err)
}
}
// Attach host side pipe interface into the bridge
// ********************************** NOTICE ********************************** //
if err = addToBridge(d.nlh, hostIfName, config.BridgeName); err != nil {
// ********************************** NOTICE ********************************** //
return fmt.Errorf("adding interface %s to bridge %s failed: %v", hostIfName, config.BridgeName, err)
}
if !dconfig.EnableUserlandProxy {
err = setHairpinMode(d.nlh, host, true)
if err != nil {
return err
}
}
// Store the sandbox side pipe interface parameters
endpoint.srcName = containerIfName
endpoint.macAddress = ifInfo.MacAddress()
endpoint.addr = ifInfo.Address()
endpoint.addrv6 = ifInfo.AddressIPv6()
// Set the sbox's MAC if not provided. If specified, use the one configured by user, otherwise generate one based on IP.
if endpoint.macAddress == nil {
endpoint.macAddress = electMacAddress(epConfig, endpoint.addr.IP)
if err = ifInfo.SetMacAddress(endpoint.macAddress); err != nil {
return err
}
}
// Up the host interface after finishing all netlink configuration
if err = d.nlh.LinkSetUp(host); err != nil {
return fmt.Errorf("could not set link up for host interface %s: %v", hostIfName, err)
}
if endpoint.addrv6 == nil && config.EnableIPv6 {
var ip6 net.IP
network := n.bridge.bridgeIPv6
if config.AddressIPv6 != nil {
network = config.AddressIPv6
}
ones, _ := network.Mask.Size()
if ones > 80 {
err = types.ForbiddenErrorf("Cannot self generate an IPv6 address on network %v: At least 48 host bits are needed.", network)
return err
}
ip6 = make(net.IP, len(network.IP))
copy(ip6, network.IP)
for i, h := range endpoint.macAddress {
ip6[i+10] = h
}
endpoint.addrv6 = &net.IPNet{IP: ip6, Mask: network.Mask}
if err = ifInfo.SetIPAddress(endpoint.addrv6); err != nil {
return err
}
}
if err = d.storeUpdate(endpoint); err != nil {
return fmt.Errorf("failed to save bridge endpoint %.7s to store: %v", endpoint.id, err)
}
return nil
}
func addToBridge(nlh *netlink.Handle, ifaceName, bridgeName string) error {
link, err := nlh.LinkByName(ifaceName)
if err != nil {
return fmt.Errorf("could not find interface %s: %v", ifaceName, err)
}
if err = nlh.LinkSetMaster(link,
&netlink.Bridge{LinkAttrs: netlink.LinkAttrs{Name: bridgeName}}); err != nil {
logrus.Debugf("Failed to add %s to bridge via netlink.Trying ioctl: %v", ifaceName, err)
iface, err := net.InterfaceByName(ifaceName)
if err != nil {
return fmt.Errorf("could not find network interface %s: %v", ifaceName, err)
}
master, err := net.InterfaceByName(bridgeName)
if err != nil {
return fmt.Errorf("could not find bridge %s: %v", bridgeName, err)
}
return ioctlAddToBridge(iface, master)
}
return nil
}
——1.2.1.1.2.5) daemon/container_operations.go#Daemon.updateEndpointNetworkSettings
func (daemon *Daemon) updateEndpointNetworkSettings(container *container.Container, n libnetwork.Network, ep libnetwork.Endpoint) error {
if err := buildEndpointInfo(container.NetworkSettings, n, ep); err != nil {
return err
}
if container.HostConfig.NetworkMode == runconfig.DefaultDaemonNetworkMode() {
container.NetworkSettings.Bridge = daemon.configStore.BridgeConfig.Iface
}
return nil
}
——1.2.1.1.2.6) daemon/container_operations.go#Daemon.buildSandboxOptions
func (daemon *Daemon) buildSandboxOptions(container *container.Container) ([]libnetwork.SandboxOption, error) {
var (
sboxOptions []libnetwork.SandboxOption
err error
dns []string
dnsOptions []string
bindings = make(nat.PortMap)
pbList []types.PortBinding
exposeList []types.TransportPort
)
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
sboxOptions = append(sboxOptions, libnetwork.OptionHostname(container.Config.Hostname),
libnetwork.OptionDomainname(container.Config.Domainname))
if container.HostConfig.NetworkMode.IsHost() {
sboxOptions = append(sboxOptions, libnetwork.OptionUseDefaultSandbox())
} else {
// OptionUseExternalKey is mandatory for userns support.
// But optional for non-userns support
sboxOptions = append(sboxOptions, libnetwork.OptionUseExternalKey())
}
if err = daemon.setupPathsAndSandboxOptions(container, &sboxOptions); err != nil {
return nil, err
}
if len(container.HostConfig.DNS) > 0 {
dns = container.HostConfig.DNS
} else if len(daemon.configStore.DNS) > 0 {
dns = daemon.configStore.DNS
}
for _, d := range dns {
sboxOptions = append(sboxOptions, libnetwork.OptionDNS(d))
}
dnsSearch := daemon.getDNSSearchSettings(container)
for _, ds := range dnsSearch {
sboxOptions = append(sboxOptions, libnetwork.OptionDNSSearch(ds))
}
if len(container.HostConfig.DNSOptions) > 0 {
dnsOptions = container.HostConfig.DNSOptions
} else if len(daemon.configStore.DNSOptions) > 0 {
dnsOptions = daemon.configStore.DNSOptions
}
for _, ds := range dnsOptions {
sboxOptions = append(sboxOptions, libnetwork.OptionDNSOptions(ds))
}
if container.NetworkSettings.SecondaryIPAddresses != nil {
name := container.Config.Hostname
if container.Config.Domainname != "" {
name = name + "." + container.Config.Domainname
}
for _, a := range container.NetworkSettings.SecondaryIPAddresses {
sboxOptions = append(sboxOptions, libnetwork.OptionExtraHost(name, a.Addr))
}
}
for _, extraHost := range container.HostConfig.ExtraHosts {
// allow IPv6 addresses in extra hosts; only split on first ":"
if _, err := opts.ValidateExtraHost(extraHost); err != nil {
return nil, err
}
parts := strings.SplitN(extraHost, ":", 2)
sboxOptions = append(sboxOptions, libnetwork.OptionExtraHost(parts[0], parts[1]))
}
if container.HostConfig.PortBindings != nil {
for p, b := range container.HostConfig.PortBindings {
bindings[p] = []nat.PortBinding{}
for _, bb := range b {
bindings[p] = append(bindings[p], nat.PortBinding{
HostIP: bb.HostIP,
HostPort: bb.HostPort,
})
}
}
}
portSpecs := container.Config.ExposedPorts
ports := make([]nat.Port, len(portSpecs))
var i int
for p := range portSpecs {
ports[i] = p
i++
}
nat.SortPortMap(ports, bindings)
for _, port := range ports {
expose := types.TransportPort{}
expose.Proto = types.ParseProtocol(port.Proto())
expose.Port = uint16(port.Int())
exposeList = append(exposeList, expose)
pb := types.PortBinding{Port: expose.Port, Proto: expose.Proto}
binding := bindings[port]
for i := 0; i < len(binding); i++ {
pbCopy := pb.GetCopy()
newP, err := nat.NewPort(nat.SplitProtoPort(binding[i].HostPort))
var portStart, portEnd int
if err == nil {
portStart, portEnd, err = newP.Range()
}
if err != nil {
return nil, fmt.Errorf("Error parsing HostPort value(%s):%v", binding[i].HostPort, err)
}
pbCopy.HostPort = uint16(portStart)
pbCopy.HostPortEnd = uint16(portEnd)
pbCopy.HostIP = net.ParseIP(binding[i].HostIP)
pbList = append(pbList, pbCopy)
}
if container.HostConfig.PublishAllPorts && len(binding) == 0 {
pbList = append(pbList, pb)
}
}
sboxOptions = append(sboxOptions,
libnetwork.OptionPortMapping(pbList),
libnetwork.OptionExposedPorts(exposeList))
// Legacy Link feature is supported only for the default bridge network.
// return if this call to build join options is not for default bridge network
// Legacy Link is only supported by docker run --link
bridgeSettings, ok := container.NetworkSettings.Networks[defaultNetName]
if !ok || bridgeSettings.EndpointSettings == nil {
return sboxOptions, nil
}
if bridgeSettings.EndpointID == "" {
return sboxOptions, nil
}
var (
childEndpoints, parentEndpoints []string
cEndpointID string
)
children := daemon.children(container)
for linkAlias, child := range children {
if !isLinkable(child) {
return nil, fmt.Errorf("Cannot link to %s, as it does not belong to the default network", child.Name)
}
_, alias := path.Split(linkAlias)
// allow access to the linked container via the alias, real name, and container hostname
aliasList := alias + " " + child.Config.Hostname
// only add the name if alias isn't equal to the name
if alias != child.Name[1:] {
aliasList = aliasList + " " + child.Name[1:]
}
sboxOptions = append(sboxOptions, libnetwork.OptionExtraHost(aliasList, child.NetworkSettings.Networks[defaultNetName].IPAddress))
cEndpointID = child.NetworkSettings.Networks[defaultNetName].EndpointID
if cEndpointID != "" {
childEndpoints = append(childEndpoints, cEndpointID)
}
}
for alias, parent := range daemon.parents(container) {
if daemon.configStore.DisableBridge || !container.HostConfig.NetworkMode.IsPrivate() {
continue
}
_, alias = path.Split(alias)
logrus.Debugf("Update /etc/hosts of %s for alias %s with ip %s", parent.ID, alias, bridgeSettings.IPAddress)
sboxOptions = append(sboxOptions, libnetwork.OptionParentUpdate(
parent.ID,
alias,
bridgeSettings.IPAddress,
))
if cEndpointID != "" {
parentEndpoints = append(parentEndpoints, cEndpointID)
}
}
linkOptions := options.Generic{
netlabel.GenericData: options.Generic{
"ParentEndpoints": parentEndpoints,
"ChildEndpoints": childEndpoints,
},
}
sboxOptions = append(sboxOptions, libnetwork.OptionGeneric(linkOptions))
return sboxOptions, nil
}
——1.2.1.1.2.7-创建NetNS) libnetwork/controller.go#controller.NewSandbox
// NetworkController provides the interface for controller instance which manages
// networks.
type NetworkController interface {
// ID provides a unique identity for the controller
ID() string
// BuiltinDrivers returns list of builtin drivers
BuiltinDrivers() []string
// BuiltinIPAMDrivers returns list of builtin ipam drivers
BuiltinIPAMDrivers() []string
// Config method returns the bootup configuration for the controller
Config() config.Config
// Create a new network. The options parameter carries network specific options.
NewNetwork(networkType, name string, id string, options ...NetworkOption) (Network, error)
// Networks returns the list of Network(s) managed by this controller.
Networks() []Network
// WalkNetworks uses the provided function to walk the Network(s) managed by this controller.
WalkNetworks(walker NetworkWalker)
// NetworkByName returns the Network which has the passed name. If not found, the error ErrNoSuchNetwork is returned.
NetworkByName(name string) (Network, error)
// NetworkByID returns the Network which has the passed id. If not found, the error ErrNoSuchNetwork is returned.
NetworkByID(id string) (Network, error)
// NewSandbox creates a new network sandbox for the passed container id
NewSandbox(containerID string, options ...SandboxOption) (Sandbox, error)
// Sandboxes returns the list of Sandbox(s) managed by this controller.
Sandboxes() []Sandbox
// WalkSandboxes uses the provided function to walk the Sandbox(s) managed by this controller.
WalkSandboxes(walker SandboxWalker)
// SandboxByID returns the Sandbox which has the passed id. If not found, a types.NotFoundError is returned.
SandboxByID(id string) (Sandbox, error)
// SandboxDestroy destroys a sandbox given a container ID
SandboxDestroy(id string) error
// Stop network controller
Stop()
// ReloadConfiguration updates the controller configuration
ReloadConfiguration(cfgOptions ...config.Option) error
// SetClusterProvider sets cluster provider
SetClusterProvider(provider cluster.Provider)
// Wait for agent initialization complete in libnetwork controller
AgentInitWait()
// Wait for agent to stop if running
AgentStopWait()
// SetKeys configures the encryption key for gossip and overlay data path
SetKeys(keys []*types.EncryptionKey) error
// StartDiagnostic start the network diagnostic mode
StartDiagnostic(port int)
// StopDiagnostic start the network diagnostic mode
StopDiagnostic()
// IsDiagnosticEnabled returns true if the diagnostic is enabled
IsDiagnosticEnabled() bool
}
// NewSandbox creates a new sandbox for the passed container id
func (c *controller) NewSandbox(containerID string, options ...SandboxOption) (Sandbox, error) {
if containerID == "" {
return nil, types.BadRequestErrorf("invalid container ID")
}
var sb *sandbox
c.Lock()
for _, s := range c.sandboxes {
if s.containerID == containerID {
// If not a stub, then we already have a complete sandbox.
if !s.isStub {
sbID := s.ID()
c.Unlock()
return nil, types.ForbiddenErrorf("container %s is already present in sandbox %s", containerID, sbID)
}
// We already have a stub sandbox from the
// store. Make use of it so that we don't lose
// the endpoints from store but reset the
// isStub flag.
sb = s
sb.isStub = false
break
}
}
c.Unlock()
sandboxID := stringid.GenerateRandomID()
if runtime.GOOS == "windows" {
sandboxID = containerID
}
// Create sandbox and process options first. Key generation depends on an option
if sb == nil {
sb = &sandbox{
id: sandboxID,
containerID: containerID,
endpoints: []*endpoint{},
epPriority: map[string]int{},
populatedEndpoints: map[string]struct{}{},
config: containerConfig{},
controller: c,
extDNS: []extDNSEntry{},
}
}
sb.processOptions(options...)
c.Lock()
if sb.ingress && c.ingressSandbox != nil {
c.Unlock()
return nil, types.ForbiddenErrorf("ingress sandbox already present")
}
if sb.ingress {
c.ingressSandbox = sb
sb.config.hostsPath = filepath.Join(c.cfg.Daemon.DataDir, "/network/files/hosts")
sb.config.resolvConfPath = filepath.Join(c.cfg.Daemon.DataDir, "/network/files/resolv.conf")
sb.id = "ingress_sbox"
} else if sb.loadBalancerNID != "" {
sb.id = "lb_" + sb.loadBalancerNID
}
c.Unlock()
var err error
defer func() {
if err != nil {
c.Lock()
if sb.ingress {
c.ingressSandbox = nil
}
c.Unlock()
}
}()
// ********************************** NOTICE ********************************** //
if err = sb.setupResolutionFiles(); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
if sb.config.useDefaultSandBox {
c.sboxOnce.Do(func() {
c.defOsSbox, err = osl.NewSandbox(sb.Key(), false, false)
})
if err != nil {
c.sboxOnce = sync.Once{}
return nil, fmt.Errorf("failed to create default sandbox: %v", err)
}
sb.osSbox = c.defOsSbox
}
if sb.osSbox == nil && !sb.config.useExternalKey {
// ********************************** NOTICE ********************************** //
if sb.osSbox, err = osl.NewSandbox(sb.Key(), !sb.config.useDefaultSandBox, false);
err != nil {
// ********************************** NOTICE ********************************** //
return nil, fmt.Errorf("failed to create new osl sandbox: %v", err)
}
}
if sb.osSbox != nil {
// Apply operating specific knobs on the load balancer sandbox
sb.osSbox.ApplyOSTweaks(sb.oslTypes)
}
c.Lock()
c.sandboxes[sb.id] = sb
c.Unlock()
defer func() {
if err != nil {
c.Lock()
delete(c.sandboxes, sb.id)
c.Unlock()
}
}()
err = sb.storeUpdate()
if err != nil {
return nil, fmt.Errorf("failed to update the store state of sandbox: %v", err)
}
return sb, nil
}
———1.2.1.1.2.7.1) libnetwork/sandbox_dns_unix.go#sandbox.setupResolutionFiles
func (sb *sandbox) setupResolutionFiles() error {
// ********************************** NOTICE ********************************** //
if err := sb.buildHostsFile(); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
if err := sb.updateParentHosts(); err != nil {
return err
}
// ********************************** NOTICE ********************************** //
return sb.setupDNS()
// ********************************** NOTICE ********************************** //
}
————1.2.1.1.2.7.1.1) libnetwork/sandbox_dns_unix.go#sandbox.buildHostsFile
func (sb *sandbox) buildHostsFile() error {
if sb.config.hostsPath == "" {
sb.config.hostsPath = defaultPrefix + "/" + sb.id + "/hosts"
}
dir, _ := filepath.Split(sb.config.hostsPath)
if err := createBasePath(dir); err != nil {
return err
}
// This is for the host mode networking
if sb.config.useDefaultSandBox && len(sb.config.extraHosts) == 0 {
// We are working under the assumption that the origin file option had been properly expressed by the upper layer
// if not here we are going to error out
if err := copyFile(sb.config.originHostsPath, sb.config.hostsPath); err != nil && !os.IsNotExist(err) {
return types.InternalErrorf("could not copy source hosts file %s to %s: %v", sb.config.originHostsPath, sb.config.hostsPath, err)
}
return nil
}
extraContent := make([]etchosts.Record, 0, len(sb.config.extraHosts))
for _, extraHost := range sb.config.extraHosts {
extraContent = append(extraContent, etchosts.Record{Hosts: extraHost.name, IP: extraHost.IP})
}
return etchosts.Build(sb.config.hostsPath, "", sb.config.hostName, sb.config.domainName, extraContent)
}
————1.2.1.1.2.7.1.2) libnetwork/sandbox_dns_unix.go#sandbox.setupDNS
func (sb *sandbox) setupDNS() error {
var newRC *resolvconf.File
if sb.config.resolvConfPath == "" {
sb.config.resolvConfPath = defaultPrefix + "/" + sb.id + "/resolv.conf"
}
sb.config.resolvConfHashFile = sb.config.resolvConfPath + ".hash"
dir, _ := filepath.Split(sb.config.resolvConfPath)
if err := createBasePath(dir); err != nil {
return err
}
// When the user specify a conainter in the host namespace and do no have any dns option specified
// we just copy the host resolv.conf from the host itself
if sb.config.useDefaultSandBox &&
len(sb.config.dnsList) == 0 && len(sb.config.dnsSearchList) == 0 && len(sb.config.dnsOptionsList) == 0 {
// We are working under the assumption that the origin file option had been properly expressed by the upper layer
// if not here we are going to error out
if err := copyFile(sb.config.originResolvConfPath, sb.config.resolvConfPath); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("could not copy source resolv.conf file %s to %s: %v", sb.config.originResolvConfPath, sb.config.resolvConfPath, err)
}
logrus.Infof("%s does not exist, we create an empty resolv.conf for container", sb.config.originResolvConfPath)
if err := createFile(sb.config.resolvConfPath); err != nil {
return err
}
}
return nil
}
originResolvConfPath := sb.config.originResolvConfPath
if originResolvConfPath == "" {
// if not specified fallback to default /etc/resolv.conf
originResolvConfPath = resolvconf.DefaultResolvConf
}
currRC, err := resolvconf.GetSpecific(originResolvConfPath)
if err != nil {
if !os.IsNotExist(err) {
return err
}
// it's ok to continue if /etc/resolv.conf doesn't exist, default resolvers (Google's Public DNS)
// will be used
currRC = &resolvconf.File{}
logrus.Infof("/etc/resolv.conf does not exist")
}
if len(sb.config.dnsList) > 0 || len(sb.config.dnsSearchList) > 0 || len(sb.config.dnsOptionsList) > 0 {
var (
err error
dnsList = resolvconf.GetNameservers(currRC.Content, types.IP)
dnsSearchList = resolvconf.GetSearchDomains(currRC.Content)
dnsOptionsList = resolvconf.GetOptions(currRC.Content)
)
if len(sb.config.dnsList) > 0 {
dnsList = sb.config.dnsList
}
if len(sb.config.dnsSearchList) > 0 {
dnsSearchList = sb.config.dnsSearchList
}
if len(sb.config.dnsOptionsList) > 0 {
dnsOptionsList = sb.config.dnsOptionsList
}
newRC, err = resolvconf.Build(sb.config.resolvConfPath, dnsList, dnsSearchList, dnsOptionsList)
if err != nil {
return err
}
// After building the resolv.conf from the user config save the
// external resolvers in the sandbox. Note that --dns 127.0.0.x
// config refers to the loopback in the container namespace
sb.setExternalResolvers(newRC.Content, types.IPv4, false)
} else {
// If the host resolv.conf file has 127.0.0.x container should
// use the host resolver for queries. This is supported by the
// docker embedded DNS server. Hence save the external resolvers
// before filtering it out.
sb.setExternalResolvers(currRC.Content, types.IPv4, true)
// Replace any localhost/127.* (at this point we have no info about ipv6, pass it as true)
if newRC, err = resolvconf.FilterResolvDNS(currRC.Content, true); err != nil {
return err
}
// No contention on container resolv.conf file at sandbox creation
if err := ioutil.WriteFile(sb.config.resolvConfPath, newRC.Content, filePerm); err != nil {
return types.InternalErrorf("failed to write unhaltered resolv.conf file content when setting up dns for sandbox %s: %v", sb.ID(), err)
}
}
// Write hash
if err := ioutil.WriteFile(sb.config.resolvConfHashFile, []byte(newRC.Hash), filePerm); err != nil {
return types.InternalErrorf("failed to write resolv.conf hash file when setting up dns for sandbox %s: %v", sb.ID(), err)
}
return nil
}
———1.2.1.1.2.7.2) libnetwork/osl/namespace_linux.go#NewSandbox
// NewSandbox provides a new sandbox instance created in an os specific way
// provided a key which uniquely identifies the sandbox
func NewSandbox(key string, osCreate, isRestore bool) (Sandbox, error) {
if !isRestore {
// ********************************** NOTICE ********************************** //
err := createNetworkNamespace(key, osCreate)
// ********************************** NOTICE ********************************** //
if err != nil {
return nil, err
}
} else {
once.Do(createBasePath)
}
n := &networkNamespace{path: key, isDefault: !osCreate, nextIfIndex: make(map[string]int)}
// ********************************** NOTICE ********************************** //
sboxNs, err := netns.GetFromPath(n.path)
// ********************************** NOTICE ********************************** //
if err != nil {
return nil, fmt.Errorf("failed get network namespace %q: %v", n.path, err)
}
defer sboxNs.Close()
n.nlHandle, err = netlink.NewHandleAt(sboxNs, syscall.NETLINK_ROUTE)
if err != nil {
return nil, fmt.Errorf("failed to create a netlink handle: %v", err)
}
err = n.nlHandle.SetSocketTimeout(ns.NetlinkSocketsTimeout)
if err != nil {
logrus.Warnf("Failed to set the timeout on the sandbox netlink handle sockets: %v", err)
}
// In live-restore mode, IPV6 entries are getting cleaned up due to below code
// We should retain IPV6 configurations in live-restore mode when Docker Daemon
// comes back. It should work as it is on other cases
// As starting point, disable IPv6 on all interfaces
if !isRestore && !n.isDefault {
err = setIPv6(n.path, "all", false)
if err != nil {
logrus.Warnf("Failed to disable IPv6 on all interfaces on network namespace %q: %v", n.path, err)
}
}
if err = n.loopbackUp(); err != nil {
n.nlHandle.Delete()
return nil, err
}
return n, nil
}
————1.2.1.1.2.7.2.1) libnetwork/osl/namespace_linux.go#createNetworkNamespace
func createNetworkNamespace(path string, osCreate bool) error {
// ********************************** NOTICE ********************************** //
if err := createNamespaceFile(path); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
cmd := &exec.Cmd{
Path: reexec.Self(),
Args: append([]string{"netns-create"}, path),
Stdout: os.Stdout,
Stderr: os.Stderr,
}
if osCreate {
cmd.SysProcAttr = &syscall.SysProcAttr{}
cmd.SysProcAttr.Cloneflags = syscall.CLONE_NEWNET
}
if err := cmd.Run(); err != nil {
return fmt.Errorf("namespace creation reexec command failed: %v", err)
}
return nil
}
func createNamespaceFile(path string) (err error) {
var f *os.File
once.Do(createBasePath)
// Remove it from garbage collection list if present
removeFromGarbagePaths(path)
// If the path is there unmount it first
unmountNamespaceFile(path)
// wait for garbage collection to complete if it is in progress
// before trying to create the file.
gpmWg.Wait()
if f, err = os.Create(path); err == nil {
f.Close()
}
return err
}
func createBasePath() {
err := os.MkdirAll(basePath(), 0755)
if err != nil {
panic("Could not create net namespace path directory")
}
// Start the garbage collection go routine
go removeUnusedPaths()
}
// prefix是/var/run/docker
func basePath() string {
return filepath.Join(prefix, "netns")
}
func init() {
reexec.Register("netns-create", reexecCreateNamespace)
}
func reexecCreateNamespace() {
if len(os.Args) < 2 {
logrus.Fatal("no namespace path provided")
}
if err := mountNetworkNamespace("/proc/self/ns/net", os.Args[1]); err != nil {
logrus.Fatal(err)
}
}
func mountNetworkNamespace(basePath string, lnPath string) error {
return syscall.Mount(basePath, lnPath, "bind", syscall.MS_BIND, "")
}
——1.2.1.1.2.8) daemon/container_operations.go#updateSandboxNetworkSettings
// updateSandboxNetworkSettings updates the sandbox ID and Key.
func updateSandboxNetworkSettings(c *container.Container, sb libnetwork.Sandbox) error {
c.NetworkSettings.SandboxID = sb.ID()
c.NetworkSettings.SandboxKey = sb.Key()
return nil
}
——1.2.1.1.2.9) daemon/network.go#buildJoinOptions
// buildJoinOptions builds endpoint Join options from a given network.
func buildJoinOptions(networkSettings *internalnetwork.Settings, n interface {
Name() string
}) ([]libnetwork.EndpointOption, error) {
var joinOptions []libnetwork.EndpointOption
if epConfig, ok := networkSettings.Networks[n.Name()]; ok {
for _, str := range epConfig.Links {
name, alias, err := opts.ParseLink(str)
if err != nil {
return nil, err
}
joinOptions = append(joinOptions, libnetwork.CreateOptionAlias(name, alias))
}
for k, v := range epConfig.DriverOpts {
joinOptions = append(joinOptions, libnetwork.EndpointOptionGeneric(options.Generic{k: v}))
}
}
return joinOptions, nil
}
——1.2.1.1.2.10) libnetwork/endpoint.go#endpoint.Join
func (ep *endpoint) Join(sbox Sandbox, options ...EndpointOption) error {
if sbox == nil {
return types.BadRequestErrorf("endpoint cannot be joined by nil container")
}
sb, ok := sbox.(*sandbox)
if !ok {
return types.BadRequestErrorf("not a valid Sandbox interface")
}
sb.joinLeaveStart()
defer sb.joinLeaveEnd()
// ********************************** NOTICE ********************************** //
return ep.sbJoin(sb, options...)
// ********************************** NOTICE ********************************** //
}
func (ep *endpoint) sbJoin(sb *sandbox, options ...EndpointOption) (err error) {
n, err := ep.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store during join: %v", err)
}
ep, err = n.getEndpointFromStore(ep.ID())
if err != nil {
return fmt.Errorf("failed to get endpoint from store during join: %v", err)
}
ep.Lock()
if ep.sandboxID != "" {
ep.Unlock()
return types.ForbiddenErrorf("another container is attached to the same network endpoint")
}
ep.network = n
ep.sandboxID = sb.ID()
ep.joinInfo = &endpointJoinInfo{}
epid := ep.id
ep.Unlock()
defer func() {
if err != nil {
ep.Lock()
ep.sandboxID = ""
ep.Unlock()
}
}()
nid := n.ID()
ep.processOptions(options...)
d, err := n.driver(true)
if err != nil {
return fmt.Errorf("failed to get driver during join: %v", err)
}
// ********************************** NOTICE ********************************** //
err = d.Join(nid, epid, sb.Key(), ep, sb.Labels())
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
defer func() {
if err != nil {
if e := d.Leave(nid, epid); e != nil {
logrus.Warnf("driver leave failed while rolling back join: %v", e)
}
}
}()
// Watch for service records
if !n.getController().isAgent() {
n.getController().watchSvcRecord(ep)
}
if doUpdateHostsFile(n, sb) {
address := ""
if ip := ep.getFirstInterfaceAddress(); ip != nil {
address = ip.String()
}
if err = sb.updateHostsFile(address); err != nil {
return err
}
}
if err = sb.updateDNS(n.enableIPv6); err != nil {
return err
}
// Current endpoint providing external connectivity for the sandbox
extEp := sb.getGatewayEndpoint()
// ********************************** NOTICE ********************************** //
sb.addEndpoint(ep)
// ********************************** NOTICE ********************************** //
defer func() {
if err != nil {
sb.removeEndpoint(ep)
}
}()
// ********************************** NOTICE ********************************** //
if err = sb.populateNetworkResources(ep); err != nil {
// ********************************** NOTICE ********************************** //
return err
}
if err = n.getController().updateToStore(ep); err != nil {
return err
}
if err = ep.addDriverInfoToCluster(); err != nil {
return err
}
defer func() {
if err != nil {
if e := ep.deleteDriverInfoFromCluster(); e != nil {
logrus.Errorf("Could not delete endpoint state for endpoint %s from cluster on join failure: %v", ep.Name(), e)
}
}
}()
// Load balancing endpoints should never have a default gateway nor
// should they alter the status of a network's default gateway
if ep.loadBalancer && !sb.ingress {
return nil
}
if sb.needDefaultGW() && sb.getEndpointInGWNetwork() == nil {
return sb.setupDefaultGW()
}
moveExtConn := sb.getGatewayEndpoint() != extEp
if moveExtConn {
if extEp != nil {
logrus.Debugf("Revoking external connectivity on endpoint %s (%s)", extEp.Name(), extEp.ID())
extN, err := extEp.getNetworkFromStore()
if err != nil {
return fmt.Errorf("failed to get network from store for revoking external connectivity during join: %v", err)
}
extD, err := extN.driver(true)
if err != nil {
return fmt.Errorf("failed to get driver for revoking external connectivity during join: %v", err)
}
if err = extD.RevokeExternalConnectivity(extEp.network.ID(), extEp.ID()); err != nil {
return types.InternalErrorf(
"driver failed revoking external connectivity on endpoint %s (%s): %v",
extEp.Name(), extEp.ID(), err)
}
defer func() {
if err != nil {
if e := extD.ProgramExternalConnectivity(extEp.network.ID(), extEp.ID(), sb.Labels()); e != nil {
logrus.Warnf("Failed to roll-back external connectivity on endpoint %s (%s): %v",
extEp.Name(), extEp.ID(), e)
}
}
}()
}
if !n.internal {
logrus.Debugf("Programming external connectivity on endpoint %s (%s)", ep.Name(), ep.ID())
if err = d.ProgramExternalConnectivity(n.ID(), ep.ID(), sb.Labels()); err != nil {
return types.InternalErrorf(
"driver failed programming external connectivity on endpoint %s (%s): %v",
ep.Name(), ep.ID(), err)
}
}
}
if !sb.needDefaultGW() {
if e := sb.clearDefaultGW(); e != nil {
logrus.Warnf("Failure while disconnecting sandbox %s (%s) from gateway network: %v",
sb.ID(), sb.ContainerID(), e)
}
}
return nil
}
———1.2.1.1.2.10.1) libnetwork/drivers/bridge/bridge.go#driver#Join
// Join method is invoked when a Sandbox is attached to an endpoint.
func (d *driver) Join(nid, eid string, sboxKey string, jinfo driverapi.JoinInfo, options map[string]interface{}) error {
defer osl.InitOSContext()()
network, err := d.getNetwork(nid)
if err != nil {
return err
}
endpoint, err := network.getEndpoint(eid)
if err != nil {
return err
}
if endpoint == nil {
return EndpointNotFoundError(eid)
}
endpoint.containerConfig, err = parseContainerOptions(options)
if err != nil {
return err
}
iNames := jinfo.InterfaceName()
containerVethPrefix := defaultContainerVethPrefix
if network.config.ContainerIfacePrefix != "" {
containerVethPrefix = network.config.ContainerIfacePrefix
}
err = iNames.SetNames(endpoint.srcName, containerVethPrefix)
if err != nil {
return err
}
err = jinfo.SetGateway(network.bridge.gatewayIPv4)
if err != nil {
return err
}
err = jinfo.SetGatewayIPv6(network.bridge.gatewayIPv6)
if err != nil {
return err
}
return nil
}
———1.2.1.1.2.10.2) libnetwork/sandbox.go#sandbox.populateNetworkResources
func (sb *sandbox) populateNetworkResources(ep *endpoint) error {
sb.Lock()
if sb.osSbox == nil {
sb.Unlock()
return nil
}
inDelete := sb.inDelete
sb.Unlock()
ep.Lock()
joinInfo := ep.joinInfo
i := ep.iface
lbModeIsDSR := ep.network.loadBalancerMode == loadBalancerModeDSR
ep.Unlock()
if ep.needResolver() {
sb.startResolver(false)
}
if i != nil && i.srcName != "" {
var ifaceOptions []osl.IfaceOption
ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().Address(i.addr), sb.osSbox.InterfaceOptions().Routes(i.routes))
if i.addrv6 != nil && i.addrv6.IP.To16() != nil {
ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().AddressIPv6(i.addrv6))
}
if len(i.llAddrs) != 0 {
ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().LinkLocalAddresses(i.llAddrs))
}
if i.mac != nil {
ifaceOptions = append(ifaceOptions, sb.osSbox.InterfaceOptions().MacAddress(i.mac))
}
// ********************************** NOTICE ********************************** //
if err := sb.osSbox.AddInterface(i.srcName, i.dstPrefix, ifaceOptions...); err != nil {
// ********************************** NOTICE ********************************** //
return fmt.Errorf("failed to add interface %s to sandbox: %v", i.srcName, err)
}
if len(ep.virtualIP) > 0 && lbModeIsDSR {
if sb.loadBalancerNID == "" {
if err := sb.osSbox.DisableARPForVIP(i.srcName); err != nil {
return fmt.Errorf("failed disable ARP for VIP: %v", err)
}
}
ipNet := &net.IPNet{IP: ep.virtualIP, Mask: net.CIDRMask(32, 32)}
if err := sb.osSbox.AddAliasIP(sb.osSbox.GetLoopbackIfaceName(), ipNet); err != nil {
return fmt.Errorf("failed to add virtual ip %v to loopback: %v", ipNet, err)
}
}
}
if joinInfo != nil {
// Set up non-interface routes.
for _, r := range joinInfo.StaticRoutes {
if err := sb.osSbox.AddStaticRoute(r); err != nil {
return fmt.Errorf("failed to add static route %s: %v", r.Destination.String(), err)
}
}
}
if ep == sb.getGatewayEndpoint() {
if err := sb.updateGateway(ep); err != nil {
return err
}
}
// Make sure to add the endpoint to the populated endpoint set
// before populating loadbalancers.
sb.Lock()
sb.populatedEndpoints[ep.ID()] = struct{}{}
sb.Unlock()
// Populate load balancer only after updating all the other
// information including gateway and other routes so that
// loadbalancers are populated all the network state is in
// place in the sandbox.
sb.populateLoadBalancers(ep)
// Only update the store if we did not come here as part of
// sandbox delete. If we came here as part of delete then do
// not bother updating the store. The sandbox object will be
// deleted anyway
if !inDelete {
return sb.storeUpdate()
}
return nil
}
————1.2.1.1.2.10.2.1) libnetwork/osl/interface_linux.go#networkNamespace#AddInterface
func (n *networkNamespace) AddInterface(srcName, dstPrefix string, options ...IfaceOption) error {
i := &nwIface{srcName: srcName, dstName: dstPrefix, ns: n}
i.processInterfaceOptions(options...)
if i.master != "" {
i.dstMaster = n.findDst(i.master, true)
if i.dstMaster == "" {
return fmt.Errorf("could not find an appropriate master %q for %q",
i.master, i.srcName)
}
}
n.Lock()
if n.isDefault {
i.dstName = i.srcName
} else {
i.dstName = fmt.Sprintf("%s%d", dstPrefix, n.nextIfIndex[dstPrefix])
n.nextIfIndex[dstPrefix]++
}
path := n.path
isDefault := n.isDefault
nlh := n.nlHandle
nlhHost := ns.NlHandle()
n.Unlock()
// If it is a bridge interface we have to create the bridge inside
// the namespace so don't try to lookup the interface using srcName
if i.bridge {
link := &netlink.Bridge{
LinkAttrs: netlink.LinkAttrs{
Name: i.srcName,
},
}
if err := nlh.LinkAdd(link); err != nil {
return fmt.Errorf("failed to create bridge %q: %v", i.srcName, err)
}
} else {
// Find the network interface identified by the SrcName attribute.
iface, err := nlhHost.LinkByName(i.srcName)
if err != nil {
return fmt.Errorf("failed to get link by name %q: %v", i.srcName, err)
}
// Move the network interface to the destination
// namespace only if the namespace is not a default
// type
if !isDefault {
newNs, err := netns.GetFromPath(path)
if err != nil {
return fmt.Errorf("failed get network namespace %q: %v", path, err)
}
defer newNs.Close()
if err := nlhHost.LinkSetNsFd(iface, int(newNs)); err != nil {
return fmt.Errorf("failed to set namespace on link %q: %v", i.srcName, err)
}
}
}
// Find the network interface identified by the SrcName attribute.
iface, err := nlh.LinkByName(i.srcName)
if err != nil {
return fmt.Errorf("failed to get link by name %q: %v", i.srcName, err)
}
// Down the interface before configuring
if err := nlh.LinkSetDown(iface); err != nil {
return fmt.Errorf("failed to set link down: %v", err)
}
// Configure the interface now this is moved in the proper namespace.
if err := configureInterface(nlh, iface, i); err != nil {
// If configuring the device fails move it back to the host namespace
// and change the name back to the source name. This allows the caller
// to properly cleanup the interface. Its important especially for
// interfaces with global attributes, ex: vni id for vxlan interfaces.
if nerr := nlh.LinkSetName(iface, i.SrcName()); nerr != nil {
logrus.Errorf("renaming interface (%s->%s) failed, %v after config error %v", i.DstName(), i.SrcName(), nerr, err)
}
if nerr := nlh.LinkSetNsFd(iface, ns.ParseHandlerInt()); nerr != nil {
logrus.Errorf("moving interface %s to host ns failed, %v, after config error %v", i.SrcName(), nerr, err)
}
return err
}
// Up the interface.
cnt := 0
for err = nlh.LinkSetUp(iface); err != nil && cnt < 3; cnt++ {
logrus.Debugf("retrying link setup because of: %v", err)
time.Sleep(10 * time.Millisecond)
err = nlh.LinkSetUp(iface)
}
if err != nil {
return fmt.Errorf("failed to set link up: %v", err)
}
// Set the routes on the interface. This can only be done when the interface is up.
if err := setInterfaceRoutes(nlh, iface, i); err != nil {
return fmt.Errorf("error setting interface %q routes to %q: %v", iface.Attrs().Name, i.Routes(), err)
}
n.Lock()
n.iFaces = append(n.iFaces, i)
n.Unlock()
n.checkLoV6()
return nil
}
—1.2.1.1.3) container/container.go#Container.WriteHostConfig
// WriteHostConfig saves the host configuration on disk for the container,
// and returns a deep copy of the saved object. Callers must hold a Container lock.
func (container *Container) WriteHostConfig() (*containertypes.HostConfig, error) {
var (
buf bytes.Buffer
deepCopy containertypes.HostConfig
)
pth, err := container.HostConfigPath()
if err != nil {
return nil, err
}
f, err := ioutils.NewAtomicFileWriter(pth, 0644)
if err != nil {
return nil, err
}
defer f.Close()
w := io.MultiWriter(&buf, f)
if err := json.NewEncoder(w).Encode(&container.HostConfig); err != nil {
return nil, err
}
if err := json.NewDecoder(&buf).Decode(&deepCopy); err != nil {
return nil, err
}
return &deepCopy, nil
}
1.2.1.2) container/container_unix.go#Container.BuildHostnameFile
// BuildHostnameFile writes the container's hostname file.
func (container *Container) BuildHostnameFile() error {
hostnamePath, err := container.GetRootResourcePath("hostname")
if err != nil {
return err
}
container.HostnamePath = hostnamePath
return ioutil.WriteFile(container.HostnamePath, []byte(container.Config.Hostname+"\n"), 0644)
}
1.2.2) daemon/oci_linux.go#Daemon.createSpec
func (daemon *Daemon) createSpec(c *container.Container) (retSpec *specs.Spec, err error) {
// ********************************** NOTICE ********************************** //
s := oci.DefaultSpec()
// ********************************** NOTICE ********************************** //
if err := daemon.populateCommonSpec(&s, c); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
var cgroupsPath string
scopePrefix := "docker"
parent := "/docker"
useSystemd := UsingSystemd(daemon.configStore)
if useSystemd {
parent = "system.slice"
}
if c.HostConfig.CgroupParent != "" {
parent = c.HostConfig.CgroupParent
} else if daemon.configStore.CgroupParent != "" {
parent = daemon.configStore.CgroupParent
}
if useSystemd {
cgroupsPath = parent + ":" + scopePrefix + ":" + c.ID
logrus.Debugf("createSpec: cgroupsPath: %s", cgroupsPath)
} else {
cgroupsPath = filepath.Join(parent, c.ID)
}
s.Linux.CgroupsPath = cgroupsPath
if err := setResources(&s, c.HostConfig.Resources); err != nil {
return nil, fmt.Errorf("linux runtime spec resources: %v", err)
}
// We merge the sysctls injected above with the HostConfig (latter takes
// precedence for backwards-compatibility reasons).
for k, v := range c.HostConfig.Sysctls {
s.Linux.Sysctl[k] = v
}
p := s.Linux.CgroupsPath
if useSystemd {
initPath, err := cgroups.GetInitCgroup("cpu")
if err != nil {
return nil, err
}
_, err = cgroups.GetOwnCgroup("cpu")
if err != nil {
return nil, err
}
p = filepath.Join(initPath, s.Linux.CgroupsPath)
}
// Clean path to guard against things like ../../../BAD
parentPath := filepath.Dir(p)
if !filepath.IsAbs(parentPath) {
parentPath = filepath.Clean("/" + parentPath)
}
if err := daemon.initCgroupsPath(parentPath); err != nil {
return nil, fmt.Errorf("linux init cgroups path: %v", err)
}
if err := setDevices(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec devices: %v", err)
}
if err := daemon.setRlimits(&s, c); err != nil {
return nil, fmt.Errorf("linux runtime spec rlimits: %v", err)
}
if err := setUser(&s, c); err != nil {
return nil, fmt.Errorf("linux spec user: %v", err)
}
// ********************************** NOTICE ********************************** //
if err := setNamespaces(daemon, &s, c); err != nil {
// ********************************** NOTICE ********************************** //
return nil, fmt.Errorf("linux spec namespaces: %v", err)
}
capabilities, err := caps.TweakCapabilities(oci.DefaultCapabilities(), c.HostConfig.CapAdd, c.HostConfig.CapDrop, c.HostConfig.Capabilities, c.HostConfig.Privileged)
if err != nil {
return nil, fmt.Errorf("linux spec capabilities: %v", err)
}
if err := oci.SetCapabilities(&s, capabilities); err != nil {
return nil, fmt.Errorf("linux spec capabilities: %v", err)
}
if err := setSeccomp(daemon, &s, c); err != nil {
return nil, fmt.Errorf("linux seccomp: %v", err)
}
// ********************************** NOTICE ********************************** //
if err := daemon.setupContainerMountsRoot(c); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
if err := daemon.setupIpcDirs(c); err != nil {
return nil, err
}
defer func() {
if err != nil {
daemon.cleanupSecretDir(c)
}
}()
if err := daemon.setupSecretDir(c); err != nil {
return nil, err
}
// ********************************** NOTICE ********************************** //
ms, err := daemon.setupMounts(c)
// ********************************** NOTICE ********************************** //
if err != nil {
return nil, err
}
if !c.HostConfig.IpcMode.IsPrivate() && !c.HostConfig.IpcMode.IsEmpty() {
ms = append(ms, c.IpcMounts()...)
}
tmpfsMounts, err := c.TmpfsMounts()
if err != nil {
return nil, err
}
ms = append(ms, tmpfsMounts...)
secretMounts, err := c.SecretMounts()
if err != nil {
return nil, err
}
ms = append(ms, secretMounts...)
sort.Sort(mounts(ms))
if err := setMounts(daemon, &s, c, ms); err != nil {
return nil, fmt.Errorf("linux mounts: %v", err)
}
for _, ns := range s.Linux.Namespaces {
if ns.Type == "network" && ns.Path == "" && !c.Config.NetworkDisabled {
target := filepath.Join("/proc", strconv.Itoa(os.Getpid()), "exe")
s.Hooks = &specs.Hooks{
Prestart: []specs.Hook{{
Path: target,
Args: []string{"libnetwork-setkey", "-exec-root=" + daemon.configStore.GetExecRoot(), c.ID, daemon.netController.ID()},
}},
}
}
}
if apparmor.IsEnabled() {
var appArmorProfile string
if c.AppArmorProfile != "" {
appArmorProfile = c.AppArmorProfile
} else if c.HostConfig.Privileged {
appArmorProfile = "unconfined"
} else {
appArmorProfile = "docker-default"
}
if appArmorProfile == "docker-default" {
// Unattended upgrades and other fun services can unload AppArmor
// profiles inadvertently. Since we cannot store our profile in
// /etc/apparmor.d, nor can we practically add other ways of
// telling the system to keep our profile loaded, in order to make
// sure that we keep the default profile enabled we dynamically
// reload it if necessary.
if err := ensureDefaultAppArmorProfile(); err != nil {
return nil, err
}
}
s.Process.ApparmorProfile = appArmorProfile
}
s.Process.SelinuxLabel = c.GetProcessLabel()
s.Process.NoNewPrivileges = c.NoNewPrivileges
s.Process.OOMScoreAdj = &c.HostConfig.OomScoreAdj
s.Linux.MountLabel = c.MountLabel
// Set the masked and readonly paths with regard to the host config options if they are set.
if c.HostConfig.MaskedPaths != nil {
s.Linux.MaskedPaths = c.HostConfig.MaskedPaths
}
if c.HostConfig.ReadonlyPaths != nil {
s.Linux.ReadonlyPaths = c.HostConfig.ReadonlyPaths
}
if daemon.configStore.Rootless {
if err := specconv.ToRootless(&s); err != nil {
return nil, err
}
}
return &s, nil
}
1.2.2.1) oci/defaults.go#DefaultSpec
// DefaultSpec returns the default spec used by docker for the current Platform
func DefaultSpec() specs.Spec {
return DefaultOSSpec(runtime.GOOS)
}
// DefaultLinuxSpec create a default spec for running Linux containers
func DefaultLinuxSpec() specs.Spec {
s := specs.Spec{
Version: specs.Version,
Process: &specs.Process{
Capabilities: &specs.LinuxCapabilities{
Bounding: DefaultCapabilities(),
Permitted: DefaultCapabilities(),
Inheritable: DefaultCapabilities(),
Effective: DefaultCapabilities(),
},
},
Root: &specs.Root{},
}
s.Mounts = []specs.Mount{
{
Destination: "/proc",
Type: "proc",
Source: "proc",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/dev",
Type: "tmpfs",
Source: "tmpfs",
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
},
{
Destination: "/dev/pts",
Type: "devpts",
Source: "devpts",
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
},
{
Destination: "/sys",
Type: "sysfs",
Source: "sysfs",
Options: []string{"nosuid", "noexec", "nodev", "ro"},
},
{
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Source: "cgroup",
Options: []string{"ro", "nosuid", "noexec", "nodev"},
},
{
Destination: "/dev/mqueue",
Type: "mqueue",
Source: "mqueue",
Options: []string{"nosuid", "noexec", "nodev"},
},
{
Destination: "/dev/shm",
Type: "tmpfs",
Source: "shm",
Options: []string{"nosuid", "noexec", "nodev", "mode=1777"},
},
}
s.Linux = &specs.Linux{
MaskedPaths: []string{
"/proc/asound",
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware",
},
ReadonlyPaths: []string{
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger",
},
Namespaces: []specs.LinuxNamespace{
{Type: "mount"},
{Type: "network"},
{Type: "uts"},
{Type: "pid"},
{Type: "ipc"},
},
// Devices implicitly contains the following devices:
// null, zero, full, random, urandom, tty, console, and ptmx.
// ptmx is a bind mount or symlink of the container's ptmx.
// See also: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#default-devices
Devices: []specs.LinuxDevice{},
Resources: &specs.LinuxResources{
Devices: []specs.LinuxDeviceCgroup{
{
Allow: false,
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(1),
Minor: iPtr(5),
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(1),
Minor: iPtr(3),
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(1),
Minor: iPtr(9),
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(1),
Minor: iPtr(8),
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(5),
Minor: iPtr(0),
Access: "rwm",
},
{
Allow: true,
Type: "c",
Major: iPtr(5),
Minor: iPtr(1),
Access: "rwm",
},
{
Allow: false,
Type: "c",
Major: iPtr(10),
Minor: iPtr(229),
Access: "rwm",
},
},
},
}
// For LCOW support, populate a blank Windows spec
if runtime.GOOS == "windows" {
s.Windows = &specs.Windows{}
}
return s
}
1.2.2.2-Link实现) daemon/oci_linux.go#Daemon.populateCommonSpec
func (daemon *Daemon) populateCommonSpec(s *specs.Spec, c *container.Container) error {
if c.BaseFS == nil {
return errors.New("populateCommonSpec: BaseFS of container " + c.ID + " is unexpectedly nil")
}
// ********************************** NOTICE ********************************** //
linkedEnv, err := daemon.setupLinkedContainers(c)
// ********************************** NOTICE ********************************** //
if err != nil {
return err
}
s.Root = &specs.Root{
Path: c.BaseFS.Path(),
Readonly: c.HostConfig.ReadonlyRootfs,
}
if err := c.SetupWorkingDirectory(daemon.idMapping.RootPair()); err != nil {
return err
}
cwd := c.Config.WorkingDir
if len(cwd) == 0 {
cwd = "/"
}
s.Process.Args = append([]string{c.Path}, c.Args...)
// only add the custom init if it is specified and the container is running in its
// own private pid namespace. It does not make sense to add if it is running in the
// host namespace or another container's pid namespace where we already have an init
if c.HostConfig.PidMode.IsPrivate() {
if (c.HostConfig.Init != nil && *c.HostConfig.Init) ||
(c.HostConfig.Init == nil && daemon.configStore.Init) {
s.Process.Args = append([]string{inContainerInitPath, "--", c.Path}, c.Args...)
path := daemon.configStore.InitPath
if path == "" {
path, err = exec.LookPath(daemonconfig.DefaultInitBinary)
if err != nil {
return err
}
}
s.Mounts = append(s.Mounts, specs.Mount{
Destination: inContainerInitPath,
Type: "bind",
Source: path,
Options: []string{"bind", "ro"},
})
}
}
s.Process.Cwd = cwd
// ********************************** NOTICE ********************************** //
s.Process.Env = c.CreateDaemonEnvironment(c.Config.Tty, linkedEnv)
// ********************************** NOTICE ********************************** //
s.Process.Terminal = c.Config.Tty
s.Hostname = c.Config.Hostname
// There isn't a field in the OCI for the NIS domainname, but luckily there
// is a sysctl which has an identical effect to setdomainname(2) so there's
// no explicit need for runtime support.
s.Linux.Sysctl = make(map[string]string)
if c.Config.Domainname != "" {
s.Linux.Sysctl["kernel.domainname"] = c.Config.Domainname
}
return nil
}
1.2.2.3) daemon/oci_linux.go#setNamespaces
func setNamespaces(daemon *Daemon, s *specs.Spec, c *container.Container) error {
userNS := false
// user
if c.HostConfig.UsernsMode.IsPrivate() {
uidMap := daemon.idMapping.UIDs()
if uidMap != nil {
userNS = true
ns := specs.LinuxNamespace{Type: "user"}
setNamespace(s, ns)
s.Linux.UIDMappings = specMapping(uidMap)
s.Linux.GIDMappings = specMapping(daemon.idMapping.GIDs())
}
}
// network
if !c.Config.NetworkDisabled {
ns := specs.LinuxNamespace{Type: "network"}
parts := strings.SplitN(string(c.HostConfig.NetworkMode), ":", 2)
if parts[0] == "container" {
nc, err := daemon.getNetworkedContainer(c.ID, c.HostConfig.NetworkMode.ConnectedContainer())
if err != nil {
return err
}
ns.Path = fmt.Sprintf("/proc/%d/ns/net", nc.State.GetPID())
if userNS {
// to share a net namespace, they must also share a user namespace
nsUser := specs.LinuxNamespace{Type: "user"}
nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", nc.State.GetPID())
setNamespace(s, nsUser)
}
} else if c.HostConfig.NetworkMode.IsHost() {
ns.Path = c.NetworkSettings.SandboxKey
}
// ********************************** NOTICE ********************************** //
setNamespace(s, ns)
// ********************************** NOTICE ********************************** //
}
// ipc
ipcMode := c.HostConfig.IpcMode
switch {
case ipcMode.IsContainer():
ns := specs.LinuxNamespace{Type: "ipc"}
ic, err := daemon.getIpcContainer(ipcMode.Container())
if err != nil {
return err
}
ns.Path = fmt.Sprintf("/proc/%d/ns/ipc", ic.State.GetPID())
setNamespace(s, ns)
if userNS {
// to share an IPC namespace, they must also share a user namespace
nsUser := specs.LinuxNamespace{Type: "user"}
nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", ic.State.GetPID())
setNamespace(s, nsUser)
}
case ipcMode.IsHost():
oci.RemoveNamespace(s, specs.LinuxNamespaceType("ipc"))
case ipcMode.IsEmpty():
// A container was created by an older version of the daemon.
// The default behavior used to be what is now called "shareable".
fallthrough
case ipcMode.IsPrivate(), ipcMode.IsShareable(), ipcMode.IsNone():
ns := specs.LinuxNamespace{Type: "ipc"}
setNamespace(s, ns)
default:
return fmt.Errorf("Invalid IPC mode: %v", ipcMode)
}
// pid
if c.HostConfig.PidMode.IsContainer() {
ns := specs.LinuxNamespace{Type: "pid"}
pc, err := daemon.getPidContainer(c)
if err != nil {
return err
}
ns.Path = fmt.Sprintf("/proc/%d/ns/pid", pc.State.GetPID())
setNamespace(s, ns)
if userNS {
// to share a PID namespace, they must also share a user namespace
nsUser := specs.LinuxNamespace{Type: "user"}
nsUser.Path = fmt.Sprintf("/proc/%d/ns/user", pc.State.GetPID())
setNamespace(s, nsUser)
}
} else if c.HostConfig.PidMode.IsHost() {
oci.RemoveNamespace(s, specs.LinuxNamespaceType("pid"))
} else {
ns := specs.LinuxNamespace{Type: "pid"}
setNamespace(s, ns)
}
// uts
if c.HostConfig.UTSMode.IsHost() {
oci.RemoveNamespace(s, specs.LinuxNamespaceType("uts"))
s.Hostname = ""
}
return nil
}
1.2.2.3.1) daemon/oci_linux.go#setNamespace
func setNamespace(s *specs.Spec, ns specs.LinuxNamespace) {
for i, n := range s.Linux.Namespaces {
if n.Type == ns.Type {
s.Linux.Namespaces[i] = ns
return
}
}
s.Linux.Namespaces = append(s.Linux.Namespaces, ns)
}
1.2.2.4) daemon/container_operations_unix.go#setupContainerMountsRoot
func (daemon *Daemon) setupContainerMountsRoot(c *container.Container) error {
// get the root mount path so we can make it unbindable
p, err := c.MountsResourcePath("")
if err != nil {
return err
}
return idtools.MkdirAllAndChown(p, 0700, daemon.idMapping.RootPair())
}
1.2.2.5) daemon/volumes_unix.go#setupMounts
// setupMounts iterates through each of the mount points for a container and
// calls Setup() on each. It also looks to see if is a network mount such as
// /etc/resolv.conf, and if it is not, appends it to the array of mounts.
func (daemon *Daemon) setupMounts(c *container.Container) ([]container.Mount, error) {
var mounts []container.Mount
// TODO: tmpfs mounts should be part of Mountpoints
tmpfsMounts := make(map[string]bool)
tmpfsMountInfo, err := c.TmpfsMounts()
if err != nil {
return nil, err
}
for _, m := range tmpfsMountInfo {
tmpfsMounts[m.Destination] = true
}
for _, m := range c.MountPoints {
if tmpfsMounts[m.Destination] {
continue
}
// ********************************** NOTICE ********************************** //
if err := daemon.lazyInitializeVolume(c.ID, m); err != nil {
// ********************************** NOTICE ********************************** //
return nil, err
}
// If the daemon is being shutdown, we should not let a container start if it is trying to
// mount the socket the daemon is listening on. During daemon shutdown, the socket
// (/var/run/docker.sock by default) doesn't exist anymore causing the call to m.Setup to
// create at directory instead. This in turn will prevent the daemon to restart.
checkfunc := func(m *volumemounts.MountPoint) error {
if _, exist := daemon.hosts[m.Source]; exist && daemon.IsShuttingDown() {
return fmt.Errorf("Could not mount %q to container while the daemon is shutting down", m.Source)
}
return nil
}
path, err := m.Setup(c.MountLabel, daemon.idMapping.RootPair(), checkfunc)
if err != nil {
return nil, err
}
if !c.TrySetNetworkMount(m.Destination, path) {
mnt := container.Mount{
Source: path,
Destination: m.Destination,
Writable: m.RW,
Propagation: string(m.Propagation),
}
if m.Spec.Type == mounttypes.TypeBind && m.Spec.BindOptions != nil {
mnt.NonRecursive = m.Spec.BindOptions.NonRecursive
}
if m.Volume != nil {
attributes := map[string]string{
"driver": m.Volume.DriverName(),
"container": c.ID,
"destination": m.Destination,
"read/write": strconv.FormatBool(m.RW),
"propagation": string(m.Propagation),
}
daemon.LogVolumeEvent(m.Volume.Name(), "mount", attributes)
}
mounts = append(mounts, mnt)
}
}
mounts = sortMounts(mounts)
// ********************************** NOTICE ********************************** //
netMounts := c.NetworkMounts()
// ********************************** NOTICE ********************************** //
// if we are going to mount any of the network files from container
// metadata, the ownership must be set properly for potential container
// remapped root (user namespaces)
rootIDs := daemon.idMapping.RootPair()
for _, mount := range netMounts {
// we should only modify ownership of network files within our own container
// metadata repository. If the user specifies a mount path external, it is
// up to the user to make sure the file has proper ownership for userns
if strings.Index(mount.Source, daemon.repository) == 0 {
if err := os.Chown(mount.Source, rootIDs.UID, rootIDs.GID); err != nil {
return nil, err
}
}
}
return append(mounts, netMounts...), nil
}
—1.2.2.5.1) container/container_unix.go#Container.NetworkMounts
// NetworkMounts returns the list of network mounts.
func (container *Container) NetworkMounts() []Mount {
var mounts []Mount
shared := container.HostConfig.NetworkMode.IsContainer()
parser := volumemounts.NewParser(container.OS)
if container.ResolvConfPath != "" {
if _, err := os.Stat(container.ResolvConfPath); err != nil {
logrus.Warnf("ResolvConfPath set to %q, but can't stat this filename (err = %v); skipping", container.ResolvConfPath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/resolv.conf"]; exists {
writable = m.RW
} else {
label.Relabel(container.ResolvConfPath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.ResolvConfPath,
Destination: "/etc/resolv.conf",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
if container.HostnamePath != "" {
if _, err := os.Stat(container.HostnamePath); err != nil {
logrus.Warnf("HostnamePath set to %q, but can't stat this filename (err = %v); skipping", container.HostnamePath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/hostname"]; exists {
writable = m.RW
} else {
label.Relabel(container.HostnamePath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.HostnamePath,
Destination: "/etc/hostname",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
if container.HostsPath != "" {
if _, err := os.Stat(container.HostsPath); err != nil {
logrus.Warnf("HostsPath set to %q, but can't stat this filename (err = %v); skipping", container.HostsPath, err)
} else {
writable := !container.HostConfig.ReadonlyRootfs
if m, exists := container.MountPoints["/etc/hosts"]; exists {
writable = m.RW
} else {
label.Relabel(container.HostsPath, container.MountLabel, shared)
}
mounts = append(mounts, Mount{
Source: container.HostsPath,
Destination: "/etc/hosts",
Writable: writable,
Propagation: string(parser.DefaultPropagationMode()),
})
}
}
return mounts
}
1.2.3) libcontainerd/client_daemon.go#client.Create
// Client provides access to containerd features.
type Client interface {
Version(ctx context.Context) (containerd.Version, error)
Restore(ctx context.Context, containerID string, attachStdio StdioCallback) (alive bool, pid int, err error)
Create(ctx context.Context, containerID string, spec *specs.Spec, runtimeOptions interface{}) error
Start(ctx context.Context, containerID, checkpointDir string, withStdin bool, attachStdio StdioCallback) (pid int, err error)
SignalProcess(ctx context.Context, containerID, processID string, signal int) error
Exec(ctx context.Context, containerID, processID string, spec *specs.Process, withStdin bool, attachStdio StdioCallback) (int, error)
ResizeTerminal(ctx context.Context, containerID, processID string, width, height int) error
CloseStdin(ctx context.Context, containerID, processID string) error
Pause(ctx context.Context, containerID string) error
Resume(ctx context.Context, containerID string) error
Stats(ctx context.Context, containerID string) (*Stats, error)
ListPids(ctx context.Context, containerID string) ([]uint32, error)
Summary(ctx context.Context, containerID string) ([]Summary, error)
DeleteTask(ctx context.Context, containerID string) (uint32, time.Time, error)
Delete(ctx context.Context, containerID string) error
Status(ctx context.Context, containerID string) (Status, error)
UpdateResources(ctx context.Context, containerID string, resources *Resources) error
CreateCheckpoint(ctx context.Context, containerID, checkpointDir string, exit bool) error
}
type client struct {
sync.RWMutex // protects containers map
client *containerd.Client
stateDir string
logger *logrus.Entry
ns string
backend Backend
eventQ queue
// 容器缓存,放在内存里的
containers map[string]*container
}
type container struct {
mu sync.Mutex
bundleDir string
ctr containerd.Container
task containerd.Task
execs map[string]containerd.Process
oomKilled bool
}
func (c *client) Create(ctx context.Context, id string, ociSpec *specs.Spec, runtimeOptions interface{}) error {
if ctr := c.getContainer(id); ctr != nil {
return errors.WithStack(newConflictError("id already in use"))
}
// ********************************** NOTICE ********************************** //
bdir, err := prepareBundleDir(filepath.Join(c.stateDir, id), ociSpec)
// ********************************** NOTICE ********************************** //
if err != nil {
return errdefs.System(errors.Wrap(err, "prepare bundle dir failed"))
}
c.logger.WithField("bundle", bdir).WithField("root", ociSpec.Root.Path).Debug("bundle dir created")
// ********************************** NOTICE ********************************** //
cdCtr, err := c.client.NewContainer(ctx, id,
containerd.WithSpec(ociSpec),
// TODO(mlaventure): when containerd support lcow, revisit runtime value
containerd.WithRuntime(fmt.Sprintf("io.containerd.runtime.v1.%s", runtime.GOOS), runtimeOptions))
// ********************************** NOTICE ********************************** //
if err != nil {
return wrapError(err)
}
c.Lock()
c.containers[id] = &container{
bundleDir: b
dir,
ctr: cdCtr,
}
c.Unlock()
return nil
}
1.2.4) libcontainerd/client_daemon.go#client.Start
// Start create and start a task for the specified containerd id
func (c *client) Start(ctx context.Context, id, checkpointDir string, withStdin bool, attachStdio StdioCallback) (int, error) {
ctr := c.getContainer(id)
if ctr == nil {
return -1, errors.WithStack(newNotFoundError("no such container"))
}
// ********************************** NOTICE ********************************** //
if t := ctr.getTask(); t != nil {
// ********************************** NOTICE ********************************** //
return -1, errors.WithStack(newConflictError("container already started"))
}
var (
cp *types.Descriptor
t containerd.Task
rio cio.IO
err error
stdinCloseSync = make(chan struct{})
)
if checkpointDir != "" {
// write checkpoint to the content store
tar := archive.Diff(ctx, "", checkpointDir)
cp, err = c.writeContent(ctx, images.MediaTypeContainerd1Checkpoint, checkpointDir, tar)
// remove the checkpoint when we're done
defer func() {
if cp != nil {
err := c.client.ContentStore().Delete(context.Background(), cp.Digest)
if err != nil {
c.logger.WithError(err).WithFields(logrus.Fields{
"ref": checkpointDir,
"digest": cp.Digest,
}).Warnf("failed to delete temporary checkpoint entry")
}
}
}()
if err := tar.Close(); err != nil {
return -1, errors.Wrap(err, "failed to close checkpoint tar stream")
}
if err != nil {
return -1, errors.Wrapf(err, "failed to upload checkpoint to containerd")
}
}
spec, err := ctr.ctr.Spec(ctx)
if err != nil {
return -1, errors.Wrap(err, "failed to retrieve spec")
}
uid, gid := getSpecUser(spec)
// ********************************** NOTICE ********************************** //
t, err = ctr.ctr.NewTask(ctx,
func(id string) (cio.IO, error) {
fifos := newFIFOSet(ctr.bundleDir, InitProcessName, withStdin, spec.Process.Terminal)
rio, err = c.createIO(fifos, id, InitProcessName, stdinCloseSync, attachStdio)
return rio, err
},
func(_ context.Context, _ *containerd.Client, info *containerd.TaskInfo) error {
info.Checkpoint = cp
info.Options = &runctypes.CreateOptions{
IoUid: uint32(uid),
IoGid: uint32(gid),
NoPivotRoot: os.Getenv("DOCKER_RAMDISK") != "",
}
return nil
})
// ********************************** NOTICE ********************************** //
if err != nil {
close(stdinCloseSync)
if rio != nil {
rio.Cancel()
rio.Close()
}
return -1, wrapError(err)
}
ctr.setTask(t)
// Signal c.createIO that it can call CloseIO
close(stdinCloseSync)
// ********************************** NOTICE ********************************** //
if err := t.Start(ctx); err != nil {
// ********************************** NOTICE ********************************** //
if _, err := t.Delete(ctx); err != nil {
c.logger.WithError(err).WithField("container", id).
Error("failed to delete task after fail start")
}
ctr.setTask(nil)
return -1, wrapError(err)
}
return int(t.Pid()), nil
}