docker容器stop流程

从API route开始看StopContainer接口的调用过程。

// NewRouter initializes a new container router
func NewRouter(b Backend, decoder httputils.ContainerDecoder) router.Router {
   
   
	r := &containerRouter{
   
   
		backend: b,
		decoder: decoder,
	}
	r.initRoutes()
	return r
}
...
// initRoutes initializes the routes in container router
func (r *containerRouter) initRoutes() {
   
   
    r.routes = []router.Route{
   
   
        ...
        router.NewPostRoute("/containers/{name:.*}/stop", r.postContainersStop),
        ...
    }
}
func (s *containerRouter) postContainersStop(ctx context.Context, w http.ResponseWriter, r *http.Request, vars map[string]string) error {
   
   
    ...
	if err := s.backend.ContainerStop(vars["name"], seconds); err != nil {
   
   
		return err
	}
	w.WriteHeader(http.StatusNoContent)
	return nil
}
func (cli *DaemonCli) start(opts *daemonOptions) (err error) {
   
   
    ...
    d, err := daemon.NewDaemon(ctx, cli.Config, pluginStore)
    ...
}
// ContainerStop looks for the given container and stops it.
// In case the container fails to stop gracefully within a time duration
// specified by the timeout argument, in seconds, it is forcefully
// terminated (killed).
//
// If the timeout is nil, the container's StopTimeout value is used, if set,
// otherwise the engine default. A negative timeout value can be specified,
// meaning no timeout, i.e. no forceful termination is performed.
func (daemon *Daemon) ContainerStop(name string, timeout *int) error {
   
   
	container, err := daemon.GetContainer(name)
	if err != nil {
   
   
		return err
	}
	if !container.IsRunning() {
   
   
		return containerNotModifiedError{
   
   running: false}
	}
	if timeout == nil {
   
   
		stopTimeout := container.StopTimeout()
		timeout = &stopTimeout
	}
	if err := daemon.containerStop(container, *timeout); err != nil {
   
   
		return errdefs.System(errors.Wrapf(err, "cannot stop container: %s", name))
	}
	return nil
}
// containerStop sends a stop signal, waits, sends a kill signal.
func (daemon *Daemon) containerStop(container *containerpkg.Container, seconds int) error {
   
   
	if !container.IsRunning() {
   
   
		return nil
	}

	stopSignal := container.StopSignal()
	// 1. Send a stop signal
	if err := daemon.killPossiblyDeadProcess(container, stopSignal); err != nil {
   
   
		// While normally we might "return err" here we're not going to
		// because if we can't stop the container by this point then
		// it's probably because it's already stopped. Meaning, between
		// the time of the IsRunning() call above and now it stopped.
		// Also, since the err return will be environment specific we can't
		// look for any particular (common) error that would indicate
		// that the process is already dead vs something else going wrong.
		// So, instead we'll give it up to 2 more seconds to complete and if
		// by that time the container is still running, then the error
		// we got is probably valid and so we force kill it.
		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
		defer cancel()

		if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
   
   
			logrus.Infof("Container failed to stop after sending signal %d to the process, force killing", stopSignal)
			if err := daemon.killPossiblyDeadProcess(container, 9); err != nil {
   
   
				return err
			}
		}
	}

	// 2. Wait for the process to exit on its own
	ctx := context.Background()
	if seconds >= 0 {
   
   
		var cancel context.CancelFunc
		ctx, cancel = context.WithTimeout(ctx, time.Duration(seconds)*time.Second)
		defer cancel()
	}

	if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
   
   
		logrus.Infof("Container %v failed to exit within %d seconds of signal %d - using the force", container.ID, seconds, stopSignal)
		// 3. If it doesn't, then send SIGKILL
		if err := daemon.Kill(container); err != nil {
   
   
			// Wait without a timeout, ignore result.
			<-container.Wait(context.Background(), containerpkg.WaitConditionNotRunning)
			logrus.Warn(err) // Don't return error because we only care that container is stopped, not what function stopped it
		}
	}

	daemon.LogContainerEvent(container, "stop")
	return nil
}

container.StopSignal() 优先用容器指定的信号,如果没有则默认是SIGTERM, 如果2s后容器仍未退出,再按上层(kubelet)指定的超时时间等待容器退出。
如果容器始终未退出,daemon.Kill(container) 给容器发送SIGKILL信号,强制容器退出。

这里涉及容器的两种启动方式:

  • shell格式

PID1进程为 /bin/sh -c,
因为/bin/sh不会转发信号至任何子进程。所以我们的应用将永远不会收到SIGTERM信号。显然要解决这个问题,就需要将我们的进程作为PID1进程运行。

  • exec格式

PID进程为应用程序执行文件(脚本或二进制), 我们的程序捕获了docker stop命令发送的SIGTERM信号

优先看下强制删除的过程

// Kill forcefully terminates a container.
func (daemon *Daemon) Kill(container *containerpkg.Container) error {
   
   
	if !container.IsRunning() {
   
   
		return errNotRunning(container.ID)
	}

	// 1. Send SIGKILL
	if err := daemon.killPossiblyDeadProcess(container, int(syscall.SIGKILL)); err != nil {
   
   
		// While normally we might "return err" here we're not going to
		// because if we can't stop the container by this point then
		// it's probably because it's already stopped. Meaning, between
		// the time of the IsRunning() call above and now it stopped.
		// Also, since the err return will be environment specific we can't
		// look for any particular (common) error that would indicate
		// that the process is already dead vs something else going wrong.
		// So, instead we'll give it up to 2 more seconds to complete and if
		// by that time the container is still running, then the error
		// we got is probably valid and so we return it to the caller.
		if isErrNoSuchProcess(err) {
   
   
			return nil
		}

		ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
		defer cancel()

		if status := <-container.Wait(ctx, containerpkg.WaitConditionNotRunning); status.Err() != nil {
   
   
			return err
		}
	}

	// 2. Wait for the process to die, in last resort, try to kill the process directly
	if err := killProcessDirectly(container); err != nil {
   
   
		if isErrNoSuchProcess(err) {
   
   
			return nil
		}
		return err
	}

	// Wait for exit with no timeout.
	// Ignore returned status.
	<-container.Wait(context.Background(), containerpkg.WaitConditionNotRunning)

	return nil
}

killWithSignal() 先从容器层面尝试停止容器,如果容器是 Restarting 状态,就放弃这次的Kill操作。
如果容器时 Paused 状态,先执行Resume,在容器恢复后,立刻发送SIGKILL。

等待2s,容器状态没有转成 NotRunning, 就直接给容器中的进程发送SIGKILL。到这里再等上10s,如果容器还不退,就查询容器的1号进程,发送SIGKILL。

<-container.Wait 发送完SIGKILL后,开始阻塞等, 这次没有设置超时,就是死等, 这时当前goroutine 握着一把容器级别的锁(state.Lock()) 。

TODO: daemon.containerd.Resume()

// killWithSignal sends the container the given signal. This wrapper for the
// host specific kill command prepares the container before attempting
// to send the signal. An error is returned if the container is paused
// or not running, or if there is a problem returned from the
// underlying kill command.
func (daemon *Daemon) killWithSignal(container *containerpkg.Container, sig int) error {
   
   
	logrus.Debugf("Sending kill signal %d to container %s", sig, container.ID)
	container.Lock()
	defer container.Unlock()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值