Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图1

    package main

    import ( “context”
    “errors”
    “flag”
    “log”
    “net”
    “net/http”
    “os”
    “os/exec”
    “os/signal”
    “syscall”
    “time” ) var (
    server *http.Server
    listener net.Listener
    graceful \= flag.Bool(“graceful”, false, “listen on fd open 3 (internal use only)”)
    )

    func handler(w http.ResponseWriter, r http.Request) {
    time.Sleep(20
    time.Second)
    w.Write([]byte(“hello world233333!!!!”))
    }

    func main() {flag.Parse()

    1. http.HandleFunc("/hello", handler)
    2. server \= &http.Server{Addr: ":9999"} var err error if \*graceful {
    3. log.Print("main: Listening to existing file descriptor 3.") // cmd.ExtraFiles: If non-nil, entry i becomes file descriptor 3+i. // when we put socket FD at the first entry, it will always be 3(0+3)

    // 为什么是 3 呢,而不是 1 0 或者其他数字?这是因为父进程里给了个 fd 给子进程了 而子进程里 0,1,2 是预留给 标准输入、输出和错误的,所以父进程给的第一个 fd 在子进程里顺序排就是从 3 开始了;如果 fork 的时候 cmd.ExtraFiles 给了两个文件句柄,那么子进程里还可以用 4 开始,就看你开了几个子进程自增就行。因为我这里就开一个子进程所以把 3 写死了。l, err = net.FileListener(f)这一步只是把 fd 描述符包装进TCPListener这个结构体。
    f := os.NewFile(3, “”)
         // 先复制 fd 到新的 fd, 然后设置子进程 exec 时自动关闭父进程的 fd, 即 “F_DUPFD_CLOEXEC”
    listener, err =
    net.FileListener(f)
    } else {log.Print(“main: Listening on a new file descriptor.”)
    listener, err \= net.Listen(“tcp”, server.Addr)
    } if err != nil {
    log.Fatalf(“listener error: %v”, err)
    }

    1. go func() { // server.Shutdown() stops Serve() immediately, thus server.Serve() should not be in main goroutine
    2. err = server.Serve(listener)
    3. log.Printf("server.Serve err: %v\\n", err)
    4. }()
    5. signalHandler()
    6. log.Printf("signal end")

    }

    func reload() error {
    tl, ok :\= listener.(*net.TCPListener) if !ok {return errors.New(“listener is not tcp listener”)
    }

    1. f, err :\= tl.File() if err != nil { return err
    2. }
    3. args :\= \[\]string{"\-graceful"}
    4. **cmd :****\= exec.Command(os.Args\[0****\], args...)**
    5. cmd.Stdout \= os.Stdout
    6. cmd.Stderr \= os.Stderr // put socket FD at the first entry
    7. cmd.ExtraFiles = \[\]\*os.File{f} return cmd.Start()

    }

    func signalHandler() {ch :\= make(chan os.Signal, 1)
    signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR2) for {
    sig :\= <-ch
    log.Printf(“signal: %v”, sig) // timeout context for shutdown
    ctx, _ := context.WithTimeout(context.Background(), 20*time.Second) switch sig { case syscall.SIGINT, syscall.SIGTERM: // stop
    log.Printf(“stop”)
    signal.Stop(ch)
    server.Shutdown(ctx)
    log.Printf(“graceful shutdown”) return
    case syscall.SIGUSR2: // reload
    log.Printf(“reload”)
    err :\= reload() if err != nil {
    log.Fatalf(“graceful restart error: %v”, err)
    }
    server.Shutdown(ctx)
    log.Printf(“graceful reload”) return }
    }
    }

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图2

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图3

    package main

    import (
    “net”
    “net/http”
    “time”
    “log”
    “syscall”
    “os”
    “os/signal”
    “context”
    “fmt”
    “os/exec”
    “flag”
    )
    var (
    listener net.Listener
    err error
    server http.Server
    graceful = flag.Bool(“g”, false, “listen on fd open 3 (internal use only)”)
    )

    type MyHandler struct {

    }

    func (MyHandler)ServeHTTP(w http.ResponseWriter, r http.Request){
    fmt.Println(“request start at”, time.Now(), r.URL.Path+”?”+r.URL.RawQuery, “request done at”, time.Now(), “pid:”, os.Getpid())
    time.Sleep(10 * time.Second)
    w.Write([]byte(“this is test response”))
    fmt.Println(“request done at”, time.Now(), “pid:”, os.Getpid() )

    }

    func main() {
    flag.Parse()
    fmt.Println(“start-up at” , time.Now(), graceful)
    if
    graceful {
    f := os.NewFile(3, “”)
    listener, err = net.FileListener(f)
    fmt.Printf( “graceful-reborn %v %v %#v \n”, f.Fd(), f.Name(), listener)
    }else{
    listener, err = net.Listen(“tcp”, “:1111”)
    tcp, := listener.(*net.TCPListener)
    fd,
    := tcp.File()
    fmt.Printf( “first-boot %v %v %#v \n “, fd.Fd(),fd.Name(), listener)
    }

    server := http.Server{
    Handler: &MyHandler{},
    ReadTimeout: 6 * time.Second,
    }
    log.Printf(“Actual pid is %d\n”, syscall.Getpid())
    if err != nil {
    println(err)
    return
    }
    log.Printf(“ listener: %v\n”, listener)
    go func(){// 不要阻塞主进程
    err := server.Serve(listener)
    if err != nil {
    log.Println(err)
    }
    }()

    //signals
    func(){
    ch := make(chan os.Signal, 1)
    signal.Notify(ch, syscall.SIGHUP, syscall.SIGTERM)
    for{// 阻塞主进程, 不停的监听系统信号
    sig := <- ch
    log.Printf(“signal: %v”, sig)
    ctx, _ := context.WithTimeout(context.Background(), 20time.Second)
    switch sig {
    case syscall.SIGTERM, syscall.SIGHUP:
    println(“signal cause reloading”)
    signal.Stop(ch)
    {//fork new child process
    tl, ok := listener.(
    net.TCPListener)
    if !ok {
    fmt.Println(“listener is not tcp listener”)
    return
    }
    currentFD, err := tl.File()
    if err != nil {
    fmt.Println(“acquiring listener file failed”)
    return
    }
    cmd := exec.Command(os.Args[0], “-g”)
    cmd.ExtraFiles, cmd.Stdout,cmd.Stderr = []*os.File{currentFD} ,os.Stdout, os.Stderr
    err = cmd.Start()

    1. if err != nil {
    2. fmt.Println("cmd.Start fail: ", err)
    3. return
    4. }
    5. fmt.Println("forked new pid : ",cmd.Process.Pid)
    6. }
    7. server.Shutdown(ctx)
    8. fmt.Println("graceful shutdown at ", time.Now())
    9. }
    10. }

    }()
    }

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图4

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图5

    qiangjian@sun-pro:/data1/works/IdeaProjects/go_core$ go run src/wright/hotrestart/**booter.go*
    start-up at 2018-10-12 15:29:34.586269 +0800 CST m=+0.004439497 false first-boot 5 tcp:[::]:1111-> &net.TCPListener{fd:(
    net.netFD)(0xc00010e000)} 2018/10/12 15:29:34 Actual pid is 10771
    2018/10/12 15:29:34 listener: &{0xc00010e000}
    request start at 2018-10-12 15:29:40.287928 +0800 CST m=+5.705965906 /aa/bb?c=d request done at 2018-10-12 15:29:40.287929 +0800 CST m=+5.705966554 pid: 10771
    2018/10/12 15:29:49 signal: terminated
    signal cause reloading
    forked new pid : 10775 start-up at 2018-10-12 15:29:49.689064 +0800 CST m=+0.001613279 true graceful-reborn 3 &net.TCPListener{fd:(*net.netFD)(0xc0000ec000)} 2018/10/12 15:29:49 Actual pid is 10775
    2018/10/12 15:29:49 listener: &{0xc0000ec000}
    request done at 2018-10-12 15:29:50.288525 +0800 CST m=+15.706330718 pid: 10771
    2018/10/12 15:29:50 http: Server closed
    request start at 2018-10-12 15:29:50.290622 +0800 CST m=+15.708426906 /aa/bb?c=d request done at 2018-10-12 15:29:50.290623 +0800 CST m=+15.708428113 pid: 10771 request start at 2018-10-12 15:29:50.290713 +0800 CST m=+0.603248262 /aa/bb?c=d request done at 2018-10-12 15:29:50.290714 +0800 CST m=+0.603249293 pid: 10775 request done at 2018-10-12 15:30:00.293988 +0800 CST m=+10.606290169 pid: 10775 request done at 2018-10-12 15:30:00.294043 +0800 CST m=+25.711615717 pid: 10771 request start at 2018-10-12 15:30:00.295554 +0800 CST m=+10.607856283 /aa/bb?c=d request done at 2018-10-12 15:30:00.295555 +0800 CST m=+10.607857307 pid: 10775 request start at 2018-10-12 15:30:00.29558 +0800 CST m=+10.607881997 /aa/bb?c=d request done at 2018-10-12 15:30:00.295581 +0800 CST m=+10.607883004 pid: 10775 graceful shutdown at 2018-10-12 15:30:00.79544 +0800 CST m=+26.213000502

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图6

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图7

    ab -v -k -c2 -n100 ‘127.0.0.1:1111/aa/bb?c=d’ This is ApacheBench, Version 2.3 <$Revision: 1826891 $> Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
    Licensed to The Apache Software Foundation, http://www.apache.org/
    Benchmarking 127.0.0.1 (be patient)…^C

    Server Software:
    Server Hostname: 127.0.0.1 Server Port: 1111 Document Path: /aa/bb?c=d
    Document Length: 21 bytes

    Concurrency Level: 2 Time taken for tests: 48.292 seconds
    Complete requests: 7 Failed requests: 0 Total transferred: 966 bytes
    HTML transferred: 147 bytes
    Requests per second: 0.14 [#/sec] (mean)
    Time per request: 13797.702 [ms] (mean)
    Time per request: 6898.851 [ms] (mean, across all concurrent requests)
    Transfer rate: 0.02 [Kbytes/sec] received

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图8

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图9

    // 还有一种方式去 fork,和上面本质一样:
    execSpec :\= &syscall.ProcAttr{Env: os.Environ(),
    Files: []uintptr{os.Stdin.Fd(), os.Stdout.Fd(), os.Stderr.Fd(), lFd},
    }
    pid, err :\= syscall.ForkExec(os.Args[0], os.Args, execSpec)

    Golang服务器热重启、热升级、热更新(safe and graceful hot-restart/reload http server)详解 - sunsky303 - 博客园 - 图10

    可以看出: ab 测试器 Failed 为 0,且 console 中显示老请求处理完后才 shutdown,即在 kill 触发 reload 后,请求无论是老进程的旧请求,还是 fork 子进程后的新请求,全都处理成功,没有失败的。 这就是我们说的热重启!

    父进程退出之后,子进程会挂到 1 号进程上面。这种情况下使用 systemd 和 supervisord 等管理程序会显示进程处于 failed 的状态。解决这个问题有两个方法:
    https://www.cnblogs.com/sunsky303/p/9778466.html