package main
import ( “context”
“errors”
“flag”
“log”
“net”
“net/http”
“os”
“os/exec”
“os/signal”
“syscall”
“time” ) var (
server *http.Server
listener net.Listener
graceful \= flag.Bool(“graceful”, false, “listen on fd open 3 (internal use only)”)
)
func handler(w http.ResponseWriter, r http.Request) {
time.Sleep(20 time.Second)
w.Write([]byte(“hello world233333!!!!”))
}
func main() {flag.Parse()
http.HandleFunc("/hello", handler)
server \= &http.Server{Addr: ":9999"} var err error if \*graceful {
log.Print("main: Listening to existing file descriptor 3.") // cmd.ExtraFiles: If non-nil, entry i becomes file descriptor 3+i. // when we put socket FD at the first entry, it will always be 3(0+3)
// 为什么是 3 呢,而不是 1 0 或者其他数字?这是因为父进程里给了个 fd 给子进程了 而子进程里 0,1,2 是预留给 标准输入、输出和错误的,所以父进程给的第一个 fd 在子进程里顺序排就是从 3 开始了;如果 fork 的时候 cmd.ExtraFiles 给了两个文件句柄,那么子进程里还可以用 4 开始,就看你开了几个子进程自增就行。因为我这里就开一个子进程所以把 3 写死了。l, err = net.FileListener(f)
这一步只是把 fd 描述符包装进TCPListener
这个结构体。
f := os.NewFile(3, “”)
// 先复制 fd 到新的 fd, 然后设置子进程 exec 时自动关闭父进程的 fd, 即 “F_DUPFD_CLOEXEC”
listener, err = net.FileListener(f)
} else {log.Print(“main: Listening on a new file descriptor.”)
listener, err \= net.Listen(“tcp”, server.Addr)
} if err != nil {
log.Fatalf(“listener error: %v”, err)
}
go func() { // server.Shutdown() stops Serve() immediately, thus server.Serve() should not be in main goroutine
err = server.Serve(listener)
log.Printf("server.Serve err: %v\\n", err)
}()
signalHandler()
log.Printf("signal end")
}
func reload() error {
tl, ok :\= listener.(*net.TCPListener) if !ok {return errors.New(“listener is not tcp listener”)
}
f, err :\= tl.File() if err != nil { return err
}
args :\= \[\]string{"\-graceful"}
**cmd :****\= exec.Command(os.Args\[0****\], args...)**
cmd.Stdout \= os.Stdout
cmd.Stderr \= os.Stderr // put socket FD at the first entry
cmd.ExtraFiles = \[\]\*os.File{f} return cmd.Start()
}
func signalHandler() {ch :\= make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM, syscall.SIGUSR2) for {
sig :\= <-ch
log.Printf(“signal: %v”, sig) // timeout context for shutdown
ctx, _ := context.WithTimeout(context.Background(), 20*time.Second) switch sig { case syscall.SIGINT, syscall.SIGTERM: // stop
log.Printf(“stop”)
signal.Stop(ch)
server.Shutdown(ctx)
log.Printf(“graceful shutdown”) return
case syscall.SIGUSR2: // reload
log.Printf(“reload”)
err :\= reload() if err != nil {
log.Fatalf(“graceful restart error: %v”, err)
}
server.Shutdown(ctx)
log.Printf(“graceful reload”) return }
}
}
package main
import (
“net”
“net/http”
“time”
“log”
“syscall”
“os”
“os/signal”
“context”
“fmt”
“os/exec”
“flag”
)
var (
listener net.Listener
err error
server http.Server
graceful = flag.Bool(“g”, false, “listen on fd open 3 (internal use only)”)
)
type MyHandler struct {
}
func (MyHandler)ServeHTTP(w http.ResponseWriter, r http.Request){
fmt.Println(“request start at”, time.Now(), r.URL.Path+”?”+r.URL.RawQuery, “request done at”, time.Now(), “pid:”, os.Getpid())
time.Sleep(10 * time.Second)
w.Write([]byte(“this is test response”))
fmt.Println(“request done at”, time.Now(), “pid:”, os.Getpid() )
}
func main() {
flag.Parse()
fmt.Println(“start-up at” , time.Now(), graceful)
if graceful {
f := os.NewFile(3, “”)
listener, err = net.FileListener(f)
fmt.Printf( “graceful-reborn %v %v %#v \n”, f.Fd(), f.Name(), listener)
}else{
listener, err = net.Listen(“tcp”, “:1111”)
tcp, := listener.(*net.TCPListener)
fd, := tcp.File()
fmt.Printf( “first-boot %v %v %#v \n “, fd.Fd(),fd.Name(), listener)
}
server := http.Server{
Handler: &MyHandler{},
ReadTimeout: 6 * time.Second,
}
log.Printf(“Actual pid is %d\n”, syscall.Getpid())
if err != nil {
println(err)
return
}
log.Printf(“ listener: %v\n”, listener)
go func(){// 不要阻塞主进程
err := server.Serve(listener)
if err != nil {
log.Println(err)
}
}()
//signals
func(){
ch := make(chan os.Signal, 1)
signal.Notify(ch, syscall.SIGHUP, syscall.SIGTERM)
for{// 阻塞主进程, 不停的监听系统信号
sig := <- ch
log.Printf(“signal: %v”, sig)
ctx, _ := context.WithTimeout(context.Background(), 20time.Second)
switch sig {
case syscall.SIGTERM, syscall.SIGHUP:
println(“signal cause reloading”)
signal.Stop(ch)
{//fork new child process
tl, ok := listener.(net.TCPListener)
if !ok {
fmt.Println(“listener is not tcp listener”)
return
}
currentFD, err := tl.File()
if err != nil {
fmt.Println(“acquiring listener file failed”)
return
}
cmd := exec.Command(os.Args[0], “-g”)
cmd.ExtraFiles, cmd.Stdout,cmd.Stderr = []*os.File{currentFD} ,os.Stdout, os.Stderr
err = cmd.Start()
if err != nil {
fmt.Println("cmd.Start fail: ", err)
return
}
fmt.Println("forked new pid : ",cmd.Process.Pid)
}
server.Shutdown(ctx)
fmt.Println("graceful shutdown at ", time.Now())
}
}
}()
}
qiangjian@sun-pro:/data1/works/IdeaProjects/go_core$ go run src/wright/hotrestart/**booter.go*
start-up at 2018-10-12 15:29:34.586269 +0800 CST m=+0.004439497 false first-boot 5 tcp:[::]:1111-> &net.TCPListener{fd:(net.netFD)(0xc00010e000)} 2018/10/12 15:29:34 Actual pid is 10771
2018/10/12 15:29:34 listener: &{0xc00010e000}
request start at 2018-10-12 15:29:40.287928 +0800 CST m=+5.705965906 /aa/bb?c=d request done at 2018-10-12 15:29:40.287929 +0800 CST m=+5.705966554 pid: 10771
2018/10/12 15:29:49 signal: terminated
signal cause reloading
forked new pid : 10775 start-up at 2018-10-12 15:29:49.689064 +0800 CST m=+0.001613279 true graceful-reborn 3 &net.TCPListener{fd:(*net.netFD)(0xc0000ec000)} 2018/10/12 15:29:49 Actual pid is 10775
2018/10/12 15:29:49 listener: &{0xc0000ec000}
request done at 2018-10-12 15:29:50.288525 +0800 CST m=+15.706330718 pid: 10771
2018/10/12 15:29:50 http: Server closed
request start at 2018-10-12 15:29:50.290622 +0800 CST m=+15.708426906 /aa/bb?c=d request done at 2018-10-12 15:29:50.290623 +0800 CST m=+15.708428113 pid: 10771 request start at 2018-10-12 15:29:50.290713 +0800 CST m=+0.603248262 /aa/bb?c=d request done at 2018-10-12 15:29:50.290714 +0800 CST m=+0.603249293 pid: 10775 request done at 2018-10-12 15:30:00.293988 +0800 CST m=+10.606290169 pid: 10775 request done at 2018-10-12 15:30:00.294043 +0800 CST m=+25.711615717 pid: 10771 request start at 2018-10-12 15:30:00.295554 +0800 CST m=+10.607856283 /aa/bb?c=d request done at 2018-10-12 15:30:00.295555 +0800 CST m=+10.607857307 pid: 10775 request start at 2018-10-12 15:30:00.29558 +0800 CST m=+10.607881997 /aa/bb?c=d request done at 2018-10-12 15:30:00.295581 +0800 CST m=+10.607883004 pid: 10775 graceful shutdown at 2018-10-12 15:30:00.79544 +0800 CST m=+26.213000502
ab -v -k -c2 -n100 ‘127.0.0.1:1111/aa/bb?c=d’ This is ApacheBench, Version 2.3 <$Revision: 1826891 $> Copyright 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/
Licensed to The Apache Software Foundation, http://www.apache.org/
Benchmarking 127.0.0.1 (be patient)…^C
Server Software:
Server Hostname: 127.0.0.1 Server Port: 1111 Document Path: /aa/bb?c=d
Document Length: 21 bytes
Concurrency Level: 2 Time taken for tests: 48.292 seconds
Complete requests: 7 Failed requests: 0 Total transferred: 966 bytes
HTML transferred: 147 bytes
Requests per second: 0.14 [#/sec] (mean)
Time per request: 13797.702 [ms] (mean)
Time per request: 6898.851 [ms] (mean, across all concurrent requests)
Transfer rate: 0.02 [Kbytes/sec] received
// 还有一种方式去 fork,和上面本质一样:
execSpec :\= &syscall.ProcAttr{Env: os.Environ(),
Files: []uintptr{os.Stdin.Fd(), os.Stdout.Fd(), os.Stderr.Fd(), lFd},
}
pid, err :\= syscall.ForkExec(os.Args[0], os.Args, execSpec)
可以看出: ab 测试器 Failed 为 0,且 console 中显示老请求处理完后才 shutdown,即在 kill 触发 reload 后,请求无论是老进程的旧请求,还是 fork 子进程后的新请求,全都处理成功,没有失败的。 这就是我们说的热重启!
父进程退出之后,子进程会挂到 1 号进程上面。这种情况下使用 systemd 和 supervisord 等管理程序会显示进程处于 failed 的状态。解决这个问题有两个方法:
https://www.cnblogs.com/sunsky303/p/9778466.html