Add feature for graceful shutdown of HTTP server

If the watchdog is sent SIGTERM from an external process then it
should stop accepting new connections and attempt to finish the
work in progress. This change makes use of the new ability in Go
1.9 and onwards to cancel a HTTP server gracefully.

The write_timeout duration is used as a grace period to allow all
in-flight requests to complete. The pattern is taken directly from
the offical example in the Golang documentation. [1]

Further tuning and testing may be needed for Windows containers which
have a different set of signals for closing work. This change aims
to cover the majority use-case for Linux containers.

The HTTP health-check is also invalidated by creating an and
expression with the existing lock file.

Tested with Kubernetes by deploying a custom watchdog and the
fprocess of `env`. Log message was observed when scaling down and
connections stopped being accepted on terminating replica.

Also corrects some typos from previous PR.

[1] https://golang.org/pkg/net/http/#Server.Shutdown

Signed-off-by: Alex Ellis (VMware) <alexellis2@gmail.com>
This commit is contained in:
Alex Ellis (VMware) 2018-03-20 09:45:31 +00:00 committed by Alex Ellis
parent 6921e4fc0f
commit de2c74fcdb
2 changed files with 60 additions and 19 deletions

View File

@ -5,20 +5,25 @@ package main
import ( import (
"bytes" "bytes"
"context"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"os" "os"
"os/exec" "os/exec"
"os/signal"
"path/filepath" "path/filepath"
"strings" "strings"
"sync" "sync"
"syscall"
"time" "time"
"github.com/openfaas/faas/watchdog/types" "github.com/openfaas/faas/watchdog/types"
) )
var acceptingConnections bool
// buildFunctionInput for a GET method this is an empty byte array. // buildFunctionInput for a GET method this is an empty byte array.
func buildFunctionInput(config *WatchdogConfig, r *http.Request) ([]byte, error) { func buildFunctionInput(config *WatchdogConfig, r *http.Request) ([]byte, error) {
var res []byte var res []byte
@ -260,25 +265,20 @@ func lockFilePresent() bool {
return true return true
} }
func createLockFile() error { func createLockFile() (string, error) {
path := filepath.Join(os.TempDir(), ".lock") path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Writing lock-file to: %s\n", path) log.Printf("Writing lock-file to: %s\n", path)
writeErr := ioutil.WriteFile(path, []byte{}, 0660) writeErr := ioutil.WriteFile(path, []byte{}, 0660)
return writeErr acceptingConnections = true
}
func removeLockFile() error { return path, writeErr
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Removing lock-file : %s\n", path)
removeErr := os.Remove(path)
return removeErr
} }
func makeHealthHandler() func(http.ResponseWriter, *http.Request) { func makeHealthHandler() func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
switch r.Method { switch r.Method {
case http.MethodGet: case http.MethodGet:
if lockFilePresent() == false { if acceptingConnections == false || lockFilePresent() == false {
w.WriteHeader(http.StatusInternalServerError) w.WriteHeader(http.StatusInternalServerError)
return return
} }
@ -288,9 +288,7 @@ func makeHealthHandler() func(http.ResponseWriter, *http.Request) {
break break
default: default:
w.WriteHeader(http.StatusMethodNotAllowed) w.WriteHeader(http.StatusMethodNotAllowed)
} }
} }
} }
@ -312,6 +310,8 @@ func makeRequestHandler(config *WatchdogConfig) func(http.ResponseWriter, *http.
} }
func main() { func main() {
acceptingConnections = false
osEnv := types.OsEnv{} osEnv := types.OsEnv{}
readConfig := ReadConfig{} readConfig := ReadConfig{}
config := readConfig.Read(osEnv) config := readConfig.Read(osEnv)
@ -335,15 +335,46 @@ func main() {
http.HandleFunc("/", makeRequestHandler(&config)) http.HandleFunc("/", makeRequestHandler(&config))
if config.suppressLock == false { if config.suppressLock == false {
path := filepath.Join(os.TempDir(), ".lock") path, writeErr := createLockFile()
log.Printf("Writing lock-file to: %s\n", path)
writeErr := ioutil.WriteFile(path, []byte{}, 0660)
if writeErr != nil { if writeErr != nil {
log.Panicf("Cannot write %s. To disable lock-file set env suppress_lock=true.\n Error: %s.\n", path, writeErr.Error()) log.Panicf("Cannot write %s. To disable lock-file set env suppress_lock=true.\n Error: %s.\n", path, writeErr.Error())
} }
} else { } else {
log.Println("Warning: \"suppress_lock\" is enabled. No automated health-checks will be in place for your function.") log.Println("Warning: \"suppress_lock\" is enabled. No automated health-checks will be in place for your function.")
acceptingConnections = true
} }
log.Fatal(s.ListenAndServe()) listenUntilShutdown(config.writeTimeout, s)
}
func listenUntilShutdown(shutdownTimeout time.Duration, s *http.Server) {
idleConnsClosed := make(chan struct{})
go func() {
sig := make(chan os.Signal, 1)
signal.Notify(sig, syscall.SIGTERM)
<-sig
log.Printf("SIGTERM received.. shutting down server")
acceptingConnections = false
if err := s.Shutdown(context.Background()); err != nil {
// Error from closing listeners, or context timeout:
log.Printf("Error in Shutdown: %v", err)
}
<-time.Tick(shutdownTimeout)
close(idleConnsClosed)
}()
if err := s.ListenAndServe(); err != http.ErrServerClosed {
log.Printf("Error ListenAndServe: %v", err)
close(idleConnsClosed)
}
<-idleConnsClosed
} }

View File

@ -11,6 +11,7 @@ import (
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
"path/filepath"
"strings" "strings"
"testing" "testing"
"time" "time"
@ -361,11 +362,13 @@ func TestHandler_StatusOKForGETAndNoBody(t *testing.T) {
} }
} }
func TestHealthHandler_SatusOK_LockFilePresent(t *testing.T) { func TestHealthHandler_StatusOK_LockFilePresent(t *testing.T) {
rr := httptest.NewRecorder() rr := httptest.NewRecorder()
if lockFilePresent() == false { present := lockFilePresent()
if err := createLockFile(); err != nil {
if present == false {
if _, err := createLockFile(); err != nil {
t.Fatal(err) t.Fatal(err)
} }
} }
@ -402,7 +405,7 @@ func TestHealthHandler_StatusInternalServerError_LockFileNotPresent(t *testing.T
required := http.StatusInternalServerError required := http.StatusInternalServerError
if status := rr.Code; status != required { if status := rr.Code; status != required {
t.Errorf("handler retruned wrong status code: got %v, but wanted %v", status, required) t.Errorf("handler returned wrong status code - got: %v, want: %v", status, required)
} }
} }
@ -426,3 +429,10 @@ func TestHealthHandler_SatusMethoNotAllowed_ForWriteableVerbs(t *testing.T) {
} }
} }
} }
func removeLockFile() error {
path := filepath.Join(os.TempDir(), ".lock")
log.Printf("Removing lock-file : %s\n", path)
removeErr := os.Remove(path)
return removeErr
}