From 682b0274b5ff4535d1c4d088650cf6d13e508b31 Mon Sep 17 00:00:00 2001 From: Kevin Lingerfelt Date: Wed, 20 Jun 2018 17:32:44 -0700 Subject: [PATCH] Add controller admin servers and readiness probes (#1168) * Add controller admin servers and readiness probes * Tweak readiness probes to be more sane * Refactor based on review feedback Signed-off-by: Kevin Lingerfelt --- cli/cmd/testdata/install_default.golden | 52 ++++++++++++- cli/cmd/testdata/install_output.golden | 52 ++++++++++++- cli/install/template.go | 52 ++++++++++++- controller/api/public/grpc_server_test.go | 5 +- controller/api/public/stat_summary_test.go | 5 +- controller/cmd/destination/main.go | 12 +-- controller/cmd/proxy-api/main.go | 4 +- controller/cmd/public-api/main.go | 13 ++-- controller/cmd/tap/main.go | 13 ++-- .../destination/endpoints_watcher_test.go | 5 +- controller/k8s/api.go | 9 ++- controller/k8s/api_test.go | 5 +- controller/script/simulate-proxy/main.go | 5 +- controller/tap/server_test.go | 5 +- pkg/admin/admin.go | 78 +++++++++++++++++++ pkg/prometheus/prometheus.go | 7 -- test/install_test.go | 3 +- web/main.go | 9 +-- 18 files changed, 261 insertions(+), 73 deletions(-) create mode 100644 pkg/admin/admin.go diff --git a/cli/cmd/testdata/install_default.golden b/cli/cmd/testdata/install_default.golden index f9d41362a5e4b..3cb44dac5a353 100755 --- a/cli/cmd/testdata/install_default.golden +++ b/cli/cmd/testdata/install_default.golden @@ -148,12 +148,22 @@ spec: - -logtostderr=true image: gcr.io/runconduit/controller:undefined imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /ping + port: 9995 + initialDelaySeconds: 10 name: public-api ports: - containerPort: 8085 name: http - containerPort: 9995 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9995 resources: {} - args: - destination @@ -162,12 +172,22 @@ spec: - -logtostderr=true image: gcr.io/runconduit/controller:undefined imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /ping + port: 9999 + initialDelaySeconds: 10 name: destination ports: - containerPort: 8089 name: grpc - containerPort: 9999 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9999 resources: {} - args: - proxy-api @@ -176,12 +196,22 @@ spec: - -logtostderr=true image: gcr.io/runconduit/controller:undefined imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /ping + port: 9996 + initialDelaySeconds: 10 name: proxy-api ports: - containerPort: 8086 name: grpc - containerPort: 9996 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9996 resources: {} - args: - tap @@ -189,12 +219,22 @@ spec: - -logtostderr=true image: gcr.io/runconduit/controller:undefined imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /ping + port: 9998 + initialDelaySeconds: 10 name: tap ports: - containerPort: 8088 name: grpc - containerPort: 9998 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9998 resources: {} - env: - name: CONDUIT_PROXY_LOG @@ -306,12 +346,22 @@ spec: - -log-level=info image: gcr.io/runconduit/web:undefined imagePullPolicy: IfNotPresent + livenessProbe: + httpGet: + path: /ping + port: 9994 + initialDelaySeconds: 10 name: web ports: - containerPort: 8084 name: http - containerPort: 9994 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9994 resources: {} - env: - name: CONDUIT_PROXY_LOG @@ -666,7 +716,7 @@ spec: httpGet: path: /api/health port: 3000 - initialDelaySeconds: 60 + initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 30 resources: {} diff --git a/cli/cmd/testdata/install_output.golden b/cli/cmd/testdata/install_output.golden index 94ae4f2afad13..f56a9bb38501b 100644 --- a/cli/cmd/testdata/install_output.golden +++ b/cli/cmd/testdata/install_output.golden @@ -149,12 +149,22 @@ spec: - -logtostderr=true image: ControllerImage imagePullPolicy: ImagePullPolicy + livenessProbe: + httpGet: + path: /ping + port: 9995 + initialDelaySeconds: 10 name: public-api ports: - containerPort: 8085 name: http - containerPort: 9995 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9995 resources: {} - args: - destination @@ -163,12 +173,22 @@ spec: - -logtostderr=true image: ControllerImage imagePullPolicy: ImagePullPolicy + livenessProbe: + httpGet: + path: /ping + port: 9999 + initialDelaySeconds: 10 name: destination ports: - containerPort: 8089 name: grpc - containerPort: 9999 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9999 resources: {} - args: - proxy-api @@ -177,12 +197,22 @@ spec: - -logtostderr=true image: ControllerImage imagePullPolicy: ImagePullPolicy + livenessProbe: + httpGet: + path: /ping + port: 9996 + initialDelaySeconds: 10 name: proxy-api ports: - containerPort: 123 name: grpc - containerPort: 9996 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9996 resources: {} - args: - tap @@ -190,12 +220,22 @@ spec: - -logtostderr=true image: ControllerImage imagePullPolicy: ImagePullPolicy + livenessProbe: + httpGet: + path: /ping + port: 9998 + initialDelaySeconds: 10 name: tap ports: - containerPort: 8088 name: grpc - containerPort: 9998 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9998 resources: {} - env: - name: CONDUIT_PROXY_LOG @@ -308,12 +348,22 @@ spec: - -log-level=ControllerLogLevel image: WebImage imagePullPolicy: ImagePullPolicy + livenessProbe: + httpGet: + path: /ping + port: 9994 + initialDelaySeconds: 10 name: web ports: - containerPort: 8084 name: http - containerPort: 9994 name: admin-http + readinessProbe: + failureThreshold: 7 + httpGet: + path: /ready + port: 9994 resources: {} - env: - name: CONDUIT_PROXY_LOG @@ -670,7 +720,7 @@ spec: httpGet: path: /api/health port: 3000 - initialDelaySeconds: 60 + initialDelaySeconds: 30 periodSeconds: 10 timeoutSeconds: 30 resources: {} diff --git a/cli/install/template.go b/cli/install/template.go index db0098ad1e00c..774dd2ebd56ba 100644 --- a/cli/install/template.go +++ b/cli/install/template.go @@ -152,6 +152,16 @@ spec: - "-controller-namespace={{.Namespace}}" - "-log-level={{.ControllerLogLevel}}" - "-logtostderr=true" + livenessProbe: + httpGet: + path: /ping + port: 9995 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 9995 + failureThreshold: 7 - name: destination ports: - name: grpc @@ -165,6 +175,16 @@ spec: - "-enable-tls={{.EnableTLS}}" - "-log-level={{.ControllerLogLevel}}" - "-logtostderr=true" + livenessProbe: + httpGet: + path: /ping + port: 9999 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 9999 + failureThreshold: 7 - name: proxy-api ports: - name: grpc @@ -178,6 +198,16 @@ spec: - "-addr=:{{.ProxyAPIPort}}" - "-log-level={{.ControllerLogLevel}}" - "-logtostderr=true" + livenessProbe: + httpGet: + path: /ping + port: 9996 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 9996 + failureThreshold: 7 - name: tap ports: - name: grpc @@ -190,6 +220,16 @@ spec: - "tap" - "-log-level={{.ControllerLogLevel}}" - "-logtostderr=true" + livenessProbe: + httpGet: + path: /ping + port: 9998 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 9998 + failureThreshold: 7 ### Web ### --- @@ -249,6 +289,16 @@ spec: - "-uuid={{.UUID}}" - "-controller-namespace={{.Namespace}}" - "-log-level={{.ControllerLogLevel}}" + livenessProbe: + httpGet: + path: /ping + port: 9994 + initialDelaySeconds: 10 + readinessProbe: + httpGet: + path: /ready + port: 9994 + failureThreshold: 7 ### Prometheus ### --- @@ -502,7 +552,7 @@ spec: httpGet: path: /api/health port: 3000 - initialDelaySeconds: 60 + initialDelaySeconds: 30 timeoutSeconds: 30 failureThreshold: 10 periodSeconds: 10 diff --git a/controller/api/public/grpc_server_test.go b/controller/api/public/grpc_server_test.go index 7fab8e734ea4e..d8346a75eafa7 100644 --- a/controller/api/public/grpc_server_test.go +++ b/controller/api/public/grpc_server_test.go @@ -161,10 +161,7 @@ spec: []string{}, ) - err = k8sAPI.Sync() - if err != nil { - t.Fatalf("k8sAPI.Sync() returned an error: %s", err) - } + k8sAPI.Sync(nil) rsp, err := fakeGrpcServer.ListPods(context.TODO(), &pb.Empty{}) if err != exp.err { diff --git a/controller/api/public/stat_summary_test.go b/controller/api/public/stat_summary_test.go index c283a1dc27864..746618f0d8cdf 100644 --- a/controller/api/public/stat_summary_test.go +++ b/controller/api/public/stat_summary_test.go @@ -69,10 +69,7 @@ func testStatSummary(t *testing.T, expectations []statSumExpected) { []string{}, ) - err = k8sAPI.Sync() - if err != nil { - t.Fatalf("k8sAPI.Sync() returned an error: %s", err) - } + k8sAPI.Sync(nil) rsp, err := fakeGrpcServer.StatSummary(context.TODO(), &exp.req) if err != exp.err { diff --git a/controller/cmd/destination/main.go b/controller/cmd/destination/main.go index 422b1a9dbdb15..c5622817de929 100644 --- a/controller/cmd/destination/main.go +++ b/controller/cmd/destination/main.go @@ -8,7 +8,7 @@ import ( "github.com/runconduit/conduit/controller/destination" "github.com/runconduit/conduit/controller/k8s" - "github.com/runconduit/conduit/pkg/prometheus" + "github.com/runconduit/conduit/pkg/admin" "github.com/runconduit/conduit/pkg/version" log "github.com/sirupsen/logrus" ) @@ -47,25 +47,21 @@ func main() { ) done := make(chan struct{}) + ready := make(chan struct{}) server, lis, err := destination.NewServer(*addr, *k8sDNSZone, *enableTLS, k8sAPI, done) if err != nil { log.Fatal(err) } - go func() { - err := k8sAPI.Sync() - if err != nil { - log.Fatal(err.Error()) - } - }() + go k8sAPI.Sync(ready) go func() { log.Infof("starting gRPC server on %s", *addr) server.Serve(lis) }() - go prometheus.NewMetricsServer(*metricsAddr) + go admin.StartServer(*metricsAddr, ready) <-stop diff --git a/controller/cmd/proxy-api/main.go b/controller/cmd/proxy-api/main.go index f0be8d9f000ce..b3e210c7821ca 100644 --- a/controller/cmd/proxy-api/main.go +++ b/controller/cmd/proxy-api/main.go @@ -8,7 +8,7 @@ import ( "github.com/runconduit/conduit/controller/api/proxy" "github.com/runconduit/conduit/controller/destination" - "github.com/runconduit/conduit/pkg/prometheus" + "github.com/runconduit/conduit/pkg/admin" "github.com/runconduit/conduit/pkg/version" log "github.com/sirupsen/logrus" ) @@ -49,7 +49,7 @@ func main() { server.Serve(lis) }() - go prometheus.NewMetricsServer(*metricsAddr) + go admin.StartServer(*metricsAddr, nil) <-stop diff --git a/controller/cmd/public-api/main.go b/controller/cmd/public-api/main.go index 9cad02d3892a7..947a7660414e3 100644 --- a/controller/cmd/public-api/main.go +++ b/controller/cmd/public-api/main.go @@ -12,7 +12,7 @@ import ( "github.com/runconduit/conduit/controller/api/public" "github.com/runconduit/conduit/controller/k8s" "github.com/runconduit/conduit/controller/tap" - "github.com/runconduit/conduit/pkg/prometheus" + "github.com/runconduit/conduit/pkg/admin" "github.com/runconduit/conduit/pkg/version" log "github.com/sirupsen/logrus" ) @@ -75,19 +75,16 @@ func main() { strings.Split(*ignoredNamespaces, ","), ) - go func() { - err := k8sAPI.Sync() - if err != nil { - log.Fatal(err.Error()) - } - }() + ready := make(chan struct{}) + + go k8sAPI.Sync(ready) go func() { log.Infof("starting HTTP server on %+v", *addr) server.ListenAndServe() }() - go prometheus.NewMetricsServer(*metricsAddr) + go admin.StartServer(*metricsAddr, ready) <-stop diff --git a/controller/cmd/tap/main.go b/controller/cmd/tap/main.go index 9529e16f846dc..b90d878a9ee21 100644 --- a/controller/cmd/tap/main.go +++ b/controller/cmd/tap/main.go @@ -8,7 +8,7 @@ import ( "github.com/runconduit/conduit/controller/k8s" "github.com/runconduit/conduit/controller/tap" - "github.com/runconduit/conduit/pkg/prometheus" + "github.com/runconduit/conduit/pkg/admin" "github.com/runconduit/conduit/pkg/version" log "github.com/sirupsen/logrus" ) @@ -52,19 +52,16 @@ func main() { log.Fatal(err.Error()) } - go func() { - err := k8sAPI.Sync() - if err != nil { - log.Fatal(err.Error()) - } - }() + ready := make(chan struct{}) + + go k8sAPI.Sync(ready) go func() { log.Println("starting gRPC server on", *addr) server.Serve(lis) }() - go prometheus.NewMetricsServer(*metricsAddr) + go admin.StartServer(*metricsAddr, ready) <-stop diff --git a/controller/destination/endpoints_watcher_test.go b/controller/destination/endpoints_watcher_test.go index 24ca48d0cda46..024c048db26e9 100644 --- a/controller/destination/endpoints_watcher_test.go +++ b/controller/destination/endpoints_watcher_test.go @@ -111,10 +111,7 @@ spec: watcher := newEndpointsWatcher(k8sAPI) - err = k8sAPI.Sync() - if err != nil { - t.Fatalf("k8sAPI.Sync() returned an error: %s", err) - } + k8sAPI.Sync(nil) listener, cancelFn := newCollectUpdateListener() defer cancelFn() diff --git a/controller/k8s/api.go b/controller/k8s/api.go index 46e1cdee57207..41e141109ddcd 100644 --- a/controller/k8s/api.go +++ b/controller/k8s/api.go @@ -2,7 +2,6 @@ package k8s import ( "context" - "errors" "fmt" "time" @@ -96,7 +95,7 @@ func NewAPI(k8sClient kubernetes.Interface, resources ...ApiResource) *API { // Sync waits for all informers to be synced. // For servers, call this asynchronously. // For testing, call this synchronously. -func (api *API) Sync() error { +func (api *API) Sync(readyCh chan<- struct{}) { api.sharedInformers.Start(nil) ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) @@ -104,11 +103,13 @@ func (api *API) Sync() error { log.Infof("waiting for caches to sync") if !cache.WaitForCacheSync(ctx.Done(), api.syncChecks...) { - return errors.New("timed out waiting for caches to sync") + log.Fatal("failed to sync caches") } log.Infof("caches synced") - return nil + if readyCh != nil { + close(readyCh) + } } func (api *API) NS() coreinformers.NamespaceInformer { diff --git a/controller/k8s/api_test.go b/controller/k8s/api_test.go index f50a5e8f0bf7f..e4e0feae0fd57 100644 --- a/controller/k8s/api_test.go +++ b/controller/k8s/api_test.go @@ -35,10 +35,7 @@ func newAPI(resourceConfigs []string, extraConfigs ...string) (*API, []runtime.O return nil, nil, fmt.Errorf("NewFakeAPI returned an error: %s", err) } - err = api.Sync() - if err != nil { - return nil, nil, fmt.Errorf("api.Sync() returned an error: %s", err) - } + api.Sync(nil) return api, k8sResults, nil } diff --git a/controller/script/simulate-proxy/main.go b/controller/script/simulate-proxy/main.go index f07b5b43997b8..9bd07d6ee7b69 100644 --- a/controller/script/simulate-proxy/main.go +++ b/controller/script/simulate-proxy/main.go @@ -620,10 +620,7 @@ func main() { k8s.Deploy, k8s.Pod, ) - err = k8sAPI.Sync() - if err != nil { - log.Fatal(err.Error()) - } + k8sAPI.Sync(nil) proxyCount := endPort - startPort + 1 simulatedPods := getDeploymentByPod(k8sAPI, proxyCount) diff --git a/controller/tap/server_test.go b/controller/tap/server_test.go index 082847559a633..59b3378e156f5 100644 --- a/controller/tap/server_test.go +++ b/controller/tap/server_test.go @@ -170,10 +170,7 @@ status: go func() { server.Serve(listener) }() defer server.GracefulStop() - err = k8sAPI.Sync() - if err != nil { - t.Fatalf("k8sAPI.Sync() returned an error: %s", err) - } + k8sAPI.Sync(nil) client, conn, err := NewClient(listener.Addr().String()) if err != nil { diff --git a/pkg/admin/admin.go b/pkg/admin/admin.go new file mode 100644 index 0000000000000..b5ec6a08a76fb --- /dev/null +++ b/pkg/admin/admin.go @@ -0,0 +1,78 @@ +package admin + +import ( + "net/http" + "sync" + "time" + + "github.com/prometheus/client_golang/prometheus/promhttp" + log "github.com/sirupsen/logrus" +) + +type handler struct { + promHandler http.Handler + ready bool + sync.RWMutex +} + +func StartServer(addr string, readyCh <-chan struct{}) { + log.Infof("starting admin server on %s", addr) + + h := &handler{ + promHandler: promhttp.Handler(), + ready: readyCh == nil, + } + + if readyCh != nil { + go func() { + <-readyCh + h.setReady(true) + }() + } + + s := &http.Server{ + Addr: addr, + Handler: h, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + } + + log.Fatal(s.ListenAndServe()) +} + +func (h *handler) ServeHTTP(w http.ResponseWriter, req *http.Request) { + switch req.URL.Path { + case "/metrics": + h.promHandler.ServeHTTP(w, req) + case "/ping": + h.servePing(w, req) + case "/ready": + h.serveReady(w, req) + default: + http.NotFound(w, req) + } +} + +func (h *handler) servePing(w http.ResponseWriter, req *http.Request) { + w.Write([]byte("pong\n")) +} + +func (h *handler) serveReady(w http.ResponseWriter, req *http.Request) { + if h.getReady() { + w.Write([]byte("ok\n")) + } else { + http.Error(w, "unready", http.StatusServiceUnavailable) + } +} + +func (h *handler) getReady() bool { + h.RLock() + defer h.RUnlock() + return h.ready +} + +func (h *handler) setReady(ready bool) { + h.Lock() + defer h.Unlock() + h.ready = ready +} diff --git a/pkg/prometheus/prometheus.go b/pkg/prometheus/prometheus.go index a2d4096b66b1f..86386248a00d8 100644 --- a/pkg/prometheus/prometheus.go +++ b/pkg/prometheus/prometheus.go @@ -6,7 +6,6 @@ import ( grpc_prometheus "github.com/grpc-ecosystem/go-grpc-prometheus" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" - log "github.com/sirupsen/logrus" "google.golang.org/grpc" ) @@ -72,9 +71,3 @@ func WithTelemetry(handler http.Handler) http.HandlerFunc { promhttp.InstrumentHandlerResponseSize(responseSize, promhttp.InstrumentHandlerCounter(counter, handler))) } - -func NewMetricsServer(metricsAddr string) { - log.Infof("serving scrapable metrics on %s", metricsAddr) - http.Handle("/metrics", promhttp.Handler()) - http.ListenAndServe(metricsAddr, nil) -} diff --git a/test/install_test.go b/test/install_test.go index e1f0a7cc5bb8b..5ebbc329a62fe 100644 --- a/test/install_test.go +++ b/test/install_test.go @@ -84,8 +84,7 @@ func TestInstall(t *testing.T) { } // Tests Pods and Deployments - // The Grafana readiness check can take 1+ min; hence the high timeout here - err = TestHelper.RetryFor(2*time.Minute, func() error { + err = TestHelper.RetryFor(1*time.Minute, func() error { for deploy, replicas := range conduitDeployReplicas { if err := TestHelper.CheckPods(TestHelper.GetConduitNamespace(), deploy, replicas); err != nil { return fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err) diff --git a/web/main.go b/web/main.go index d1189a765b72e..50b30a2ddc864 100644 --- a/web/main.go +++ b/web/main.go @@ -4,14 +4,13 @@ import ( "context" "flag" "net" - "net/http" "os" "os/signal" "syscall" "time" - "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/runconduit/conduit/controller/api/public" + "github.com/runconduit/conduit/pkg/admin" "github.com/runconduit/conduit/pkg/version" "github.com/runconduit/conduit/web/srv" log "github.com/sirupsen/logrus" @@ -59,11 +58,7 @@ func main() { server.ListenAndServe() }() - go func() { - log.Infof("serving scrapable metrics on %+v", *metricsAddr) - http.Handle("/metrics", promhttp.Handler()) - http.ListenAndServe(*metricsAddr, nil) - }() + go admin.StartServer(*metricsAddr, nil) <-stop