From d850b02b667dfdbbf08f4dcccf2046c5165d8209 Mon Sep 17 00:00:00 2001 From: Andrew Seigner Date: Tue, 5 Mar 2019 19:49:38 -0800 Subject: [PATCH] Introduce logging and restart integration tests (#2414) The integration tests deploy complete Linkerd environments into Kubernetes, but do not check if the components are logging errors or restarting. Introduce integration tests to validation that all expected control-plane containers (including `linkerd-proxy` and `linkerd-init`) are found, logging no errors, and not restarting. Fixes #2348 Signed-off-by: Andrew Seigner --- test/install_test.go | 94 +++++++++++++++++++++++++++++++---- testutil/kubernetes_helper.go | 31 +++++++++++- 2 files changed, 114 insertions(+), 11 deletions(-) diff --git a/test/install_test.go b/test/install_test.go index d07ab079b91d4..bd93332563a72 100644 --- a/test/install_test.go +++ b/test/install_test.go @@ -3,6 +3,7 @@ package test import ( "fmt" "os" + "regexp" "strconv" "strings" "testing" @@ -11,6 +12,15 @@ import ( "github.com/linkerd/linkerd2/testutil" ) +type deploySpec struct { + replicas int + containers []string +} + +const ( + proxyContainer = "linkerd-proxy" +) + ////////////////////// /// TEST SETUP /// ////////////////////// @@ -31,12 +41,31 @@ var ( "linkerd-web", } - linkerdDeployReplicas = map[string]int{ - "linkerd-controller": 1, - "linkerd-grafana": 1, - "linkerd-prometheus": 1, - "linkerd-web": 1, + linkerdDeployReplicas = map[string]deploySpec{ + "linkerd-controller": {1, []string{"destination", "public-api", "tap"}}, + "linkerd-grafana": {1, []string{}}, + "linkerd-prometheus": {1, []string{}}, + "linkerd-web": {1, []string{"web"}}, } + + // Linkerd commonly logs these errors during testing, remove these once + // they're addressed. + knownErrorsRegex = regexp.MustCompile(strings.Join([]string{ + // TLS not ready at startup + `.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/identity/certificate\.crt: No such file or directory \(os error 2\)`, + `.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/trust-anchors/trust-anchors\.pem: No such file or directory \(os error 2\)`, + `.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/identity/certificate\.crt", Some\(2\)\), falling back`, + `.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/trust-anchors/trust-anchors\.pem", Some\(2\)\), falling back`, + + // k8s hitting readiness endpoints before components are ready + `.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=in listen=0\.0\.0\.0:4143 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: 127\.0\.0\.1:.*\)`, + `.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=out listen=127\.0\.0\.1:4140 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: .*:4191\)`, + + `.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={server=metrics listen=0.0.0.0:4191 remote=.*} linkerd2_proxy::control::serve_http error serving metrics: Error { kind: Shutdown, cause: Os { code: 107, kind: NotConnected, message: "Transport endpoint is not connected" } }`, + + `.* linkerd-controller-.*-.* tap time=".*" level=error msg="\[.*\] encountered an error: rpc error: code = Canceled desc = context canceled"`, + `.* linkerd-web-.*-.* linkerd-proxy WARN trust_dns_proto::xfer::dns_exchange failed to associate send_message response to the sender`, + }, "|")) ) ////////////////////// @@ -83,7 +112,7 @@ func TestInstall(t *testing.T) { } if TestHelper.TLS() { cmd = append(cmd, []string{"--tls", "optional"}...) - linkerdDeployReplicas["linkerd-ca"] = 1 + linkerdDeployReplicas["linkerd-ca"] = deploySpec{1, []string{"ca"}} } if TestHelper.SingleNamespace() { cmd = append(cmd, "--single-namespace") @@ -113,11 +142,11 @@ func TestInstall(t *testing.T) { } // Tests Pods and Deployments - for deploy, replicas := range linkerdDeployReplicas { - if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil { + for deploy, spec := range linkerdDeployReplicas { + if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil { t.Fatal(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err)) } - if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil { + if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil { t.Fatal(fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err)) } } @@ -261,3 +290,50 @@ func TestCheckProxy(t *testing.T) { t.Fatal(err.Error()) } } + +func TestLogs(t *testing.T) { + controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)") + proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", proxyContainer)) + + for deploy, spec := range linkerdDeployReplicas { + deploy := strings.TrimPrefix(deploy, "linkerd-") + containers := append(spec.containers, proxyContainer) + + for _, container := range containers { + errRegex := controllerRegex + if container == proxyContainer { + errRegex = proxyRegex + } + + outputStream, err := TestHelper.LinkerdRunStream( + "logs", "--no-color", + "--control-plane-component", deploy, + "--container", container, + ) + if err != nil { + t.Errorf("Error running command:\n%s", err) + } + defer outputStream.Stop() + // Ignore the error returned, since ReadUntil will return an error if it + // does not return 10,000 after 1 second. We don't need 10,000 log lines. + outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second) + if len(outputLines) == 0 { + t.Errorf("No logs found for %s/%s", deploy, container) + } + + for _, line := range outputLines { + if errRegex.MatchString(line) && !knownErrorsRegex.MatchString(line) { + t.Errorf("Found error in %s/%s log: %s", deploy, container, line) + } + } + } + } +} + +func TestRestarts(t *testing.T) { + for deploy, spec := range linkerdDeployReplicas { + if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil { + t.Fatal(fmt.Errorf("Error validating pods [%s]:\n%s", deploy, err)) + } + } +} diff --git a/testutil/kubernetes_helper.go b/testutil/kubernetes_helper.go index 4689d40ec98d3..eb027c005a76f 100644 --- a/testutil/kubernetes_helper.go +++ b/testutil/kubernetes_helper.go @@ -87,6 +87,13 @@ func (h *KubernetesHelper) KubectlApply(stdin string, namespace string) (string, return string(out), err } +// Kubectl executes an arbitrary Kubectl command +func (h *KubernetesHelper) Kubectl(arg ...string) (string, error) { + cmd := exec.Command("kubectl", arg...) + out, err := cmd.CombinedOutput() + return string(out), err +} + // getDeployments gets all deployments with a count of their ready replicas in // the specified namespace. func (h *KubernetesHelper) getDeployments(namespace string) (map[string]int, error) { @@ -127,9 +134,12 @@ func (h *KubernetesHelper) CheckDeployment(namespace string, deploymentName stri } // CheckPods checks that a deployment in a namespace contains the expected -// number of pods in the Running state. +// number of pods in the Running state, and that no pods have been restarted. func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, replicas int) error { - return h.retryFor(3*time.Minute, func() error { + var checkedPods []corev1.Pod + + err := h.retryFor(3*time.Minute, func() error { + checkedPods = []corev1.Pod{} pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{}) if err != nil { return err @@ -138,6 +148,8 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re var deploymentReplicas int for _, pod := range pods.Items { if strings.HasPrefix(pod.Name, deploymentName) { + checkedPods = append(checkedPods, pod) + deploymentReplicas++ if pod.Status.Phase != "Running" { return fmt.Errorf("Pod [%s] in namespace [%s] is not running", @@ -159,6 +171,21 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re return nil }) + + if err != nil { + return err + } + + for _, pod := range checkedPods { + for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) { + if status.RestartCount != 0 { + return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]", + status.Name, pod.Name, pod.Namespace, status.RestartCount) + } + } + } + + return nil } // CheckService checks that a service exists in a namespace.