Skip to content

Commit

Permalink
Introduce logging and restart integration tests (#2414)
Browse files Browse the repository at this point in the history
The integration tests deploy complete Linkerd environments into
Kubernetes, but do not check if the components are logging errors or
restarting.

Introduce integration tests to validation that all expected
control-plane containers (including `linkerd-proxy` and `linkerd-init`)
are found, logging no errors, and not restarting.

Fixes #2348

Signed-off-by: Andrew Seigner <siggy@buoyant.io>
  • Loading branch information
siggy committed Mar 6, 2019
1 parent 0dcd69c commit d850b02
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 11 deletions.
94 changes: 85 additions & 9 deletions test/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package test
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
"testing"
Expand All @@ -11,6 +12,15 @@ import (
"github.com/linkerd/linkerd2/testutil"
)

type deploySpec struct {
replicas int
containers []string
}

const (
proxyContainer = "linkerd-proxy"
)

//////////////////////
/// TEST SETUP ///
//////////////////////
Expand All @@ -31,12 +41,31 @@ var (
"linkerd-web",
}

linkerdDeployReplicas = map[string]int{
"linkerd-controller": 1,
"linkerd-grafana": 1,
"linkerd-prometheus": 1,
"linkerd-web": 1,
linkerdDeployReplicas = map[string]deploySpec{
"linkerd-controller": {1, []string{"destination", "public-api", "tap"}},
"linkerd-grafana": {1, []string{}},
"linkerd-prometheus": {1, []string{}},
"linkerd-web": {1, []string{"web"}},
}

// Linkerd commonly logs these errors during testing, remove these once
// they're addressed.
knownErrorsRegex = regexp.MustCompile(strings.Join([]string{
// TLS not ready at startup
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/identity/certificate\.crt: No such file or directory \(os error 2\)`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={bg=tls-config} linkerd2_proxy::transport::tls::config error loading /var/linkerd-io/trust-anchors/trust-anchors\.pem: No such file or directory \(os error 2\)`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/identity/certificate\.crt", Some\(2\)\), falling back`,
`.*-tls linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy WARN admin={bg=tls-config} linkerd2_proxy::transport::tls::config error reloading TLS config: Io\("/var/linkerd-io/trust-anchors/trust-anchors\.pem", Some\(2\)\), falling back`,

// k8s hitting readiness endpoints before components are ready
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=in listen=0\.0\.0\.0:4143 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: 127\.0\.0\.1:.*\)`,
`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! proxy={server=out listen=127\.0\.0\.1:4140 remote=.*} linkerd2_proxy::proxy::http::router service error: an error occurred trying to connect: Connection refused \(os error 111\) \(address: .*:4191\)`,

`.* linkerd-(ca|controller|grafana|prometheus|web)-.*-.* linkerd-proxy ERR! admin={server=metrics listen=0.0.0.0:4191 remote=.*} linkerd2_proxy::control::serve_http error serving metrics: Error { kind: Shutdown, cause: Os { code: 107, kind: NotConnected, message: "Transport endpoint is not connected" } }`,

`.* linkerd-controller-.*-.* tap time=".*" level=error msg="\[.*\] encountered an error: rpc error: code = Canceled desc = context canceled"`,
`.* linkerd-web-.*-.* linkerd-proxy WARN trust_dns_proto::xfer::dns_exchange failed to associate send_message response to the sender`,
}, "|"))
)

//////////////////////
Expand Down Expand Up @@ -83,7 +112,7 @@ func TestInstall(t *testing.T) {
}
if TestHelper.TLS() {
cmd = append(cmd, []string{"--tls", "optional"}...)
linkerdDeployReplicas["linkerd-ca"] = 1
linkerdDeployReplicas["linkerd-ca"] = deploySpec{1, []string{"ca"}}
}
if TestHelper.SingleNamespace() {
cmd = append(cmd, "--single-namespace")
Expand Down Expand Up @@ -113,11 +142,11 @@ func TestInstall(t *testing.T) {
}

// Tests Pods and Deployments
for deploy, replicas := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
for deploy, spec := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating pods for deploy [%s]:\n%s", deploy, err))
}
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, replicas); err != nil {
if err := TestHelper.CheckDeployment(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating deploy [%s]:\n%s", deploy, err))
}
}
Expand Down Expand Up @@ -261,3 +290,50 @@ func TestCheckProxy(t *testing.T) {
t.Fatal(err.Error())
}
}

func TestLogs(t *testing.T) {
controllerRegex := regexp.MustCompile("level=(panic|fatal|error|warn)")
proxyRegex := regexp.MustCompile(fmt.Sprintf("%s (ERR|WARN)", proxyContainer))

for deploy, spec := range linkerdDeployReplicas {
deploy := strings.TrimPrefix(deploy, "linkerd-")
containers := append(spec.containers, proxyContainer)

for _, container := range containers {
errRegex := controllerRegex
if container == proxyContainer {
errRegex = proxyRegex
}

outputStream, err := TestHelper.LinkerdRunStream(
"logs", "--no-color",
"--control-plane-component", deploy,
"--container", container,
)
if err != nil {
t.Errorf("Error running command:\n%s", err)
}
defer outputStream.Stop()
// Ignore the error returned, since ReadUntil will return an error if it
// does not return 10,000 after 1 second. We don't need 10,000 log lines.
outputLines, _ := outputStream.ReadUntil(10000, 2*time.Second)
if len(outputLines) == 0 {
t.Errorf("No logs found for %s/%s", deploy, container)
}

for _, line := range outputLines {
if errRegex.MatchString(line) && !knownErrorsRegex.MatchString(line) {
t.Errorf("Found error in %s/%s log: %s", deploy, container, line)
}
}
}
}
}

func TestRestarts(t *testing.T) {
for deploy, spec := range linkerdDeployReplicas {
if err := TestHelper.CheckPods(TestHelper.GetLinkerdNamespace(), deploy, spec.replicas); err != nil {
t.Fatal(fmt.Errorf("Error validating pods [%s]:\n%s", deploy, err))
}
}
}
31 changes: 29 additions & 2 deletions testutil/kubernetes_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,13 @@ func (h *KubernetesHelper) KubectlApply(stdin string, namespace string) (string,
return string(out), err
}

// Kubectl executes an arbitrary Kubectl command
func (h *KubernetesHelper) Kubectl(arg ...string) (string, error) {
cmd := exec.Command("kubectl", arg...)
out, err := cmd.CombinedOutput()
return string(out), err
}

// getDeployments gets all deployments with a count of their ready replicas in
// the specified namespace.
func (h *KubernetesHelper) getDeployments(namespace string) (map[string]int, error) {
Expand Down Expand Up @@ -127,9 +134,12 @@ func (h *KubernetesHelper) CheckDeployment(namespace string, deploymentName stri
}

// CheckPods checks that a deployment in a namespace contains the expected
// number of pods in the Running state.
// number of pods in the Running state, and that no pods have been restarted.
func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, replicas int) error {
return h.retryFor(3*time.Minute, func() error {
var checkedPods []corev1.Pod

err := h.retryFor(3*time.Minute, func() error {
checkedPods = []corev1.Pod{}
pods, err := h.clientset.CoreV1().Pods(namespace).List(metav1.ListOptions{})
if err != nil {
return err
Expand All @@ -138,6 +148,8 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re
var deploymentReplicas int
for _, pod := range pods.Items {
if strings.HasPrefix(pod.Name, deploymentName) {
checkedPods = append(checkedPods, pod)

deploymentReplicas++
if pod.Status.Phase != "Running" {
return fmt.Errorf("Pod [%s] in namespace [%s] is not running",
Expand All @@ -159,6 +171,21 @@ func (h *KubernetesHelper) CheckPods(namespace string, deploymentName string, re

return nil
})

if err != nil {
return err
}

for _, pod := range checkedPods {
for _, status := range append(pod.Status.ContainerStatuses, pod.Status.InitContainerStatuses...) {
if status.RestartCount != 0 {
return fmt.Errorf("Container [%s] in pod [%s] in namespace [%s] has restart count [%d]",
status.Name, pod.Name, pod.Namespace, status.RestartCount)
}
}
}

return nil
}

// CheckService checks that a service exists in a namespace.
Expand Down

0 comments on commit d850b02

Please sign in to comment.