diff --git a/pkg/webhook/client.go b/pkg/webhook/client.go index 270e0903d..c4d3686af 100644 --- a/pkg/webhook/client.go +++ b/pkg/webhook/client.go @@ -11,7 +11,7 @@ import ( snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" ) -var snclient *snclientset.Clientset +var snclient snclientset.Interface var kubeclient *kubernetes.Clientset func SetupInClusterClient() error { diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 5e298c1dc..0bb9a42e9 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -47,9 +47,48 @@ func validateSriovOperatorConfig(cr *sriovnetworkv1.SriovOperatorConfig, operati warnings = append(warnings, "Node draining is disabled for applying SriovNetworkNodePolicy, it may result in workload interruption.") } + err := validateSriovOperatorConfigDisableDrainWhileUpdating(cr) + if err != nil { + return false, warnings, err + } + return true, warnings, nil } +// validateSriovOperatorConfigDisableDrainWhileUpdating checks if the user is setting `.Spec.DisableDrain` from false to true while +// operator is updating one or more nodes. Disabling the drain at this stage would prevent the operator to uncordon a node at +// the end of the update operation, keeping nodes un-schedulable until manual intervention. +func validateSriovOperatorConfigDisableDrainWhileUpdating(cr *sriovnetworkv1.SriovOperatorConfig) error { + + if !cr.Spec.DisableDrain { + return nil + } + + previousConfig, err := snclient.SriovnetworkV1().SriovOperatorConfigs(cr.Namespace).Get(context.Background(), cr.Name, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain against its previous value: %q", cr.Name, err) + } + + if previousConfig.Spec.DisableDrain == cr.Spec.DisableDrain { + // DisableDrain didn't change + return nil + } + + // DisableDrain has been changed `false -> true`, check if any node is updating + nodeStates, err := snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace).List(context.Background(), metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain transition to true: %q", cr.Name, err) + } + + for _, nodeState := range nodeStates.Items { + if nodeState.Status.SyncStatus == "InProgress" { + return fmt.Errorf("can't set Spec.DisableDrain = true while node[%s] is updating", nodeState.Name) + } + } + + return nil +} + func validateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePolicy, operation v1.Operation) (bool, []string, error) { glog.V(2).Infof("validateSriovNetworkNodePolicy: %v", cr) var warnings []string diff --git a/pkg/webhook/validate_test.go b/pkg/webhook/validate_test.go index 028d62a5e..44d6414d9 100644 --- a/pkg/webhook/validate_test.go +++ b/pkg/webhook/validate_test.go @@ -1,6 +1,7 @@ package webhook import ( + "context" "fmt" "os" "testing" @@ -14,6 +15,8 @@ import ( . "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + + fakesnclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake" ) func TestMain(m *testing.M) { @@ -164,6 +167,51 @@ func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) { g.Expect(ok).To(Equal(true)) } +func TestValidateSriovOperatorConfigDisableDrain(t *testing.T) { + g := NewGomegaWithT(t) + + config := &SriovOperatorConfig{ + ObjectMeta: metav1.ObjectMeta{ + Name: "default", + }, + Spec: SriovOperatorConfigSpec{ + ConfigDaemonNodeSelector: map[string]string{}, + DisableDrain: false, + EnableInjector: func() *bool { b := true; return &b }(), + EnableOperatorWebhook: func() *bool { b := true; return &b }(), + LogLevel: 2, + }, + } + + nodeState := &SriovNetworkNodeState{ + ObjectMeta: metav1.ObjectMeta{Name: "worker-1"}, + Status: SriovNetworkNodeStateStatus{ + SyncStatus: "InProgress", + }, + } + + snclient = fakesnclientset.NewSimpleClientset( + config, + nodeState, + ) + + config.Spec.DisableDrain = true + + ok, _, err := validateSriovOperatorConfig(config, "UPDATE") + g.Expect(err).To(MatchError("can't set Spec.DisableDrain = true while node[worker-1] is updating")) + g.Expect(ok).To(Equal(false)) + + // Simulate node update finished + nodeState.Status.SyncStatus = "Succeeded" + snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace). + Update(context.Background(), nodeState, metav1.UpdateOptions{}) + + ok, _, err = validateSriovOperatorConfig(config, "UPDATE") + g.Expect(err).NotTo(HaveOccurred()) + g.Expect(ok).To(Equal(true)) + +} + func TestValidateSriovNetworkNodePolicyWithDefaultPolicy(t *testing.T) { var err error var ok bool