Skip to content

Commit

Permalink
webhook: Avoid setting DisableDrain while updating
Browse files Browse the repository at this point in the history
Disable the drain while a node is updating its configuration
prevents the config-daemon to uncordon the node, leading
to a node that needs to be manually uncordoned.

Signed-off-by: Andrea Panattoni <apanatto@redhat.com>
  • Loading branch information
zeeke committed Jun 28, 2023
1 parent 8ea81c0 commit 3715423
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pkg/webhook/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned"
)

var snclient *snclientset.Clientset
var snclient snclientset.Interface
var kubeclient *kubernetes.Clientset

func SetupInClusterClient() error {
Expand Down
39 changes: 39 additions & 0 deletions pkg/webhook/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,48 @@ func validateSriovOperatorConfig(cr *sriovnetworkv1.SriovOperatorConfig, operati
warnings = append(warnings, "Node draining is disabled for applying SriovNetworkNodePolicy, it may result in workload interruption.")
}

err := validateSriovOperatorConfigDisableDrainWhileUpdating(cr)
if err != nil {
return false, warnings, err
}

return true, warnings, nil
}

// validateSriovOperatorConfigDisableDrainWhileUpdating checks if the user is setting `.Spec.DisableDrain` from false to true while
// operator is updating one or more nodes. Disabling the drain at this stage would prevent the operator to uncordon a node at
// the end of the update operation, keeping nodes un-schedulable until manual intervention.
func validateSriovOperatorConfigDisableDrainWhileUpdating(cr *sriovnetworkv1.SriovOperatorConfig) error {

Check failure on line 61 in pkg/webhook/validate.go

View workflow job for this annotation

GitHub Actions / Golangci-lint

unnecessary leading newline (whitespace)

if !cr.Spec.DisableDrain {
return nil
}

previousConfig, err := snclient.SriovnetworkV1().SriovOperatorConfigs(cr.Namespace).Get(context.Background(), cr.Name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain against its previous value: %q", cr.Name, err)
}

if previousConfig.Spec.DisableDrain == cr.Spec.DisableDrain {
// DisableDrain didn't change
return nil
}

// DisableDrain has been changed `false -> true`, check if any node is updating
nodeStates, err := snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace).List(context.Background(), metav1.ListOptions{})
if err != nil {
return fmt.Errorf("can't validate SriovOperatorConfig[%s] DisableDrain transition to true: %q", cr.Name, err)
}

for _, nodeState := range nodeStates.Items {
if nodeState.Status.SyncStatus == "InProgress" {
return fmt.Errorf("can't set Spec.DisableDrain = true while node[%s] is updating", nodeState.Name)
}
}

return nil
}

func validateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePolicy, operation v1.Operation) (bool, []string, error) {
glog.V(2).Infof("validateSriovNetworkNodePolicy: %v", cr)
var warnings []string
Expand Down
48 changes: 48 additions & 0 deletions pkg/webhook/validate_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package webhook

import (
"context"
"fmt"
"os"
"testing"
Expand All @@ -14,6 +15,8 @@ import (

. "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts"

fakesnclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake"
)

func TestMain(m *testing.M) {
Expand Down Expand Up @@ -164,6 +167,51 @@ func TestValidateSriovOperatorConfigWithDefaultOperatorConfig(t *testing.T) {
g.Expect(ok).To(Equal(true))
}

func TestValidateSriovOperatorConfigDisableDrain(t *testing.T) {
g := NewGomegaWithT(t)

config := &SriovOperatorConfig{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
},
Spec: SriovOperatorConfigSpec{
ConfigDaemonNodeSelector: map[string]string{},
DisableDrain: false,
EnableInjector: func() *bool { b := true; return &b }(),
EnableOperatorWebhook: func() *bool { b := true; return &b }(),
LogLevel: 2,
},
}

nodeState := &SriovNetworkNodeState{
ObjectMeta: metav1.ObjectMeta{Name: "worker-1"},
Status: SriovNetworkNodeStateStatus{
SyncStatus: "InProgress",
},
}

snclient = fakesnclientset.NewSimpleClientset(
config,
nodeState,
)

config.Spec.DisableDrain = true

ok, _, err := validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).To(MatchError("can't set Spec.DisableDrain = true while node[worker-1] is updating"))
g.Expect(ok).To(Equal(false))

// Simulate node update finished
nodeState.Status.SyncStatus = "Succeeded"
snclient.SriovnetworkV1().SriovNetworkNodeStates(namespace).
Update(context.Background(), nodeState, metav1.UpdateOptions{})

ok, _, err = validateSriovOperatorConfig(config, "UPDATE")
g.Expect(err).NotTo(HaveOccurred())
g.Expect(ok).To(Equal(true))

Check failure on line 212 in pkg/webhook/validate_test.go

View workflow job for this annotation

GitHub Actions / Golangci-lint

unnecessary trailing newline (whitespace)
}

func TestValidateSriovNetworkNodePolicyWithDefaultPolicy(t *testing.T) {
var err error
var ok bool
Expand Down

0 comments on commit 3715423

Please sign in to comment.