From 69b14ce44e61aad31371a0efd1c7615ce5332451 Mon Sep 17 00:00:00 2001 From: Tobias Giese Date: Tue, 23 Jul 2024 21:47:59 +0200 Subject: [PATCH] chore: add feature flag for mellanox fw reset To not enable the new feature by default we want to add a feature flag first. Signed-off-by: Tobias Giese --- cmd/sriov-network-config-daemon/start.go | 20 ++++++++++++++++++++ pkg/consts/constants.go | 3 +++ pkg/daemon/daemon.go | 16 ++++++++++++++++ pkg/daemon/daemon_test.go | 9 ++++++--- pkg/plugins/mellanox/mellanox_plugin.go | 6 +++++- pkg/vars/vars.go | 3 +++ 6 files changed, 53 insertions(+), 4 deletions(-) diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index efe35bcbf..2d894d4e8 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -25,7 +25,9 @@ import ( "time" "github.com/spf13/cobra" + apierrors "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/rest" @@ -41,6 +43,7 @@ import ( snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/daemon" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" @@ -276,6 +279,22 @@ func runStartCmd(cmd *cobra.Command, args []string) error { } go nodeWriter.Run(stopCh, refreshCh, syncCh) + // Init feature gates once to prevent race conditions. + defaultConfig := &sriovnetworkv1.SriovOperatorConfig{} + err = kClient.Get(context.Background(), types.NamespacedName{Namespace: vars.Namespace, Name: consts.DefaultConfigName}, defaultConfig) + if err != nil { + if apierrors.IsNotFound(err) { + log.Log.Info("default SriovOperatorConfig object not found. waiting for creation.") + return nil + } + // Error reading the object - requeue the request. + log.Log.Error(err, "Failed to get default SriovOperatorConfig object") + return err + } + featureGates := featuregate.New() + featureGates.Init(defaultConfig.Spec.FeatureGates) + log.Log.Info("Enabled featureGates", "featureGates", featureGates.String()) + setupLog.V(0).Info("Starting SriovNetworkConfigDaemon") err = daemon.New( kClient, @@ -288,6 +307,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error { syncCh, refreshCh, eventRecorder, + featureGates, startOpts.disabledPlugins, ).Run(stopCh, exitCh) if err != nil { diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index cbfe9ad98..5abfa53c6 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -141,6 +141,9 @@ const ( // The path to the file on the host filesystem that contains the IB GUID distribution for IB VFs InfinibandGUIDConfigFilePath = SriovConfBasePath + "/infiniband/guids" + + // MellanoxFirmwareResetFeatureGate: enables the firmware reset via mstfwreset before a reboot + MellanoxFirmwareResetFeatureGate = "mellanoxFirmwareReset" ) const ( diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 0fd1ec2cb..6a8e0c3ae 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -5,6 +5,7 @@ import ( "fmt" "math/rand" "os/exec" + "reflect" "sync" "time" @@ -23,6 +24,7 @@ import ( snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" sninformer "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/informers/externalversions" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" @@ -82,6 +84,8 @@ type Daemon struct { workqueue workqueue.RateLimitingInterface eventRecorder *EventRecorder + + featureGate featuregate.FeatureGate } func New( @@ -95,6 +99,7 @@ func New( syncCh <-chan struct{}, refreshCh chan<- Message, er *EventRecorder, + featureGates featuregate.FeatureGate, disabledPlugins []string, ) *Daemon { return &Daemon{ @@ -113,6 +118,7 @@ func New( &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(updateDelay), 1)}, workqueue.NewItemExponentialFailureRateLimiter(1*time.Second, maxUpdateBackoff)), "SriovNetworkNodeState"), eventRecorder: er, + featureGate: featureGates, disabledPlugins: disabledPlugins, } } @@ -286,6 +292,7 @@ func (dn *Daemon) operatorConfigAddHandler(obj interface{}) { } func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) { + oldCfg := old.(*sriovnetworkv1.SriovOperatorConfig) newCfg := new.(*sriovnetworkv1.SriovOperatorConfig) if newCfg.Namespace != vars.Namespace || newCfg.Name != consts.DefaultConfigName { log.Log.V(2).Info("unsupported SriovOperatorConfig", "namespace", newCfg.Namespace, "name", newCfg.Name) @@ -299,6 +306,15 @@ func (dn *Daemon) operatorConfigChangeHandler(old, new interface{}) { dn.disableDrain = newDisableDrain log.Log.Info("Set Disable Drain", "value", dn.disableDrain) } + + if !reflect.DeepEqual(oldCfg.Spec.FeatureGates, newCfg.Spec.FeatureGates) { + dn.featureGate.Init(newCfg.Spec.FeatureGates) + log.Log.Info("Enabled featureGates", "featureGates", dn.featureGate.String()) + } + + if dn.featureGate.IsEnabled(consts.MellanoxFirmwareResetFeatureGate) { + vars.MlxPluginFwReset = true + } } func (dn *Daemon) nodeStateSyncHandler() error { diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index a1e29dcb2..d89e91991 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -18,18 +18,18 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" - "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem" - snclient "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate" mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/fake" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem" ) func TestConfigDaemon(t *testing.T) { @@ -151,6 +151,8 @@ var _ = Describe("Config Daemon", func() { vendorHelper.EXPECT().PrepareNMUdevRule([]string{"0x1014", "0x154c"}).Return(nil).AnyTimes() vendorHelper.EXPECT().PrepareVFRepUdevRule().Return(nil).AnyTimes() + featureGates := featuregate.New() + sut = New( kClient, snclient, @@ -162,6 +164,7 @@ var _ = Describe("Config Daemon", func() { syncCh, refreshCh, er, + featureGates, nil, ) diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index f3099f756..8996abe02 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -8,6 +8,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" mlx "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vendors/mellanox" ) @@ -198,7 +199,10 @@ func (p *MellanoxPlugin) Apply() error { if err := p.helpers.MlxConfigFW(attributesToChange); err != nil { return err } - return p.helpers.MlxResetFW(pciAddressesToReset) + if vars.MlxPluginFwReset { + return p.helpers.MlxResetFW(pciAddressesToReset) + } + return nil } // nicHasExternallyManagedPFs returns true if one of the ports(interface) of the NIC is marked as externally managed diff --git a/pkg/vars/vars.go b/pkg/vars/vars.go index 9321261dc..fc7108ed8 100644 --- a/pkg/vars/vars.go +++ b/pkg/vars/vars.go @@ -54,6 +54,9 @@ var ( // ManageSoftwareBridges global variable which reflects state of manageSoftwareBridges feature ManageSoftwareBridges = false + // MlxPluginFwReset global variable enables mstfwreset before rebooting a node on VF changes + MlxPluginFwReset = false + // FilesystemRoot used by test to mock interactions with filesystem FilesystemRoot = ""