diff --git a/api/v1/helper.go b/api/v1/helper.go index ba2429caf..664531260 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -20,7 +20,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" logf "sigs.k8s.io/controller-runtime/pkg/log" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) const ( @@ -47,6 +49,8 @@ var log = logf.Log.WithName("sriovnetwork") // Vendor ID, Physical Function Device ID, Virtual Function Device ID var NicIDMap = []string{} +var InitialState SriovNetworkNodeState + // NetFilterType Represents the NetFilter tags to be used type NetFilterType int @@ -211,6 +215,80 @@ func GetVfDeviceID(deviceID string) string { return "" } +func IsSwitchdevModeSpec(spec SriovNetworkNodeStateSpec) bool { + for _, iface := range spec.Interfaces { + if iface.EswitchMode == ESwithModeSwitchDev { + return true + } + } + return false +} + +func FindInterface(interfaces Interfaces, name string) (iface Interface, err error) { + for _, i := range interfaces { + if i.Name == name { + return i, nil + } + } + return Interface{}, fmt.Errorf("unable to find interface: %v", name) +} + +func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool { + if ifaceSpec.Mtu > 0 { + mtu := ifaceSpec.Mtu + if mtu != ifaceStatus.Mtu { + log.V(2).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu) + return true + } + } + + if ifaceSpec.NumVfs != ifaceStatus.NumVfs { + log.V(2).Info("NeedToUpdateSriov(): NumVfs needs update", "desired", ifaceSpec.NumVfs, "current", ifaceStatus.NumVfs) + return true + } + if ifaceSpec.NumVfs > 0 { + for _, vfStatus := range ifaceStatus.VFs { + ingroup := false + for _, groupSpec := range ifaceSpec.VfGroups { + if IndexInRange(vfStatus.VfID, groupSpec.VfRange) { + ingroup = true + if groupSpec.DeviceType != consts.DeviceTypeNetDevice { + if groupSpec.DeviceType != vfStatus.Driver { + log.V(2).Info("NeedToUpdateSriov(): Driver needs update", + "desired", groupSpec.DeviceType, "current", vfStatus.Driver) + return true + } + } else { + if StringInArray(vfStatus.Driver, vars.DpdkDrivers) { + log.V(2).Info("NeedToUpdateSriov(): Driver needs update", + "desired", groupSpec.DeviceType, "current", vfStatus.Driver) + return true + } + if vfStatus.Mtu != 0 && groupSpec.Mtu != 0 && vfStatus.Mtu != groupSpec.Mtu { + log.V(2).Info("NeedToUpdateSriov(): VF MTU needs update", + "vf", vfStatus.VfID, "desired", groupSpec.Mtu, "current", vfStatus.Mtu) + return true + } + + // this is needed to be sure the admin mac address is configured as expected + if ifaceSpec.ExternallyManaged { + log.V(2).Info("NeedToUpdateSriov(): need to update the device as it's externally manage", + "device", ifaceStatus.PciAddress) + return true + } + } + break + } + } + if !ingroup && StringInArray(vfStatus.Driver, vars.DpdkDrivers) { + // VF which has DPDK driver loaded but not in any group, needs to be reset to default driver. + return true + } + } + } + return false +} + type ByPriority []SriovNetworkNodePolicy func (a ByPriority) Len() int { diff --git a/cmd/sriov-network-config-daemon/service.go b/cmd/sriov-network-config-daemon/service.go index fc9cfc6d9..3305d7e8e 100644 --- a/cmd/sriov-network-config-daemon/service.go +++ b/cmd/sriov-network-config-daemon/service.go @@ -24,15 +24,16 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" + snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/virtual" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/systemd" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/version" - - snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" ) var ( @@ -57,11 +58,16 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { setupLog.V(2).Info("sriov-config-service", "version", version.Version) setupLog.V(0).Info("Starting sriov-config-service") + + // Mark that we are running on host + vars.UsingSystemdMode = true + vars.InChroot = true + supportedNicIds, err := systemd.ReadSriovSupportedNics() if err != nil { setupLog.Error(err, "failed to read list of supported nic ids") sriovResult := &systemd.SriovResult{ - SyncStatus: "Failed", + SyncStatus: consts.SyncStatusFailed, LastSyncError: fmt.Sprintf("failed to read list of supported nic ids: %v", err), } err = systemd.WriteSriovResult(sriovResult) @@ -78,7 +84,7 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { if _, err := os.Stat(systemd.SriovSystemdConfigPath); !errors.Is(err, os.ErrNotExist) { setupLog.Error(err, "failed to read the sriov configuration file", "path", systemd.SriovSystemdConfigPath) sriovResult := &systemd.SriovResult{ - SyncStatus: "Failed", + SyncStatus: consts.SyncStatusFailed, LastSyncError: fmt.Sprintf("failed to read the sriov configuration file in path %s: %v", systemd.SriovSystemdConfigPath, err), } err = systemd.WriteSriovResult(sriovResult) @@ -91,67 +97,64 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { nodeStateSpec = &systemd.SriovConfig{ Spec: sriovv1.SriovNetworkNodeStateSpec{}, UnsupportedNics: false, - PlatformType: utils.Baremetal, + PlatformType: consts.Baremetal, } } setupLog.V(2).Info("sriov-config-service", "config", nodeStateSpec) - - storeManager, err := utils.NewStoreManager(true) + hostHelpers, err := helper.NewDefaultHostHelpers() if err != nil { - setupLog.Error(err, "failed to create store manager") - return err + setupLog.Error(err, "failed to create hostHelpers") + return updateSriovResultErr("failed to create hostHelpers") } - // Load kernel modules - hostManager := host.NewHostManager(true) - _, err = hostManager.TryEnableRdma() + platformHelper, err := platforms.NewDefaultPlatformHelper() + if err != nil { + setupLog.Error(err, "failed to create platformHelpers") + return updateSriovResultErr("failed to create platformHelpers") + } + + _, err = hostHelpers.TryEnableRdma() if err != nil { setupLog.Error(err, "warning, failed to enable RDMA") } - hostManager.TryEnableTun() - hostManager.TryEnableVhostNet() + hostHelpers.TryEnableTun() + hostHelpers.TryEnableVhostNet() var configPlugin plugin.VendorPlugin var ifaceStatuses []sriovv1.InterfaceExt - if nodeStateSpec.PlatformType == utils.Baremetal { + if nodeStateSpec.PlatformType == consts.Baremetal { // Bare metal support - ifaceStatuses, err = utils.DiscoverSriovDevices(nodeStateSpec.UnsupportedNics, storeManager) + vars.DevMode = nodeStateSpec.UnsupportedNics + ifaceStatuses, err = hostHelpers.DiscoverSriovDevices(hostHelpers) if err != nil { setupLog.Error(err, "failed to discover sriov devices on the host") - return fmt.Errorf("sriov-config-service: failed to discover sriov devices on the host: %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service: failed to discover sriov devices on the host: %v", err)) } // Create the generic plugin - configPlugin, err = generic.NewGenericPlugin(true, hostManager, storeManager) + configPlugin, err = generic.NewGenericPlugin(hostHelpers) if err != nil { setupLog.Error(err, "failed to create generic plugin") - return fmt.Errorf("sriov-config-service failed to create generic plugin %v", err) - } - } else if nodeStateSpec.PlatformType == utils.VirtualOpenStack { - // Openstack support - metaData, networkData, err := utils.GetOpenstackData(false) - if err != nil { - setupLog.Error(err, "failed to read OpenStack data") - return fmt.Errorf("sriov-config-service failed to read OpenStack data: %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service failed to create generic plugin %v", err)) } - - openStackDevicesInfo, err := utils.CreateOpenstackDevicesInfo(metaData, networkData) + } else if nodeStateSpec.PlatformType == consts.VirtualOpenStack { + err = platformHelper.CreateOpenstackDevicesInfo() if err != nil { setupLog.Error(err, "failed to read OpenStack data") - return fmt.Errorf("sriov-config-service failed to read OpenStack data: %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service failed to read OpenStack data: %v", err)) } - ifaceStatuses, err = utils.DiscoverSriovDevicesVirtual(openStackDevicesInfo) + ifaceStatuses, err = platformHelper.DiscoverSriovDevicesVirtual() if err != nil { setupLog.Error(err, "failed to read OpenStack data") - return fmt.Errorf("sriov-config-service: failed to read OpenStack data: %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service: failed to read OpenStack data: %v", err)) } // Create the virtual plugin - configPlugin, err = virtual.NewVirtualPlugin(true) + configPlugin, err = virtual.NewVirtualPlugin(hostHelpers) if err != nil { setupLog.Error(err, "failed to create virtual plugin") - return fmt.Errorf("sriov-config-service: failed to create virtual plugin %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service: failed to create virtual plugin %v", err)) } } @@ -163,18 +166,18 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { _, _, err = configPlugin.OnNodeStateChange(nodeState) if err != nil { setupLog.Error(err, "failed to run OnNodeStateChange to update the generic plugin status") - return fmt.Errorf("sriov-config-service: failed to run OnNodeStateChange to update the generic plugin status %v", err) + return updateSriovResultErr(fmt.Sprintf("sriov-config-service: failed to run OnNodeStateChange to update the generic plugin status %v", err)) } sriovResult := &systemd.SriovResult{ - SyncStatus: "Succeeded", + SyncStatus: consts.SyncStatusSucceeded, LastSyncError: "", } err = configPlugin.Apply() if err != nil { setupLog.Error(err, "failed to run apply node configuration") - sriovResult.SyncStatus = "Failed" + sriovResult.SyncStatus = consts.SyncStatusFailed sriovResult.LastSyncError = err.Error() } @@ -187,3 +190,17 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { setupLog.V(0).Info("shutting down sriov-config-service") return nil } + +func updateSriovResultErr(errMsg string) error { + sriovResult := &systemd.SriovResult{ + SyncStatus: consts.SyncStatusFailed, + LastSyncError: errMsg, + } + + err := systemd.WriteSriovResult(sriovResult) + if err != nil { + log.Log.Error(err, "failed to write sriov result file", "content", *sriovResult) + return fmt.Errorf("sriov-config-service failed to write sriov result file with content %v error: %v", *sriovResult, err) + } + return nil +} diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index a7feffbde..1d7d4d00c 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -24,8 +24,6 @@ import ( "strings" "time" - configv1 "github.com/openshift/api/config/v1" - mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" "github.com/spf13/cobra" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" @@ -35,12 +33,17 @@ import ( "k8s.io/client-go/util/connrotation" "sigs.k8s.io/controller-runtime/pkg/log" + configv1 "github.com/openshift/api/config/v1" + mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/daemon" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/version" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) var ( @@ -70,8 +73,11 @@ func runStartCmd(cmd *cobra.Command, args []string) error { snolog.InitLog() setupLog := log.Log.WithName("sriov-network-config-daemon") - // To help debugging, immediately log version - setupLog.V(2).Info("sriov-network-config-daemon", "version", version.Version) + // Mark that we are running inside a container + vars.UsingSystemdMode = false + if startOpts.systemd { + vars.UsingSystemdMode = true + } if startOpts.nodeName == "" { name, ok := os.LookupEnv("NODE_NAME") @@ -80,6 +86,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error { } startOpts.nodeName = name } + vars.NodeName = startOpts.nodeName // This channel is used to ensure all spawned goroutines exit when we exit. stopCh := make(chan struct{}) @@ -102,7 +109,9 @@ func runStartCmd(cmd *cobra.Command, args []string) error { var config *rest.Config var err error - if os.Getenv("CLUSTER_TYPE") == utils.ClusterTypeOpenshift { + // On openshift we use the kubeconfig from kubelet on the node where the daemon is running + // this allow us to improve security as every daemon has access only to its own node + if vars.ClusterType == consts.ClusterTypeOpenshift { kubeconfig, err := clientcmd.LoadFromFile("/host/etc/kubernetes/kubeconfig") if err != nil { setupLog.Error(err, "failed to load kubelet kubeconfig") @@ -110,7 +119,7 @@ func runStartCmd(cmd *cobra.Command, args []string) error { clusterName := kubeconfig.Contexts[kubeconfig.CurrentContext].Cluster apiURL := kubeconfig.Clusters[clusterName].Server - url, err := url.Parse(apiURL) + urlPath, err := url.Parse(apiURL) if err != nil { setupLog.Error(err, "failed to parse api url from kubelet kubeconfig") } @@ -118,8 +127,14 @@ func runStartCmd(cmd *cobra.Command, args []string) error { // The kubernetes in-cluster functions don't let you override the apiserver // directly; gotta "pass" it via environment vars. setupLog.V(0).Info("overriding kubernetes api", "new-url", apiURL) - os.Setenv("KUBERNETES_SERVICE_HOST", url.Hostname()) - os.Setenv("KUBERNETES_SERVICE_PORT", url.Port()) + err = os.Setenv("KUBERNETES_SERVICE_HOST", urlPath.Hostname()) + if err != nil { + setupLog.Error(err, "failed to set KUBERNETES_SERVICE_HOST environment variable") + } + err = os.Setenv("KUBERNETES_SERVICE_PORT", urlPath.Port()) + if err != nil { + setupLog.Error(err, "failed to set KUBERNETES_SERVICE_PORT environment variable") + } } kubeconfig := os.Getenv("KUBECONFIG") @@ -134,57 +149,72 @@ func runStartCmd(cmd *cobra.Command, args []string) error { return err } + vars.Config = config + vars.Scheme = scheme.Scheme + closeAllConns, err := updateDialer(config) if err != nil { return err } - sriovnetworkv1.AddToScheme(scheme.Scheme) - mcfgv1.AddToScheme(scheme.Scheme) - configv1.Install(scheme.Scheme) + err = sriovnetworkv1.AddToScheme(scheme.Scheme) + if err != nil { + setupLog.Error(err, "failed to load sriov network CRDs to scheme") + return err + } + + err = mcfgv1.AddToScheme(scheme.Scheme) + if err != nil { + setupLog.Error(err, "failed to load machine config CRDs to scheme") + return err + } + + err = configv1.Install(scheme.Scheme) + if err != nil { + setupLog.Error(err, "failed to load openshift config CRDs to scheme") + return err + } snclient := snclientset.NewForConfigOrDie(config) kubeclient := kubernetes.NewForConfigOrDie(config) - openshiftContext, err := utils.NewOpenshiftContext(config, scheme.Scheme) + + hostHelpers, err := helper.NewDefaultHostHelpers() + if err != nil { + setupLog.Error(err, "failed to create hostHelpers") + return err + } + + platformHelper, err := platforms.NewDefaultPlatformHelper() if err != nil { + setupLog.Error(err, "failed to create platformHelper") return err } config.Timeout = 5 * time.Second writerclient := snclientset.NewForConfigOrDie(config) - mode := os.Getenv("DEV_MODE") - devMode := false - if mode == "TRUE" { - devMode = true - setupLog.V(0).Info("dev mode enabled") - } - - eventRecorder := daemon.NewEventRecorder(writerclient, startOpts.nodeName, kubeclient) + eventRecorder := daemon.NewEventRecorder(writerclient, kubeclient) defer eventRecorder.Shutdown() setupLog.V(0).Info("starting node writer") - nodeWriter := daemon.NewNodeStateStatusWriter(writerclient, startOpts.nodeName, closeAllConns, eventRecorder, devMode) - - destdir := os.Getenv("DEST_DIR") - if destdir == "" { - destdir = "/host/tmp" - } - - platformType := utils.Baremetal + nodeWriter := daemon.NewNodeStateStatusWriter(writerclient, + closeAllConns, + eventRecorder, + hostHelpers, + platformHelper) nodeInfo, err := kubeclient.CoreV1().Nodes().Get(context.Background(), startOpts.nodeName, v1.GetOptions{}) if err == nil { - for key, pType := range utils.PlatformMap { + for key, pType := range vars.PlatformsMap { if strings.Contains(strings.ToLower(nodeInfo.Spec.ProviderID), strings.ToLower(key)) { - platformType = pType + vars.PlatformType = pType } } } else { setupLog.Error(err, "failed to fetch node state, exiting", "node-name", startOpts.nodeName) return err } - setupLog.Info("Running on", "platform", platformType.String()) + setupLog.Info("Running on", "platform", vars.PlatformType.String()) var namespace = os.Getenv("NAMESPACE") if err := sriovnetworkv1.InitNicIDMapFromConfigMap(kubeclient, namespace); err != nil { @@ -195,27 +225,24 @@ func runStartCmd(cmd *cobra.Command, args []string) error { eventRecorder.SendEvent("ConfigDaemonStart", "Config Daemon starting") // block the deamon process until nodeWriter finish first its run - err = nodeWriter.RunOnce(destdir, platformType) + err = nodeWriter.RunOnce() if err != nil { setupLog.Error(err, "failed to run writer") return err } - go nodeWriter.Run(stopCh, refreshCh, syncCh, platformType) + go nodeWriter.Run(stopCh, refreshCh, syncCh) setupLog.V(0).Info("Starting SriovNetworkConfigDaemon") err = daemon.New( - startOpts.nodeName, snclient, kubeclient, - openshiftContext, + hostHelpers, + platformHelper, exitCh, stopCh, syncCh, refreshCh, - platformType, - startOpts.systemd, eventRecorder, - devMode, ).Run(stopCh, exitCh) if err != nil { setupLog.Error(err, "failed to run daemon") diff --git a/controllers/sriovnetworkpoolconfig_controller.go b/controllers/sriovnetworkpoolconfig_controller.go index de8fb32b2..fd4643476 100644 --- a/controllers/sriovnetworkpoolconfig_controller.go +++ b/controllers/sriovnetworkpoolconfig_controller.go @@ -17,15 +17,16 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) // SriovNetworkPoolConfigReconciler reconciles a SriovNetworkPoolConfig object type SriovNetworkPoolConfigReconciler struct { client.Client - Scheme *runtime.Scheme - OpenshiftContext *utils.OpenshiftContext + Scheme *runtime.Scheme + PlatformHelper platforms.Interface } //+kubebuilder:rbac:groups=sriovnetwork.openshift.io,resources=sriovnetworkpoolconfigs,verbs=get;list;watch;create;update;patch;delete @@ -44,8 +45,8 @@ type SriovNetworkPoolConfigReconciler struct { func (r *SriovNetworkPoolConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := log.FromContext(ctx).WithValues("sriovnetworkpoolconfig", req.NamespacedName) isHypershift := false - if r.OpenshiftContext.IsOpenshiftCluster() { - if r.OpenshiftContext.IsHypershift() { + if r.PlatformHelper.IsOpenshiftCluster() { + if r.PlatformHelper.IsHypershift() { isHypershift = true } logger = logger.WithValues("isHypershift", isHypershift) @@ -78,7 +79,7 @@ func (r *SriovNetworkPoolConfigReconciler) Reconcile(ctx context.Context, req ct return reconcile.Result{}, err } } - if utils.ClusterType == utils.ClusterTypeOpenshift { + if vars.ClusterType == constants.ClusterTypeOpenshift { if !isHypershift { if err = r.syncOvsHardwareOffloadMachineConfigs(ctx, instance, false); err != nil { return reconcile.Result{}, err @@ -92,7 +93,7 @@ func (r *SriovNetworkPoolConfigReconciler) Reconcile(ctx context.Context, req ct if sriovnetworkv1.StringInArray(sriovnetworkv1.POOLCONFIGFINALIZERNAME, instance.ObjectMeta.Finalizers) { // our finalizer is present, so lets handle any external dependency logger.Info("delete SriovNetworkPoolConfig CR", "Namespace", instance.Namespace, "Name", instance.Name) - if utils.ClusterType == utils.ClusterTypeOpenshift && !isHypershift { + if vars.ClusterType == constants.ClusterTypeOpenshift && !isHypershift { if err = r.syncOvsHardwareOffloadMachineConfigs(ctx, instance, true); err != nil { // if fail to delete the external dependency here, return with error // so that it can be retried diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 4c453b6de..1e4c7728b 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -39,17 +39,19 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" apply "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/apply" - constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" render "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/render" utils "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) // SriovOperatorConfigReconciler reconciles a SriovOperatorConfig object type SriovOperatorConfigReconciler struct { client.Client - Scheme *runtime.Scheme - OpenshiftContext *utils.OpenshiftContext + Scheme *runtime.Scheme + PlatformHelper platforms.Interface } //+kubebuilder:rbac:groups=sriovnetwork.openshift.io,resources=sriovoperatorconfigs,verbs=get;list;watch;create;update;patch;delete @@ -70,13 +72,12 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. logger.Info("Reconciling SriovOperatorConfig") - enableAdmissionController := os.Getenv("ADMISSION_CONTROLLERS_ENABLED") == trueString - if !enableAdmissionController { + if !vars.EnableAdmissionController { logger.Info("SR-IOV Network Resource Injector and Operator Webhook are disabled.") } defaultConfig := &sriovnetworkv1.SriovOperatorConfig{} err := r.Get(ctx, types.NamespacedName{ - Name: constants.DefaultConfigName, Namespace: namespace}, defaultConfig) + Name: consts.DefaultConfigName, Namespace: namespace}, defaultConfig) if err != nil { if apierrors.IsNotFound(err) { singleNode, err := utils.IsSingleNodeCluster(r.Client) @@ -86,10 +87,10 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. // Default Config object not found, create it. defaultConfig.SetNamespace(namespace) - defaultConfig.SetName(constants.DefaultConfigName) + defaultConfig.SetName(consts.DefaultConfigName) defaultConfig.Spec = sriovnetworkv1.SriovOperatorConfigSpec{ - EnableInjector: func() *bool { b := enableAdmissionController; return &b }(), - EnableOperatorWebhook: func() *bool { b := enableAdmissionController; return &b }(), + EnableInjector: func() *bool { b := vars.EnableAdmissionController; return &b }(), + EnableOperatorWebhook: func() *bool { b := vars.EnableAdmissionController; return &b }(), ConfigDaemonNodeSelector: map[string]string{}, LogLevel: 2, DisableDrain: singleNode, @@ -99,7 +100,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. err = r.Create(ctx, defaultConfig) if err != nil { logger.Error(err, "Failed to create default Operator Config", "Namespace", - namespace, "Name", constants.DefaultConfigName) + namespace, "Name", consts.DefaultConfigName) return reconcile.Result{}, err } return reconcile.Result{}, nil @@ -129,9 +130,9 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. snolog.SetLogLevel(defaultConfig.Spec.LogLevel) // For Openshift we need to create the systemd files using a machine config - if utils.ClusterType == utils.ClusterTypeOpenshift { + if vars.ClusterType == consts.ClusterTypeOpenshift { // TODO: add support for hypershift as today there is no MCO on hypershift clusters - if r.OpenshiftContext.IsHypershift() { + if r.PlatformHelper.IsHypershift() { return ctrl.Result{}, fmt.Errorf("systemd mode is not supported on hypershift") } @@ -139,7 +140,7 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl. return reconcile.Result{}, err } } - return reconcile.Result{RequeueAfter: constants.ResyncPeriod}, nil + return reconcile.Result{RequeueAfter: consts.ResyncPeriod}, nil } // SetupWithManager sets up the controller with the Manager. @@ -192,7 +193,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, data.Data["SRIOVCNIImage"] = os.Getenv("SRIOV_CNI_IMAGE") data.Data["SRIOVInfiniBandCNIImage"] = os.Getenv("SRIOV_INFINIBAND_CNI_IMAGE") data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") - data.Data["ClusterType"] = utils.ClusterType + data.Data["ClusterType"] = vars.ClusterType data.Data["DevMode"] = os.Getenv("DEV_MODE") data.Data["ImagePullSecrets"] = GetImagePullSecrets() if dc.Spec.ConfigurationMode == sriovnetworkv1.SystemdConfigurationMode { @@ -208,7 +209,7 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, logger.V(1).Info("New cni bin found", "CNIBinPath", envCniBinPath) data.Data["CNIBinPath"] = envCniBinPath } - objs, err := render.RenderDir(constants.ConfigDaemonPath, &data) + objs, err := render.RenderDir(consts.ConfigDaemonPath, &data) if err != nil { logger.Error(err, "Fail to render config daemon manifests") return err @@ -251,7 +252,7 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc data.Data["NetworkResourcesInjectorImage"] = os.Getenv("NETWORK_RESOURCES_INJECTOR_IMAGE") data.Data["SriovNetworkWebhookImage"] = os.Getenv("SRIOV_NETWORK_WEBHOOK_IMAGE") data.Data["ReleaseVersion"] = os.Getenv("RELEASEVERSION") - data.Data["ClusterType"] = utils.ClusterType + data.Data["ClusterType"] = vars.ClusterType data.Data["DevMode"] = os.Getenv("DEV_MODE") data.Data["ImagePullSecrets"] = GetImagePullSecrets() data.Data["CertManagerEnabled"] = strings.ToLower(os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_CERT_MANAGER_ENABLED")) == trueString @@ -261,8 +262,8 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc data.Data["InjectorWebhookCA"] = os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT") data.Data["ExternalControlPlane"] = false - if r.OpenshiftContext.IsOpenshiftCluster() { - external := r.OpenshiftContext.IsHypershift() + if r.PlatformHelper.IsOpenshiftCluster() { + external := r.PlatformHelper.IsHypershift() data.Data["ExternalControlPlane"] = external } @@ -273,7 +274,7 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc } // Delete injector webhook - if !*dc.Spec.EnableInjector && path == constants.InjectorWebHookPath { + if !*dc.Spec.EnableInjector && path == consts.InjectorWebHookPath { for _, obj := range objs { err = r.deleteWebhookObject(ctx, obj) if err != nil { @@ -286,7 +287,7 @@ func (r *SriovOperatorConfigReconciler) syncWebhookObjs(ctx context.Context, dc continue } // Delete operator webhook - if !*dc.Spec.EnableOperatorWebhook && path == constants.OperatorWebHookPath { + if !*dc.Spec.EnableOperatorWebhook && path == consts.OperatorWebHookPath { for _, obj := range objs { err = r.deleteWebhookObject(ctx, obj) if err != nil { @@ -347,7 +348,7 @@ func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context. if cr.Spec.ConfigurationMode != sriovnetworkv1.SystemdConfigurationMode { obj := &machinev1.MachineConfig{} - err := r.Get(context.TODO(), types.NamespacedName{Name: constants.SystemdServiceOcpMachineConfigName}, obj) + err := r.Get(context.TODO(), types.NamespacedName{Name: consts.SystemdServiceOcpMachineConfigName}, obj) if err != nil { if apierrors.IsNotFound(err) { return nil @@ -370,7 +371,7 @@ func (r *SriovOperatorConfigReconciler) syncOpenShiftSystemdService(ctx context. logger.Info("Start to sync config systemd machine config for openshift") data := render.MakeRenderData() data.Data["LogLevel"] = cr.Spec.LogLevel - objs, err := render.RenderDir(constants.SystemdServiceOcpPath, &data) + objs, err := render.RenderDir(consts.SystemdServiceOcpPath, &data) if err != nil { logger.Error(err, "Fail to render config daemon manifests") return err diff --git a/controllers/suite_test.go b/controllers/suite_test.go index 4fe8a3eb8..0493ab7c6 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -23,13 +23,11 @@ import ( "testing" "time" - netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/golang/mock/gomock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - openshiftconfigv1 "github.com/openshift/api/config/v1" - mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" - "go.uber.org/zap/zapcore" + "go.uber.org/zap/zapcore" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/scheme" @@ -39,10 +37,15 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" + netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + openshiftconfigv1 "github.com/openshift/api/config/v1" + mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + + //+kubebuilder:scaffold:imports sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" - //+kubebuilder:scaffold:imports + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" ) // These tests use Ginkgo (BDD-style Go testing framework). Refer to @@ -129,17 +132,24 @@ var _ = BeforeSuite(func(done Done) { }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) + t := GinkgoT() + mockCtrl := gomock.NewController(t) + platformHelper := mock_platforms.NewMockInterface(mockCtrl) + platformHelper.EXPECT().GetFlavor().Return(openshift.OpenshiftFlavorDefault).AnyTimes() + platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes() + platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes() + err = (&SriovOperatorConfigReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - OpenshiftContext: &utils.OpenshiftContext{OpenshiftFlavor: utils.OpenshiftFlavorDefault}, + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + PlatformHelper: platformHelper, }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) err = (&SriovNetworkPoolConfigReconciler{ - Client: k8sManager.GetClient(), - Scheme: k8sManager.GetScheme(), - OpenshiftContext: &utils.OpenshiftContext{OpenshiftFlavor: utils.OpenshiftFlavorDefault}, + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + PlatformHelper: platformHelper, }).SetupWithManager(k8sManager) Expect(err).ToNot(HaveOccurred()) diff --git a/main.go b/main.go index 7d62af130..07f3f70be 100644 --- a/main.go +++ b/main.go @@ -27,6 +27,9 @@ import ( mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" "k8s.io/apimachinery/pkg/api/errors" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" @@ -90,12 +93,6 @@ func main() { os.Exit(1) } - openshiftContext, err := utils.NewOpenshiftContext(restConfig, scheme) - if err != nil { - setupLog.Error(err, "couldn't create openshift context") - os.Exit(1) - } - le := leaderelection.GetLeaderElectionConfig(kubeClient, enableLeaderElection) namespace := os.Getenv("NAMESPACE") @@ -137,6 +134,16 @@ func main() { os.Exit(1) } + // Initial global info + vars.Config = restConfig + vars.Scheme = mgrGlobal.GetScheme() + + platformsHelper, err := platforms.NewDefaultPlatformHelper() + if err != nil { + setupLog.Error(err, "couldn't create openshift context") + os.Exit(1) + } + if err = (&controllers.SriovNetworkReconciler{ Client: mgrGlobal.GetClient(), Scheme: mgrGlobal.GetScheme(), @@ -159,17 +166,17 @@ func main() { os.Exit(1) } if err = (&controllers.SriovOperatorConfigReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - OpenshiftContext: openshiftContext, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + PlatformHelper: platformsHelper, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "SriovOperatorConfig") os.Exit(1) } if err = (&controllers.SriovNetworkPoolConfigReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - OpenshiftContext: openshiftContext, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + PlatformHelper: platformsHelper, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "SriovNetworkPoolConfig") os.Exit(1) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 913cad805..290b4400e 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -39,12 +39,13 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" sninformer "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/informers/externalversions" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/service" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/systemd" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) const ( @@ -62,26 +63,17 @@ type Message struct { } type Daemon struct { - // name is the node name. - name string - - platform utils.PlatformType - - useSystemdService bool - - devMode bool - client snclientset.Interface // kubeClient allows interaction with Kubernetes, including the node we are running on. kubeClient kubernetes.Interface - openshiftContext *utils.OpenshiftContext - nodeState *sriovnetworkv1.SriovNetworkNodeState enabledPlugins map[string]plugin.VendorPlugin - serviceManager service.ServiceManager + HostHelpers helper.HostHelpersInterface + + platformHelpers platforms.Interface // channel used by callbacks to signal Run() of an error exitCh chan<- error @@ -109,22 +101,15 @@ type Daemon struct { mcpName string - storeManager utils.StoreManagerInterface - - hostManager host.HostManagerInterface - eventRecorder *EventRecorder } const ( - udevScriptsPath = "/bindata/scripts/load-udev.sh" - annoKey = "sriovnetwork.openshift.io/state" - annoIdle = "Idle" - annoDraining = "Draining" - annoMcpPaused = "Draining_MCP_Paused" - syncStatusSucceeded = "Succeeded" - syncStatusFailed = "Failed" - syncStatusInProgress = "InProgress" + udevScriptsPath = "/bindata/scripts/load-udev.sh" + annoKey = "sriovnetwork.openshift.io/state" + annoIdle = "Idle" + annoDraining = "Draining" + annoMcpPaused = "Draining_MCP_Paused" ) var namespace = os.Getenv("NAMESPACE") @@ -141,33 +126,26 @@ func (w writer) Write(p []byte) (n int, err error) { } func New( - nodeName string, client snclientset.Interface, kubeClient kubernetes.Interface, - openshiftContext *utils.OpenshiftContext, + hostHelpers helper.HostHelpersInterface, + platformHelper platforms.Interface, exitCh chan<- error, stopCh <-chan struct{}, syncCh <-chan struct{}, refreshCh chan<- Message, - platformType utils.PlatformType, - useSystemdService bool, er *EventRecorder, - devMode bool, ) *Daemon { return &Daemon{ - name: nodeName, - platform: platformType, - useSystemdService: useSystemdService, - devMode: devMode, - client: client, - kubeClient: kubeClient, - openshiftContext: openshiftContext, - serviceManager: service.NewServiceManager("/host"), - exitCh: exitCh, - stopCh: stopCh, - syncCh: syncCh, - refreshCh: refreshCh, - nodeState: &sriovnetworkv1.SriovNetworkNodeState{}, + client: client, + kubeClient: kubeClient, + HostHelpers: hostHelpers, + platformHelpers: platformHelper, + exitCh: exitCh, + stopCh: stopCh, + syncCh: syncCh, + refreshCh: refreshCh, + nodeState: &sriovnetworkv1.SriovNetworkNodeState{}, drainer: &drain.Helper{ Client: kubeClient, Force: true, @@ -195,38 +173,30 @@ func New( // Run the config daemon func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { - log.Log.V(0).Info("Run()", "node", dn.name) + log.Log.V(0).Info("Run()", "node", vars.NodeName) - if utils.ClusterType == utils.ClusterTypeOpenshift { - log.Log.V(0).Info("Run(): start daemon.", "openshiftFlavor", dn.openshiftContext.OpenshiftFlavor) + if vars.ClusterType == consts.ClusterTypeOpenshift { + log.Log.V(0).Info("Run(): start daemon.", "openshiftFlavor", dn.platformHelpers.GetFlavor()) } else { log.Log.V(0).Info("Run(): start daemon.") } - if dn.useSystemdService { - log.Log.V(0).Info("Run(): daemon running in systemd mode") - } - // Only watch own SriovNetworkNodeState CR - defer utilruntime.HandleCrash() - defer dn.workqueue.ShutDown() - - hostManager := host.NewHostManager(dn.useSystemdService) - dn.hostManager = hostManager - if !dn.useSystemdService { - dn.hostManager.TryEnableRdma() - dn.hostManager.TryEnableTun() - dn.hostManager.TryEnableVhostNet() - err := systemd.CleanSriovFilesFromHost(utils.ClusterType == utils.ClusterTypeOpenshift) + if !vars.UsingSystemdMode { + log.Log.V(0).Info("Run(): daemon running in daemon mode") + dn.HostHelpers.TryEnableRdma() + dn.HostHelpers.TryEnableTun() + dn.HostHelpers.TryEnableVhostNet() + err := systemd.CleanSriovFilesFromHost(vars.ClusterType == consts.ClusterTypeOpenshift) if err != nil { log.Log.Error(err, "failed to remove all the systemd sriov files") } + } else { + log.Log.V(0).Info("Run(): daemon running in systemd mode") } - storeManager, err := utils.NewStoreManager(false) - if err != nil { - return err - } - dn.storeManager = storeManager + // Only watch own SriovNetworkNodeState CR + defer utilruntime.HandleCrash() + defer dn.workqueue.ShutDown() if err := dn.prepareNMUdevRule(); err != nil { log.Log.Error(err, "failed to prepare udev files to disable network manager on requested VFs") @@ -241,7 +211,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { time.Second*15, namespace, func(lo *metav1.ListOptions) { - lo.FieldSelector = "metadata.name=" + dn.name + lo.FieldSelector = "metadata.name=" + vars.NodeName lo.TimeoutSeconds = &timeout }, ) @@ -300,7 +270,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { log.Log.Error(err, "got an error") if more { dn.refreshCh <- Message{ - syncStatus: syncStatusFailed, + syncStatus: consts.SyncStatusFailed, lastSyncError: err.Error(), } } @@ -358,7 +328,7 @@ func (dn *Daemon) processNextWorkItem() bool { if err != nil { // Ereport error message, and put the item back to work queue for retry. dn.refreshCh <- Message{ - syncStatus: syncStatusFailed, + syncStatus: consts.SyncStatusFailed, lastSyncError: err.Error(), } <-dn.syncCh @@ -384,9 +354,9 @@ func (dn *Daemon) nodeAddHandler(obj interface{}) { } func (dn *Daemon) nodeUpdateHandler(old, new interface{}) { - node, err := dn.nodeLister.Get(dn.name) + node, err := dn.nodeLister.Get(vars.NodeName) if errors.IsNotFound(err) { - log.Log.V(2).Info("nodeUpdateHandler(): node has been deleted", "name", dn.name) + log.Log.V(2).Info("nodeUpdateHandler(): node has been deleted", "name", vars.NodeName) return } dn.node = node.DeepCopy() @@ -399,7 +369,7 @@ func (dn *Daemon) nodeUpdateHandler(old, new interface{}) { // Checking if other nodes are draining for _, otherNode := range nodes { - if otherNode.GetName() == dn.name { + if otherNode.GetName() == vars.NodeName { continue } @@ -438,24 +408,24 @@ func (dn *Daemon) nodeStateSyncHandler() error { var err error // Get the latest NodeState var latestState *sriovnetworkv1.SriovNetworkNodeState - var sriovResult = &systemd.SriovResult{SyncStatus: syncStatusSucceeded, LastSyncError: ""} - latestState, err = dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), dn.name, metav1.GetOptions{}) + var sriovResult = &systemd.SriovResult{SyncStatus: consts.SyncStatusSucceeded, LastSyncError: ""} + latestState, err = dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), vars.NodeName, metav1.GetOptions{}) if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): Failed to fetch node state", "name", dn.name) + log.Log.Error(err, "nodeStateSyncHandler(): Failed to fetch node state", "name", vars.NodeName) return err } latest := latestState.GetGeneration() log.Log.V(0).Info("nodeStateSyncHandler(): new generation", "generation", latest) - if utils.ClusterType == utils.ClusterTypeOpenshift && !dn.openshiftContext.IsHypershift() { + if vars.ClusterType == consts.ClusterTypeOpenshift && !dn.platformHelpers.IsHypershift() { if err = dn.getNodeMachinePool(); err != nil { return err } } if dn.nodeState.GetGeneration() == latest { - if dn.useSystemdService { - serviceEnabled, err := dn.serviceManager.IsServiceEnabled(systemd.SriovServicePath) + if vars.UsingSystemdMode { + serviceEnabled, err := dn.HostHelpers.IsServiceEnabled(systemd.SriovServicePath) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to check if sriov-config service exist on host") return err @@ -465,7 +435,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { // this is only for k8s base environments, for openshift the sriov-operator creates a machine config to will apply // the system service and reboot the node the config-daemon doesn't need to do anything. if !serviceEnabled { - sriovResult = &systemd.SriovResult{SyncStatus: syncStatusFailed, + sriovResult = &systemd.SriovResult{SyncStatus: consts.SyncStatusFailed, LastSyncError: "sriov-config systemd service is not available on node"} } else { sriovResult, err = systemd.ReadSriovResult() @@ -474,12 +444,12 @@ func (dn *Daemon) nodeStateSyncHandler() error { return err } } - if sriovResult.LastSyncError != "" || sriovResult.SyncStatus == syncStatusFailed { + if sriovResult.LastSyncError != "" || sriovResult.SyncStatus == consts.SyncStatusFailed { log.Log.Info("nodeStateSyncHandler(): sync failed systemd service error", "last-sync-error", sriovResult.LastSyncError) // add the error but don't requeue dn.refreshCh <- Message{ - syncStatus: syncStatusFailed, + syncStatus: consts.SyncStatusFailed, lastSyncError: sriovResult.LastSyncError, } <-dn.syncCh @@ -488,9 +458,9 @@ func (dn *Daemon) nodeStateSyncHandler() error { } log.Log.V(0).Info("nodeStateSyncHandler(): Interface not changed") if latestState.Status.LastSyncError != "" || - latestState.Status.SyncStatus != syncStatusSucceeded { + latestState.Status.SyncStatus != consts.SyncStatusSucceeded { dn.refreshCh <- Message{ - syncStatus: syncStatusSucceeded, + syncStatus: consts.SyncStatusSucceeded, lastSyncError: "", } // wait for writer to refresh the status @@ -501,7 +471,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { - err = dn.storeManager.ClearPCIAddressFolder() + err = dn.HostHelpers.ClearPCIAddressFolder() if err != nil { log.Log.Error(err, "failed to clear the PCI address configuration") return err @@ -522,7 +492,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } dn.refreshCh <- Message{ - syncStatus: syncStatusInProgress, + syncStatus: consts.SyncStatusInProgress, lastSyncError: "", } // wait for writer to refresh status then pull again the latest node state @@ -531,16 +501,16 @@ func (dn *Daemon) nodeStateSyncHandler() error { // we need to load the latest status to our object // if we don't do it we can have a race here where the user remove the virtual functions but the operator didn't // trigger the refresh - updatedState, err := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), dn.name, metav1.GetOptions{}) + updatedState, err := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), vars.NodeName, metav1.GetOptions{}) if err != nil { - log.Log.Error(err, "nodeStateSyncHandler(): Failed to fetch node state", "name", dn.name) + log.Log.Error(err, "nodeStateSyncHandler(): Failed to fetch node state", "name", vars.NodeName) return err } latestState.Status = updatedState.Status // load plugins if it has not loaded if len(dn.enabledPlugins) == 0 { - dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState, dn.hostManager, dn.storeManager) + dn.enabledPlugins, err = enablePlugins(latestState, dn.HostHelpers) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to enable vendor plugins") return err @@ -571,8 +541,9 @@ func (dn *Daemon) nodeStateSyncHandler() error { // When running using systemd check if the applied configuration is the latest one // or there is a new config we need to apply // When using systemd configuration we write the file - if dn.useSystemdService { - systemdConfModified, err := systemd.WriteConfFile(latestState, dn.devMode, dn.platform) + if vars.UsingSystemdMode { + log.Log.V(0).Info("nodeStateSyncHandler(): writing systemd config file to host") + systemdConfModified, err := systemd.WriteConfFile(latestState) if err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to write configuration file for systemd mode") return err @@ -610,7 +581,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } } } - if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() { + if dn.platformHelpers.IsOpenshiftCluster() && !dn.platformHelpers.IsHypershift() { if err = dn.getNodeMachinePool(); err != nil { return err } @@ -628,7 +599,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } } - if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() { + if dn.platformHelpers.IsOpenshiftCluster() && !dn.platformHelpers.IsHypershift() { log.Log.Info("nodeStateSyncHandler(): pause MCP") if err := dn.pauseMCP(); err != nil { return err @@ -645,7 +616,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } } - if !reqReboot && !dn.useSystemdService { + if !reqReboot && !vars.UsingSystemdMode { // For BareMetal machines apply the generic plugin selectedPlugin, ok := dn.enabledPlugins[GenericPluginName] if ok { @@ -672,7 +643,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { if reqReboot { log.Log.Info("nodeStateSyncHandler(): reboot node") dn.eventRecorder.SendEvent("RebootNode", "Reboot node has been initiated") - rebootNode() + dn.rebootNode() return nil } @@ -689,7 +660,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { } } else { if !dn.nodeHasAnnotation(annoKey, annoIdle) { - if err := dn.annotateNode(dn.name, annoIdle); err != nil { + if err := dn.annotateNode(vars.NodeName, annoIdle); err != nil { log.Log.Error(err, "nodeStateSyncHandler(): failed to annotate node") return err } @@ -697,14 +668,14 @@ func (dn *Daemon) nodeStateSyncHandler() error { } log.Log.Info("nodeStateSyncHandler(): sync succeeded") dn.nodeState = latestState.DeepCopy() - if dn.useSystemdService { + if vars.UsingSystemdMode { dn.refreshCh <- Message{ syncStatus: sriovResult.SyncStatus, lastSyncError: sriovResult.LastSyncError, } } else { dn.refreshCh <- Message{ - syncStatus: syncStatusSucceeded, + syncStatus: consts.SyncStatusSucceeded, lastSyncError: "", } } @@ -739,16 +710,16 @@ func (dn *Daemon) completeDrain() error { } } - if dn.openshiftContext.IsOpenshiftCluster() && !dn.openshiftContext.IsHypershift() { + if dn.platformHelpers.IsOpenshiftCluster() && !dn.platformHelpers.IsHypershift() { log.Log.Info("completeDrain(): resume MCP", "mcp-name", dn.mcpName) pausePatch := []byte("{\"spec\":{\"paused\":false}}") - if _, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}); err != nil { + if _, err := dn.platformHelpers.GetMcClient().MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}); err != nil { log.Log.Error(err, "completeDrain(): failed to resume MCP", "mcp-name", dn.mcpName) return err } } - if err := dn.annotateNode(dn.name, annoIdle); err != nil { + if err := dn.annotateNode(vars.NodeName, annoIdle); err != nil { log.Log.Error(err, "completeDrain(): failed to annotate node") return err } @@ -763,7 +734,7 @@ func (dn *Daemon) restartDevicePluginPod() error { var podToDelete string pods, err := dn.kubeClient.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "app=sriov-device-plugin", - FieldSelector: "spec.nodeName=" + dn.name, + FieldSelector: "spec.nodeName=" + vars.NodeName, }) if err != nil { if errors.IsNotFound(err) { @@ -812,9 +783,9 @@ func (dn *Daemon) restartDevicePluginPod() error { return nil } -func rebootNode() { +func (dn *Daemon) rebootNode() { log.Log.Info("rebootNode(): trigger node reboot") - exit, err := utils.Chroot("/host") + exit, err := dn.HostHelpers.Chroot(consts.Host) if err != nil { log.Log.Error(err, "rebootNode(): chroot command failed") } @@ -837,7 +808,7 @@ func rebootNode() { func (dn *Daemon) annotateNode(node, value string) error { log.Log.Info("annotateNode(): Annotate node", "name", node, "value", value) - oldNode, err := dn.kubeClient.CoreV1().Nodes().Get(context.Background(), dn.name, metav1.GetOptions{}) + oldNode, err := dn.kubeClient.CoreV1().Nodes().Get(context.Background(), vars.NodeName, metav1.GetOptions{}) if err != nil { log.Log.Error(err, "annotateNode(): Failed to get node, retrying", "name", node) return err @@ -863,7 +834,7 @@ func (dn *Daemon) annotateNode(node, value string) error { return err } _, err = dn.kubeClient.CoreV1().Nodes().Patch(context.Background(), - dn.name, + vars.NodeName, types.StrategicMergePatchType, patchBytes, metav1.PatchOptions{}) @@ -881,7 +852,7 @@ func (dn *Daemon) getNodeMachinePool() error { log.Log.Error(nil, "getNodeMachinePool(): Failed to find the the desiredConfig Annotation") return fmt.Errorf("getNodeMachinePool(): Failed to find the the desiredConfig Annotation") } - mc, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigs().Get(context.TODO(), desiredConfig, metav1.GetOptions{}) + mc, err := dn.platformHelpers.GetMcClient().MachineconfigurationV1().MachineConfigs().Get(context.TODO(), desiredConfig, metav1.GetOptions{}) if err != nil { log.Log.Error(err, "getNodeMachinePool(): Failed to get the desired Machine Config") return err @@ -907,7 +878,7 @@ func (dn *Daemon) getDrainLock(ctx context.Context, done chan bool) { }, Client: dn.kubeClient.CoordinationV1(), LockConfig: resourcelock.ResourceLockConfig{ - Identity: dn.name, + Identity: vars.NodeName, }, } @@ -930,7 +901,7 @@ func (dn *Daemon) getDrainLock(ctx context.Context, done chan bool) { } if dn.drainable { log.Log.V(2).Info("getDrainLock(): no other node is draining") - err = dn.annotateNode(dn.name, annoDraining) + err = dn.annotateNode(vars.NodeName, annoDraining) if err != nil { log.Log.Error(err, "getDrainLock(): failed to annotate node") continue @@ -952,7 +923,7 @@ func (dn *Daemon) pauseMCP() error { log.Log.Info("pauseMCP(): pausing MCP") var err error - mcpInformerFactory := mcfginformers.NewSharedInformerFactory(dn.openshiftContext.McClient, + mcpInformerFactory := mcfginformers.NewSharedInformerFactory(dn.platformHelpers.GetMcClient(), time.Second*30, ) mcpInformer := mcpInformerFactory.Machineconfiguration().V1().MachineConfigPools().Informer() @@ -967,7 +938,7 @@ func (dn *Daemon) pauseMCP() error { return } // Always get the latest object - newMcp, err := dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Get(ctx, dn.mcpName, metav1.GetOptions{}) + newMcp, err := dn.platformHelpers.GetMcClient().MachineconfigurationV1().MachineConfigPools().Get(ctx, dn.mcpName, metav1.GetOptions{}) if err != nil { log.Log.V(2).Error(err, "pauseMCP(): Failed to get MCP", "mcp-name", dn.mcpName) return @@ -987,12 +958,12 @@ func (dn *Daemon) pauseMCP() error { } log.Log.Info("pauseMCP(): pause MCP", "mcp-name", dn.mcpName) pausePatch := []byte("{\"spec\":{\"paused\":true}}") - _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}) + _, err = dn.platformHelpers.GetMcClient().MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}) if err != nil { log.Log.V(2).Error(err, "pauseMCP(): failed to pause MCP", "mcp-name", dn.mcpName) return } - err = dn.annotateNode(dn.name, annoMcpPaused) + err = dn.annotateNode(vars.NodeName, annoMcpPaused) if err != nil { log.Log.V(2).Error(err, "pauseMCP(): Failed to annotate node") return @@ -1003,12 +974,12 @@ func (dn *Daemon) pauseMCP() error { if paused { log.Log.Info("pauseMCP(): MCP is processing, resume MCP", "mcp-name", dn.mcpName) pausePatch := []byte("{\"spec\":{\"paused\":false}}") - _, err = dn.openshiftContext.McClient.MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}) + _, err = dn.platformHelpers.GetMcClient().MachineconfigurationV1().MachineConfigPools().Patch(context.Background(), dn.mcpName, types.MergePatchType, pausePatch, metav1.PatchOptions{}) if err != nil { log.Log.V(2).Error(err, "pauseMCP(): fail to resume MCP", "mcp-name", dn.mcpName) return } - err = dn.annotateNode(dn.name, annoDraining) + err = dn.annotateNode(vars.NodeName, annoDraining) if err != nil { log.Log.V(2).Error(err, "pauseMCP(): Failed to annotate node") return @@ -1057,7 +1028,7 @@ func (dn *Daemon) drainNode() error { log.Log.Error(err, "cordon failed, retrying") return false, nil } - err = drain.RunNodeDrain(dn.drainer, dn.name) + err = drain.RunNodeDrain(dn.drainer, vars.NodeName) if err == nil { return true, nil } @@ -1077,28 +1048,29 @@ func (dn *Daemon) drainNode() error { return nil } +// TODO: move this to host interface func (dn *Daemon) tryCreateSwitchdevUdevRule() error { log.Log.V(2).Info("tryCreateSwitchdevUdevRule()") nodeState, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get( context.Background(), - dn.name, + vars.NodeName, metav1.GetOptions{}, ) if nodeStateErr != nil { - log.Log.Error(nodeStateErr, "could not fetch node state, skip updating switchdev udev rules", "name", dn.name) + log.Log.Error(nodeStateErr, "could not fetch node state, skip updating switchdev udev rules", "name", vars.NodeName) return nil } var newContent string - filePath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") + filePath := path.Join(vars.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") for _, ifaceStatus := range nodeState.Status.Interfaces { if ifaceStatus.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { - switchID, err := utils.GetPhysSwitchID(ifaceStatus.Name) + switchID, err := dn.HostHelpers.GetPhysSwitchID(ifaceStatus.Name) if err != nil { return err } - portName, err := utils.GetPhysPortName(ifaceStatus.Name) + portName, err := dn.HostHelpers.GetPhysPortName(ifaceStatus.Name) if err != nil { return err } @@ -1126,7 +1098,7 @@ func (dn *Daemon) tryCreateSwitchdevUdevRule() error { } var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/bash", path.Join(utils.FilesystemRoot, udevScriptsPath)) + cmd := exec.Command("/bin/bash", path.Join(vars.FilesystemRoot, udevScriptsPath)) cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { @@ -1158,5 +1130,5 @@ func (dn *Daemon) prepareNMUdevRule() error { supportedVfIds = append(supportedVfIds, vfID) } - return utils.PrepareNMUdevRule(supportedVfIds) + return dn.HostHelpers.PrepareNMUdevRule(supportedVfIds) } diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index 5300a1a65..b804888fa 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -5,6 +5,7 @@ import ( "flag" "testing" + "github.com/golang/mock/gomock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -12,14 +13,18 @@ import ( fakek8s "k8s.io/client-go/kubernetes/fake" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" fakesnclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned/fake" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/fake" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) var FakeSupportedNicIDs corev1.ConfigMap = corev1.ConfigMap{ @@ -95,29 +100,44 @@ var _ = Describe("Config Daemon", func() { } var err error - utils.FilesystemRoot, cleanFakeFs, err = fakeFs.Use() + vars.FilesystemRoot, cleanFakeFs, err = fakeFs.Use() Expect(err).ToNot(HaveOccurred()) + vars.UsingSystemdMode = false + vars.NodeName = "test-node" + vars.PlatformType = consts.Baremetal + kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs, &SriovDevicePluginPod) client := fakesnclientset.NewSimpleClientset() err = sriovnetworkv1.InitNicIDMapFromConfigMap(kubeClient, namespace) Expect(err).ToNot(HaveOccurred()) - er := NewEventRecorder(client, "test-node", kubeClient) + er := NewEventRecorder(client, kubeClient) + + t := GinkgoT() + mockCtrl := gomock.NewController(t) + platformHelper := mock_platforms.NewMockInterface(mockCtrl) + platformHelper.EXPECT().GetFlavor().Return(openshift.OpenshiftFlavorDefault).AnyTimes() + platformHelper.EXPECT().IsOpenshiftCluster().Return(false).AnyTimes() + platformHelper.EXPECT().IsHypershift().Return(false).AnyTimes() - sut = New("test-node", + vendorHelper := mock_helper.NewMockHostHelpersInterface(mockCtrl) + vendorHelper.EXPECT().TryEnableRdma().Return(true, nil).AnyTimes() + vendorHelper.EXPECT().TryEnableVhostNet().AnyTimes() + vendorHelper.EXPECT().TryEnableTun().AnyTimes() + vendorHelper.EXPECT().PrepareNMUdevRule([]string{"0x1014", "0x154c"}).Return(nil).AnyTimes() + + sut = New( client, kubeClient, - &utils.OpenshiftContext{IsOpenShiftCluster: false, OpenshiftFlavor: ""}, + vendorHelper, + platformHelper, exitCh, stopCh, syncCh, refreshCh, - utils.Baremetal, - false, er, - false, ) sut.enabledPlugins = map[string]plugin.VendorPlugin{generic.PluginName: &fake.FakePlugin{}} @@ -236,66 +256,6 @@ var _ = Describe("Config Daemon", func() { Expect(sut.nodeState.GetGeneration()).To(BeNumerically("==", 777)) }) }) - - Context("isNodeDraining", func() { - - It("for a non-Openshift cluster", func() { - sut.openshiftContext = &utils.OpenshiftContext{IsOpenShiftCluster: false} - - sut.node = &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Annotations: map[string]string{}}} - - Expect(sut.isNodeDraining()).To(BeFalse()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining" - Expect(sut.isNodeDraining()).To(BeTrue()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining_MCP_Paused" - Expect(sut.isNodeDraining()).To(BeTrue()) - }) - - It("for an Openshift cluster", func() { - sut.openshiftContext = &utils.OpenshiftContext{ - IsOpenShiftCluster: true, - OpenshiftFlavor: utils.OpenshiftFlavorDefault, - } - - sut.node = &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Annotations: map[string]string{}}} - - Expect(sut.isNodeDraining()).To(BeFalse()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining" - Expect(sut.isNodeDraining()).To(BeTrue()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining_MCP_Paused" - Expect(sut.isNodeDraining()).To(BeTrue()) - }) - - It("for an Openshift Hypershift cluster", func() { - sut.openshiftContext = &utils.OpenshiftContext{ - IsOpenShiftCluster: true, - OpenshiftFlavor: utils.OpenshiftFlavorHypershift, - } - - sut.node = &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-node", - Annotations: map[string]string{}}} - - Expect(sut.isNodeDraining()).To(BeFalse()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining" - Expect(sut.isNodeDraining()).To(BeTrue()) - - sut.node.Annotations["sriovnetwork.openshift.io/state"] = "Draining_MCP_Paused" - Expect(sut.isNodeDraining()).To(BeTrue()) - }) - }) }) func createSriovNetworkNodeState(c snclientset.Interface, nodeState *sriovnetworkv1.SriovNetworkNodeState) error { diff --git a/pkg/daemon/event_recorder.go b/pkg/daemon/event_recorder.go index 2860cf84a..ed9b34ee7 100644 --- a/pkg/daemon/event_recorder.go +++ b/pkg/daemon/event_recorder.go @@ -12,24 +12,23 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) type EventRecorder struct { client snclientset.Interface - node string eventRecorder record.EventRecorder eventBroadcaster record.EventBroadcaster } // NewEventRecorder Create a new EventRecorder -func NewEventRecorder(c snclientset.Interface, n string, kubeclient kubernetes.Interface) *EventRecorder { +func NewEventRecorder(c snclientset.Interface, kubeclient kubernetes.Interface) *EventRecorder { eventBroadcaster := record.NewBroadcaster() eventBroadcaster.StartStructuredLogging(4) eventBroadcaster.StartRecordingToSink(&typedv1core.EventSinkImpl{Interface: kubeclient.CoreV1().Events("")}) eventRecorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "config-daemon"}) return &EventRecorder{ client: c, - node: n, eventRecorder: eventRecorder, eventBroadcaster: eventBroadcaster, } @@ -37,9 +36,9 @@ func NewEventRecorder(c snclientset.Interface, n string, kubeclient kubernetes.I // SendEvent Send an Event on the NodeState object func (e *EventRecorder) SendEvent(eventType string, msg string) { - nodeState, err := e.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), e.node, metav1.GetOptions{}) + nodeState, err := e.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), vars.NodeName, metav1.GetOptions{}) if err != nil { - log.Log.V(2).Error(err, "SendEvent(): Failed to fetch node state, skip SendEvent", "name", e.node) + log.Log.V(2).Error(err, "SendEvent(): Failed to fetch node state, skip SendEvent", "name", vars.NodeName) return } e.eventRecorder.Event(nodeState, corev1.EventTypeNormal, eventType, msg) diff --git a/pkg/daemon/plugin.go b/pkg/daemon/plugin.go index 09c69271c..38e1d9d73 100644 --- a/pkg/daemon/plugin.go +++ b/pkg/daemon/plugin.go @@ -6,17 +6,18 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" genericplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" intelplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/intel" k8splugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/k8s" mellanoxplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/mellanox" virtualplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/virtual" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) -var VendorPluginMap = map[string]func() (plugin.VendorPlugin, error){ +var VendorPluginMap = map[string]func(helpers helper.HostHelpersInterface) (plugin.VendorPlugin, error){ "8086": intelplugin.NewIntelPlugin, "15b3": mellanoxplugin.NewMellanoxPlugin, } @@ -29,33 +30,33 @@ var ( K8sPlugin = k8splugin.NewK8sPlugin ) -func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *sriovnetworkv1.SriovNetworkNodeState, hostManager host.HostManagerInterface, storeManager utils.StoreManagerInterface) (map[string]plugin.VendorPlugin, error) { +func enablePlugins(ns *sriovnetworkv1.SriovNetworkNodeState, helpers helper.HostHelpersInterface) (map[string]plugin.VendorPlugin, error) { log.Log.Info("enableVendorPlugins(): enabling plugins") enabledPlugins := map[string]plugin.VendorPlugin{} - if platform == utils.VirtualOpenStack { - virtualPlugin, err := VirtualPlugin(false) + if vars.PlatformType == consts.VirtualOpenStack { + virtualPlugin, err := VirtualPlugin(helpers) if err != nil { log.Log.Error(err, "enableVendorPlugins(): failed to load the virtual plugin") return nil, err } enabledPlugins[virtualPlugin.Name()] = virtualPlugin } else { - enabledVendorPlugins, err := registerVendorPlugins(ns) + enabledVendorPlugins, err := registerVendorPlugins(ns, helpers) if err != nil { return nil, err } enabledPlugins = enabledVendorPlugins - if utils.ClusterType != utils.ClusterTypeOpenshift { - k8sPlugin, err := K8sPlugin(useSystemdService) + if vars.ClusterType != consts.ClusterTypeOpenshift { + k8sPlugin, err := K8sPlugin(helpers) if err != nil { log.Log.Error(err, "enableVendorPlugins(): failed to load the k8s plugin") return nil, err } enabledPlugins[k8sPlugin.Name()] = k8sPlugin } - genericPlugin, err := GenericPlugin(false, hostManager, storeManager) + genericPlugin, err := GenericPlugin(helpers) if err != nil { log.Log.Error(err, "enableVendorPlugins(): failed to load the generic plugin") return nil, err @@ -71,12 +72,12 @@ func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *srio return enabledPlugins, nil } -func registerVendorPlugins(ns *sriovnetworkv1.SriovNetworkNodeState) (map[string]plugin.VendorPlugin, error) { +func registerVendorPlugins(ns *sriovnetworkv1.SriovNetworkNodeState, helpers helper.HostHelpersInterface) (map[string]plugin.VendorPlugin, error) { vendorPlugins := map[string]plugin.VendorPlugin{} for _, iface := range ns.Status.Interfaces { if val, ok := VendorPluginMap[iface.Vendor]; ok { - plug, err := val() + plug, err := val(helpers) if err != nil { log.Log.Error(err, "registerVendorPlugins(): failed to load plugin", "plugin-name", plug.Name()) return vendorPlugins, fmt.Errorf("registerVendorPlugins(): failed to load the %s plugin error: %v", plug.Name(), err) diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index a9e65a417..1a83c77d8 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -16,7 +16,10 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) const ( @@ -25,62 +28,51 @@ const ( ) type NodeStateStatusWriter struct { - client snclientset.Interface - node string - status sriovnetworkv1.SriovNetworkNodeStateStatus - OnHeartbeatFailure func() - openStackDevicesInfo utils.OSPDevicesInfo - withUnsupportedDevices bool - storeManager utils.StoreManagerInterface - eventRecorder *EventRecorder + client snclientset.Interface + status sriovnetworkv1.SriovNetworkNodeStateStatus + OnHeartbeatFailure func() + platformHelper platforms.Interface + hostHelper helper.HostHelpersInterface + eventRecorder *EventRecorder } // NewNodeStateStatusWriter Create a new NodeStateStatusWriter -func NewNodeStateStatusWriter(c snclientset.Interface, n string, f func(), er *EventRecorder, devMode bool) *NodeStateStatusWriter { +func NewNodeStateStatusWriter(c snclientset.Interface, + f func(), er *EventRecorder, + hostHelper helper.HostHelpersInterface, + platformHelper platforms.Interface) *NodeStateStatusWriter { return &NodeStateStatusWriter{ - client: c, - node: n, - OnHeartbeatFailure: f, - eventRecorder: er, - withUnsupportedDevices: devMode, + client: c, + OnHeartbeatFailure: f, + eventRecorder: er, + hostHelper: hostHelper, + platformHelper: platformHelper, } } // RunOnce initial the interface status for both baremetal and virtual environments -func (w *NodeStateStatusWriter) RunOnce(destDir string, platformType utils.PlatformType) error { +func (w *NodeStateStatusWriter) RunOnce() error { log.Log.V(0).Info("RunOnce()") msg := Message{} - storeManager, err := utils.NewStoreManager(false) - if err != nil { - log.Log.Error(err, "failed to create store manager") - return err - } - w.storeManager = storeManager - - if platformType == utils.VirtualOpenStack { - ns, err := w.getCheckPointNodeState(destDir) + if vars.PlatformType == consts.VirtualOpenStack { + ns, err := w.getCheckPointNodeState() if err != nil { return err } if ns == nil { - metaData, networkData, err := utils.GetOpenstackData(true) - if err != nil { - log.Log.Error(err, "RunOnce(): failed to read OpenStack data") - } - - w.openStackDevicesInfo, err = utils.CreateOpenstackDevicesInfo(metaData, networkData) + err = w.platformHelper.CreateOpenstackDevicesInfo() if err != nil { return err } } else { - w.openStackDevicesInfo = utils.CreateOpenstackDevicesInfoFromNodeStatus(ns) + w.platformHelper.CreateOpenstackDevicesInfoFromNodeStatus(ns) } } log.Log.V(0).Info("RunOnce(): first poll for nic status") - if err := w.pollNicStatus(platformType); err != nil { + if err := w.pollNicStatus(); err != nil { log.Log.Error(err, "RunOnce(): first poll failed") } @@ -88,12 +80,12 @@ func (w *NodeStateStatusWriter) RunOnce(destDir string, platformType utils.Platf if err != nil { log.Log.Error(err, "RunOnce(): first writing to node status failed") } - return w.writeCheckpointFile(ns, destDir) + return w.writeCheckpointFile(ns) } // Run reads from the writer channel and sets the interface status. It will // return if the stop channel is closed. Intended to be run via a goroutine. -func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}, platformType utils.PlatformType) error { +func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message, syncCh chan<- struct{}) error { log.Log.V(0).Info("Run(): start writer") msg := Message{} @@ -104,7 +96,7 @@ func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message return nil case msg = <-refresh: log.Log.V(0).Info("Run(): refresh trigger") - if err := w.pollNicStatus(platformType); err != nil { + if err := w.pollNicStatus(); err != nil { continue } _, err := w.setNodeStateStatus(msg) @@ -114,7 +106,7 @@ func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message syncCh <- struct{}{} case <-time.After(30 * time.Second): log.Log.V(2).Info("Run(): period refresh") - if err := w.pollNicStatus(platformType); err != nil { + if err := w.pollNicStatus(); err != nil { continue } w.setNodeStateStatus(msg) @@ -122,15 +114,15 @@ func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message } } -func (w *NodeStateStatusWriter) pollNicStatus(platformType utils.PlatformType) error { +func (w *NodeStateStatusWriter) pollNicStatus() error { log.Log.V(2).Info("pollNicStatus()") var iface []sriovnetworkv1.InterfaceExt var err error - if platformType == utils.VirtualOpenStack { - iface, err = utils.DiscoverSriovDevicesVirtual(w.openStackDevicesInfo) + if vars.PlatformType == consts.VirtualOpenStack { + iface, err = w.platformHelper.DiscoverSriovDevicesVirtual() } else { - iface, err = utils.DiscoverSriovDevices(w.withUnsupportedDevices, w.storeManager) + iface, err = w.hostHelper.DiscoverSriovDevices(w.hostHelper) } if err != nil { return err @@ -177,7 +169,7 @@ func (w *NodeStateStatusWriter) updateNodeStateStatusRetry(f func(*sriovnetworkv func (w *NodeStateStatusWriter) setNodeStateStatus(msg Message) (*sriovnetworkv1.SriovNetworkNodeState, error) { nodeState, err := w.updateNodeStateStatusRetry(func(nodeState *sriovnetworkv1.SriovNetworkNodeState) { nodeState.Status.Interfaces = w.status.Interfaces - if msg.lastSyncError != "" || msg.syncStatus == syncStatusSucceeded { + if msg.lastSyncError != "" || msg.syncStatus == consts.SyncStatusSucceeded { // clear lastSyncError when sync Succeeded nodeState.Status.LastSyncError = msg.lastSyncError } @@ -215,33 +207,33 @@ func (w *NodeStateStatusWriter) getNodeState() (*sriovnetworkv1.SriovNetworkNode var lastErr error var n *sriovnetworkv1.SriovNetworkNodeState err := wait.PollImmediate(10*time.Second, 5*time.Minute, func() (bool, error) { - n, lastErr = w.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), w.node, metav1.GetOptions{}) + n, lastErr = w.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), vars.NodeName, metav1.GetOptions{}) if lastErr == nil { return true, nil } - log.Log.Error(lastErr, "getNodeState(): Failed to fetch node state, close all connections and retry...", "name", w.node) + log.Log.Error(lastErr, "getNodeState(): Failed to fetch node state, close all connections and retry...", "name", vars.NodeName) // Use the Get() also as an client-go keepalive indicator for the TCP connection. w.OnHeartbeatFailure() return false, nil }) if err != nil { if err == wait.ErrWaitTimeout { - return nil, errors.Wrapf(lastErr, "Timed out trying to fetch node %s", w.node) + return nil, errors.Wrapf(lastErr, "Timed out trying to fetch node %s", vars.NodeName) } return nil, err } return n, nil } -func (w *NodeStateStatusWriter) writeCheckpointFile(ns *sriovnetworkv1.SriovNetworkNodeState, destDir string) error { - configdir := filepath.Join(destDir, CheckpointFileName) +func (w *NodeStateStatusWriter) writeCheckpointFile(ns *sriovnetworkv1.SriovNetworkNodeState) error { + configdir := filepath.Join(vars.Destdir, CheckpointFileName) file, err := os.OpenFile(configdir, os.O_RDWR|os.O_CREATE, 0644) if err != nil { return err } defer file.Close() log.Log.Info("writeCheckpointFile(): try to decode the checkpoint file") - if err = json.NewDecoder(file).Decode(&utils.InitialState); err != nil { + if err = json.NewDecoder(file).Decode(&sriovnetworkv1.InitialState); err != nil { log.Log.V(2).Error(err, "writeCheckpointFile(): fail to decode, writing new file instead") log.Log.Info("writeCheckpointFile(): write checkpoint file") if err = file.Truncate(0); err != nil { @@ -253,14 +245,14 @@ func (w *NodeStateStatusWriter) writeCheckpointFile(ns *sriovnetworkv1.SriovNetw if err = json.NewEncoder(file).Encode(*ns); err != nil { return err } - utils.InitialState = *ns + sriovnetworkv1.InitialState = *ns } return nil } -func (w *NodeStateStatusWriter) getCheckPointNodeState(destDir string) (*sriovnetworkv1.SriovNetworkNodeState, error) { +func (w *NodeStateStatusWriter) getCheckPointNodeState() (*sriovnetworkv1.SriovNetworkNodeState, error) { log.Log.Info("getCheckPointNodeState()") - configdir := filepath.Join(destDir, CheckpointFileName) + configdir := filepath.Join(vars.Destdir, CheckpointFileName) file, err := os.OpenFile(configdir, os.O_RDONLY, 0644) if err != nil { if os.IsNotExist(err) { @@ -269,9 +261,9 @@ func (w *NodeStateStatusWriter) getCheckPointNodeState(destDir string) (*sriovne return nil, err } defer file.Close() - if err = json.NewDecoder(file).Decode(&utils.InitialState); err != nil { + if err = json.NewDecoder(file).Decode(&sriovnetworkv1.InitialState); err != nil { return nil, err } - return &utils.InitialState, nil + return &sriovnetworkv1.InitialState, nil } diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index 44069407e..88aa38549 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -3,6 +3,7 @@ package generic import ( "bytes" "errors" + "fmt" "os/exec" "reflect" "strconv" @@ -12,10 +13,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" + mlx "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vendors/mellanox" ) var PluginName = "generic_plugin" @@ -54,34 +57,33 @@ type GenericPlugin struct { LastState *sriovnetworkv1.SriovNetworkNodeState DriverStateMap DriverStateMapType DesiredKernelArgs map[string]bool - RunningOnHost bool - HostManager host.HostManagerInterface - StoreManager utils.StoreManagerInterface + pfsToSkip map[string]bool + helpers helper.HostHelpersInterface } const scriptsPath = "bindata/scripts/enable-kargs.sh" // Initialize our plugin and set up initial values -func NewGenericPlugin(runningOnHost bool, hostManager host.HostManagerInterface, storeManager utils.StoreManagerInterface) (plugin.VendorPlugin, error) { +func NewGenericPlugin(helpers helper.HostHelpersInterface) (plugin.VendorPlugin, error) { driverStateMap := make(map[uint]*DriverState) driverStateMap[Vfio] = &DriverState{ DriverName: vfioPciDriver, - DeviceType: constants.DeviceTypeVfioPci, + DeviceType: consts.DeviceTypeVfioPci, VdpaType: "", NeedDriverFunc: needDriverCheckDeviceType, DriverLoaded: false, } driverStateMap[VirtioVdpa] = &DriverState{ DriverName: virtioVdpaDriver, - DeviceType: constants.DeviceTypeNetDevice, - VdpaType: constants.VdpaTypeVirtio, + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVirtio, NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } driverStateMap[VhostVdpa] = &DriverState{ DriverName: vhostVdpaDriver, - DeviceType: constants.DeviceTypeNetDevice, - VdpaType: constants.VdpaTypeVhost, + DeviceType: consts.DeviceTypeNetDevice, + VdpaType: consts.VdpaTypeVhost, NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } @@ -90,9 +92,8 @@ func NewGenericPlugin(runningOnHost bool, hostManager host.HostManagerInterface, SpecVersion: "1.0", DriverStateMap: driverStateMap, DesiredKernelArgs: make(map[string]bool), - RunningOnHost: runningOnHost, - HostManager: hostManager, - StoreManager: storeManager, + pfsToSkip: make(map[string]bool), + helpers: helpers, }, nil } @@ -109,9 +110,6 @@ func (p *GenericPlugin) Spec() string { // OnNodeStateChange Invoked when SriovNetworkNodeState CR is created or updated, return if need drain and/or reboot node func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { log.Log.Info("generic-plugin OnNodeStateChange()") - needDrain = false - needReboot = false - err = nil p.DesireState = new needDrain = p.needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) @@ -130,7 +128,7 @@ func (p *GenericPlugin) syncDriverState() error { for _, driverState := range p.DriverStateMap { if !driverState.DriverLoaded && driverState.NeedDriverFunc(p.DesireState, driverState) { log.Log.V(2).Info("loading driver", "name", driverState.DriverName) - if err := p.HostManager.LoadKernelModule(driverState.DriverName); err != nil { + if err := p.helpers.LoadKernelModule(driverState.DriverName); err != nil { log.Log.Error(err, "generic-plugin syncDriverState(): fail to load kmod", "name", driverState.DriverName) return err } @@ -156,27 +154,19 @@ func (p *GenericPlugin) Apply() error { return err } - // Create a map with all the PFs we will need to configure - // we need to create it here before we access the host file system using the chroot function - // because the skipConfigVf needs the mstconfig package that exist only inside the sriov-config-daemon file system - pfsToSkip, err := utils.GetPfsToSkip(p.DesireState) - if err != nil { - return err - } - // When calling from systemd do not try to chroot - if !p.RunningOnHost { - exit, err := utils.Chroot("/host") + if !vars.UsingSystemdMode { + exit, err := p.helpers.Chroot(consts.Host) if err != nil { return err } defer exit() } - if err := utils.SyncNodeState(p.DesireState, pfsToSkip); err != nil { + if err := p.helpers.ConfigSriovInterfaces(p.helpers, p.DesireState.Spec.Interfaces, p.DesireState.Status.Interfaces, p.pfsToSkip); err != nil { // Catch the "cannot allocate memory" error and try to use PCI realloc if errors.Is(err, syscall.ENOMEM) { - p.addToDesiredKernelArgs(utils.KernelArgPciRealloc) + p.addToDesiredKernelArgs(consts.KernelArgPciRealloc) } return err } @@ -217,7 +207,7 @@ func setKernelArg(karg string) (bool, error) { if err := cmd.Run(); err != nil { // if grubby is not there log and assume kernel args are set correctly. - if isCommandNotFound(err) { + if utils.IsCommandNotFound(err) { log.Log.Error(err, "generic-plugin setKernelArg(): grubby or ostree command not found. Please ensure that kernel arg are set", "kargs", karg) return false, nil @@ -236,15 +226,6 @@ func setKernelArg(karg string) (bool, error) { return false, err } -func isCommandNotFound(err error) bool { - if exitErr, ok := err.(*exec.ExitError); ok { - if status, ok := exitErr.Sys().(syscall.WaitStatus); ok && status.ExitStatus() == 127 { - return true - } - } - return false -} - // addToDesiredKernelArgs Should be called to queue a kernel arg to be added to the node. func (p *GenericPlugin) addToDesiredKernelArgs(karg string) { if _, ok := p.DesiredKernelArgs[karg]; !ok { @@ -259,12 +240,12 @@ func (p *GenericPlugin) syncDesiredKernelArgs() (bool, error) { if len(p.DesiredKernelArgs) == 0 { return false, nil } - kargs, err := utils.GetCurrentKernelArgs(false) + kargs, err := p.helpers.GetCurrentKernelArgs() if err != nil { return false, err } for desiredKarg, attempted := range p.DesiredKernelArgs { - set := utils.IsKernelArgsSet(kargs, desiredKarg) + set := p.helpers.IsKernelArgsSet(kargs, desiredKarg) if !set { if attempted { log.Log.V(2).Info("generic-plugin syncDesiredKernelArgs(): previously attempted to set kernel arg", @@ -302,7 +283,7 @@ func (p *GenericPlugin) needDrainNode(desired sriovnetworkv1.Interfaces, current "address", iface.PciAddress) break } - if utils.NeedUpdate(&iface, &ifaceStatus) { + if sriovnetworkv1.NeedToUpdateSriov(&iface, &ifaceStatus) { log.Log.V(2).Info("generic-plugin needDrainNode(): need drain, for PCI address request update", "address", iface.PciAddress) needDrain = true @@ -314,7 +295,7 @@ func (p *GenericPlugin) needDrainNode(desired sriovnetworkv1.Interfaces, current } if !configured && ifaceStatus.NumVfs > 0 { // load the PF info - pfStatus, exist, err := p.StoreManager.LoadPfsStatus(ifaceStatus.PciAddress) + pfStatus, exist, err := p.helpers.LoadPfsStatus(ifaceStatus.PciAddress) if err != nil { log.Log.Error(err, "generic-plugin needDrainNode(): failed to load info about PF status for pci device", "address", ifaceStatus.PciAddress) @@ -347,8 +328,8 @@ func (p *GenericPlugin) needDrainNode(desired sriovnetworkv1.Interfaces, current func (p *GenericPlugin) addVfioDesiredKernelArg(state *sriovnetworkv1.SriovNetworkNodeState) { driverState := p.DriverStateMap[Vfio] if !driverState.DriverLoaded && driverState.NeedDriverFunc(state, driverState) { - p.addToDesiredKernelArgs(utils.KernelArgIntelIommu) - p.addToDesiredKernelArgs(utils.KernelArgIommuPt) + p.addToDesiredKernelArgs(consts.KernelArgIntelIommu) + p.addToDesiredKernelArgs(consts.KernelArgIommuPt) } } @@ -366,7 +347,16 @@ func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeSta needReboot = true } - updateNode, err = utils.WriteSwitchdevConfFile(state) + // Create a map with all the PFs we will need to configure + // we need to create it here before we access the host file system using the chroot function + // because the skipConfigVf needs the mstconfig package that exist only inside the sriov-config-daemon file system + pfsToSkip, err := getPfsToSkip(p.DesireState, p.helpers) + if err != nil { + return false, err + } + p.pfsToSkip = pfsToSkip + + updateNode, err = p.helpers.WriteSwitchdevConfFile(state, p.pfsToSkip) if err != nil { log.Log.Error(err, "generic-plugin needRebootNode(): fail to write switchdev device config file") return false, err @@ -379,6 +369,53 @@ func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeSta return needReboot, nil } +// getPfsToSkip return a map of devices pci addresses to should be configured via systemd instead if the legacy mode +// we skip devices in switchdev mode and Bluefield card in ConnectX mode +func getPfsToSkip(ns *sriovnetworkv1.SriovNetworkNodeState, mlxHelper mlx.MellanoxInterface) (map[string]bool, error) { + pfsToSkip := map[string]bool{} + for _, ifaceStatus := range ns.Status.Interfaces { + for _, iface := range ns.Spec.Interfaces { + if iface.PciAddress == ifaceStatus.PciAddress { + skip, err := skipConfigVf(iface, ifaceStatus, mlxHelper) + if err != nil { + log.Log.Error(err, "GetPfsToSkip(): fail to check for skip VFs", "device", iface.PciAddress) + return pfsToSkip, err + } + pfsToSkip[iface.PciAddress] = skip + break + } + } + } + + return pfsToSkip, nil +} + +// skipConfigVf Use systemd service to configure switchdev mode or BF-2 NICs in OpenShift +func skipConfigVf(ifSpec sriovnetworkv1.Interface, ifStatus sriovnetworkv1.InterfaceExt, mlxHelper mlx.MellanoxInterface) (bool, error) { + if ifSpec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { + log.Log.V(2).Info("skipConfigVf(): skip config VF for switchdev device") + return true, nil + } + + // NVIDIA BlueField 2 and BlueField3 in OpenShift + if vars.ClusterType == consts.ClusterTypeOpenshift && ifStatus.Vendor == mlx.VendorMellanox && (ifStatus.DeviceID == mlx.DeviceBF2 || ifStatus.DeviceID == mlx.DeviceBF3) { + // TODO: remove this when switch to the systemd configuration support. + mode, err := mlxHelper.GetMellanoxBlueFieldMode(ifStatus.PciAddress) + if err != nil { + return false, fmt.Errorf("failed to read Mellanox Bluefield card mode for %s,%v", ifStatus.PciAddress, err) + } + + if mode == mlx.BluefieldConnectXMode { + return false, nil + } + + log.Log.V(2).Info("skipConfigVf(): skip config VF for Bluefiled card on DPU mode") + return true, nil + } + + return false, nil +} + // ////////////// for testing purposes only /////////////////////// func (p *GenericPlugin) getDriverStateMap() DriverStateMapType { return p.DriverStateMap diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index e1211b392..881fc87d4 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -8,9 +8,8 @@ import ( . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - mock_host "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/mock" + mock_helper "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper/mock" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - mock_utils "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils/mock" ) func TestGenericPlugin(t *testing.T) { @@ -24,16 +23,16 @@ var _ = Describe("Generic plugin", func() { genericPlugin plugin.VendorPlugin err error ctrl *gomock.Controller - mockHost *mock_host.MockHostManagerInterface - mockStore *mock_utils.MockStoreManagerInterface + hostHelper *mock_helper.MockHostHelpersInterface ) BeforeEach(func() { t = GinkgoT() ctrl = gomock.NewController(t) - mockHost = mock_host.NewMockHostManagerInterface(ctrl) - mockStore = mock_utils.NewMockStoreManagerInterface(ctrl) - genericPlugin, err = NewGenericPlugin(false, mockHost, mockStore) + + hostHelper = mock_helper.NewMockHostHelpersInterface(ctrl) + + genericPlugin, err = NewGenericPlugin(hostHelper) Expect(err).ToNot(HaveOccurred()) }) @@ -79,6 +78,7 @@ var _ = Describe("Generic plugin", func() { }, } + hostHelper.EXPECT().WriteSwitchdevConfFile(networkNodeState, map[string]bool{"0000:00:00.0": false}).Return(false, nil) needDrain, needReboot, err := genericPlugin.OnNodeStateChange(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(needReboot).To(BeFalse()) @@ -134,6 +134,7 @@ var _ = Describe("Generic plugin", func() { }, } + hostHelper.EXPECT().WriteSwitchdevConfFile(networkNodeState, map[string]bool{"0000:00:00.0": false}).Return(false, nil) needDrain, needReboot, err := genericPlugin.OnNodeStateChange(networkNodeState) Expect(err).ToNot(HaveOccurred()) Expect(needReboot).To(BeFalse()) diff --git a/pkg/plugins/intel/intel_plugin.go b/pkg/plugins/intel/intel_plugin.go index bf032652e..1c64a47fb 100644 --- a/pkg/plugins/intel/intel_plugin.go +++ b/pkg/plugins/intel/intel_plugin.go @@ -4,6 +4,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" ) @@ -16,7 +17,7 @@ type IntelPlugin struct { LastState *sriovnetworkv1.SriovNetworkNodeState } -func NewIntelPlugin() (plugin.VendorPlugin, error) { +func NewIntelPlugin(helpers helper.HostHelpersInterface) (plugin.VendorPlugin, error) { return &IntelPlugin{ PluginName: PluginName, SpecVersion: "1.0", diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go index 4cbcc818e..1bf70eab1 100644 --- a/pkg/plugins/k8s/k8s_plugin.go +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -6,13 +6,14 @@ import ( "path" "strings" - "github.com/coreos/go-systemd/v22/unit" "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" plugins "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/service" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) var PluginName = "k8s_plugin" @@ -20,17 +21,16 @@ var PluginName = "k8s_plugin" type K8sPlugin struct { PluginName string SpecVersion string - serviceManager service.ServiceManager - switchdevBeforeNMRunScript *service.ScriptManifestFile - switchdevAfterNMRunScript *service.ScriptManifestFile - switchdevUdevScript *service.ScriptManifestFile - switchdevBeforeNMService *service.Service - switchdevAfterNMService *service.Service - openVSwitchService *service.Service - networkManagerService *service.Service - sriovService *service.Service + switchdevBeforeNMRunScript *host.ScriptManifestFile + switchdevAfterNMRunScript *host.ScriptManifestFile + switchdevUdevScript *host.ScriptManifestFile + switchdevBeforeNMService *host.Service + switchdevAfterNMService *host.Service + openVSwitchService *host.Service + networkManagerService *host.Service + sriovService *host.Service updateTarget *k8sUpdateTarget - useSystemdService bool + hostHelper helper.HostHelpersInterface } type k8sUpdateTarget struct { @@ -40,7 +40,7 @@ type k8sUpdateTarget struct { switchdevAfterNMRunScript bool switchdevUdevScript bool sriovScript bool - systemServices []*service.Service + systemServices []*host.Service } func (u *k8sUpdateTarget) needUpdate() bool { @@ -58,7 +58,7 @@ func (u *k8sUpdateTarget) reset() { u.switchdevAfterNMRunScript = false u.switchdevUdevScript = false u.sriovScript = false - u.systemServices = []*service.Service{} + u.systemServices = []*host.Service{} } func (u *k8sUpdateTarget) String() string { @@ -92,18 +92,15 @@ const ( configuresSwitchdevBeforeNMScript = switchdevManifestPath + "files/switchdev-configuration-before-nm.sh.yaml" configuresSwitchdevAfterNMScript = switchdevManifestPath + "files/switchdev-configuration-after-nm.sh.yaml" switchdevRenamingUdevScript = switchdevManifestPath + "files/switchdev-vf-link-name.sh.yaml" - - chroot = "/host" ) // Initialize our plugin and set up initial values -func NewK8sPlugin(useSystemdService bool) (plugins.VendorPlugin, error) { +func NewK8sPlugin(helper helper.HostHelpersInterface) (plugins.VendorPlugin, error) { k8sPluging := &K8sPlugin{ - PluginName: PluginName, - SpecVersion: "1.0", - serviceManager: service.NewServiceManager(chroot), - updateTarget: &k8sUpdateTarget{}, - useSystemdService: useSystemdService, + PluginName: PluginName, + SpecVersion: "1.0", + hostHelper: helper, + updateTarget: &k8sUpdateTarget{}, } return k8sPluging, k8sPluging.readManifestFiles() @@ -128,11 +125,11 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) p.updateTarget.reset() // TODO add check for enableOvsOffload in OperatorConfig later // Update services if switchdev required - if !p.useSystemdService && !utils.IsSwitchdevModeSpec(new.Spec) { + if !vars.UsingSystemdMode && !sriovnetworkv1.IsSwitchdevModeSpec(new.Spec) { return } - if utils.IsSwitchdevModeSpec(new.Spec) { + if sriovnetworkv1.IsSwitchdevModeSpec(new.Spec) { // Check services err = p.switchDevServicesStateUpdate() if err != nil { @@ -141,7 +138,7 @@ func (p *K8sPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeState) } } - if p.useSystemdService { + if vars.UsingSystemdMode { // Check sriov service err = p.sriovServiceStateUpdate() if err != nil { @@ -170,14 +167,14 @@ func (p *K8sPlugin) Apply() error { return err } - if p.useSystemdService { + if vars.UsingSystemdMode { if err := p.updateSriovService(); err != nil { return err } } for _, systemService := range p.updateTarget.systemServices { - if err := p.updateSystemService(systemService); err != nil { + if err := p.hostHelper.UpdateSystemService(systemService); err != nil { return err } } @@ -187,26 +184,20 @@ func (p *K8sPlugin) Apply() error { func (p *K8sPlugin) readSwitchdevManifest() error { // Read switchdev service - switchdevBeforeNMService, err := service.ReadServiceManifestFile(switchdevBeforeNMUnitFile) + switchdevBeforeNMService, err := p.hostHelper.ReadServiceManifestFile(switchdevBeforeNMUnitFile) if err != nil { return err } - switchdevAfterNMService, err := service.ReadServiceManifestFile(switchdevAfterNMUnitFile) + switchdevAfterNMService, err := p.hostHelper.ReadServiceManifestFile(switchdevAfterNMUnitFile) if err != nil { return err } - // Remove run condition form the service - conditionOpt := &unit.UnitOption{ - Section: "Unit", - Name: "ConditionPathExists", - Value: "!/etc/ignition-machine-config-encapsulated.json", - } - switchdevBeforeNMService, err = service.RemoveFromService(switchdevBeforeNMService, conditionOpt) + switchdevBeforeNMService, err = p.hostHelper.RemoveFromService(switchdevBeforeNMService, host.ConditionOpt) if err != nil { return err } - switchdevAfterNMService, err = service.RemoveFromService(switchdevAfterNMService, conditionOpt) + switchdevAfterNMService, err = p.hostHelper.RemoveFromService(switchdevAfterNMService, host.ConditionOpt) if err != nil { return err } @@ -214,11 +205,11 @@ func (p *K8sPlugin) readSwitchdevManifest() error { p.switchdevAfterNMService = switchdevAfterNMService // Read switchdev run script - switchdevBeforeNMRunScript, err := service.ReadScriptManifestFile(configuresSwitchdevBeforeNMScript) + switchdevBeforeNMRunScript, err := p.hostHelper.ReadScriptManifestFile(configuresSwitchdevBeforeNMScript) if err != nil { return err } - switchdevAfterNMRunScript, err := service.ReadScriptManifestFile(configuresSwitchdevAfterNMScript) + switchdevAfterNMRunScript, err := p.hostHelper.ReadScriptManifestFile(configuresSwitchdevAfterNMScript) if err != nil { return err } @@ -226,7 +217,7 @@ func (p *K8sPlugin) readSwitchdevManifest() error { p.switchdevAfterNMRunScript = switchdevAfterNMRunScript // Read switchdev udev script - switchdevUdevScript, err := service.ReadScriptManifestFile(switchdevRenamingUdevScript) + switchdevUdevScript, err := p.hostHelper.ReadScriptManifestFile(switchdevRenamingUdevScript) if err != nil { return err } @@ -236,7 +227,7 @@ func (p *K8sPlugin) readSwitchdevManifest() error { } func (p *K8sPlugin) readNetworkManagerManifest() error { - networkManagerService, err := service.ReadServiceInjectionManifestFile(networkManagerUnitFile) + networkManagerService, err := p.hostHelper.ReadServiceInjectionManifestFile(networkManagerUnitFile) if err != nil { return err } @@ -246,7 +237,7 @@ func (p *K8sPlugin) readNetworkManagerManifest() error { } func (p *K8sPlugin) readOpenVSwitchdManifest() error { - openVSwitchService, err := service.ReadServiceInjectionManifestFile(ovsUnitFile) + openVSwitchService, err := p.hostHelper.ReadServiceInjectionManifestFile(ovsUnitFile) if err != nil { return err } @@ -256,7 +247,7 @@ func (p *K8sPlugin) readOpenVSwitchdManifest() error { } func (p *K8sPlugin) readSriovServiceManifest() error { - sriovService, err := service.ReadServiceManifestFile(sriovUnitFile) + sriovService, err := p.hostHelper.ReadServiceManifestFile(sriovUnitFile) if err != nil { return err } @@ -322,7 +313,7 @@ func (p *K8sPlugin) switchdevServiceStateUpdate() error { func (p *K8sPlugin) sriovServiceStateUpdate() error { log.Log.Info("sriovServiceStateUpdate()") - isServiceEnabled, err := p.serviceManager.IsServiceEnabled(p.sriovService.Path) + isServiceEnabled, err := p.hostHelper.IsServiceEnabled(p.sriovService.Path) if err != nil { return err } @@ -340,12 +331,12 @@ func (p *K8sPlugin) sriovServiceStateUpdate() error { return nil } -func (p *K8sPlugin) getSwitchDevSystemServices() []*service.Service { - return []*service.Service{p.networkManagerService, p.openVSwitchService} +func (p *K8sPlugin) getSwitchDevSystemServices() []*host.Service { + return []*host.Service{p.networkManagerService, p.openVSwitchService} } -func (p *K8sPlugin) isSwitchdevScriptNeedUpdate(scriptObj *service.ScriptManifestFile) (needUpdate bool, err error) { - data, err := os.ReadFile(path.Join(chroot, scriptObj.Path)) +func (p *K8sPlugin) isSwitchdevScriptNeedUpdate(scriptObj *host.ScriptManifestFile) (needUpdate bool, err error) { + data, err := os.ReadFile(path.Join(consts.Host, scriptObj.Path)) if err != nil { if !os.IsNotExist(err) { return false, err @@ -357,8 +348,8 @@ func (p *K8sPlugin) isSwitchdevScriptNeedUpdate(scriptObj *service.ScriptManifes return false, nil } -func (p *K8sPlugin) isSwitchdevServiceNeedUpdate(serviceObj *service.Service) (needUpdate bool, err error) { - swdService, err := p.serviceManager.ReadService(serviceObj.Path) +func (p *K8sPlugin) isSwitchdevServiceNeedUpdate(serviceObj *host.Service) (needUpdate bool, err error) { + swdService, err := p.hostHelper.ReadService(serviceObj.Path) if err != nil { if !os.IsNotExist(err) { return false, err @@ -366,7 +357,7 @@ func (p *K8sPlugin) isSwitchdevServiceNeedUpdate(serviceObj *service.Service) (n // service not exists return true, nil } else { - needChange, err := service.CompareServices(swdService, serviceObj) + needChange, err := p.hostHelper.CompareServices(swdService, serviceObj) if err != nil { return false, err } @@ -374,16 +365,16 @@ func (p *K8sPlugin) isSwitchdevServiceNeedUpdate(serviceObj *service.Service) (n } } -func (p *K8sPlugin) isSystemServiceNeedUpdate(serviceObj *service.Service) bool { +func (p *K8sPlugin) isSystemServiceNeedUpdate(serviceObj *host.Service) bool { log.Log.Info("isSystemServiceNeedUpdate()") - systemService, err := p.serviceManager.ReadService(serviceObj.Path) + systemService, err := p.hostHelper.ReadService(serviceObj.Path) if err != nil { log.Log.Error(err, "k8s-plugin isSystemServiceNeedUpdate(): failed to read sriov-config service file, ignoring", "path", serviceObj.Path) return false } if systemService != nil { - needChange, err := service.CompareServices(systemService, serviceObj) + needChange, err := p.hostHelper.CompareServices(systemService, serviceObj) if err != nil { log.Log.Error(err, "k8s-plugin isSystemServiceNeedUpdate(): failed to compare sriov-config service, ignoring") return false @@ -395,9 +386,9 @@ func (p *K8sPlugin) isSystemServiceNeedUpdate(serviceObj *service.Service) bool } func (p *K8sPlugin) systemServicesStateUpdate() error { - var services []*service.Service + var services []*host.Service for _, systemService := range p.getSwitchDevSystemServices() { - exist, err := p.serviceManager.IsServiceExist(systemService.Path) + exist, err := p.hostHelper.IsServiceExist(systemService.Path) if err != nil { return err } @@ -431,7 +422,7 @@ func (p *K8sPlugin) switchDevServicesStateUpdate() error { func (p *K8sPlugin) updateSriovService() error { if p.updateTarget.sriovScript { - err := p.serviceManager.EnableService(p.sriovService) + err := p.hostHelper.EnableService(p.sriovService) if err != nil { return err } @@ -442,21 +433,21 @@ func (p *K8sPlugin) updateSriovService() error { func (p *K8sPlugin) updateSwitchdevService() error { if p.updateTarget.switchdevBeforeNMService { - err := p.serviceManager.EnableService(p.switchdevBeforeNMService) + err := p.hostHelper.EnableService(p.switchdevBeforeNMService) if err != nil { return err } } if p.updateTarget.switchdevAfterNMService { - err := p.serviceManager.EnableService(p.switchdevAfterNMService) + err := p.hostHelper.EnableService(p.switchdevAfterNMService) if err != nil { return err } } if p.updateTarget.switchdevBeforeNMRunScript { - err := os.WriteFile(path.Join(chroot, p.switchdevBeforeNMRunScript.Path), + err := os.WriteFile(path.Join(consts.Host, p.switchdevBeforeNMRunScript.Path), []byte(p.switchdevBeforeNMRunScript.Contents.Inline), 0755) if err != nil { return err @@ -464,7 +455,7 @@ func (p *K8sPlugin) updateSwitchdevService() error { } if p.updateTarget.switchdevAfterNMRunScript { - err := os.WriteFile(path.Join(chroot, p.switchdevAfterNMRunScript.Path), + err := os.WriteFile(path.Join(consts.Host, p.switchdevAfterNMRunScript.Path), []byte(p.switchdevAfterNMRunScript.Contents.Inline), 0755) if err != nil { return err @@ -472,7 +463,7 @@ func (p *K8sPlugin) updateSwitchdevService() error { } if p.updateTarget.switchdevUdevScript { - err := os.WriteFile(path.Join(chroot, p.switchdevUdevScript.Path), + err := os.WriteFile(path.Join(consts.Host, p.switchdevUdevScript.Path), []byte(p.switchdevUdevScript.Contents.Inline), 0755) if err != nil { return err @@ -481,32 +472,3 @@ func (p *K8sPlugin) updateSwitchdevService() error { return nil } - -func (p *K8sPlugin) updateSystemService(serviceObj *service.Service) error { - systemService, err := p.serviceManager.ReadService(serviceObj.Path) - if err != nil { - return err - } - if systemService == nil { - // Invalid case to reach here - return fmt.Errorf("k8s-plugin updateSystemService(): can't update non-existing service %q", serviceObj.Name) - } - serviceOptions, err := unit.Deserialize(strings.NewReader(serviceObj.Content)) - if err != nil { - return err - } - updatedService, err := service.AppendToService(systemService, serviceOptions...) - if err != nil { - return err - } - - return p.serviceManager.EnableService(updatedService) -} - -func (p *K8sPlugin) SetSystemdFlag() { - p.useSystemdService = true -} - -func (p *K8sPlugin) IsSystemService() bool { - return p.useSystemdService -} diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index 5f9f9f56a..d36996ca6 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -2,15 +2,13 @@ package mellanox import ( "fmt" - "strconv" - "strings" "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + mlx "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vendors/mellanox" ) var PluginName = "mellanox_plugin" @@ -18,36 +16,21 @@ var PluginName = "mellanox_plugin" type MellanoxPlugin struct { PluginName string SpecVersion string + helpers helper.HostHelpersInterface } -type mlnxNic struct { - enableSriov bool - totalVfs int - linkTypeP1 string - linkTypeP2 string -} - -const ( - PreconfiguredLinkType = "Preconfigured" - UknownLinkType = "Uknown" - TotalVfs = "NUM_OF_VFS" - EnableSriov = "SRIOV_EN" - LinkTypeP1 = "LINK_TYPE_P1" - LinkTypeP2 = "LINK_TYPE_P2" - MellanoxVendorID = "15b3" -) - -var attributesToChange map[string]mlnxNic +var attributesToChange map[string]mlx.MlxNic var mellanoxNicsStatus map[string]map[string]sriovnetworkv1.InterfaceExt var mellanoxNicsSpec map[string]sriovnetworkv1.Interface // Initialize our plugin and set up initial values -func NewMellanoxPlugin() (plugin.VendorPlugin, error) { +func NewMellanoxPlugin(helpers helper.HostHelpersInterface) (plugin.VendorPlugin, error) { mellanoxNicsStatus = map[string]map[string]sriovnetworkv1.InterfaceExt{} return &MellanoxPlugin{ PluginName: PluginName, SpecVersion: "1.0", + helpers: helpers, }, nil } @@ -68,18 +51,18 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS needDrain = false needReboot = false err = nil - attributesToChange = map[string]mlnxNic{} + attributesToChange = map[string]mlx.MlxNic{} mellanoxNicsSpec = map[string]sriovnetworkv1.Interface{} processedNics := map[string]bool{} // Read mellanox NIC status once if len(mellanoxNicsStatus) == 0 { for _, iface := range new.Status.Interfaces { - if iface.Vendor != MellanoxVendorID { + if iface.Vendor != mlx.MellanoxVendorID { continue } - pciPrefix := getPciAddressPrefix(iface.PciAddress) + pciPrefix := mlx.GetPciAddressPrefix(iface.PciAddress) if ifaces, ok := mellanoxNicsStatus[pciPrefix]; ok { ifaces[iface.PciAddress] = iface } else { @@ -90,14 +73,14 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS // Add only mellanox cards that required changes in the map, to help track dual port NICs for _, iface := range new.Spec.Interfaces { - pciPrefix := getPciAddressPrefix(iface.PciAddress) + pciPrefix := mlx.GetPciAddressPrefix(iface.PciAddress) if _, ok := mellanoxNicsStatus[pciPrefix]; !ok { continue } mellanoxNicsSpec[iface.PciAddress] = iface } - if utils.IsKernelLockdownMode(false) { + if p.helpers.IsKernelLockdownMode() { if len(mellanoxNicsSpec) > 0 { log.Log.Info("Lockdown mode detected, failing on interface update for mellanox devices") return false, false, fmt.Errorf("mellanox device detected when in lockdown mode") @@ -107,25 +90,25 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS } for _, ifaceSpec := range mellanoxNicsSpec { - pciPrefix := getPciAddressPrefix(ifaceSpec.PciAddress) + pciPrefix := mlx.GetPciAddressPrefix(ifaceSpec.PciAddress) // skip processed nics, help not running the same logic 2 times for dual port NICs if _, ok := processedNics[pciPrefix]; ok { continue } processedNics[pciPrefix] = true - fwCurrent, fwNext, err := getMlnxNicFwData(ifaceSpec.PciAddress) + fwCurrent, fwNext, err := p.helpers.GetMlxNicFwData(ifaceSpec.PciAddress) if err != nil { return false, false, err } - isDualPort := isDualPort(ifaceSpec.PciAddress) + isDualPort := mlx.IsDualPort(ifaceSpec.PciAddress, mellanoxNicsStatus) // Attributes to change - attrs := &mlnxNic{totalVfs: -1} + attrs := &mlx.MlxNic{TotalVfs: -1} var changeWithoutReboot bool var totalVfs int - totalVfs, needReboot, changeWithoutReboot = handleTotalVfs(fwCurrent, fwNext, attrs, ifaceSpec, isDualPort) - sriovEnNeedReboot, sriovEnChangeWithoutReboot := handleEnableSriov(totalVfs, fwCurrent, fwNext, attrs) + totalVfs, needReboot, changeWithoutReboot = mlx.HandleTotalVfs(fwCurrent, fwNext, attrs, ifaceSpec, isDualPort, mellanoxNicsSpec) + sriovEnNeedReboot, sriovEnChangeWithoutReboot := mlx.HandleEnableSriov(totalVfs, fwCurrent, fwNext, attrs) needReboot = needReboot || sriovEnNeedReboot changeWithoutReboot = changeWithoutReboot || sriovEnChangeWithoutReboot @@ -133,10 +116,10 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS if ifaceSpec.ExternallyManaged && needReboot { return true, true, fmt.Errorf( "interface %s required a change in the TotalVfs but the policy is externally managed failing: firmware TotalVf %d requested TotalVf %d", - ifaceSpec.PciAddress, fwCurrent.totalVfs, totalVfs) + ifaceSpec.PciAddress, fwCurrent.TotalVfs, totalVfs) } - needLinkChange, err := handleLinkType(pciPrefix, fwCurrent, attrs) + needLinkChange, err := mlx.HandleLinkType(pciPrefix, fwCurrent, attrs, mellanoxNicsSpec, mellanoxNicsStatus) if err != nil { return false, false, err } @@ -162,14 +145,14 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS continue } - _, fwNext, err := getMlnxNicFwData(pciAddress) + _, fwNext, err := p.helpers.GetMlxNicFwData(pciAddress) if err != nil { return false, false, err } - if fwNext.totalVfs > 0 || fwNext.enableSriov { - attributesToChange[pciAddress] = mlnxNic{totalVfs: 0} - log.Log.V(2).Info("Changing TotalVfs to 0, doesn't require rebooting", "fwNext.totalVfs", fwNext.totalVfs) + if fwNext.TotalVfs > 0 || fwNext.EnableSriov { + attributesToChange[pciAddress] = mlx.MlxNic{TotalVfs: 0} + log.Log.V(2).Info("Changing TotalVfs to 0, doesn't require rebooting", "fwNext.totalVfs", fwNext.TotalVfs) } } @@ -182,251 +165,10 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS // Apply config change func (p *MellanoxPlugin) Apply() error { - if utils.IsKernelLockdownMode(false) { + if p.helpers.IsKernelLockdownMode() { log.Log.Info("mellanox-plugin Apply() - skipping due to lockdown mode") return nil } log.Log.Info("mellanox-plugin Apply()") - return configFW() -} - -func configFW() error { - log.Log.Info("mellanox-plugin configFW()") - for pciAddr, fwArgs := range attributesToChange { - cmdArgs := []string{"-d", pciAddr, "-y", "set"} - if fwArgs.enableSriov { - cmdArgs = append(cmdArgs, fmt.Sprintf("%s=True", EnableSriov)) - } else if fwArgs.totalVfs == 0 { - cmdArgs = append(cmdArgs, fmt.Sprintf("%s=False", EnableSriov)) - } - if fwArgs.totalVfs > -1 { - cmdArgs = append(cmdArgs, fmt.Sprintf("%s=%d", TotalVfs, fwArgs.totalVfs)) - } - if len(fwArgs.linkTypeP1) > 0 { - cmdArgs = append(cmdArgs, fmt.Sprintf("%s=%s", LinkTypeP1, fwArgs.linkTypeP1)) - } - if len(fwArgs.linkTypeP2) > 0 { - cmdArgs = append(cmdArgs, fmt.Sprintf("%s=%s", LinkTypeP2, fwArgs.linkTypeP2)) - } - - log.Log.V(2).Info("mellanox-plugin: configFW()", "cmd-args", cmdArgs) - if len(cmdArgs) <= 4 { - continue - } - _, err := utils.RunCommand("mstconfig", cmdArgs...) - if err != nil { - log.Log.Error(err, "mellanox-plugin configFW(): failed") - return err - } - } - return nil -} - -func getMlnxNicFwData(pciAddress string) (current, next *mlnxNic, err error) { - log.Log.Info("mellanox-plugin getMlnxNicFwData()", "device", pciAddress) - err = nil - attrs := []string{TotalVfs, EnableSriov, LinkTypeP1, LinkTypeP2} - - out, err := utils.MstConfigReadData(pciAddress) - if err != nil { - log.Log.Error(err, "mellanox-plugin getMlnxNicFwData(): failed") - return - } - mstCurrentData, mstNextData := utils.ParseMstconfigOutput(out, attrs) - current, err = mlnxNicFromMap(mstCurrentData) - if err != nil { - log.Log.Error(err, "mellanox-plugin mlnxNicFromMap() for current mstconfig data failed") - return - } - next, err = mlnxNicFromMap(mstNextData) - if err != nil { - log.Log.Error(err, "mellanox-plugin mlnxNicFromMap() for next mstconfig data failed") - } - return -} - -func mlnxNicFromMap(mstData map[string]string) (*mlnxNic, error) { - log.Log.Info("mellanox-plugin mlnxNicFromMap()", "data", mstData) - fwData := &mlnxNic{} - if strings.Contains(mstData[EnableSriov], "True") { - fwData.enableSriov = true - } - i, err := strconv.Atoi(mstData[TotalVfs]) - if err != nil { - return nil, err - } - - fwData.totalVfs = i - fwData.linkTypeP1 = getLinkType(mstData[LinkTypeP1]) - if linkTypeP2, ok := mstData[LinkTypeP2]; ok { - fwData.linkTypeP2 = getLinkType(linkTypeP2) - } - - return fwData, nil -} - -func getPciAddressPrefix(pciAddress string) string { - return pciAddress[:len(pciAddress)-1] -} - -func isDualPort(pciAddress string) bool { - log.Log.Info("mellanox-plugin isDualPort()", "address", pciAddress) - pciAddressPrefix := getPciAddressPrefix(pciAddress) - return len(mellanoxNicsStatus[pciAddressPrefix]) > 1 -} - -func getLinkType(linkType string) string { - log.Log.Info("mellanox-plugin getLinkType()", "link-type", linkType) - if strings.Contains(linkType, constants.LinkTypeETH) { - return constants.LinkTypeETH - } else if strings.Contains(linkType, constants.LinkTypeIB) { - return constants.LinkTypeIB - } else if len(linkType) > 0 { - log.Log.Error(nil, "mellanox-plugin getLinkType(): link type is not one of [ETH, IB], treating as unknown", - "link-type", linkType) - return UknownLinkType - } else { - log.Log.Info("mellanox-plugin getLinkType(): LINK_TYPE_P* attribute was not found, treating as preconfigured link type") - return PreconfiguredLinkType - } -} - -func isLinkTypeRequireChange(iface sriovnetworkv1.Interface, ifaceStatus sriovnetworkv1.InterfaceExt, fwLinkType string) (bool, error) { - log.Log.Info("mellanox-plugin isLinkTypeRequireChange()", "device", iface.PciAddress) - if iface.LinkType != "" && !strings.EqualFold(ifaceStatus.LinkType, iface.LinkType) { - if !strings.EqualFold(iface.LinkType, constants.LinkTypeETH) && !strings.EqualFold(iface.LinkType, constants.LinkTypeIB) { - return false, fmt.Errorf("mellanox-plugin OnNodeStateChange(): Not supported link type: %s,"+ - " supported link types: [eth, ETH, ib, and IB]", iface.LinkType) - } - if fwLinkType == UknownLinkType { - return false, fmt.Errorf("mellanox-plugin OnNodeStateChange(): Unknown link type: %s", fwLinkType) - } - if fwLinkType == PreconfiguredLinkType { - return false, fmt.Errorf("mellanox-plugin OnNodeStateChange(): Network card %s does not support link type change", iface.PciAddress) - } - - return true, nil - } - - return false, nil -} - -func getOtherPortSpec(pciAddress string) *sriovnetworkv1.Interface { - log.Log.Info("mellanox-plugin getOtherPortSpec()", "pciAddress", pciAddress) - pciAddrPrefix := getPciAddressPrefix(pciAddress) - pciAddrSuffix := pciAddress[len(pciAddrPrefix):] - - if pciAddrSuffix == "0" { - iface := mellanoxNicsSpec[pciAddrPrefix+"1"] - return &iface - } - - iface := mellanoxNicsSpec[pciAddrPrefix+"0"] - return &iface -} - -// handleTotalVfs return required total VFs or max (required VFs for dual port NIC) and needReboot if totalVfs will change -func handleTotalVfs(fwCurrent, fwNext, attrs *mlnxNic, ifaceSpec sriovnetworkv1.Interface, isDualPort bool) ( - totalVfs int, needReboot, changeWithoutReboot bool) { - totalVfs = ifaceSpec.NumVfs - // Check if the other port is changing the number of VF - if isDualPort { - otherIfaceSpec := getOtherPortSpec(ifaceSpec.PciAddress) - if otherIfaceSpec != nil { - if otherIfaceSpec.NumVfs > totalVfs { - totalVfs = otherIfaceSpec.NumVfs - } - } - } - - // if the PF is externally managed we just need to check the totalVfs requested in the policy is not higher than - // the configured amount - if ifaceSpec.ExternallyManaged { - if totalVfs > fwCurrent.totalVfs { - log.Log.Error(nil, "The nic is externallyManaged and TotalVfs configured on the system is lower then requested VFs, failing configuration", - "current", fwCurrent.totalVfs, "requested", totalVfs) - attrs.totalVfs = totalVfs - needReboot = true - changeWithoutReboot = false - } - return - } - - if fwCurrent.totalVfs != totalVfs { - log.Log.V(2).Info("Changing TotalVfs, needs reboot", "current", fwCurrent.totalVfs, "requested", totalVfs) - attrs.totalVfs = totalVfs - needReboot = true - } - - // Remove policy then re-apply it - if !needReboot && fwNext.totalVfs != totalVfs { - log.Log.V(2).Info("Changing TotalVfs to same as Next Boot value, doesn't require rebooting", - "current", fwCurrent.totalVfs, "next", fwNext.totalVfs, "requested", totalVfs) - attrs.totalVfs = totalVfs - changeWithoutReboot = true - } - - return -} - -// handleEnableSriov based on totalVfs it decide to disable (totalVfs=0) or enable (totalVfs changed from 0) sriov -// and need reboot if enableSriov will change -func handleEnableSriov(totalVfs int, fwCurrent, fwNext, attrs *mlnxNic) (needReboot, changeWithoutReboot bool) { - if totalVfs == 0 && fwCurrent.enableSriov { - log.Log.V(2).Info("disabling Sriov, needs reboot") - attrs.enableSriov = false - return true, false - } else if totalVfs > 0 && !fwCurrent.enableSriov { - log.Log.V(2).Info("enabling Sriov, needs reboot") - attrs.enableSriov = true - return true, false - } else if totalVfs > 0 && !fwNext.enableSriov { - attrs.enableSriov = true - return false, true - } - - return false, false -} - -func getIfaceStatus(pciAddress string) sriovnetworkv1.InterfaceExt { - return mellanoxNicsStatus[getPciAddressPrefix(pciAddress)][pciAddress] -} - -// handleLinkType based on existing linkType and requested link -func handleLinkType(pciPrefix string, fwData, attr *mlnxNic) (bool, error) { - needReboot := false - - pciAddress := pciPrefix + "0" - if firstPortSpec, ok := mellanoxNicsSpec[pciAddress]; ok { - ifaceStatus := getIfaceStatus(pciAddress) - needChange, err := isLinkTypeRequireChange(firstPortSpec, ifaceStatus, fwData.linkTypeP1) - if err != nil { - return false, err - } - - if needChange { - log.Log.V(2).Info("Changing LinkTypeP1, needs reboot", - "from", fwData.linkTypeP1, "to", firstPortSpec.LinkType) - attr.linkTypeP1 = firstPortSpec.LinkType - needReboot = true - } - } - - pciAddress = pciPrefix + "1" - if secondPortSpec, ok := mellanoxNicsSpec[pciAddress]; ok { - ifaceStatus := getIfaceStatus(pciAddress) - needChange, err := isLinkTypeRequireChange(secondPortSpec, ifaceStatus, fwData.linkTypeP2) - if err != nil { - return false, err - } - - if needChange { - log.Log.V(2).Info("Changing LinkTypeP2, needs reboot", - "from", fwData.linkTypeP2, "to", secondPortSpec.LinkType) - attr.linkTypeP2 = secondPortSpec.LinkType - needReboot = true - } - } - - return needReboot, nil + return p.helpers.MlxConfigFW(attributesToChange) } diff --git a/pkg/plugins/mock/mock_plugin.go b/pkg/plugins/mock/mock_plugin.go new file mode 100644 index 000000000..b821139c5 --- /dev/null +++ b/pkg/plugins/mock/mock_plugin.go @@ -0,0 +1,93 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: plugin.go + +// Package mock_plugin is a generated GoMock package. +package mock_plugin + +import ( + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +// MockVendorPlugin is a mock of VendorPlugin interface. +type MockVendorPlugin struct { + ctrl *gomock.Controller + recorder *MockVendorPluginMockRecorder +} + +// MockVendorPluginMockRecorder is the mock recorder for MockVendorPlugin. +type MockVendorPluginMockRecorder struct { + mock *MockVendorPlugin +} + +// NewMockVendorPlugin creates a new mock instance. +func NewMockVendorPlugin(ctrl *gomock.Controller) *MockVendorPlugin { + mock := &MockVendorPlugin{ctrl: ctrl} + mock.recorder = &MockVendorPluginMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockVendorPlugin) EXPECT() *MockVendorPluginMockRecorder { + return m.recorder +} + +// Apply mocks base method. +func (m *MockVendorPlugin) Apply() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Apply") + ret0, _ := ret[0].(error) + return ret0 +} + +// Apply indicates an expected call of Apply. +func (mr *MockVendorPluginMockRecorder) Apply() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Apply", reflect.TypeOf((*MockVendorPlugin)(nil).Apply)) +} + +// Name mocks base method. +func (m *MockVendorPlugin) Name() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Name") + ret0, _ := ret[0].(string) + return ret0 +} + +// Name indicates an expected call of Name. +func (mr *MockVendorPluginMockRecorder) Name() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Name", reflect.TypeOf((*MockVendorPlugin)(nil).Name)) +} + +// OnNodeStateChange mocks base method. +func (m *MockVendorPlugin) OnNodeStateChange(new *v1.SriovNetworkNodeState) (bool, bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "OnNodeStateChange", new) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(bool) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 +} + +// OnNodeStateChange indicates an expected call of OnNodeStateChange. +func (mr *MockVendorPluginMockRecorder) OnNodeStateChange(new interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "OnNodeStateChange", reflect.TypeOf((*MockVendorPlugin)(nil).OnNodeStateChange), new) +} + +// Spec mocks base method. +func (m *MockVendorPlugin) Spec() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Spec") + ret0, _ := ret[0].(string) + return ret0 +} + +// Spec indicates an expected call of Spec. +func (mr *MockVendorPluginMockRecorder) Spec() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Spec", reflect.TypeOf((*MockVendorPlugin)(nil).Spec)) +} diff --git a/pkg/plugins/plugin.go b/pkg/plugins/plugin.go index 1723ca610..276e37cf2 100644 --- a/pkg/plugins/plugin.go +++ b/pkg/plugins/plugin.go @@ -4,6 +4,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" ) +//go:generate ../../bin/mockgen -destination mock/mock_plugin.go -source plugin.go type VendorPlugin interface { // Return the name of plugin Name() string diff --git a/pkg/plugins/virtual/virtual_plugin.go b/pkg/plugins/virtual/virtual_plugin.go index 9fb6cb774..06a57a80f 100644 --- a/pkg/plugins/virtual/virtual_plugin.go +++ b/pkg/plugins/virtual/virtual_plugin.go @@ -6,10 +6,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" + consts "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) var PluginName = "virtual_plugin" @@ -21,8 +21,7 @@ type VirtualPlugin struct { DesireState *sriovnetworkv1.SriovNetworkNodeState LastState *sriovnetworkv1.SriovNetworkNodeState LoadVfioDriver uint - RunningOnHost bool - HostManager host.HostManagerInterface + helpers helper.HostHelpersInterface } const ( @@ -32,13 +31,12 @@ const ( ) // Initialize our plugin and set up initial values -func NewVirtualPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { +func NewVirtualPlugin(helper helper.HostHelpersInterface) (plugin.VendorPlugin, error) { return &VirtualPlugin{ PluginName: PluginName, SpecVersion: "1.0", LoadVfioDriver: unloaded, - RunningOnHost: runningOnHost, - HostManager: host.NewHostManager(runningOnHost), + helpers: helper, }, nil } @@ -79,12 +77,12 @@ func (p *VirtualPlugin) Apply() error { // This is the case for OpenStack deployments where the underlying virtualization platform is KVM. // NOTE: if VFIO was already loaded for some reason, we will not try to load it again with the new options. kernelArgs := "enable_unsafe_noiommu_mode=1" - if err := p.HostManager.LoadKernelModule("vfio", kernelArgs); err != nil { + if err := p.helpers.LoadKernelModule("vfio", kernelArgs); err != nil { log.Log.Error(err, "virtual-plugin Apply(): fail to load vfio kmod") return err } - if err := p.HostManager.LoadKernelModule("vfio_pci"); err != nil { + if err := p.helpers.LoadKernelModule("vfio_pci"); err != nil { log.Log.Error(err, "virtual-plugin Apply(): fail to load vfio_pci kmod") return err } @@ -98,12 +96,12 @@ func (p *VirtualPlugin) Apply() error { return nil } } - exit, err := utils.Chroot("/host") + exit, err := p.helpers.Chroot(consts.Host) if err != nil { return err } defer exit() - if err := utils.SyncNodeStateVirtual(p.DesireState); err != nil { + if err := syncNodeStateVirtual(p.DesireState, p.helpers); err != nil { return err } p.LastState = &sriovnetworkv1.SriovNetworkNodeState{} @@ -122,7 +120,62 @@ func (p *VirtualPlugin) IsSystemService() bool { func needVfioDriver(state *sriovnetworkv1.SriovNetworkNodeState) bool { for _, iface := range state.Spec.Interfaces { for i := range iface.VfGroups { - if iface.VfGroups[i].DeviceType == constants.DeviceTypeVfioPci { + if iface.VfGroups[i].DeviceType == consts.DeviceTypeVfioPci { + return true + } + } + } + return false +} + +// syncNodeStateVirtual attempt to update the node state to match the desired state in virtual platforms +func syncNodeStateVirtual(newState *sriovnetworkv1.SriovNetworkNodeState, helpers helper.HostHelpersInterface) error { + var err error + for _, ifaceStatus := range newState.Status.Interfaces { + for _, iface := range newState.Spec.Interfaces { + if iface.PciAddress == ifaceStatus.PciAddress { + if !needUpdateVirtual(&iface, &ifaceStatus) { + log.Log.V(2).Info("syncNodeStateVirtual(): no need update interface", "address", iface.PciAddress) + break + } + if err = helpers.ConfigSriovDeviceVirtual(&iface); err != nil { + log.Log.Error(err, "syncNodeStateVirtual(): fail to config sriov interface", "address", iface.PciAddress) + return err + } + break + } + } + } + return nil +} + +func needUpdateVirtual(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt) bool { + // The device MTU is set by the platform + // The NumVfs is always 1 + if iface.NumVfs > 0 { + for _, vf := range ifaceStatus.VFs { + ingroup := false + for _, group := range iface.VfGroups { + if sriovnetworkv1.IndexInRange(vf.VfID, group.VfRange) { + ingroup = true + if group.DeviceType != consts.DeviceTypeNetDevice { + if group.DeviceType != vf.Driver { + log.Log.V(2).Info("needUpdateVirtual(): Driver needs update", + "desired", group.DeviceType, "current", vf.Driver) + return true + } + } else { + if sriovnetworkv1.StringInArray(vf.Driver, vars.DpdkDrivers) { + log.Log.V(2).Info("needUpdateVirtual(): Driver needs update", + "desired", group.DeviceType, "current", vf.Driver) + return true + } + } + break + } + } + if !ingroup && sriovnetworkv1.StringInArray(vf.Driver, vars.DpdkDrivers) { + // VF which has DPDK driver loaded but not in any group, needs to be reset to default driver. return true } } diff --git a/pkg/systemd/systemd.go b/pkg/systemd/systemd.go index 87ae19a11..f682d85f5 100644 --- a/pkg/systemd/systemd.go +++ b/pkg/systemd/systemd.go @@ -25,30 +25,33 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) const ( - SriovSystemdConfigPath = utils.SriovConfBasePath + "/sriov-interface-config.yaml" - SriovSystemdResultPath = utils.SriovConfBasePath + "/sriov-interface-result.yaml" - sriovSystemdSupportedNicPath = utils.SriovConfBasePath + "/sriov-supported-nics-ids.yaml" + SriovSystemdConfigPath = consts.SriovConfBasePath + "/sriov-interface-config.yaml" + SriovSystemdResultPath = consts.SriovConfBasePath + "/sriov-interface-result.yaml" + sriovSystemdSupportedNicPath = consts.SriovConfBasePath + "/sriov-supported-nics-ids.yaml" sriovSystemdServiceBinaryPath = "/var/lib/sriov/sriov-network-config-daemon" - SriovHostSystemdConfigPath = "/host" + SriovSystemdConfigPath - SriovHostSystemdResultPath = "/host" + SriovSystemdResultPath - sriovHostSystemdSupportedNicPath = "/host" + sriovSystemdSupportedNicPath - sriovHostSystemdServiceBinaryPath = "/host" + sriovSystemdServiceBinaryPath + SriovHostSystemdConfigPath = consts.Host + SriovSystemdConfigPath + SriovHostSystemdResultPath = consts.Host + SriovSystemdResultPath + sriovHostSystemdSupportedNicPath = consts.Host + sriovSystemdSupportedNicPath + sriovHostSystemdServiceBinaryPath = consts.Host + sriovSystemdServiceBinaryPath SriovServicePath = "/etc/systemd/system/sriov-config.service" - SriovHostServicePath = "/host" + SriovServicePath + SriovHostServicePath = consts.Host + SriovServicePath - HostSriovConfBasePath = "/host" + utils.SriovConfBasePath + HostSriovConfBasePath = consts.Host + consts.SriovConfBasePath ) +// TODO: move this to the host interface also + type SriovConfig struct { Spec sriovnetworkv1.SriovNetworkNodeStateSpec `yaml:"spec"` UnsupportedNics bool `yaml:"unsupportedNics"` - PlatformType utils.PlatformType `yaml:"platformType"` + PlatformType consts.PlatformTypes `yaml:"platformType"` } type SriovResult struct { @@ -67,15 +70,15 @@ func ReadConfFile() (spec *SriovConfig, err error) { return spec, err } -func WriteConfFile(newState *sriovnetworkv1.SriovNetworkNodeState, unsupportedNics bool, platformType utils.PlatformType) (bool, error) { +func WriteConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { newFile := false // remove the device plugin revision as we don't need it here newState.Spec.DpConfigVersion = "" sriovConfig := &SriovConfig{ newState.Spec, - unsupportedNics, - platformType, + vars.DevMode, + vars.PlatformType, } _, err := os.Stat(SriovHostSystemdConfigPath) diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 366881dc6..64457d210 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -16,8 +16,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" - constants "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" - "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" ) const ( @@ -35,7 +35,7 @@ func validateSriovOperatorConfig(cr *sriovnetworkv1.SriovOperatorConfig, operati log.Log.V(2).Info("validateSriovOperatorConfig", "object", cr) var warnings []string - if cr.GetName() != constants.DefaultConfigName { + if cr.GetName() != consts.DefaultConfigName { return false, warnings, fmt.Errorf("only default SriovOperatorConfig is used") } @@ -95,7 +95,7 @@ func validateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePolicy, o log.Log.V(2).Info("validateSriovNetworkNodePolicy", "object", cr) var warnings []string - if cr.GetName() == constants.DefaultPolicyName && cr.GetNamespace() == os.Getenv("NAMESPACE") { + if cr.GetName() == consts.DefaultPolicyName && cr.GetNamespace() == os.Getenv("NAMESPACE") { if operation == v1.Delete { // reject deletion of default policy return false, warnings, fmt.Errorf("default SriovNetworkNodePolicy shouldn't be deleted") @@ -193,19 +193,19 @@ func staticValidateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePol // To configure RoCE on baremetal or virtual machine: // BM: DeviceType = netdevice && isRdma = true // VM: DeviceType = vfio-pci && isRdma = false - if cr.Spec.DeviceType == constants.DeviceTypeVfioPci && cr.Spec.IsRdma { + if cr.Spec.DeviceType == consts.DeviceTypeVfioPci && cr.Spec.IsRdma { return false, fmt.Errorf("'deviceType: vfio-pci' conflicts with 'isRdma: true'; Set 'deviceType' to (string)'netdevice' Or Set 'isRdma' to (bool)'false'") } - if strings.EqualFold(cr.Spec.LinkType, constants.LinkTypeIB) && !cr.Spec.IsRdma { + if strings.EqualFold(cr.Spec.LinkType, consts.LinkTypeIB) && !cr.Spec.IsRdma { return false, fmt.Errorf("'linkType: ib or IB' requires 'isRdma: true'; Set 'isRdma' to (bool)'true'") } // vdpa: deviceType must be set to 'netdevice' - if cr.Spec.DeviceType != constants.DeviceTypeNetDevice && (cr.Spec.VdpaType == constants.VdpaTypeVirtio || cr.Spec.VdpaType == constants.VdpaTypeVhost) { + if cr.Spec.DeviceType != consts.DeviceTypeNetDevice && (cr.Spec.VdpaType == consts.VdpaTypeVirtio || cr.Spec.VdpaType == consts.VdpaTypeVhost) { return false, fmt.Errorf("'deviceType: %s' conflicts with '%s'; Set 'deviceType' to (string)'netdevice' Or Remove 'vdpaType'", cr.Spec.DeviceType, cr.Spec.VdpaType) } // vdpa: device must be configured in switchdev mode - if (cr.Spec.VdpaType == constants.VdpaTypeVirtio || cr.Spec.VdpaType == constants.VdpaTypeVhost) && cr.Spec.EswitchMode != sriovnetworkv1.ESwithModeSwitchDev { + if (cr.Spec.VdpaType == consts.VdpaTypeVirtio || cr.Spec.VdpaType == consts.VdpaTypeVhost) && cr.Spec.EswitchMode != sriovnetworkv1.ESwithModeSwitchDev { return false, fmt.Errorf("vdpa requires the device to be configured in switchdev mode") } @@ -297,7 +297,7 @@ func validatePolicyForNodeState(policy *sriovnetworkv1.SriovNetworkNodePolicy, s if err == nil { interfaceSelected = true interfaceSelectedForNode = true - if policy.GetName() != constants.DefaultPolicyName && policy.Spec.NumVfs == 0 { + if policy.GetName() != consts.DefaultPolicyName && policy.Spec.NumVfs == 0 { return nil, fmt.Errorf("numVfs(%d) in CR %s is not allowed", policy.Spec.NumVfs, policy.GetName()) } if policy.Spec.NumVfs > iface.TotalVfs && iface.Vendor == IntelID { @@ -322,7 +322,7 @@ func validatePolicyForNodeState(policy *sriovnetworkv1.SriovNetworkNodePolicy, s } } // vdpa: only mellanox cards are supported - if (policy.Spec.VdpaType == constants.VdpaTypeVirtio || policy.Spec.VdpaType == constants.VdpaTypeVhost) && iface.Vendor != MellanoxID { + if (policy.Spec.VdpaType == consts.VdpaTypeVirtio || policy.Spec.VdpaType == consts.VdpaTypeVhost) && iface.Vendor != MellanoxID { return nil, fmt.Errorf("vendor(%s) in CR %s not supported for vdpa interface(%s)", iface.Vendor, policy.GetName(), iface.Name) } } else { @@ -440,7 +440,7 @@ func validateNicModel(selector *sriovnetworkv1.SriovNetworkNicSelector, iface *s } // Check the vendor and device ID of the VF only if we are on a virtual environment - for key := range utils.PlatformMap { + for key := range vars.PlatformsMap { if strings.Contains(strings.ToLower(node.Spec.ProviderID), strings.ToLower(key)) && selector.NetFilter != "" && selector.NetFilter == iface.NetFilter && sriovnetworkv1.IsVfSupportedModel(iface.Vendor, iface.DeviceID) { diff --git a/test/util/cluster/cluster.go b/test/util/cluster/cluster.go index fdf06bdf7..af2230cff 100644 --- a/test/util/cluster/cluster.go +++ b/test/util/cluster/cluster.go @@ -17,6 +17,7 @@ import ( runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" testclient "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/client" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/nodes" "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/pod" @@ -323,7 +324,7 @@ func GetNodeSecureBootState(clients *testclient.ClientSet, nodeName, namespace s podDefinition.Namespace = namespace volume := corev1.Volume{Name: "host", VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: "/"}}} - mount := corev1.VolumeMount{Name: "host", MountPath: "/host"} + mount := corev1.VolumeMount{Name: "host", MountPath: consts.Host} podDefinition = pod.RedefineWithMount(podDefinition, volume, mount) created, err := clients.Pods(namespace).Create(context.Background(), podDefinition, metav1.CreateOptions{}) if err != nil {