From bc4b37164299990c544625ed97e340471ed31ebd Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:35:42 +0300 Subject: [PATCH 01/13] externally managed api Signed-off-by: Sebastian Sch --- api/v1/helper.go | 13 ++--- api/v1/sriovnetworknodepolicy_types.go | 2 + api/v1/sriovnetworknodestate_types.go | 48 ++++++++++--------- ...openshift.io_sriovnetworknodepolicies.yaml | 4 ++ ...k.openshift.io_sriovnetworknodestates.yaml | 4 ++ 5 files changed, 42 insertions(+), 29 deletions(-) diff --git a/api/v1/helper.go b/api/v1/helper.go index bdaa71a36..910592c74 100644 --- a/api/v1/helper.go +++ b/api/v1/helper.go @@ -282,12 +282,13 @@ func (p *SriovNetworkNodePolicy) Apply(state *SriovNetworkNodeState, equalPriori if s.Selected(&iface) { log.Info("Update interface", "name:", iface.Name) result := Interface{ - PciAddress: iface.PciAddress, - Mtu: p.Spec.Mtu, - Name: iface.Name, - LinkType: p.Spec.LinkType, - EswitchMode: p.Spec.EswitchMode, - NumVfs: p.Spec.NumVfs, + PciAddress: iface.PciAddress, + Mtu: p.Spec.Mtu, + Name: iface.Name, + LinkType: p.Spec.LinkType, + EswitchMode: p.Spec.EswitchMode, + NumVfs: p.Spec.NumVfs, + ExternallyManaged: p.Spec.ExternallyManaged, } if p.Spec.NumVfs > 0 { group, err := p.generateVfGroup(&iface) diff --git a/api/v1/sriovnetworknodepolicy_types.go b/api/v1/sriovnetworknodepolicy_types.go index 17e22d2fa..15503feb4 100644 --- a/api/v1/sriovnetworknodepolicy_types.go +++ b/api/v1/sriovnetworknodepolicy_types.go @@ -59,6 +59,8 @@ type SriovNetworkNodePolicySpec struct { VdpaType string `json:"vdpaType,omitempty"` // Exclude device's NUMA node when advertising this resource by SRIOV network device plugin. Default to false. ExcludeTopology bool `json:"excludeTopology,omitempty"` + // don't create the virtual function only allocated them to the device plugin. Defaults to false. + ExternallyManaged bool `json:"externallyManaged,omitempty"` } type SriovNetworkNicSelector struct { diff --git a/api/v1/sriovnetworknodestate_types.go b/api/v1/sriovnetworknodestate_types.go index 1203abdf2..a653b391f 100644 --- a/api/v1/sriovnetworknodestate_types.go +++ b/api/v1/sriovnetworknodestate_types.go @@ -29,14 +29,17 @@ type SriovNetworkNodeStateSpec struct { Interfaces Interfaces `json:"interfaces,omitempty"` } +type Interfaces []Interface + type Interface struct { - PciAddress string `json:"pciAddress"` - NumVfs int `json:"numVfs,omitempty"` - Mtu int `json:"mtu,omitempty"` - Name string `json:"name,omitempty"` - LinkType string `json:"linkType,omitempty"` - EswitchMode string `json:"eSwitchMode,omitempty"` - VfGroups []VfGroup `json:"vfGroups,omitempty"` + PciAddress string `json:"pciAddress"` + NumVfs int `json:"numVfs,omitempty"` + Mtu int `json:"mtu,omitempty"` + Name string `json:"name,omitempty"` + LinkType string `json:"linkType,omitempty"` + EswitchMode string `json:"eSwitchMode,omitempty"` + VfGroups []VfGroup `json:"vfGroups,omitempty"` + ExternallyManaged bool `json:"externallyManaged,omitempty"` } type VfGroup struct { @@ -49,23 +52,22 @@ type VfGroup struct { VdpaType string `json:"vdpaType,omitempty"` } -type Interfaces []Interface - type InterfaceExt struct { - Name string `json:"name,omitempty"` - Mac string `json:"mac,omitempty"` - Driver string `json:"driver,omitempty"` - PciAddress string `json:"pciAddress"` - Vendor string `json:"vendor,omitempty"` - DeviceID string `json:"deviceID,omitempty"` - NetFilter string `json:"netFilter,omitempty"` - Mtu int `json:"mtu,omitempty"` - NumVfs int `json:"numVfs,omitempty"` - LinkSpeed string `json:"linkSpeed,omitempty"` - LinkType string `json:"linkType,omitempty"` - EswitchMode string `json:"eSwitchMode,omitempty"` - TotalVfs int `json:"totalvfs,omitempty"` - VFs []VirtualFunction `json:"Vfs,omitempty"` + Name string `json:"name,omitempty"` + Mac string `json:"mac,omitempty"` + Driver string `json:"driver,omitempty"` + PciAddress string `json:"pciAddress"` + Vendor string `json:"vendor,omitempty"` + DeviceID string `json:"deviceID,omitempty"` + NetFilter string `json:"netFilter,omitempty"` + Mtu int `json:"mtu,omitempty"` + NumVfs int `json:"numVfs,omitempty"` + LinkSpeed string `json:"linkSpeed,omitempty"` + LinkType string `json:"linkType,omitempty"` + EswitchMode string `json:"eSwitchMode,omitempty"` + ExternallyManaged bool `json:"externallyManaged,omitempty"` + TotalVfs int `json:"totalvfs,omitempty"` + VFs []VirtualFunction `json:"Vfs,omitempty"` } type InterfaceExts []InterfaceExt diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index a81c6a227..d5ae89cca 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -53,6 +53,10 @@ spec: description: Exclude device's NUMA node when advertising this resource by SRIOV network device plugin. Default to false. type: boolean + externallyManaged: + description: don't create the virtual function only allocated them + to the device plugin. Defaults to false. + type: boolean isRdma: description: RDMA mode. Defaults to false. type: boolean diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 1e7ef32b0..8e756f681 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -50,6 +50,8 @@ spec: properties: eSwitchMode: type: string + externallyManaged: + type: boolean linkType: type: string mtu: @@ -125,6 +127,8 @@ spec: type: string eSwitchMode: type: string + externallyManaged: + type: boolean linkSpeed: type: string linkType: From 55ae84d67804f612b01393b11675f26642c82a83 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:35:54 +0300 Subject: [PATCH 02/13] documentation update Signed-off-by: Sebastian Sch --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 7117ab61e..3a588a87e 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,23 @@ VF groups** (when #-notation is used in pfName field) are merged, otherwise only the highest priority policy is applied. In case of same-priority policies and overlapping VF groups, only the last processed policy is applied. +#### Externally Manage virtual functions + +When `ExternallyManage` is request on a policy the operator will only skip the virtual function creation. +The operator will only bind the virtual functions to the requested driver and expose them via the device plugin. +Another difference when this field is requested in the policy is that when this policy is removed the operator +will not remove the virtual functions from the policy. + +*Note:* This means the user must create the virtual functions before they apply the policy or the webhook will reject +the policy creation. + +It's possible to use something like nmstate kubernetes-nmstate or just a simple systemd file to create +the virtual functions on boot. + +This feature was created to support deployments where the user want to use some of the virtual funtions for the host +communication like storage network or out of band managment and the virtual functions must exist on boot and not only +after the operator and config-daemon are running. + ## Components and design This operator is split into 2 components: From 455951e8590d6485714a3fd1828d2fc66dea3401 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:36:29 +0300 Subject: [PATCH 03/13] validation webhook update Signed-off-by: Sebastian Sch --- pkg/webhook/validate.go | 38 ++++ pkg/webhook/validate_test.go | 339 +++++++++++++++++++++++++++++++++++ 2 files changed, 377 insertions(+) diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 571d93d37..624577df2 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -210,6 +210,13 @@ func staticValidateSriovNetworkNodePolicy(cr *sriovnetworkv1.SriovNetworkNodePol if (cr.Spec.VdpaType == constants.VdpaTypeVirtio || cr.Spec.VdpaType == constants.VdpaTypeVhost) && cr.Spec.EswitchMode != sriovnetworkv1.ESwithModeSwitchDev { return false, fmt.Errorf("vdpa requires the device to be configured in switchdev mode") } + + // Externally created: we don't support ExternallyManaged + EswitchMode + //TODO: if needed we will need to add this in the future as today EswitchMode is for HWOFFLOAD + if cr.Spec.ExternallyManaged && cr.Spec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { + return false, fmt.Errorf("ExternallyManaged doesn't support the device to be configured in switchdev mode") + } + return true, nil } @@ -285,6 +292,21 @@ func validatePolicyForNodeState(policy *sriovnetworkv1.SriovNetworkNodePolicy, s if policy.Spec.NumVfs > MlxMaxVFs && iface.Vendor == MellanoxID { return fmt.Errorf("numVfs(%d) in CR %s exceed the maximum allowed value(%d)", policy.Spec.NumVfs, policy.GetName(), MlxMaxVFs) } + + // Externally create validations + if policy.Spec.ExternallyManaged { + if policy.Spec.NumVfs > iface.NumVfs { + return fmt.Errorf("numVfs(%d) in CR %s is higher than the virtual functions allocated for the PF externally value(%d)", policy.Spec.NumVfs, policy.GetName(), iface.NumVfs) + } + + if policy.Spec.Mtu != 0 && policy.Spec.Mtu > iface.Mtu { + return fmt.Errorf("MTU(%d) in CR %s is higher than the MTU for the PF externally value(%d)", policy.Spec.Mtu, policy.GetName(), iface.Mtu) + } + + if policy.Spec.LinkType != "" && strings.ToLower(policy.Spec.LinkType) != strings.ToLower(iface.LinkType) { + return fmt.Errorf("LinkType(%d) in CR %s is not equal to the LinkType for the PF externally value(%d)", policy.Spec.Mtu, policy.GetName(), iface.Mtu) + } + } // vdpa: only mellanox cards are supported if (policy.Spec.VdpaType == constants.VdpaTypeVirtio || policy.Spec.VdpaType == constants.VdpaTypeVhost) && iface.Vendor != MellanoxID { return fmt.Errorf("vendor(%s) in CR %s not supported for vdpa", iface.Vendor, policy.GetName()) @@ -326,6 +348,22 @@ func validatePfNames(current *sriovnetworkv1.SriovNetworkNodePolicy, previous *s // since it should already be evaluated in previous run. preName, preRngSt, preRngEnd, _ := sriovnetworkv1.ParsePFName(prePf) if curName == preName { + // reject policy with externallyManage if there is a policy on the same PF without it + if current.Spec.ExternallyManaged != previous.Spec.ExternallyManaged { + return fmt.Errorf("externallyManage is inconsistent with existing policy %s", previous.GetName()) + } + + // reject policy with externallyManage if there is a policy on the same PF with switch dev + if current.Spec.ExternallyManaged && previous.Spec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { + return fmt.Errorf("externallyManage overlap with switchdev mode in existing policy %s", previous.GetName()) + } + + // reject policy with externallyManage if there is a policy on the same PF with switch dev + if previous.Spec.ExternallyManaged && current.Spec.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { + return fmt.Errorf("switchdev overlap with externallyManage mode in existing policy %s", previous.GetName()) + } + + // Check for overlapping ranges if curRngEnd < preRngSt || curRngSt > preRngEnd { return nil } else { diff --git a/pkg/webhook/validate_test.go b/pkg/webhook/validate_test.go index 965504770..743000862 100644 --- a/pkg/webhook/validate_test.go +++ b/pkg/webhook/validate_test.go @@ -74,6 +74,7 @@ func newNodeState() *SriovNetworkNodeState { Vendor: "8086", NumVfs: 4, TotalVfs: 64, + LinkType: "ETH", }, { VFs: []VirtualFunction{ @@ -87,6 +88,7 @@ func newNodeState() *SriovNetworkNodeState { Vendor: "8086", NumVfs: 4, TotalVfs: 64, + LinkType: "ETH", }, { VFs: []VirtualFunction{ @@ -100,6 +102,7 @@ func newNodeState() *SriovNetworkNodeState { Vendor: "8086", NumVfs: 4, TotalVfs: 64, + LinkType: "ETH", }, }, }, @@ -283,6 +286,318 @@ func TestValidatePolicyForNodeStateWithInvalidNumVfsPolicy(t *testing.T) { g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("numVfs(%d) in CR %s exceed the maximum allowed value(%d)", policy.Spec.NumVfs, policy.GetName(), state.Status.Interfaces[0].TotalVfs)))) } +func TestValidatePolicyForNodeStateWithInvalidNumVfsExternallyCreated(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 5, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("numVfs(%d) in CR %s is higher than the virtual functions allocated for the PF externally value(%d)", policy.Spec.NumVfs, policy.GetName(), state.Status.Interfaces[0].NumVfs)))) +} + +func TestValidatePolicyForNodeStateWithValidNumVfsExternallyCreated(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestValidatePolicyForNodeStateWithValidLowerNumVfsExternallyCreated(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 3, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestValidatePolicyForNodePolicyWithOutExternallyManageConflict(t *testing.T) { + appliedPolicy := newNodePolicy() + appliedPolicy.Spec.ExternallyManaged = true + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p0", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f1#3-4"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodePolicy(policy, appliedPolicy) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestValidatePolicyForNodePolicyWithExternallyManageConflict(t *testing.T) { + appliedPolicy := newNodePolicy() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p0", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f1#3-4"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodePolicy(policy, appliedPolicy) + g.Expect(err).To(MatchError(ContainSubstring(fmt.Sprintf("externallyManage is inconsistent with existing policy %s", appliedPolicy.ObjectMeta.Name)))) +} + +func TestValidatePolicyForNodePolicyWithExternallyManageConflictWithSwitchDev(t *testing.T) { + appliedPolicy := newNodePolicy() + appliedPolicy.Spec.EswitchMode = ESwithModeSwitchDev + + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p0", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f1#3-4"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodePolicy(policy, appliedPolicy) + g.Expect(err).To(HaveOccurred()) +} + +func TestValidatePolicyForNodePolicyWithSwitchDevConflictWithExternallyManage(t *testing.T) { + appliedPolicy := newNodePolicy() + appliedPolicy.Spec.ExternallyManaged = true + + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p0", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f1#3-4"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + EswitchMode: ESwithModeSwitchDev, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodePolicy(policy, appliedPolicy) + g.Expect(err).To(HaveOccurred()) +} + +func TestValidatePolicyForNodeStateWithExternallyManageAndMTU(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + Mtu: 1500, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestValidatePolicyForNodeStateWithExternallyManageAndDifferentMTU(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + Mtu: 9000, + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).To(HaveOccurred()) +} + +func TestValidatePolicyForNodeStateWithExternallyManageAndLinkType(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + LinkType: "ETH", + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) + + policy.Spec.LinkType = "eth" + err = validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) + + policy.Spec.LinkType = "ETH" + state.Status.Interfaces[0].LinkType = "eth" + err = validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).ToNot(HaveOccurred()) +} + +func TestValidatePolicyForNodeStateWithExternallyManageAndDifferentLinkType(t *testing.T) { + state := newNodeState() + policy := &SriovNetworkNodePolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: "p1", + }, + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + PfNames: []string{"ens803f0"}, + RootDevices: []string{"0000:86:00.0"}, + Vendor: "8086", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 4, + Priority: 99, + ResourceName: "p0", + ExternallyManaged: true, + Mtu: 9000, + LinkType: "IB", + }, + } + g := NewGomegaWithT(t) + err := validatePolicyForNodeState(policy, state, NewNode()) + g.Expect(err).To(HaveOccurred()) +} + func TestValidatePolicyForNodePolicyWithOverlappedVfRange(t *testing.T) { appliedPolicy := newNodePolicy() policy := &SriovNetworkNodePolicy{ @@ -632,6 +947,30 @@ func TestStaticValidateSriovNetworkNodePolicyVhostVdpaMustSpecifySwitchDev(t *te g.Expect(ok).To(Equal(false)) } +func TestStaticValidateSriovNetworkNodePolicyWithExternallyCreatedAndSwitchDev(t *testing.T) { + policy := &SriovNetworkNodePolicy{ + Spec: SriovNetworkNodePolicySpec{ + DeviceType: "netdevice", + NicSelector: SriovNetworkNicSelector{ + Vendor: "8086", + DeviceID: "158b", + }, + NodeSelector: map[string]string{ + "feature.node.kubernetes.io/network-sriov.capable": "true", + }, + NumVfs: 63, + Priority: 99, + ResourceName: "p0", + EswitchMode: "switchdev", + ExternallyManaged: true, + }, + } + g := NewGomegaWithT(t) + ok, err := staticValidateSriovNetworkNodePolicy(policy) + g.Expect(err).To(HaveOccurred()) + g.Expect(ok).To(BeFalse()) +} + func TestValidatePolicyForNodeStateVirtioVdpaWithNotSupportedVendor(t *testing.T) { state := newNodeState() policy := &SriovNetworkNodePolicy{ From cf4a728983e9bb37adfc3ebf6728dd087edc4cf3 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:37:11 +0300 Subject: [PATCH 04/13] new store package to save per pci policy info Signed-off-by: Sebastian Sch --- pkg/utils/mock/mock_store.go | 79 +++++++++++++++++++ pkg/utils/store.go | 145 +++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+) create mode 100644 pkg/utils/mock/mock_store.go create mode 100644 pkg/utils/store.go diff --git a/pkg/utils/mock/mock_store.go b/pkg/utils/mock/mock_store.go new file mode 100644 index 000000000..1527f611e --- /dev/null +++ b/pkg/utils/mock/mock_store.go @@ -0,0 +1,79 @@ +// Code generated by MockGen. DO NOT EDIT. +// Source: store.go + +// Package mock_utils is a generated GoMock package. +package mock_utils + +import ( + reflect "reflect" + + gomock "github.com/golang/mock/gomock" + v1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +// MockStoreManagerInterface is a mock of StoreManagerInterface interface. +type MockStoreManagerInterface struct { + ctrl *gomock.Controller + recorder *MockStoreManagerInterfaceMockRecorder +} + +// MockStoreManagerInterfaceMockRecorder is the mock recorder for MockStoreManagerInterface. +type MockStoreManagerInterfaceMockRecorder struct { + mock *MockStoreManagerInterface +} + +// NewMockStoreManagerInterface creates a new mock instance. +func NewMockStoreManagerInterface(ctrl *gomock.Controller) *MockStoreManagerInterface { + mock := &MockStoreManagerInterface{ctrl: ctrl} + mock.recorder = &MockStoreManagerInterfaceMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockStoreManagerInterface) EXPECT() *MockStoreManagerInterfaceMockRecorder { + return m.recorder +} + +// ClearPCIAddressFolder mocks base method. +func (m *MockStoreManagerInterface) ClearPCIAddressFolder() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ClearPCIAddressFolder") + ret0, _ := ret[0].(error) + return ret0 +} + +// ClearPCIAddressFolder indicates an expected call of ClearPCIAddressFolder. +func (mr *MockStoreManagerInterfaceMockRecorder) ClearPCIAddressFolder() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ClearPCIAddressFolder", reflect.TypeOf((*MockStoreManagerInterface)(nil).ClearPCIAddressFolder)) +} + +// LoadPfsStatus mocks base method. +func (m *MockStoreManagerInterface) LoadPfsStatus(pciAddress string) (*v1.Interface, bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "LoadPfsStatus", pciAddress) + ret0, _ := ret[0].(*v1.Interface) + ret1, _ := ret[1].(bool) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 +} + +// LoadPfsStatus indicates an expected call of LoadPfsStatus. +func (mr *MockStoreManagerInterfaceMockRecorder) LoadPfsStatus(pciAddress interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadPfsStatus", reflect.TypeOf((*MockStoreManagerInterface)(nil).LoadPfsStatus), pciAddress) +} + +// SaveLastPfAppliedStatus mocks base method. +func (m *MockStoreManagerInterface) SaveLastPfAppliedStatus(pciAddress string, PfInfo *v1.Interface) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "SaveLastPfAppliedStatus", pciAddress, PfInfo) + ret0, _ := ret[0].(error) + return ret0 +} + +// SaveLastPfAppliedStatus indicates an expected call of SaveLastPfAppliedStatus. +func (mr *MockStoreManagerInterfaceMockRecorder) SaveLastPfAppliedStatus(pciAddress, PfInfo interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SaveLastPfAppliedStatus", reflect.TypeOf((*MockStoreManagerInterface)(nil).SaveLastPfAppliedStatus), pciAddress, PfInfo) +} diff --git a/pkg/utils/store.go b/pkg/utils/store.go new file mode 100644 index 000000000..4db7b86ce --- /dev/null +++ b/pkg/utils/store.go @@ -0,0 +1,145 @@ +package utils + +import ( + "encoding/json" + "fmt" + "os" + "path" + "path/filepath" + + "github.com/golang/glog" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +const ( + SriovConfBasePath = "/etc/sriov-operator" + PfAppliedConfig = SriovConfBasePath + "/pci" +) + +// Contains all the file storing on the host +// +//go:generate ../../bin/mockgen -destination mock/mock_store.go -source store.go +type StoreManagerInterface interface { + ClearPCIAddressFolder() error + SaveLastPfAppliedStatus(pciAddress string, PfInfo *sriovnetworkv1.Interface) error + LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) +} + +type StoreManager struct { + RunOnHost bool +} + +// NewStoreManager: create the initial folders needed to store the info about the PF +// and return a storeManager struct that implements the StoreManagerInterface interface +func NewStoreManager(runOnHost bool) (StoreManagerInterface, error) { + if err := createOperatorConfigFolderIfNeeded(runOnHost); err != nil { + return nil, err + } + + return &StoreManager{runOnHost}, nil +} + +// createOperatorConfigFolderIfNeeded: create the operator base folder on the host +// together with the pci folder to save the PF status objects as json files +func createOperatorConfigFolderIfNeeded(runOnHost bool) error { + hostExtension := getHostExtension(runOnHost) + SriovConfBasePathUse := filepath.Join(hostExtension, SriovConfBasePath) + _, err := os.Stat(SriovConfBasePathUse) + if err != nil { + if os.IsNotExist(err) { + err = os.MkdirAll(SriovConfBasePathUse, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the sriov config folder on host in path %s: %v", SriovConfBasePathUse, err) + } + } else { + return fmt.Errorf("failed to check if the sriov config folder on host in path %s exist: %v", SriovConfBasePathUse, err) + } + } + + PfAppliedConfigUse := filepath.Join(hostExtension, PfAppliedConfig) + _, err = os.Stat(PfAppliedConfigUse) + if err != nil { + if os.IsNotExist(err) { + err = os.MkdirAll(PfAppliedConfigUse, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", PfAppliedConfigUse, err) + } + } else { + return fmt.Errorf("failed to check if the pci folder on host in path %s exist: %v", PfAppliedConfigUse, err) + } + } + + return nil +} + +// ClearPCIAddressFolder: removes all the PFs storage information +func (s *StoreManager) ClearPCIAddressFolder() error { + hostExtension := getHostExtension(s.RunOnHost) + PfAppliedConfigUse := filepath.Join(hostExtension, PfAppliedConfig) + _, err := os.Stat(PfAppliedConfigUse) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to check the pci address folder path %s", PfAppliedConfigUse) + } + + err = os.RemoveAll(PfAppliedConfigUse) + if err != nil { + return fmt.Errorf("failed to remove the PCI address folder on path %s: %v", PfAppliedConfigUse, err) + } + + err = os.Mkdir(PfAppliedConfigUse, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", PfAppliedConfigUse, err) + } + + return nil +} + +// SaveLastPfAppliedStatus will save the PF object as a json into the /etc/sriov-operator/pci/ +// this function must be called after running the chroot function +func (s *StoreManager) SaveLastPfAppliedStatus(pciAddress string, PfInfo *sriovnetworkv1.Interface) error { + data, err := json.Marshal(PfInfo) + if err != nil { + glog.Errorf("failed to marshal PF status %+v: %v", *PfInfo, err) + return err + } + + hostExtension := getHostExtension(s.RunOnHost) + pathFile := filepath.Join(hostExtension, PfAppliedConfig, pciAddress) + err = os.WriteFile(pathFile, data, 0644) + return err +} + +// LoadPfsStatus convert the /etc/sriov-operator/pci/ json to pfstatus +// returns false if the file doesn't exist. +func (s *StoreManager) LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) { + hostExtension := getHostExtension(s.RunOnHost) + pathFile := filepath.Join(hostExtension, PfAppliedConfig, pciAddress) + pfStatus := &sriovnetworkv1.Interface{} + data, err := os.ReadFile(pathFile) + if err != nil { + if os.IsNotExist(err) { + return nil, false, nil + } + glog.Errorf("failed to read PF status from path %s: %v", pathFile, err) + return nil, false, err + } + + err = json.Unmarshal(data, pfStatus) + if err != nil { + glog.Errorf("failed to unmarshal PF status %s: %v", data, err) + return nil, false, err + } + + return pfStatus, true, nil +} + +func getHostExtension(runOnHost bool) string { + if !runOnHost { + return path.Join(FilesystemRoot, "/host") + } + return FilesystemRoot +} From 5a74fc33c96681734a3018a113eee3ce139d042d Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:37:34 +0300 Subject: [PATCH 05/13] update host package and fix mocks Signed-off-by: Sebastian Sch --- pkg/host/host.go | 180 ++++++++++++++--------------- pkg/host/mock/mock_host.go | 226 ++++++++++++++++++------------------- 2 files changed, 203 insertions(+), 203 deletions(-) diff --git a/pkg/host/host.go b/pkg/host/host.go index 37af5ddba..482607d07 100644 --- a/pkg/host/host.go +++ b/pkg/host/host.go @@ -51,17 +51,17 @@ type HostManagerInterface interface { // private functions // part of the interface for the mock generation LoadKernelModule(name string, args ...string) error - isKernelModuleLoaded(string) (bool, error) - isRHELSystem() (bool, error) - isUbuntuSystem() (bool, error) - isCoreOS() (bool, error) - rdmaIsLoaded() (bool, error) - enableRDMA(string, string, string) (bool, error) - installRDMA(string) error - triggerUdevEvent() error - reloadDriver(string) error - enableRDMAOnRHELMachine() (bool, error) - getOSPrettyName() (string, error) + IsKernelModuleLoaded(string) (bool, error) + IsRHELSystem() (bool, error) + IsUbuntuSystem() (bool, error) + IsCoreOS() (bool, error) + RdmaIsLoaded() (bool, error) + EnableRDMA(string, string, string) (bool, error) + InstallRDMA(string) error + TriggerUdevEvent() error + ReloadDriver(string) error + EnableRDMAOnRHELMachine() (bool, error) + GetOSPrettyName() (string, error) } type HostManager struct { @@ -78,11 +78,11 @@ func NewHostManager(runOnHost bool) HostManagerInterface { func (h *HostManager) LoadKernelModule(name string, args ...string) error { glog.Infof("LoadKernelModule(): try to load kernel module %s with arguments '%s'", name, args) - chrootDefinition := getChrootExtention(h.RunOnHost) + chrootDefinition := getChrootExtension(h.RunOnHost) cmdArgs := strings.Join(args, " ") // check if the driver is already loaded in to the system - isLoaded, err := h.isKernelModuleLoaded(name) + isLoaded, err := h.IsKernelModuleLoaded(name) if err != nil { glog.Errorf("LoadKernelModule(): failed to check if kernel module %s is already loaded", name) } @@ -99,23 +99,23 @@ func (h *HostManager) LoadKernelModule(name string, args ...string) error { return nil } -func (h *HostManager) isKernelModuleLoaded(kernelModuleName string) (bool, error) { - glog.Infof("isKernelModuleLoaded(): check if kernel module %s is loaded", kernelModuleName) - chrootDefinition := getChrootExtention(h.RunOnHost) +func (h *HostManager) IsKernelModuleLoaded(kernelModuleName string) (bool, error) { + glog.Infof("IsKernelModuleLoaded(): check if kernel module %s is loaded", kernelModuleName) + chrootDefinition := getChrootExtension(h.RunOnHost) stdout, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("%s lsmod | grep \"^%s\"", chrootDefinition, kernelModuleName)) if err != nil && stderr.Len() != 0 { - glog.Errorf("isKernelModuleLoaded(): failed to check if kernel module %s is loaded: error: %v stderr %s", kernelModuleName, err, stderr.String()) + glog.Errorf("IsKernelModuleLoaded(): failed to check if kernel module %s is loaded: error: %v stderr %s", kernelModuleName, err, stderr.String()) return false, err } - glog.V(2).Infof("isKernelModuleLoaded(): %v", stdout.String()) + glog.V(2).Infof("IsKernelModuleLoaded(): %v", stdout.String()) if stderr.Len() != 0 { - glog.Errorf("isKernelModuleLoaded(): failed to check if kernel module %s is loaded: error: %v stderr %s", kernelModuleName, err, stderr.String()) + glog.Errorf("IsKernelModuleLoaded(): failed to check if kernel module %s is loaded: error: %v stderr %s", kernelModuleName, err, stderr.String()) return false, fmt.Errorf(stderr.String()) } if stdout.Len() != 0 { - glog.Infof("isKernelModuleLoaded(): kernel module %s already loaded", kernelModuleName) + glog.Infof("IsKernelModuleLoaded(): kernel module %s already loaded", kernelModuleName) return true, nil } @@ -136,7 +136,7 @@ func (h *HostManager) TryEnableVhostNet() { func (h *HostManager) TryEnableRdma() (bool, error) { glog.V(2).Infof("tryEnableRdma()") - chrootDefinition := getChrootExtention(h.RunOnHost) + chrootDefinition := getChrootExtension(h.RunOnHost) // check if the driver is already loaded in to the system _, stderr, mlx4Err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("grep --quiet 'mlx4_en' <(%s lsmod)", chrootDefinition)) @@ -156,7 +156,7 @@ func (h *HostManager) TryEnableRdma() (bool, error) { return false, nil } - isRhelSystem, err := h.isRHELSystem() + isRhelSystem, err := h.IsRHELSystem() if err != nil { glog.Errorf("tryEnableRdma(): failed to check if the machine is base on RHEL: %v", err) return false, err @@ -164,20 +164,20 @@ func (h *HostManager) TryEnableRdma() (bool, error) { // RHEL check if isRhelSystem { - return h.enableRDMAOnRHELMachine() + return h.EnableRDMAOnRHELMachine() } - isUbuntuSystem, err := h.isUbuntuSystem() + isUbuntuSystem, err := h.IsUbuntuSystem() if err != nil { glog.Errorf("tryEnableRdma(): failed to check if the machine is base on Ubuntu: %v", err) return false, err } if isUbuntuSystem { - return h.enableRDMAOnUbuntuMachine() + return h.EnableRDMAOnUbuntuMachine() } - osName, err := h.getOSPrettyName() + osName, err := h.GetOSPrettyName() if err != nil { glog.Errorf("tryEnableRdma(): failed to check OS name: %v", err) return false, err @@ -187,19 +187,19 @@ func (h *HostManager) TryEnableRdma() (bool, error) { return false, fmt.Errorf("unable to load RDMA unsupported OS: %s", osName) } -func (h *HostManager) enableRDMAOnRHELMachine() (bool, error) { - glog.Infof("enableRDMAOnRHELMachine()") - isCoreOsSystem, err := h.isCoreOS() +func (h *HostManager) EnableRDMAOnRHELMachine() (bool, error) { + glog.Infof("EnableRDMAOnRHELMachine()") + isCoreOsSystem, err := h.IsCoreOS() if err != nil { - glog.Errorf("enableRDMAOnRHELMachine(): failed to check if the machine runs CoreOS: %v", err) + glog.Errorf("EnableRDMAOnRHELMachine(): failed to check if the machine runs CoreOS: %v", err) return false, err } // CoreOS check if isCoreOsSystem { - isRDMALoaded, err := h.rdmaIsLoaded() + isRDMALoaded, err := h.RdmaIsLoaded() if err != nil { - glog.Errorf("enableRDMAOnRHELMachine(): failed to check if RDMA kernel modules are loaded: %v", err) + glog.Errorf("EnableRDMAOnRHELMachine(): failed to check if RDMA kernel modules are loaded: %v", err) return false, err } @@ -207,26 +207,26 @@ func (h *HostManager) enableRDMAOnRHELMachine() (bool, error) { } // RHEL - glog.Infof("enableRDMAOnRHELMachine(): enabling RDMA on RHEL machine") - isRDMAEnable, err := h.enableRDMA(rhelRDMAConditionFile, rhelRDMAServiceName, rhelPackageManager) + glog.Infof("EnableRDMAOnRHELMachine(): enabling RDMA on RHEL machine") + isRDMAEnable, err := h.EnableRDMA(rhelRDMAConditionFile, rhelRDMAServiceName, rhelPackageManager) if err != nil { - glog.Errorf("enableRDMAOnRHELMachine(): failed to enable RDMA on RHEL machine: %v", err) + glog.Errorf("EnableRDMAOnRHELMachine(): failed to enable RDMA on RHEL machine: %v", err) return false, err } // check if we need to install rdma-core package if isRDMAEnable { - isRDMALoaded, err := h.rdmaIsLoaded() + isRDMALoaded, err := h.RdmaIsLoaded() if err != nil { - glog.Errorf("enableRDMAOnRHELMachine(): failed to check if RDMA kernel modules are loaded: %v", err) + glog.Errorf("EnableRDMAOnRHELMachine(): failed to check if RDMA kernel modules are loaded: %v", err) return false, err } // if ib kernel module is not loaded trigger a loading if isRDMALoaded { - err = h.triggerUdevEvent() + err = h.TriggerUdevEvent() if err != nil { - glog.Errorf("enableRDMAOnRHELMachine() failed to trigger udev event: %v", err) + glog.Errorf("EnableRDMAOnRHELMachine() failed to trigger udev event: %v", err) return false, err } } @@ -235,27 +235,27 @@ func (h *HostManager) enableRDMAOnRHELMachine() (bool, error) { return true, nil } -func (h *HostManager) enableRDMAOnUbuntuMachine() (bool, error) { - glog.Infof("enableRDMAOnUbuntuMachine(): enabling RDMA on RHEL machine") - isRDMAEnable, err := h.enableRDMA(ubuntuRDMAConditionFile, ubuntuRDMAServiceName, ubuntuPackageManager) +func (h *HostManager) EnableRDMAOnUbuntuMachine() (bool, error) { + glog.Infof("EnableRDMAOnUbuntuMachine(): enabling RDMA on RHEL machine") + isRDMAEnable, err := h.EnableRDMA(ubuntuRDMAConditionFile, ubuntuRDMAServiceName, ubuntuPackageManager) if err != nil { - glog.Errorf("enableRDMAOnUbuntuMachine(): failed to enable RDMA on Ubuntu machine: %v", err) + glog.Errorf("EnableRDMAOnUbuntuMachine(): failed to enable RDMA on Ubuntu machine: %v", err) return false, err } // check if we need to install rdma-core package if isRDMAEnable { - isRDMALoaded, err := h.rdmaIsLoaded() + isRDMALoaded, err := h.RdmaIsLoaded() if err != nil { - glog.Errorf("enableRDMAOnUbuntuMachine(): failed to check if RDMA kernel modules are loaded: %v", err) + glog.Errorf("EnableRDMAOnUbuntuMachine(): failed to check if RDMA kernel modules are loaded: %v", err) return false, err } // if ib kernel module is not loaded trigger a loading if isRDMALoaded { - err = h.triggerUdevEvent() + err = h.TriggerUdevEvent() if err != nil { - glog.Errorf("enableRDMAOnUbuntuMachine() failed to trigger udev event: %v", err) + glog.Errorf("EnableRDMAOnUbuntuMachine() failed to trigger udev event: %v", err) return false, err } } @@ -264,27 +264,27 @@ func (h *HostManager) enableRDMAOnUbuntuMachine() (bool, error) { return true, nil } -func (h *HostManager) isRHELSystem() (bool, error) { - glog.Infof("isRHELSystem(): checking for RHEL machine") +func (h *HostManager) IsRHELSystem() (bool, error) { + glog.Infof("IsRHELSystem(): checking for RHEL machine") path := redhatReleaseFile if !h.RunOnHost { path = pathlib.Join(hostPathFromDaemon, path) } if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { - glog.V(2).Infof("isRHELSystem() not a RHEL machine") + glog.V(2).Infof("IsRHELSystem() not a RHEL machine") return false, nil } - glog.Errorf("isRHELSystem() failed to check for os release file on path %s: %v", path, err) + glog.Errorf("IsRHELSystem() failed to check for os release file on path %s: %v", path, err) return false, err } return true, nil } -func (h *HostManager) isCoreOS() (bool, error) { - glog.Infof("isCoreOS(): checking for CoreOS machine") +func (h *HostManager) IsCoreOS() (bool, error) { + glog.Infof("IsCoreOS(): checking for CoreOS machine") path := redhatReleaseFile if !h.RunOnHost { path = pathlib.Join(hostPathFromDaemon, path) @@ -292,7 +292,7 @@ func (h *HostManager) isCoreOS() (bool, error) { data, err := os.ReadFile(path) if err != nil { - glog.Errorf("isCoreOS(): failed to read RHEL release file on path %s: %v", path, err) + glog.Errorf("IsCoreOS(): failed to read RHEL release file on path %s: %v", path, err) return false, err } @@ -303,8 +303,8 @@ func (h *HostManager) isCoreOS() (bool, error) { return false, nil } -func (h *HostManager) isUbuntuSystem() (bool, error) { - glog.Infof("isUbuntuSystem(): checking for Ubuntu machine") +func (h *HostManager) IsUbuntuSystem() (bool, error) { + glog.Infof("IsUbuntuSystem(): checking for Ubuntu machine") path := genericOSReleaseFile if !h.RunOnHost { path = pathlib.Join(hostPathFromDaemon, path) @@ -312,17 +312,17 @@ func (h *HostManager) isUbuntuSystem() (bool, error) { if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { - glog.Errorf("isUbuntuSystem() os-release on path %s doesn't exist: %v", path, err) + glog.Errorf("IsUbuntuSystem() os-release on path %s doesn't exist: %v", path, err) return false, err } - glog.Errorf("isUbuntuSystem() failed to check for os release file on path %s: %v", path, err) + glog.Errorf("IsUbuntuSystem() failed to check for os release file on path %s: %v", path, err) return false, err } stdout, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("grep -i --quiet 'ubuntu' %s", path)) if err != nil && stderr.Len() != 0 { - glog.Errorf("isUbuntuSystem(): failed to check for ubuntu operating system name in os-releasae file: error: %v stderr %s", err, stderr.String()) + glog.Errorf("IsUbuntuSystem(): failed to check for ubuntu operating system name in os-releasae file: error: %v stderr %s", err, stderr.String()) return false, fmt.Errorf(stderr.String()) } @@ -333,14 +333,14 @@ func (h *HostManager) isUbuntuSystem() (bool, error) { return false, nil } -func (h *HostManager) rdmaIsLoaded() (bool, error) { - glog.V(2).Infof("rdmaIsLoaded()") - chrootDefinition := getChrootExtention(h.RunOnHost) +func (h *HostManager) RdmaIsLoaded() (bool, error) { + glog.V(2).Infof("RdmaIsLoaded()") + chrootDefinition := getChrootExtension(h.RunOnHost) // check if the driver is already loaded in to the system _, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("grep --quiet '\\(^ib\\|^rdma\\)' <(%s lsmod)", chrootDefinition)) if err != nil && stderr.Len() != 0 { - glog.Errorf("rdmaIsLoaded(): fail to check if ib and rdma kernel modules are loaded: error: %v stderr %s", err, stderr.String()) + glog.Errorf("RdmaIsLoaded(): fail to check if ib and rdma kernel modules are loaded: error: %v stderr %s", err, stderr.String()) return false, fmt.Errorf(stderr.String()) } @@ -351,61 +351,61 @@ func (h *HostManager) rdmaIsLoaded() (bool, error) { return true, nil } -func (h *HostManager) enableRDMA(conditionFilePath, serviceName, packageManager string) (bool, error) { +func (h *HostManager) EnableRDMA(conditionFilePath, serviceName, packageManager string) (bool, error) { path := conditionFilePath if !h.RunOnHost { path = pathlib.Join(hostPathFromDaemon, path) } - glog.Infof("enableRDMA(): checking for service file on path %s", path) + glog.Infof("EnableRDMA(): checking for service file on path %s", path) if _, err := os.Stat(path); err != nil { if os.IsNotExist(err) { - glog.V(2).Infof("enableRDMA(): RDMA server doesn't exist") - err = h.installRDMA(packageManager) + glog.V(2).Infof("EnableRDMA(): RDMA server doesn't exist") + err = h.InstallRDMA(packageManager) if err != nil { - glog.Errorf("enableRDMA() failed to install RDMA package: %v", err) + glog.Errorf("EnableRDMA() failed to install RDMA package: %v", err) return false, err } - err = h.triggerUdevEvent() + err = h.TriggerUdevEvent() if err != nil { - glog.Errorf("enableRDMA() failed to trigger udev event: %v", err) + glog.Errorf("EnableRDMA() failed to trigger udev event: %v", err) return false, err } return false, nil } - glog.Errorf("enableRDMA() failed to check for os release file on path %s: %v", path, err) + glog.Errorf("EnableRDMA() failed to check for os release file on path %s: %v", path, err) return false, err } - glog.Infof("enableRDMA(): service %s.service installed", serviceName) + glog.Infof("EnableRDMA(): service %s.service installed", serviceName) return true, nil } -func (h *HostManager) installRDMA(packageManager string) error { - glog.Infof("installRDMA(): installing RDMA") - chrootDefinition := getChrootExtention(h.RunOnHost) +func (h *HostManager) InstallRDMA(packageManager string) error { + glog.Infof("InstallRDMA(): installing RDMA") + chrootDefinition := getChrootExtension(h.RunOnHost) stdout, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("%s %s install -y rdma-core", chrootDefinition, packageManager)) if err != nil && stderr.Len() != 0 { - glog.Errorf("installRDMA(): failed to install RDMA package output %s: error %v stderr %s", stdout.String(), err, stderr.String()) + glog.Errorf("InstallRDMA(): failed to install RDMA package output %s: error %v stderr %s", stdout.String(), err, stderr.String()) return err } return nil } -func (h *HostManager) triggerUdevEvent() error { - glog.Infof("triggerUdevEvent(): installing RDMA") +func (h *HostManager) TriggerUdevEvent() error { + glog.Infof("TriggerUdevEvent(): installing RDMA") - err := h.reloadDriver("mlx4_en") + err := h.ReloadDriver("mlx4_en") if err != nil { return err } - err = h.reloadDriver("mlx5_core") + err = h.ReloadDriver("mlx5_core") if err != nil { return err } @@ -413,30 +413,30 @@ func (h *HostManager) triggerUdevEvent() error { return nil } -func (h *HostManager) reloadDriver(driverName string) error { - glog.Infof("reloadDriver(): reload driver %s", driverName) - chrootDefinition := getChrootExtention(h.RunOnHost) +func (h *HostManager) ReloadDriver(driverName string) error { + glog.Infof("ReloadDriver(): reload driver %s", driverName) + chrootDefinition := getChrootExtension(h.RunOnHost) _, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("%s modprobe -r %s && %s modprobe %s", chrootDefinition, driverName, chrootDefinition, driverName)) if err != nil && stderr.Len() != 0 { - glog.Errorf("installRDMA(): failed to reload %s kernel module: error %v stderr %s", driverName, err, stderr.String()) + glog.Errorf("InstallRDMA(): failed to reload %s kernel module: error %v stderr %s", driverName, err, stderr.String()) return err } return nil } -func (h *HostManager) getOSPrettyName() (string, error) { +func (h *HostManager) GetOSPrettyName() (string, error) { path := genericOSReleaseFile if !h.RunOnHost { path = pathlib.Join(hostPathFromDaemon, path) } - glog.Infof("getOSPrettyName(): getting os name from os-release file") + glog.Infof("GetOSPrettyName(): getting os name from os-release file") stdout, stderr, err := h.cmd.Run("/bin/sh", "-c", fmt.Sprintf("cat %s | grep PRETTY_NAME | cut -c 13-", path)) if err != nil && stderr.Len() != 0 { - glog.Errorf("isUbuntuSystem(): failed to check for ubuntu operating system name in os-releasae file: error: %v stderr %s", err, stderr.String()) + glog.Errorf("IsUbuntuSystem(): failed to check for ubuntu operating system name in os-releasae file: error: %v stderr %s", err, stderr.String()) return "", fmt.Errorf(stderr.String()) } @@ -447,9 +447,9 @@ func (h *HostManager) getOSPrettyName() (string, error) { return "", fmt.Errorf("failed to find pretty operating system name") } -func getChrootExtention(runOnHost bool) string { +func getChrootExtension(runOnHost bool) string { if !runOnHost { - return "chroot /host/" + return fmt.Sprintf("chroot %s/host", utils.FilesystemRoot) } - return "" + return utils.FilesystemRoot } diff --git a/pkg/host/mock/mock_host.go b/pkg/host/mock/mock_host.go index 06ed3858a..e09bfd3c8 100644 --- a/pkg/host/mock/mock_host.go +++ b/pkg/host/mock/mock_host.go @@ -33,222 +33,222 @@ func (m *MockHostManagerInterface) EXPECT() *MockHostManagerInterfaceMockRecorde return m.recorder } -// LoadKernelModule mocks base method. -func (m *MockHostManagerInterface) LoadKernelModule(name string, args ...string) error { +// EnableRDMA mocks base method. +func (m *MockHostManagerInterface) EnableRDMA(arg0, arg1, arg2 string) (bool, error) { m.ctrl.T.Helper() - varargs := []interface{}{name} - for _, a := range args { - varargs = append(varargs, a) - } - ret := m.ctrl.Call(m, "LoadKernelModule", varargs...) - ret0, _ := ret[0].(error) - return ret0 + ret := m.ctrl.Call(m, "EnableRDMA", arg0, arg1, arg2) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 } -// LoadKernelModule indicates an expected call of LoadKernelModule. -func (mr *MockHostManagerInterfaceMockRecorder) LoadKernelModule(name interface{}, args ...interface{}) *gomock.Call { +// EnableRDMA indicates an expected call of EnableRDMA. +func (mr *MockHostManagerInterfaceMockRecorder) EnableRDMA(arg0, arg1, arg2 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - varargs := append([]interface{}{name}, args...) - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadKernelModule", reflect.TypeOf((*MockHostManagerInterface)(nil).LoadKernelModule), varargs...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableRDMA", reflect.TypeOf((*MockHostManagerInterface)(nil).EnableRDMA), arg0, arg1, arg2) } -// TryEnableRdma mocks base method. -func (m *MockHostManagerInterface) TryEnableRdma() (bool, error) { +// EnableRDMAOnRHELMachine mocks base method. +func (m *MockHostManagerInterface) EnableRDMAOnRHELMachine() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "TryEnableRdma") + ret := m.ctrl.Call(m, "EnableRDMAOnRHELMachine") ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// TryEnableRdma indicates an expected call of TryEnableRdma. -func (mr *MockHostManagerInterfaceMockRecorder) TryEnableRdma() *gomock.Call { +// EnableRDMAOnRHELMachine indicates an expected call of EnableRDMAOnRHELMachine. +func (mr *MockHostManagerInterfaceMockRecorder) EnableRDMAOnRHELMachine() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableRdma", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableRdma)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "EnableRDMAOnRHELMachine", reflect.TypeOf((*MockHostManagerInterface)(nil).EnableRDMAOnRHELMachine)) } -// TryEnableTun mocks base method. -func (m *MockHostManagerInterface) TryEnableTun() { +// GetOSPrettyName mocks base method. +func (m *MockHostManagerInterface) GetOSPrettyName() (string, error) { m.ctrl.T.Helper() - m.ctrl.Call(m, "TryEnableTun") + ret := m.ctrl.Call(m, "GetOSPrettyName") + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 } -// TryEnableTun indicates an expected call of TryEnableTun. -func (mr *MockHostManagerInterfaceMockRecorder) TryEnableTun() *gomock.Call { +// GetOSPrettyName indicates an expected call of GetOSPrettyName. +func (mr *MockHostManagerInterfaceMockRecorder) GetOSPrettyName() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableTun", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableTun)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetOSPrettyName", reflect.TypeOf((*MockHostManagerInterface)(nil).GetOSPrettyName)) } -// TryEnableVhostNet mocks base method. -func (m *MockHostManagerInterface) TryEnableVhostNet() { +// InstallRDMA mocks base method. +func (m *MockHostManagerInterface) InstallRDMA(arg0 string) error { m.ctrl.T.Helper() - m.ctrl.Call(m, "TryEnableVhostNet") + ret := m.ctrl.Call(m, "InstallRDMA", arg0) + ret0, _ := ret[0].(error) + return ret0 } -// TryEnableVhostNet indicates an expected call of TryEnableVhostNet. -func (mr *MockHostManagerInterfaceMockRecorder) TryEnableVhostNet() *gomock.Call { +// InstallRDMA indicates an expected call of InstallRDMA. +func (mr *MockHostManagerInterfaceMockRecorder) InstallRDMA(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableVhostNet", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableVhostNet)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallRDMA", reflect.TypeOf((*MockHostManagerInterface)(nil).InstallRDMA), arg0) } -// enableRDMA mocks base method. -func (m *MockHostManagerInterface) enableRDMA(arg0, arg1, arg2 string) (bool, error) { +// IsCoreOS mocks base method. +func (m *MockHostManagerInterface) IsCoreOS() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "enableRDMA", arg0, arg1, arg2) + ret := m.ctrl.Call(m, "IsCoreOS") ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// enableRDMA indicates an expected call of enableRDMA. -func (mr *MockHostManagerInterfaceMockRecorder) enableRDMA(arg0, arg1, arg2 interface{}) *gomock.Call { +// IsCoreOS indicates an expected call of IsCoreOS. +func (mr *MockHostManagerInterfaceMockRecorder) IsCoreOS() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "enableRDMA", reflect.TypeOf((*MockHostManagerInterface)(nil).enableRDMA), arg0, arg1, arg2) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsCoreOS", reflect.TypeOf((*MockHostManagerInterface)(nil).IsCoreOS)) } -// enableRDMAOnRHELMachine mocks base method. -func (m *MockHostManagerInterface) enableRDMAOnRHELMachine() (bool, error) { +// IsKernelModuleLoaded mocks base method. +func (m *MockHostManagerInterface) IsKernelModuleLoaded(arg0 string) (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "enableRDMAOnRHELMachine") + ret := m.ctrl.Call(m, "IsKernelModuleLoaded", arg0) ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// enableRDMAOnRHELMachine indicates an expected call of enableRDMAOnRHELMachine. -func (mr *MockHostManagerInterfaceMockRecorder) enableRDMAOnRHELMachine() *gomock.Call { +// IsKernelModuleLoaded indicates an expected call of IsKernelModuleLoaded. +func (mr *MockHostManagerInterfaceMockRecorder) IsKernelModuleLoaded(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "enableRDMAOnRHELMachine", reflect.TypeOf((*MockHostManagerInterface)(nil).enableRDMAOnRHELMachine)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsKernelModuleLoaded", reflect.TypeOf((*MockHostManagerInterface)(nil).IsKernelModuleLoaded), arg0) } -// getOSPrettyName mocks base method. -func (m *MockHostManagerInterface) getOSPrettyName() (string, error) { +// IsRHELSystem mocks base method. +func (m *MockHostManagerInterface) IsRHELSystem() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "getOSPrettyName") - ret0, _ := ret[0].(string) + ret := m.ctrl.Call(m, "IsRHELSystem") + ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// getOSPrettyName indicates an expected call of getOSPrettyName. -func (mr *MockHostManagerInterfaceMockRecorder) getOSPrettyName() *gomock.Call { +// IsRHELSystem indicates an expected call of IsRHELSystem. +func (mr *MockHostManagerInterfaceMockRecorder) IsRHELSystem() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "getOSPrettyName", reflect.TypeOf((*MockHostManagerInterface)(nil).getOSPrettyName)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsRHELSystem", reflect.TypeOf((*MockHostManagerInterface)(nil).IsRHELSystem)) } -// installRDMA mocks base method. -func (m *MockHostManagerInterface) installRDMA(arg0 string) error { +// IsUbuntuSystem mocks base method. +func (m *MockHostManagerInterface) IsUbuntuSystem() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "installRDMA", arg0) - ret0, _ := ret[0].(error) - return ret0 + ret := m.ctrl.Call(m, "IsUbuntuSystem") + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 } -// installRDMA indicates an expected call of installRDMA. -func (mr *MockHostManagerInterfaceMockRecorder) installRDMA(arg0 interface{}) *gomock.Call { +// IsUbuntuSystem indicates an expected call of IsUbuntuSystem. +func (mr *MockHostManagerInterfaceMockRecorder) IsUbuntuSystem() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "installRDMA", reflect.TypeOf((*MockHostManagerInterface)(nil).installRDMA), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsUbuntuSystem", reflect.TypeOf((*MockHostManagerInterface)(nil).IsUbuntuSystem)) } -// isCoreOS mocks base method. -func (m *MockHostManagerInterface) isCoreOS() (bool, error) { +// LoadKernelModule mocks base method. +func (m *MockHostManagerInterface) LoadKernelModule(name string, args ...string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "isCoreOS") - ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 + varargs := []interface{}{name} + for _, a := range args { + varargs = append(varargs, a) + } + ret := m.ctrl.Call(m, "LoadKernelModule", varargs...) + ret0, _ := ret[0].(error) + return ret0 } -// isCoreOS indicates an expected call of isCoreOS. -func (mr *MockHostManagerInterfaceMockRecorder) isCoreOS() *gomock.Call { +// LoadKernelModule indicates an expected call of LoadKernelModule. +func (mr *MockHostManagerInterfaceMockRecorder) LoadKernelModule(name interface{}, args ...interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "isCoreOS", reflect.TypeOf((*MockHostManagerInterface)(nil).isCoreOS)) + varargs := append([]interface{}{name}, args...) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadKernelModule", reflect.TypeOf((*MockHostManagerInterface)(nil).LoadKernelModule), varargs...) } -// isKernelModuleLoaded mocks base method. -func (m *MockHostManagerInterface) isKernelModuleLoaded(arg0 string) (bool, error) { +// RdmaIsLoaded mocks base method. +func (m *MockHostManagerInterface) RdmaIsLoaded() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "isKernelModuleLoaded", arg0) + ret := m.ctrl.Call(m, "RdmaIsLoaded") ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// isKernelModuleLoaded indicates an expected call of isKernelModuleLoaded. -func (mr *MockHostManagerInterfaceMockRecorder) isKernelModuleLoaded(arg0 interface{}) *gomock.Call { +// RdmaIsLoaded indicates an expected call of RdmaIsLoaded. +func (mr *MockHostManagerInterfaceMockRecorder) RdmaIsLoaded() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "isKernelModuleLoaded", reflect.TypeOf((*MockHostManagerInterface)(nil).isKernelModuleLoaded), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "RdmaIsLoaded", reflect.TypeOf((*MockHostManagerInterface)(nil).RdmaIsLoaded)) } -// isRHELSystem mocks base method. -func (m *MockHostManagerInterface) isRHELSystem() (bool, error) { +// ReloadDriver mocks base method. +func (m *MockHostManagerInterface) ReloadDriver(arg0 string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "isRHELSystem") - ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret := m.ctrl.Call(m, "ReloadDriver", arg0) + ret0, _ := ret[0].(error) + return ret0 } -// isRHELSystem indicates an expected call of isRHELSystem. -func (mr *MockHostManagerInterfaceMockRecorder) isRHELSystem() *gomock.Call { +// ReloadDriver indicates an expected call of ReloadDriver. +func (mr *MockHostManagerInterfaceMockRecorder) ReloadDriver(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "isRHELSystem", reflect.TypeOf((*MockHostManagerInterface)(nil).isRHELSystem)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReloadDriver", reflect.TypeOf((*MockHostManagerInterface)(nil).ReloadDriver), arg0) } -// isUbuntuSystem mocks base method. -func (m *MockHostManagerInterface) isUbuntuSystem() (bool, error) { +// TriggerUdevEvent mocks base method. +func (m *MockHostManagerInterface) TriggerUdevEvent() error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "isUbuntuSystem") - ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret := m.ctrl.Call(m, "TriggerUdevEvent") + ret0, _ := ret[0].(error) + return ret0 } -// isUbuntuSystem indicates an expected call of isUbuntuSystem. -func (mr *MockHostManagerInterfaceMockRecorder) isUbuntuSystem() *gomock.Call { +// TriggerUdevEvent indicates an expected call of TriggerUdevEvent. +func (mr *MockHostManagerInterfaceMockRecorder) TriggerUdevEvent() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "isUbuntuSystem", reflect.TypeOf((*MockHostManagerInterface)(nil).isUbuntuSystem)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TriggerUdevEvent", reflect.TypeOf((*MockHostManagerInterface)(nil).TriggerUdevEvent)) } -// rdmaIsLoaded mocks base method. -func (m *MockHostManagerInterface) rdmaIsLoaded() (bool, error) { +// TryEnableRdma mocks base method. +func (m *MockHostManagerInterface) TryEnableRdma() (bool, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "rdmaIsLoaded") + ret := m.ctrl.Call(m, "TryEnableRdma") ret0, _ := ret[0].(bool) ret1, _ := ret[1].(error) return ret0, ret1 } -// rdmaIsLoaded indicates an expected call of rdmaIsLoaded. -func (mr *MockHostManagerInterfaceMockRecorder) rdmaIsLoaded() *gomock.Call { +// TryEnableRdma indicates an expected call of TryEnableRdma. +func (mr *MockHostManagerInterfaceMockRecorder) TryEnableRdma() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "rdmaIsLoaded", reflect.TypeOf((*MockHostManagerInterface)(nil).rdmaIsLoaded)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableRdma", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableRdma)) } -// reloadDriver mocks base method. -func (m *MockHostManagerInterface) reloadDriver(arg0 string) error { +// TryEnableTun mocks base method. +func (m *MockHostManagerInterface) TryEnableTun() { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "reloadDriver", arg0) - ret0, _ := ret[0].(error) - return ret0 + m.ctrl.Call(m, "TryEnableTun") } -// reloadDriver indicates an expected call of reloadDriver. -func (mr *MockHostManagerInterfaceMockRecorder) reloadDriver(arg0 interface{}) *gomock.Call { +// TryEnableTun indicates an expected call of TryEnableTun. +func (mr *MockHostManagerInterfaceMockRecorder) TryEnableTun() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "reloadDriver", reflect.TypeOf((*MockHostManagerInterface)(nil).reloadDriver), arg0) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableTun", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableTun)) } -// triggerUdevEvent mocks base method. -func (m *MockHostManagerInterface) triggerUdevEvent() error { +// TryEnableVhostNet mocks base method. +func (m *MockHostManagerInterface) TryEnableVhostNet() { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "triggerUdevEvent") - ret0, _ := ret[0].(error) - return ret0 + m.ctrl.Call(m, "TryEnableVhostNet") } -// triggerUdevEvent indicates an expected call of triggerUdevEvent. -func (mr *MockHostManagerInterfaceMockRecorder) triggerUdevEvent() *gomock.Call { +// TryEnableVhostNet indicates an expected call of TryEnableVhostNet. +func (mr *MockHostManagerInterfaceMockRecorder) TryEnableVhostNet() *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "triggerUdevEvent", reflect.TypeOf((*MockHostManagerInterface)(nil).triggerUdevEvent)) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "TryEnableVhostNet", reflect.TypeOf((*MockHostManagerInterface)(nil).TryEnableVhostNet)) } From bf6cecd1b82bfa9f2744eae098879eca3df844a7 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:37:50 +0300 Subject: [PATCH 06/13] update utils and systemd Signed-off-by: Sebastian Sch --- pkg/systemd/systemd.go | 6 ++-- pkg/utils/sriov.go | 2 -- pkg/utils/utils.go | 78 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/pkg/systemd/systemd.go b/pkg/systemd/systemd.go index 03dc6ad2a..504fcf389 100644 --- a/pkg/systemd/systemd.go +++ b/pkg/systemd/systemd.go @@ -41,6 +41,8 @@ const ( SriovServicePath = "/etc/systemd/system/sriov-config.service" SriovHostServicePath = "/host" + SriovServicePath + + HostSriovConfBasePath = "/host" + utils.SriovConfBasePath ) type SriovConfig struct { @@ -80,8 +82,8 @@ func WriteConfFile(newState *sriovnetworkv1.SriovNetworkNodeState, unsupportedNi if err != nil { if os.IsNotExist(err) { // Create the sriov-operator folder on the host if it doesn't exist - if _, err := os.Stat(utils.HostSriovConfBasePath); os.IsNotExist(err) { - err = os.Mkdir(utils.HostSriovConfBasePath, os.ModeDir) + if _, err := os.Stat(HostSriovConfBasePath); os.IsNotExist(err) { + err = os.Mkdir(HostSriovConfBasePath, os.ModeDir) if err != nil { glog.Errorf("WriteConfFile(): fail to create sriov-operator folder: %v", err) return false, err diff --git a/pkg/utils/sriov.go b/pkg/utils/sriov.go index 4fd993a5c..df85cbf51 100644 --- a/pkg/utils/sriov.go +++ b/pkg/utils/sriov.go @@ -27,8 +27,6 @@ import ( ) const ( - SriovConfBasePath = "/etc/sriov-operator" - HostSriovConfBasePath = "/host" + SriovConfBasePath SriovSwitchDevConfPath = SriovConfBasePath + "/sriov_config.json" SriovHostSwitchDevConfPath = "/host" + SriovSwitchDevConfPath ) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 231e2ab80..ab1a0e847 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -47,11 +47,14 @@ var ClusterType string var pfPhysPortNameRe = regexp.MustCompile(`p\d+`) +// FilesystemRoot used by test to mock interactions with filesystem +var FilesystemRoot = "" + func init() { ClusterType = os.Getenv("CLUSTER_TYPE") } -func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, error) { +func DiscoverSriovDevices(withUnsupported bool, storeManager StoreManagerInterface) ([]sriovnetworkv1.InterfaceExt, error) { glog.V(2).Info("DiscoverSriovDevices") pfList := []sriovnetworkv1.InterfaceExt{} @@ -122,6 +125,15 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, } iface.LinkType = getLinkType(iface) + pfStatus, exist, err := storeManager.LoadPfsStatus(iface.PciAddress) + if err != nil { + glog.Warningf("DiscoverSriovDevices(): failed to load PF status from disk: %v", err) + } + + if exist { + iface.ExternallyManaged = pfStatus.ExternallyManaged + } + if dputils.IsSriovPF(device.Address) { iface.TotalVfs = dputils.GetSriovVFcapacity(device.Address) iface.NumVfs = dputils.GetVFconfigured(device.Address) @@ -156,7 +168,13 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses glog.Warningf("cannot use mellanox devices when in kernel lockdown mode") return fmt.Errorf("cannot use mellanox devices when in kernel lockdown mode") } - var err error + + // we are already inside chroot, so we initialize the store as running on host + storeManager, err := NewStoreManager(true) + if err != nil { + return fmt.Errorf("SyncNodeState(): error initializing storeManager: %v", err) + } + for _, ifaceStatus := range ifaceStatuses { configured := false for _, iface := range interfaces { @@ -169,15 +187,34 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses if !NeedUpdate(&iface, &ifaceStatus) { glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) + + // Save the PF status to the host + err = storeManager.SaveLastPfAppliedStatus(iface.PciAddress, &iface) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } + break } if err = configSriovDevice(&iface, &ifaceStatus); err != nil { glog.Errorf("SyncNodeState(): fail to configure sriov interface %s: %v. resetting interface.", iface.PciAddress, err) - if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { - glog.Errorf("SyncNodeState(): fail to reset on error SR-IOV interface: %s", resetErr) + if iface.ExternallyManaged { + glog.Infof("SyncNodeState(): skipping device reset as the nic is marked as externally created") + } else { + if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { + glog.Errorf("SyncNodeState(): failed to reset on error SR-IOV interface: %s", resetErr) + } } return err } + + // Save the PF status to the host + err = storeManager.SaveLastPfAppliedStatus(iface.PciAddress, &iface) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } break } } @@ -186,6 +223,27 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses continue } + // load the PF info + pfStatus, exist, err := storeManager.LoadPfsStatus(ifaceStatus.PciAddress) + if err != nil { + glog.Errorf("SyncNodeState(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + return err + } + + if !exist { + glog.Infof("SyncNodeState(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyManaged { + glog.Infof("SyncNodeState(): PF name %s with pci address %s was externally created skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + if err = resetSriovDevice(ifaceStatus); err != nil { return err } @@ -274,6 +332,12 @@ func NeedUpdate(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.Int glog.V(2).Infof("NeedUpdate(): VF %d MTU needs update, desired=%d, current=%d", vf.VfID, group.Mtu, vf.Mtu) return true } + + // this is needed to be sure the admin mac address is configured as expected + if iface.ExternallyManaged { + glog.V(2).Infof("NeedUpdate(): need to update the device as it's externally manage for pci address %s", ifaceStatus.PciAddress) + return true + } } break } @@ -297,6 +361,12 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor } // set numVFs if iface.NumVfs != ifaceStatus.NumVfs { + if iface.ExternallyManaged { + errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) + glog.Error(errMsg) + return fmt.Errorf(errMsg) + } + err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) if err != nil { glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress) From 8ada3978b7f5bc6f8ed7de489da1c99915170dd9 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:38:06 +0300 Subject: [PATCH 07/13] add externally manage to plugins Signed-off-by: Sebastian Sch WIP Signed-off-by: Sebastian Sch --- pkg/plugins/generic/generic_plugin.go | 33 ++++++++++++++++++---- pkg/plugins/generic/generic_plugin_test.go | 20 +++++++++++-- pkg/plugins/mellanox/mellanox_plugin.go | 17 +++++++++++ 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index bc5b1129e..9483a699d 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -54,12 +54,13 @@ type GenericPlugin struct { DriverStateMap DriverStateMapType RunningOnHost bool HostManager host.HostManagerInterface + StoreManager utils.StoreManagerInterface } const scriptsPath = "bindata/scripts/enable-kargs.sh" // Initialize our plugin and set up initial values -func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { +func NewGenericPlugin(runningOnHost bool, hostManager host.HostManagerInterface, storeManager utils.StoreManagerInterface) (plugin.VendorPlugin, error) { driverStateMap := make(map[uint]*DriverState) driverStateMap[Vfio] = &DriverState{ DriverName: vfioPciDriver, @@ -82,13 +83,13 @@ func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { NeedDriverFunc: needDriverCheckVdpaType, DriverLoaded: false, } - return &GenericPlugin{ PluginName: PluginName, SpecVersion: "1.0", DriverStateMap: driverStateMap, RunningOnHost: runningOnHost, - HostManager: host.NewHostManager(runningOnHost), + HostManager: hostManager, + StoreManager: storeManager, }, nil } @@ -110,7 +111,7 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt err = nil p.DesireState = new - needDrain = needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) + needDrain = p.needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) needReboot = needRebootNode(new, p.DriverStateMap) if needReboot { @@ -233,8 +234,9 @@ func isCommandNotFound(err error) bool { return false } -func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.InterfaceExts) (needDrain bool) { +func (p *GenericPlugin) needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.InterfaceExts) (needDrain bool) { glog.V(2).Infof("generic-plugin needDrainNode(): current state '%+v', desired state '%+v'", current, desired) + needDrain = false for _, ifaceStatus := range current { configured := false @@ -254,6 +256,27 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int } } if !configured && ifaceStatus.NumVfs > 0 { + // load the PF info + pfStatus, exist, err := p.StoreManager.LoadPfsStatus(ifaceStatus.PciAddress) + if err != nil { + glog.Errorf("generic-plugin needDrainNode(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + continue + } + + if !exist { + glog.Infof("generic-plugin needDrainNode(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyManaged { + glog.Infof("generic-plugin needDrainNode()(): PF name %s with pci address %s was externally created. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + glog.V(2).Infof("generic-plugin needDrainNode(): need drain, %v needs to be reset", ifaceStatus) needDrain = true return diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index dc8201448..e1211b392 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -3,11 +3,14 @@ package generic import ( "testing" + "github.com/golang/mock/gomock" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + mock_host "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/mock" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" + mock_utils "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils/mock" ) func TestGenericPlugin(t *testing.T) { @@ -16,10 +19,21 @@ func TestGenericPlugin(t *testing.T) { } var _ = Describe("Generic plugin", func() { - var genericPlugin plugin.VendorPlugin - var err error + var ( + t GinkgoTInterface + genericPlugin plugin.VendorPlugin + err error + ctrl *gomock.Controller + mockHost *mock_host.MockHostManagerInterface + mockStore *mock_utils.MockStoreManagerInterface + ) + BeforeEach(func() { - genericPlugin, err = NewGenericPlugin(false) + t = GinkgoT() + ctrl = gomock.NewController(t) + mockHost = mock_host.NewMockHostManagerInterface(ctrl) + mockStore = mock_utils.NewMockStoreManagerInterface(ctrl) + genericPlugin, err = NewGenericPlugin(false, mockHost, mockStore) Expect(err).ToNot(HaveOccurred()) }) diff --git a/pkg/plugins/mellanox/mellanox_plugin.go b/pkg/plugins/mellanox/mellanox_plugin.go index f6b8ab185..fe5f51a81 100644 --- a/pkg/plugins/mellanox/mellanox_plugin.go +++ b/pkg/plugins/mellanox/mellanox_plugin.go @@ -129,6 +129,11 @@ func (p *MellanoxPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeS needReboot = needReboot || sriovEnNeedReboot changeWithoutReboot = changeWithoutReboot || sriovEnChangeWithoutReboot + // failing as we can't the fwTotalVf is lower than the request one on a nic with externallyManage configured + if ifaceSpec.ExternallyManaged && needReboot { + return true, true, fmt.Errorf("interface %s required a change in the TotalVfs but the policy is externally managed failing: firmware TotalVf %d requested TotalVf %d", ifaceSpec.PciAddress, fwCurrent.totalVfs, totalVfs) + } + needLinkChange, err := handleLinkType(pciPrefix, fwCurrent, attrs) if err != nil { return false, false, err @@ -331,6 +336,18 @@ func handleTotalVfs(fwCurrent, fwNext, attrs *mlnxNic, ifaceSpec sriovnetworkv1. } } + // if the PF is externally managed we just need to check the totalVfs requested in the policy is not higher than + // the configured amount + if ifaceSpec.ExternallyManaged { + if totalVfs > fwCurrent.totalVfs { + glog.Errorf("The nic is externallyManaged and TotalVfs %d configured on the system is lower then requested %d, failing configuration", fwCurrent.totalVfs, totalVfs) + attrs.totalVfs = totalVfs + needReboot = true + changeWithoutReboot = false + } + return + } + if fwCurrent.totalVfs != totalVfs { glog.V(2).Infof("Changing TotalVfs %d to %d, needs reboot", fwCurrent.totalVfs, totalVfs) attrs.totalVfs = totalVfs From 241f4d70c7ad3c2266f9aca73c760652ac093dc7 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:38:31 +0300 Subject: [PATCH 08/13] update daemon to support new externally managed field Signed-off-by: Sebastian Sch --- pkg/daemon/daemon.go | 48 ++++++++++++++++++++++++++++++--------- pkg/daemon/daemon_test.go | 13 +++++++---- pkg/daemon/plugin.go | 5 ++-- pkg/daemon/writer.go | 15 ++++++++---- 4 files changed, 58 insertions(+), 23 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 6535bff62..e93976af4 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -108,6 +108,10 @@ type Daemon struct { workqueue workqueue.RateLimitingInterface mcpName string + + storeManager utils.StoreManagerInterface + + hostManager host.HostManagerInterface } const ( @@ -123,9 +127,6 @@ const ( var namespace = os.Getenv("NAMESPACE") -// used by test to mock interactions with filesystem -var filesystemRoot string = "" - // writer implements io.Writer interface as a pass-through for glog. type writer struct { logFunc func(args ...interface{}) @@ -232,17 +233,24 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { defer utilruntime.HandleCrash() defer dn.workqueue.ShutDown() + hostManager := host.NewHostManager(dn.useSystemdService) + dn.hostManager = hostManager if !dn.useSystemdService { - hostManager := host.NewHostManager(dn.useSystemdService) - hostManager.TryEnableRdma() - hostManager.TryEnableTun() - hostManager.TryEnableVhostNet() + dn.hostManager.TryEnableRdma() + dn.hostManager.TryEnableTun() + dn.hostManager.TryEnableVhostNet() err := systemd.CleanSriovFilesFromHost(utils.ClusterType == utils.ClusterTypeOpenshift) if err != nil { glog.Warningf("failed to remove all the systemd sriov files error: %v", err) } } + storeManager, err := utils.NewStoreManager(false) + if err != nil { + return err + } + dn.storeManager = storeManager + if err := dn.tryCreateUdevRuleWrapper(); err != nil { return err } @@ -515,6 +523,12 @@ func (dn *Daemon) nodeStateSyncHandler() error { } if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { + err = dn.storeManager.ClearPCIAddressFolder() + if err != nil { + glog.Errorf("failed to clear the PCI address configuration: %v", err) + return err + } + glog.V(0).Infof("nodeStateSyncHandler(): Name: %s, Interface policy spec not yet set by controller", latestState.Name) if latestState.Status.SyncStatus != "Succeeded" { dn.refreshCh <- Message{ @@ -531,10 +545,22 @@ func (dn *Daemon) nodeStateSyncHandler() error { syncStatus: syncStatusInProgress, lastSyncError: "", } + // wait for writer to refresh status then pull again the latest node state + <-dn.syncCh + + // we need to load the latest status to our object + // if we don't do it we can have a race here where the user remove the virtual functions but the operator didn't + // trigger the refresh + updatedState, err := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(context.Background(), dn.name, metav1.GetOptions{}) + if err != nil { + glog.Warningf("nodeStateSyncHandler(): Failed to fetch node state %s: %v", dn.name, err) + return err + } + latestState.Status = updatedState.Status // load plugins if it has not loaded if len(dn.enabledPlugins) == 0 { - dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState) + dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState, dn.hostManager, dn.storeManager) if err != nil { glog.Errorf("nodeStateSyncHandler(): failed to enable vendor plugins error: %v", err) return err @@ -1056,7 +1082,7 @@ func (dn *Daemon) drainNode() error { func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error { glog.V(2).Infof("tryCreateSwitchdevUdevRule()") var newContent string - filePath := path.Join(filesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") + filePath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") for _, ifaceStatus := range nodeState.Status.Interfaces { if ifaceStatus.EswitchMode == sriovnetworkv1.ESwithModeSwitchDev { @@ -1092,7 +1118,7 @@ func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) } var stdout, stderr bytes.Buffer - cmd := exec.Command("/bin/bash", path.Join(filesystemRoot, udevScriptsPath)) + cmd := exec.Command("/bin/bash", path.Join(utils.FilesystemRoot, udevScriptsPath)) cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { @@ -1113,7 +1139,7 @@ func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) func tryCreateNMUdevRule() error { glog.V(2).Infof("tryCreateNMUdevRule()") - dirPath := path.Join(filesystemRoot, "/host/etc/udev/rules.d") + dirPath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d") filePath := path.Join(dirPath, "10-nm-unmanaged.rules") // we need to remove the Red Hat Virtio network device from the udev rule configuration diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index 1aac8d43d..cdd1fe11b 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -85,7 +85,9 @@ var _ = Describe("Config Daemon", func() { fakeFs := &fakefilesystem.FS{ Dirs: []string{ "bindata/scripts", - "host", + "host/etc/sriov-operator", + "host/etc/sriov-operator/pci", + "host/etc/udev/rules.d", }, Symlinks: map[string]string{}, Files: map[string][]byte{ @@ -95,7 +97,7 @@ var _ = Describe("Config Daemon", func() { } var err error - filesystemRoot, cleanFakeFs, err = fakeFs.Use() + utils.FilesystemRoot, cleanFakeFs, err = fakeFs.Use() Expect(err).ToNot(HaveOccurred()) kubeClient := fakek8s.NewSimpleClientset(&FakeSupportedNicIDs, &SriovDevicePluginPod) @@ -120,7 +122,9 @@ var _ = Describe("Config Daemon", func() { sut.enabledPlugins = map[string]plugin.VendorPlugin{generic.PluginName: &fake.FakePlugin{}} go func() { - sut.Run(stopCh, exitCh) + defer GinkgoRecover() + err := sut.Run(stopCh, exitCh) + Expect(err).ToNot(HaveOccurred()) }() }) @@ -134,7 +138,6 @@ var _ = Describe("Config Daemon", func() { }) Context("Should", func() { - It("restart sriov-device-plugin pod", func() { _, err := sut.kubeClient.CoreV1().Nodes(). @@ -234,7 +237,7 @@ var _ = Describe("Config Daemon", func() { It("configure udev rules on host", func() { - networkManagerUdevRulePath := path.Join(filesystemRoot, "host/etc/udev/rules.d/10-nm-unmanaged.rules") + networkManagerUdevRulePath := path.Join(utils.FilesystemRoot, "host/etc/udev/rules.d/10-nm-unmanaged.rules") expectedContents := `ACTION=="add|change|move", ATTRS{device}=="0x1014|0x154c", ENV{NM_UNMANAGED}="1" SUBSYSTEM=="net", ACTION=="add|move", ATTRS{phys_switch_id}!="", ATTR{phys_port_name}=="pf*vf*", ENV{NM_UNMANAGED}="1" diff --git a/pkg/daemon/plugin.go b/pkg/daemon/plugin.go index 9639db88e..ef48b2817 100644 --- a/pkg/daemon/plugin.go +++ b/pkg/daemon/plugin.go @@ -6,6 +6,7 @@ import ( "github.com/golang/glog" sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host" plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" genericplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" intelplugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/intel" @@ -28,7 +29,7 @@ var ( K8sPlugin = k8splugin.NewK8sPlugin ) -func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *sriovnetworkv1.SriovNetworkNodeState) (map[string]plugin.VendorPlugin, error) { +func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *sriovnetworkv1.SriovNetworkNodeState, hostManager host.HostManagerInterface, storeManager utils.StoreManagerInterface) (map[string]plugin.VendorPlugin, error) { glog.Infof("enableVendorPlugins(): enabling plugins") enabledPlugins := map[string]plugin.VendorPlugin{} @@ -54,7 +55,7 @@ func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *srio } enabledPlugins[k8sPlugin.Name()] = k8sPlugin } - genericPlugin, err := GenericPlugin(false) + genericPlugin, err := GenericPlugin(false, hostManager, storeManager) if err != nil { glog.Errorf("enableVendorPlugins(): failed to load the generic plugin error: %v", err) return nil, err diff --git a/pkg/daemon/writer.go b/pkg/daemon/writer.go index 2a6fc6384..89abd8137 100644 --- a/pkg/daemon/writer.go +++ b/pkg/daemon/writer.go @@ -30,6 +30,7 @@ type NodeStateStatusWriter struct { OnHeartbeatFailure func() openStackDevicesInfo utils.OSPDevicesInfo withUnsupportedDevices bool + storeManager utils.StoreManagerInterface } // NewNodeStateStatusWriter Create a new NodeStateStatusWriter @@ -47,6 +48,13 @@ func (w *NodeStateStatusWriter) RunOnce(destDir string, platformType utils.Platf glog.V(0).Infof("RunOnce()") msg := Message{} + storeManager, err := utils.NewStoreManager(false) + if err != nil { + glog.Errorf("failed to create store manager: %v", err) + return err + } + w.storeManager = storeManager + if platformType == utils.VirtualOpenStack { ns, err := w.getCheckPointNodeState(destDir) if err != nil { @@ -100,10 +108,7 @@ func (w *NodeStateStatusWriter) Run(stop <-chan struct{}, refresh <-chan Message if err != nil { glog.Errorf("Run() refresh: writing to node status failed: %v", err) } - - if msg.syncStatus == syncStatusSucceeded || msg.syncStatus == syncStatusFailed { - syncCh <- struct{}{} - } + syncCh <- struct{}{} case <-time.After(30 * time.Second): glog.V(2).Info("Run(): period refresh") if err := w.pollNicStatus(platformType); err != nil { @@ -122,7 +127,7 @@ func (w *NodeStateStatusWriter) pollNicStatus(platformType utils.PlatformType) e if platformType == utils.VirtualOpenStack { iface, err = utils.DiscoverSriovDevicesVirtual(w.openStackDevicesInfo) } else { - iface, err = utils.DiscoverSriovDevices(w.withUnsupportedDevices) + iface, err = utils.DiscoverSriovDevices(w.withUnsupportedDevices, w.storeManager) } if err != nil { return err From 3ebe5f688a337ff9109492452958117395616b53 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:38:45 +0300 Subject: [PATCH 09/13] copy crds to deployment Signed-off-by: Sebastian Sch --- .../sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml | 4 ++++ .../sriovnetwork.openshift.io_sriovnetworknodestates.yaml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml index a81c6a227..d5ae89cca 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodepolicies.yaml @@ -53,6 +53,10 @@ spec: description: Exclude device's NUMA node when advertising this resource by SRIOV network device plugin. Default to false. type: boolean + externallyManaged: + description: don't create the virtual function only allocated them + to the device plugin. Defaults to false. + type: boolean isRdma: description: RDMA mode. Defaults to false. type: boolean diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml index 1e7ef32b0..8e756f681 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovnetworknodestates.yaml @@ -50,6 +50,8 @@ spec: properties: eSwitchMode: type: string + externallyManaged: + type: boolean linkType: type: string mtu: @@ -125,6 +127,8 @@ spec: type: string eSwitchMode: type: string + externallyManaged: + type: boolean linkSpeed: type: string linkType: From 7edb9b81e114e95d5e052b5a52df9bcfa62b7648 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:39:08 +0300 Subject: [PATCH 10/13] update systemd service main go with new store and host interfaces Signed-off-by: Sebastian Sch --- cmd/sriov-network-config-daemon/service.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/sriov-network-config-daemon/service.go b/cmd/sriov-network-config-daemon/service.go index b300254ac..bbad72663 100644 --- a/cmd/sriov-network-config-daemon/service.go +++ b/cmd/sriov-network-config-daemon/service.go @@ -95,6 +95,11 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { glog.V(2).Infof("sriov-config-service read config: %v", nodeStateSpec) + storeManager, err := utils.NewStoreManager(true) + if err != nil { + glog.Errorf("failed to create store manager: %v", err) + return err + } // Load kernel modules hostManager := host.NewHostManager(true) _, err = hostManager.TryEnableRdma() @@ -108,14 +113,14 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { var ifaceStatuses []sriovv1.InterfaceExt if nodeStateSpec.PlatformType == utils.Baremetal { // Bare metal support - ifaceStatuses, err = utils.DiscoverSriovDevices(nodeStateSpec.UnsupportedNics) + ifaceStatuses, err = utils.DiscoverSriovDevices(nodeStateSpec.UnsupportedNics, storeManager) if err != nil { glog.Errorf("sriov-config-service: failed to discover sriov devices on the host: %v", err) return fmt.Errorf("sriov-config-service: failed to discover sriov devices on the host: %v", err) } // Create the generic plugin - configPlugin, err = generic.NewGenericPlugin(true) + configPlugin, err = generic.NewGenericPlugin(true, hostManager, storeManager) if err != nil { glog.Errorf("sriov-config-service: failed to create generic plugin %v", err) return fmt.Errorf("sriov-config-service failed to create generic plugin %v", err) From aeb60e2f64aa6f61d74613c68a3f7ba7cc520e3d Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 3 Aug 2023 19:39:17 +0300 Subject: [PATCH 11/13] functional tests Signed-off-by: Sebastian Sch --- test/conformance/tests/test_sriov_operator.go | 171 ++++++++++++++++-- 1 file changed, 158 insertions(+), 13 deletions(-) diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index a979ebe5f..27f016257 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -21,6 +21,8 @@ import ( rbacv1 "k8s.io/api/rbac/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/pointer" runtimeclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -907,12 +909,6 @@ var _ = Describe("[sriov] operator", func() { }) Describe("Custom SriovNetworkNodePolicy", func() { - var vfioNode string - var vfioNic sriovv1.InterfaceExt - execute.BeforeAll(func() { - vfioNode, vfioNic = sriovInfos.FindOneVfioSriovDevice() - }) - BeforeEach(func() { err := namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled()) Expect(err).ToNot(HaveOccurred()) @@ -920,15 +916,17 @@ var _ = Describe("[sriov] operator", func() { }) Describe("Configuration", func() { - Context("Create vfio-pci node policy", func() { + var vfioNode string + var vfioNic sriovv1.InterfaceExt + BeforeEach(func() { if discovery.Enabled() { Skip("Test unsuitable to be run in discovery mode") } - if vfioNode == "" { - Skip("skip test as no vfio-pci capable PF was found") - } + + vfioNode, vfioNic = sriovInfos.FindOneVfioSriovDevice() + Expect(vfioNode).ToNot(Equal("")) }) It("Should be possible to create a vfio-pci resource", func() { @@ -964,14 +962,16 @@ var _ = Describe("[sriov] operator", func() { }) Context("PF Partitioning", func() { + var vfioNode string + var vfioNic sriovv1.InterfaceExt + // 27633 BeforeEach(func() { if discovery.Enabled() { Skip("Test unsuitable to be run in discovery mode") } - if vfioNode == "" { - Skip("skip test as no vfio-pci capable PF was found") - } + vfioNode, vfioNic = sriovInfos.FindOneVfioSriovDevice() + Expect(vfioNode).ToNot(Equal("")) }) It("Should be possible to partition the pf's vfs", func() { @@ -1784,6 +1784,137 @@ var _ = Describe("[sriov] operator", func() { Expect(err).ToNot(HaveOccurred()) }) }) + + Context("ExternallyManaged Validation", func() { + numVfs := 5 + var node string + var nic sriovv1.InterfaceExt + externallyManage := func(policy *sriovv1.SriovNetworkNodePolicy) { + policy.Spec.ExternallyManaged = true + } + + execute.BeforeAll(func() { + node, nic = sriovInfos.FindOneVfioSriovDevice() + Expect(node).ToNot(Equal("")) + }) + + It("Should not allow to create a policy if there are no vfs configured", func() { + resourceName := "test" + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, nic.Name, node, numVfs, resourceName, "netdevice", externallyManage) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("is higher than the virtual functions allocated for the PF externally")) + }) + + It("Should create a policy if the number of requested vfs is equal", func() { + resourceName := "testexternally" //nolint:goconst + By("allocating the 5 virtual functions to the selected device") + _, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 5 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + + By("creating the policy that will use the 5 virtual functions we create manually on the system") + Eventually(func() error { + _, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, nic.Name, node, numVfs, resourceName, "netdevice", externallyManage) + return err + }, 1*time.Minute, time.Second).ShouldNot(HaveOccurred()) + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 2*time.Minute, time.Second).Should(Equal(int64(numVfs))) + + By("cleaning the manual sriov created") + _, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 0 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + }) + + It("Should create a policy if the number of requested vfs is equal and not delete them when the policy is removed", func() { + resourceName := "testexternally" + var sriovPolicy *sriovv1.SriovNetworkNodePolicy + By("allocating the 5 virtual functions to the selected device") + _, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 5 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + + By("creating the policy that will use the 5 virtual functions we create manually on the system") + Eventually(func() error { + sriovPolicy, err = network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, nic.Name, node, numVfs, resourceName, "netdevice", externallyManage) + return err + }, 2*time.Minute, time.Second).ShouldNot(HaveOccurred()) + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 3*time.Minute, time.Second).Should(Equal(int64(numVfs))) + + By("deleting the policy") + err = clients.Delete(context.Background(), sriovPolicy, &runtimeclient.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 2*time.Minute, time.Second).Should(Equal(int64(0))) + + By("checking the virtual functions are still on the host") + output, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + Expect(output).To(ContainSubstring("5")) + + By("cleaning the manual sriov created") + _, errOutput, err = runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("echo 0 > /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + }) + + It("should reset the virtual functions if externallyManaged is false", func() { + resourceName := "testexternally" //nolint:goconst + + var sriovPolicy *sriovv1.SriovNetworkNodePolicy + By("creating the policy for 5 virtual functions") + sriovPolicy, err := network.CreateSriovPolicy(clients, "test-policy-", operatorNamespace, nic.Name, node, numVfs, resourceName, "netdevice") + Expect(err).ToNot(HaveOccurred()) + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 3*time.Minute, time.Second).Should(Equal(int64(numVfs))) + + By("deleting the policy") + err = clients.Delete(context.Background(), sriovPolicy, &runtimeclient.DeleteOptions{}) + Expect(err).ToNot(HaveOccurred()) + WaitForSRIOVStable() + + Eventually(func() int64 { + testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), node, metav1.GetOptions{}) + Expect(err).ToNot(HaveOccurred()) + resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)] + allocatable, _ := resNum.AsInt64() + return allocatable + }, 3*time.Minute, time.Second).Should(Equal(int64(0))) + + By("checking the virtual functions don't exist anymore on the system") + output, errOutput, err := runCommandOnConfigDaemon(node, "/bin/bash", "-c", fmt.Sprintf("cat /host/sys/class/net/%s/device/sriov_numvfs", nic.Name)) + Expect(err).ToNot(HaveOccurred()) + Expect(errOutput).To(Equal("")) + Expect(output).To(ContainSubstring("0")) + }) + }) }) }) @@ -2183,6 +2314,20 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n }))) } +func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) { + pods := &corev1.PodList{} + label, err := labels.Parse("app=sriov-network-config-daemon") + Expect(err).ToNot(HaveOccurred()) + field, err := fields.ParseSelector(fmt.Sprintf("spec.nodeName=%s", nodeName)) + Expect(err).ToNot(HaveOccurred()) + err = clients.List(context.Background(), pods, &runtimeclient.ListOptions{Namespace: operatorNamespace, LabelSelector: label, FieldSelector: field}) + Expect(err).ToNot(HaveOccurred()) + Expect(len(pods.Items)).To(Equal(1)) + + output, errOutput, err := pod.ExecCommand(clients, &pods.Items[0], command...) + return output, errOutput, err +} + func defaultFilterPolicy(policy sriovv1.SriovNetworkNodePolicy) bool { return policy.Spec.DeviceType == "netdevice" } From 835f5ac1993c4a6a517ff89ed684179ef291c9e2 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Mon, 4 Sep 2023 15:56:52 +0300 Subject: [PATCH 12/13] improve udev rules to only disable nm on operator managed devices Signed-off-by: Sebastian Sch --- bindata/scripts/udev-find-sriov-pf.sh | 17 +++++ pkg/daemon/daemon.go | 84 +++++----------------- pkg/daemon/daemon_test.go | 20 ------ pkg/utils/utils.go | 100 +++++++++++++++++++++++--- 4 files changed, 128 insertions(+), 93 deletions(-) create mode 100755 bindata/scripts/udev-find-sriov-pf.sh diff --git a/bindata/scripts/udev-find-sriov-pf.sh b/bindata/scripts/udev-find-sriov-pf.sh new file mode 100755 index 000000000..ee708f903 --- /dev/null +++ b/bindata/scripts/udev-find-sriov-pf.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +cat <<'EOF' > /host/etc/udev/disable-nm-sriov.sh +#!/bin/bash +if [ ! -d "/sys/class/net/$1/device/physfn" ]; then + exit 0 +fi + +pf_path=$(readlink /sys/class/net/$1/device/physfn -n) +pf_pci_address=${pf_path##*../} + +if [ "$2" == "$pf_pci_address" ]; then + echo "NM_UNMANAGED=1" +fi +EOF + +chmod +x /host/etc/udev/disable-nm-sriov.sh diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index e93976af4..5c9f6f7bd 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -189,33 +189,6 @@ func New( } } -func (dn *Daemon) tryCreateUdevRuleWrapper() error { - ns, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get( - context.Background(), - dn.name, - metav1.GetOptions{}, - ) - if nodeStateErr != nil { - glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr) - } else { - err := tryCreateSwitchdevUdevRule(ns) - if err != nil { - glog.Warningf("Failed to create switchdev udev rules: %v", err) - } - } - - // update udev rule only if we are on a BM environment - // for virtual environments we don't disable the vfs as they may be used by the platform/host - if dn.platform != utils.VirtualOpenStack { - err := tryCreateNMUdevRule() - if err != nil { - return err - } - } - - return nil -} - // Run the config daemon func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { glog.V(0).Infof("Run(): node: %s", dn.name) @@ -251,8 +224,11 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { } dn.storeManager = storeManager - if err := dn.tryCreateUdevRuleWrapper(); err != nil { - return err + if err := dn.prepareNMUdevRule(); err != nil { + glog.Warningf("failed to prepare udev files to disable network manager on requested VFs: %v", err) + } + if err := dn.tryCreateSwitchdevUdevRule(); err != nil { + glog.Warningf("failed to create udev files for switchdev") } var timeout int64 = 5 @@ -327,7 +303,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error { return err case <-time.After(30 * time.Second): glog.V(2).Info("Run(): period refresh") - if err := dn.tryCreateUdevRuleWrapper(); err != nil { + if err := dn.tryCreateSwitchdevUdevRule(); err != nil { glog.V(2).Info("Could not create udev rule: ", err) } } @@ -1079,8 +1055,18 @@ func (dn *Daemon) drainNode() error { return nil } -func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error { +func (dn *Daemon) tryCreateSwitchdevUdevRule() error { glog.V(2).Infof("tryCreateSwitchdevUdevRule()") + nodeState, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get( + context.Background(), + dn.name, + metav1.GetOptions{}, + ) + if nodeStateErr != nil { + glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr) + return nil + } + var newContent string filePath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules") @@ -1137,11 +1123,7 @@ func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) return nil } -func tryCreateNMUdevRule() error { - glog.V(2).Infof("tryCreateNMUdevRule()") - dirPath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d") - filePath := path.Join(dirPath, "10-nm-unmanaged.rules") - +func (dn *Daemon) prepareNMUdevRule() error { // we need to remove the Red Hat Virtio network device from the udev rule configuration // if we don't remove it when running the config-daemon on a virtual node it will disconnect the node after a reboot // even that the operator should not be installed on virtual environments that are not openstack @@ -1153,34 +1135,6 @@ func tryCreateNMUdevRule() error { } supportedVfIds = append(supportedVfIds, vfID) } - newContent := fmt.Sprintf("ACTION==\"add|change|move\", ATTRS{device}==\"%s\", ENV{NM_UNMANAGED}=\"1\"\n", strings.Join(supportedVfIds, "|")) - - // add NM udev rules for renaming VF rep - newContent = newContent + "SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}!=\"\", ATTR{phys_port_name}==\"pf*vf*\", ENV{NM_UNMANAGED}=\"1\"\n" - - oldContent, err := os.ReadFile(filePath) - // if oldContent = newContent, don't do anything - if err == nil && newContent == string(oldContent) { - return nil - } - - glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.", - strings.TrimSuffix(string(oldContent), "\n"), - strings.TrimSuffix(newContent, "\n"), - filePath) - - err = os.MkdirAll(dirPath, os.ModePerm) - if err != nil && !os.IsExist(err) { - glog.Errorf("tryCreateNMUdevRule(): failed to create dir %s: %v", dirPath, err) - return err - } - // if the file does not exist or if oldContent != newContent - // write to file and create it if it doesn't exist - err = os.WriteFile(filePath, []byte(newContent), 0666) - if err != nil { - glog.Errorf("tryCreateNMUdevRule(): fail to write file: %v", err) - return err - } - return nil + return utils.PrepareNMUdevRule(supportedVfIds) } diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index cdd1fe11b..973882c4a 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -3,8 +3,6 @@ package daemon import ( "context" "flag" - "os" - "path" "testing" . "github.com/onsi/ginkgo/v2" @@ -234,17 +232,6 @@ var _ = Describe("Config Daemon", func() { Expect(sut.nodeState.GetGeneration()).To(BeNumerically("==", 777)) }) - - It("configure udev rules on host", func() { - - networkManagerUdevRulePath := path.Join(utils.FilesystemRoot, "host/etc/udev/rules.d/10-nm-unmanaged.rules") - - expectedContents := `ACTION=="add|change|move", ATTRS{device}=="0x1014|0x154c", ENV{NM_UNMANAGED}="1" -SUBSYSTEM=="net", ACTION=="add|move", ATTRS{phys_switch_id}!="", ATTR{phys_port_name}=="pf*vf*", ENV{NM_UNMANAGED}="1" -` - // No need to trigger any action on config-daemon, as it checks the file in the main loop - assertFileContents(networkManagerUdevRulePath, expectedContents) - }) }) Context("isNodeDraining", func() { @@ -321,10 +308,3 @@ func updateSriovNetworkNodeState(c snclientset.Interface, nodeState *sriovnetwor Update(context.Background(), nodeState, metav1.UpdateOptions{}) return err } - -func assertFileContents(path, contents string) { - Eventually(func() (string, error) { - ret, err := os.ReadFile(path) - return string(ret), err - }, "10s").WithOffset(1).Should(Equal(contents)) -} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index ab1a0e847..0050725d9 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -8,6 +8,7 @@ import ( "net" "os" "os/exec" + "path" "path/filepath" "regexp" "strconv" @@ -40,6 +41,10 @@ const ( VendorMellanox = "15b3" DeviceBF2 = "a2d6" DeviceBF3 = "a2dc" + + udevFolder = "/etc/udev" + udevRulesFolder = udevFolder + "/rules.d" + udevDisableNM = "/bindata/scripts/udev-find-sriov-pf.sh" ) var InitialState sriovnetworkv1.SriovNetworkNodeState @@ -50,6 +55,8 @@ var pfPhysPortNameRe = regexp.MustCompile(`p\d+`) // FilesystemRoot used by test to mock interactions with filesystem var FilesystemRoot = "" +var SupportedVfIds []string + func init() { ClusterType = os.Getenv("CLUSTER_TYPE") } @@ -242,6 +249,11 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses ifaceStatus.Name, ifaceStatus.PciAddress) continue + } else { + err = RemoveUdevRule(ifaceStatus.PciAddress) + if err != nil { + return err + } } if err = resetSriovDevice(ifaceStatus); err != nil { @@ -362,15 +374,27 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor // set numVFs if iface.NumVfs != ifaceStatus.NumVfs { if iface.ExternallyManaged { - errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) - glog.Error(errMsg) - return fmt.Errorf(errMsg) - } + if iface.NumVfs > ifaceStatus.NumVfs { + errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress) + glog.Error(errMsg) + return fmt.Errorf(errMsg) + } + } else { + // create the udev rule to disable all the vfs from network manager as this vfs are managed by the operator + err = AddUdevRule(iface.PciAddress) + if err != nil { + return err + } - err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) - if err != nil { - glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress) - return err + err = setSriovNumVfs(iface.PciAddress, iface.NumVfs) + if err != nil { + err = RemoveUdevRule(iface.PciAddress) + if err != nil { + return err + } + glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress) + return err + } } } // set PF mtu @@ -886,3 +910,63 @@ func RebindVfToDefaultDriver(vfAddr string) error { glog.Warningf("RebindVfToDefaultDriver(): workaround implemented for VF %s", vfAddr) return nil } + +func PrepareNMUdevRule(supportedVfIds []string) error { + glog.V(2).Infof("PrepareNMUdevRule()") + dirPath := path.Join(FilesystemRoot, "/host/etc/udev/rules.d") + filePath := path.Join(dirPath, "10-nm-unmanaged.rules") + + // remove the old unmanaged rules file + if _, err := os.Stat(filePath); err == nil { + err = os.Remove(filePath) + if err != nil { + glog.Warningf("failed to remove the network manager global unmanaged rule on path %s: %v", filePath, err) + } + } + + // create the pf finder script for udev rules + var stdout, stderr bytes.Buffer + cmd := exec.Command("/bin/bash", path.Join(FilesystemRoot, udevDisableNM)) + cmd.Stdout = &stdout + cmd.Stderr = &stderr + if err := cmd.Run(); err != nil { + return err + } + glog.V(2).Infof("PrepareNMUdevRule(): %v", cmd.Stdout) + + //save the device list to use for udev rules + SupportedVfIds = supportedVfIds + return nil +} + +func AddUdevRule(pfPciAddress string) error { + glog.V(2).Infof("AddUdevRule(): %s", pfPciAddress) + pathFile := udevRulesFolder + udevRuleContent := fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|change|move\", ATTRS{device}==\"%s\", IMPORT{program}=\"/etc/udev/disable-nm-sriov.sh $env{INTERFACE} %s\"", strings.Join(SupportedVfIds, "|"), pfPciAddress) + + err := os.MkdirAll(pathFile, os.ModePerm) + if err != nil && !os.IsExist(err) { + glog.Errorf("AddUdevRule(): failed to create dir %s: %v", pathFile, err) + return err + } + + filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress)) + // if the file does not exist or if oldContent != newContent + // write to file and create it if it doesn't exist + err = os.WriteFile(filePath, []byte(udevRuleContent), 0666) + if err != nil { + glog.Errorf("AddUdevRule(): fail to write file: %v", err) + return err + } + return nil +} + +func RemoveUdevRule(pfPciAddress string) error { + pathFile := udevRulesFolder + filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress)) + err := os.Remove(filePath) + if err != nil && !os.IsNotExist(err) { + return err + } + return nil +} From 53cfd3d6e8677eb32fc96ad55a50be758707bbb7 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Tue, 26 Sep 2023 18:10:00 +0300 Subject: [PATCH 13/13] Fix comments Signed-off-by: Sebastian Sch --- pkg/utils/mock/mock_store.go | 8 ++++---- pkg/utils/store.go | 8 ++++---- pkg/utils/utils.go | 18 ++++++++++-------- pkg/webhook/validate.go | 2 +- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/pkg/utils/mock/mock_store.go b/pkg/utils/mock/mock_store.go index 1527f611e..d6ff6b8fd 100644 --- a/pkg/utils/mock/mock_store.go +++ b/pkg/utils/mock/mock_store.go @@ -65,15 +65,15 @@ func (mr *MockStoreManagerInterfaceMockRecorder) LoadPfsStatus(pciAddress interf } // SaveLastPfAppliedStatus mocks base method. -func (m *MockStoreManagerInterface) SaveLastPfAppliedStatus(pciAddress string, PfInfo *v1.Interface) error { +func (m *MockStoreManagerInterface) SaveLastPfAppliedStatus(PfInfo *v1.Interface) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "SaveLastPfAppliedStatus", pciAddress, PfInfo) + ret := m.ctrl.Call(m, "SaveLastPfAppliedStatus", PfInfo) ret0, _ := ret[0].(error) return ret0 } // SaveLastPfAppliedStatus indicates an expected call of SaveLastPfAppliedStatus. -func (mr *MockStoreManagerInterfaceMockRecorder) SaveLastPfAppliedStatus(pciAddress, PfInfo interface{}) *gomock.Call { +func (mr *MockStoreManagerInterfaceMockRecorder) SaveLastPfAppliedStatus(PfInfo interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SaveLastPfAppliedStatus", reflect.TypeOf((*MockStoreManagerInterface)(nil).SaveLastPfAppliedStatus), pciAddress, PfInfo) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SaveLastPfAppliedStatus", reflect.TypeOf((*MockStoreManagerInterface)(nil).SaveLastPfAppliedStatus), PfInfo) } diff --git a/pkg/utils/store.go b/pkg/utils/store.go index 4db7b86ce..2356db137 100644 --- a/pkg/utils/store.go +++ b/pkg/utils/store.go @@ -22,7 +22,7 @@ const ( //go:generate ../../bin/mockgen -destination mock/mock_store.go -source store.go type StoreManagerInterface interface { ClearPCIAddressFolder() error - SaveLastPfAppliedStatus(pciAddress string, PfInfo *sriovnetworkv1.Interface) error + SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) error LoadPfsStatus(pciAddress string) (*sriovnetworkv1.Interface, bool, error) } @@ -82,7 +82,7 @@ func (s *StoreManager) ClearPCIAddressFolder() error { if os.IsNotExist(err) { return nil } - return fmt.Errorf("failed to check the pci address folder path %s", PfAppliedConfigUse) + return fmt.Errorf("failed to check the pci address folder path %s: %v", PfAppliedConfigUse, err) } err = os.RemoveAll(PfAppliedConfigUse) @@ -100,7 +100,7 @@ func (s *StoreManager) ClearPCIAddressFolder() error { // SaveLastPfAppliedStatus will save the PF object as a json into the /etc/sriov-operator/pci/ // this function must be called after running the chroot function -func (s *StoreManager) SaveLastPfAppliedStatus(pciAddress string, PfInfo *sriovnetworkv1.Interface) error { +func (s *StoreManager) SaveLastPfAppliedStatus(PfInfo *sriovnetworkv1.Interface) error { data, err := json.Marshal(PfInfo) if err != nil { glog.Errorf("failed to marshal PF status %+v: %v", *PfInfo, err) @@ -108,7 +108,7 @@ func (s *StoreManager) SaveLastPfAppliedStatus(pciAddress string, PfInfo *sriovn } hostExtension := getHostExtension(s.RunOnHost) - pathFile := filepath.Join(hostExtension, PfAppliedConfig, pciAddress) + pathFile := filepath.Join(hostExtension, PfAppliedConfig, PfInfo.PciAddress) err = os.WriteFile(pathFile, data, 0644) return err } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 0050725d9..c9aecd7ed 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -45,6 +45,7 @@ const ( udevFolder = "/etc/udev" udevRulesFolder = udevFolder + "/rules.d" udevDisableNM = "/bindata/scripts/udev-find-sriov-pf.sh" + nmUdevRule = "SUBSYSTEM==\"net\", ACTION==\"add|change|move\", ATTRS{device}==\"%s\", IMPORT{program}=\"/etc/udev/disable-nm-sriov.sh $env{INTERFACE} %s\"" ) var InitialState sriovnetworkv1.SriovNetworkNodeState @@ -135,10 +136,10 @@ func DiscoverSriovDevices(withUnsupported bool, storeManager StoreManagerInterfa pfStatus, exist, err := storeManager.LoadPfsStatus(iface.PciAddress) if err != nil { glog.Warningf("DiscoverSriovDevices(): failed to load PF status from disk: %v", err) - } - - if exist { - iface.ExternallyManaged = pfStatus.ExternallyManaged + } else { + if exist { + iface.ExternallyManaged = pfStatus.ExternallyManaged + } } if dputils.IsSriovPF(device.Address) { @@ -196,7 +197,7 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) // Save the PF status to the host - err = storeManager.SaveLastPfAppliedStatus(iface.PciAddress, &iface) + err = storeManager.SaveLastPfAppliedStatus(&iface) if err != nil { glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) return err @@ -217,7 +218,7 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses } // Save the PF status to the host - err = storeManager.SaveLastPfAppliedStatus(iface.PciAddress, &iface) + err = storeManager.SaveLastPfAppliedStatus(&iface) if err != nil { glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) return err @@ -930,9 +931,10 @@ func PrepareNMUdevRule(supportedVfIds []string) error { cmd.Stdout = &stdout cmd.Stderr = &stderr if err := cmd.Run(); err != nil { + glog.Errorf("PrepareNMUdevRule(): failed to prepare nmUdevRule, stderr %s: %v", stderr.String(), err) return err } - glog.V(2).Infof("PrepareNMUdevRule(): %v", cmd.Stdout) + glog.V(2).Infof("PrepareNMUdevRule(): %v", stdout.String()) //save the device list to use for udev rules SupportedVfIds = supportedVfIds @@ -942,7 +944,7 @@ func PrepareNMUdevRule(supportedVfIds []string) error { func AddUdevRule(pfPciAddress string) error { glog.V(2).Infof("AddUdevRule(): %s", pfPciAddress) pathFile := udevRulesFolder - udevRuleContent := fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|change|move\", ATTRS{device}==\"%s\", IMPORT{program}=\"/etc/udev/disable-nm-sriov.sh $env{INTERFACE} %s\"", strings.Join(SupportedVfIds, "|"), pfPciAddress) + udevRuleContent := fmt.Sprintf(nmUdevRule, strings.Join(SupportedVfIds, "|"), pfPciAddress) err := os.MkdirAll(pathFile, os.ModePerm) if err != nil && !os.IsExist(err) { diff --git a/pkg/webhook/validate.go b/pkg/webhook/validate.go index 624577df2..826b44516 100644 --- a/pkg/webhook/validate.go +++ b/pkg/webhook/validate.go @@ -304,7 +304,7 @@ func validatePolicyForNodeState(policy *sriovnetworkv1.SriovNetworkNodePolicy, s } if policy.Spec.LinkType != "" && strings.ToLower(policy.Spec.LinkType) != strings.ToLower(iface.LinkType) { - return fmt.Errorf("LinkType(%d) in CR %s is not equal to the LinkType for the PF externally value(%d)", policy.Spec.Mtu, policy.GetName(), iface.Mtu) + return fmt.Errorf("LinkType(%s) in CR %s is not equal to the LinkType for the PF externally value(%s)", policy.Spec.LinkType, policy.GetName(), iface.LinkType) } } // vdpa: only mellanox cards are supported