Skip to content

Commit

Permalink
Add more checks on generic plugin
Browse files Browse the repository at this point in the history
Today, we are missing 2 checks on settings that sriov netop is
configuring:
* PF is up
* GUID is set

Without checking for these 2, we risk that changes made by the user
directly to the system are not reconciled by the netop leaving the
system in a bad state.

This commit is adding those checks.

Signed-off-by: Vasilis Remmas <vremmas@nvidia.com>
  • Loading branch information
vasrem committed May 10, 2024
1 parent 8326f50 commit 0ecd227
Show file tree
Hide file tree
Showing 14 changed files with 845 additions and 35 deletions.
16 changes: 16 additions & 0 deletions api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,12 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
log.V(2).Info("NeedToUpdateSriov(): NumVfs needs update", "desired", ifaceSpec.NumVfs, "current", ifaceStatus.NumVfs)
return true
}

if ifaceStatus.LinkAdminState == "down" {
log.V(2).Info("NeedToUpdateSriov(): PF link status needs update", "desired to include", "up", "current", ifaceStatus.LinkAdminState)
return true
}

if ifaceSpec.NumVfs > 0 {
for _, vfStatus := range ifaceStatus.VFs {
ingroup := false
Expand Down Expand Up @@ -300,6 +306,16 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
return true
}

if (strings.EqualFold(ifaceStatus.LinkType, consts.LinkTypeETH) && groupSpec.IsRdma) || strings.EqualFold(ifaceStatus.LinkType, consts.LinkTypeIB) {
// We do this check only if a Node GUID is set to ensure that we were able to read the
// Node GUID. We intentionally skip empty Node GUID in vfStatus because this may happen
// when the VF is allocated to a workload.
if vfStatus.GUID == consts.UninitializedNodeGUID {
log.V(2).Info("NeedToUpdateSriov(): VF GUID needs update",
"vf", vfStatus.VfID, "current", vfStatus.GUID)
return true
}
}
// this is needed to be sure the admin mac address is configured as expected
if ifaceSpec.ExternallyManaged {
log.V(2).Info("NeedToUpdateSriov(): need to update the device as it's externally manage",
Expand Down
2 changes: 2 additions & 0 deletions api/v1/sriovnetworknodestate_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ type InterfaceExt struct {
NumVfs int `json:"numVfs,omitempty"`
LinkSpeed string `json:"linkSpeed,omitempty"`
LinkType string `json:"linkType,omitempty"`
LinkAdminState string `json:"linkAdminState,omitempty"`
EswitchMode string `json:"eSwitchMode,omitempty"`
ExternallyManaged bool `json:"externallyManaged,omitempty"`
TotalVfs int `json:"totalvfs,omitempty"`
Expand All @@ -82,6 +83,7 @@ type VirtualFunction struct {
Mtu int `json:"mtu,omitempty"`
VfID int `json:"vfID"`
VdpaType string `json:"vdpaType,omitempty"`
GUID string `json:"guid,omitempty"`
}

// SriovNetworkNodeStateStatus defines the observed state of SriovNetworkNodeState
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ spec:
type: string
driver:
type: string
guid:
type: string
mac:
type: string
mtu:
Expand Down Expand Up @@ -135,6 +137,8 @@ spec:
type: string
externallyManaged:
type: boolean
linkAdminState:
type: string
linkSpeed:
type: string
linkType:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ spec:
type: string
driver:
type: string
guid:
type: string
mac:
type: string
mtu:
Expand Down Expand Up @@ -135,6 +137,8 @@ spec:
type: string
externallyManaged:
type: boolean
linkAdminState:
type: string
linkSpeed:
type: string
linkType:
Expand Down
2 changes: 2 additions & 0 deletions pkg/consts/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ const (
LinkTypeIB = "IB"
LinkTypeETH = "ETH"

UninitializedNodeGUID = "0000:0000:0000:0000"

DeviceTypeVfioPci = "vfio-pci"
DeviceTypeNetDevice = "netdevice"
VdpaTypeVirtio = "virtio"
Expand Down
28 changes: 28 additions & 0 deletions pkg/helper/mock/mock_helper.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions pkg/host/internal/lib/netlink/mock/mock_netlink.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions pkg/host/internal/lib/netlink/netlink.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ type NetlinkLib interface {
// cmode argument should contain valid cmode value as uint8, modes are define in nl.DEVLINK_PARAM_CMODE_* constants
// value argument should have one of the following types: uint8, uint16, uint32, string, bool
DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error
// RdmaLinkByName finds a link by name and returns a pointer to the object if
// found and nil error, otherwise returns error code.
RdmaLinkByName(name string) (*netlink.RdmaLink, error)
}

type libWrapper struct{}
Expand Down Expand Up @@ -142,3 +145,9 @@ func (w *libWrapper) DevlinkGetDeviceParamByName(bus string, device string, para
func (w *libWrapper) DevlinkSetDeviceParam(bus string, device string, param string, cmode uint8, value interface{}) error {
return netlink.DevlinkSetDeviceParam(bus, device, param, cmode, value)
}

// RdmaLinkByName finds a link by name and returns a pointer to the object if
// found and nil error, otherwise returns error code.
func (w *libWrapper) RdmaLinkByName(name string) (*netlink.RdmaLink, error) {
return netlink.RdmaLinkByName(name)
}
52 changes: 52 additions & 0 deletions pkg/host/internal/network/network.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package network

import (
"errors"
"fmt"
"io/fs"
"net"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -187,6 +190,35 @@ func (n *network) GetNetDevMac(ifaceName string) string {
return link.Attrs().HardwareAddr.String()
}

// GetNetDevNodeGUID returns the network interface node GUID if device is RDMA capable otherwise returns empty string
func (n *network) GetNetDevNodeGUID(pciAddr string) string {
if len(pciAddr) == 0 {
return ""
}

rdmaDevicesPath := filepath.Join(vars.FilesystemRoot, consts.SysBusPciDevices, pciAddr, "infiniband")
rdmaDevices, err := os.ReadDir(rdmaDevicesPath)
if err != nil {
if !errors.Is(err, fs.ErrNotExist) {
log.Log.Error(err, "GetNetDevNodeGUID(): failed to read RDMA related directory", "pciAddr", pciAddr)
}
return ""
}

if len(rdmaDevices) != 1 {
log.Log.Error(err, "GetNetDevNodeGUID(): expected just one RDMA device", "pciAddr", pciAddr, "numOfDevices", len(rdmaDevices))
return ""
}

rdmaLink, err := n.netlinkLib.RdmaLinkByName(rdmaDevices[0].Name())
if err != nil {
log.Log.Error(err, "GetNetDevNodeGUID(): failed to get RDMA link", "pciAddr", pciAddr)
return ""
}

return rdmaLink.Attrs.NodeGuid
}

func (n *network) GetNetDevLinkSpeed(ifaceName string) string {
log.Log.V(2).Info("GetNetDevLinkSpeed(): get LinkSpeed", "device", ifaceName)
speedFilePath := filepath.Join(vars.FilesystemRoot, consts.SysClassNet, ifaceName, "speed")
Expand Down Expand Up @@ -329,3 +361,23 @@ func (n *network) EnableHwTcOffload(ifaceName string) error {
log.Log.V(0).Info("EnableHwTcOffload(): feature is still disabled, not supported by device", "device", ifaceName)
return nil
}

// GetNetDevLinkAdminState returns the admin state of the interface.
func (n *network) GetNetDevLinkAdminState(ifaceName string) string {
log.Log.V(2).Info("GetNetDevLinkAdminState(): get LinkAdminState", "device", ifaceName)
if len(ifaceName) == 0 {
return ""
}

link, err := n.netlinkLib.LinkByName(ifaceName)
if err != nil {
log.Log.Error(err, "GetNetDevLinkAdminState(): failed to get link", "device", ifaceName)
return ""
}

if link.Attrs().Flags&net.FlagUp == 0 {
return "down"
}

return "up"
}
36 changes: 36 additions & 0 deletions pkg/host/internal/network/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
ethtoolMockPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/ethtool/mock"
netlinkMockPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/netlink/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/fakefilesystem"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/helpers"
)

func getDevlinkParam(t uint8, value interface{}) *netlink.DevlinkParam {
Expand Down Expand Up @@ -202,4 +204,38 @@ var _ = Describe("Network", func() {
Expect(n.EnableHwTcOffload("enp216s0f0np0")).To(MatchError(testErr))
})
})
Context("GetNetDevNodeGUID", func() {
It("Returns empty when pciAddr is empty", func() {
Expect(n.GetNetDevNodeGUID("")).To(Equal(""))
})
It("Returns empty when infiniband directory can't be read", func() {
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
Dirs: []string{"/sys/bus/pci/devices/0000:4b:00.3/"},
})
Expect(n.GetNetDevNodeGUID("0000:4b:00.3")).To(Equal(""))
})
It("Returns empty when more than one RDMA devices are detected for pciAddr", func() {
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
Dirs: []string{
"/sys/bus/pci/devices/0000:4b:00.3/infiniband/mlx5_2",
"/sys/bus/pci/devices/0000:4b:00.3/infiniband/mlx5_3",
},
})
Expect(n.GetNetDevNodeGUID("0000:4b:00.3")).To(Equal(""))
})
It("Returns empty when it fails to read RDMA link", func() {
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
Dirs: []string{"/sys/bus/pci/devices/0000:4b:00.3/infiniband/mlx5_2"},
})
netlinkLibMock.EXPECT().RdmaLinkByName("mlx5_2").Return(nil, fmt.Errorf("some-error"))
Expect(n.GetNetDevNodeGUID("0000:4b:00.3")).To(Equal(""))
})
It("Returns populated node GUID on correct setup", func() {
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
Dirs: []string{"/sys/bus/pci/devices/0000:4b:00.3/infiniband/mlx5_2"},
})
netlinkLibMock.EXPECT().RdmaLinkByName("mlx5_2").Return(&netlink.RdmaLink{Attrs: netlink.RdmaLinkAttrs{NodeGuid: "1122:3344:5566:7788"}}, nil)
Expect(n.GetNetDevNodeGUID("0000:4b:00.3")).To(Equal("1122:3344:5566:7788"))
})
})
})
23 changes: 13 additions & 10 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sriov
import (
"errors"
"fmt"
"net"
"os"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -134,6 +135,7 @@ func (s *sriov) GetVfInfo(pciAddr string, devices []*ghw.PCIDevice) sriovnetwork
vf.Mac = link.Attrs().HardwareAddr.String()
}
}
vf.GUID = s.networkHelper.GetNetDevNodeGUID(pciAddr)

for _, device := range devices {
if pciAddr == device.Address {
Expand Down Expand Up @@ -259,15 +261,16 @@ func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sri
}

iface := sriovnetworkv1.InterfaceExt{
Name: pfNetName,
PciAddress: device.Address,
Driver: driver,
Vendor: device.Vendor.ID,
DeviceID: device.Product.ID,
Mtu: link.Attrs().MTU,
Mac: link.Attrs().HardwareAddr.String(),
LinkType: s.encapTypeToLinkType(link.Attrs().EncapType),
LinkSpeed: s.networkHelper.GetNetDevLinkSpeed(pfNetName),
Name: pfNetName,
PciAddress: device.Address,
Driver: driver,
Vendor: device.Vendor.ID,
DeviceID: device.Product.ID,
Mtu: link.Attrs().MTU,
Mac: link.Attrs().HardwareAddr.String(),
LinkType: s.encapTypeToLinkType(link.Attrs().EncapType),
LinkSpeed: s.networkHelper.GetNetDevLinkSpeed(pfNetName),
LinkAdminState: s.networkHelper.GetNetDevLinkAdminState(pfNetName),
}

pfStatus, exist, err := storeManager.LoadPfsStatus(iface.PciAddress)
Expand Down Expand Up @@ -571,7 +574,7 @@ func (s *sriov) configSriovDevice(iface *sriovnetworkv1.Interface, skipVFConfigu
if err != nil {
return err
}
if pfLink.Attrs().OperState != netlink.OperUp {
if pfLink.Attrs().Flags&net.FlagUp == 0 {
err = s.netlinkLib.LinkSetUp(pfLink)
if err != nil {
return err
Expand Down
28 changes: 28 additions & 0 deletions pkg/host/mock/mock_host.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0ecd227

Please sign in to comment.