Skip to content

Commit

Permalink
Add support for switchdev configs to sriov pkg
Browse files Browse the repository at this point in the history
Signed-off-by: Yury Kulazhenkov <ykulazhenkov@nvidia.com>
  • Loading branch information
ykulazhenkov committed Feb 22, 2024
1 parent 057e540 commit 5282b87
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 16 deletions.
163 changes: 150 additions & 13 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ type sriov struct {
kernelHelper types.KernelInterface
networkHelper types.NetworkInterface
udevHelper types.UdevInterface
vdpaHelper types.VdpaInterface
netlinkLib netlinkPkg.NetlinkLib
dputilsLib dputilsPkg.DPUtilsLib
}
Expand All @@ -44,12 +45,14 @@ func New(utilsHelper utils.CmdInterface,
kernelHelper types.KernelInterface,
networkHelper types.NetworkInterface,
udevHelper types.UdevInterface,
vdpaHelper types.VdpaInterface,
netlinkLib netlinkPkg.NetlinkLib,
dputilsLib dputilsPkg.DPUtilsLib) types.SriovInterface {
return &sriov{utilsHelper: utilsHelper,
kernelHelper: kernelHelper,
networkHelper: networkHelper,
udevHelper: udevHelper,
vdpaHelper: vdpaHelper,
netlinkLib: netlinkLib,
dputilsLib: dputilsLib,
}
Expand Down Expand Up @@ -77,22 +80,28 @@ func (s *sriov) SetSriovNumVfs(pciAddr string, numVfs int) error {

func (s *sriov) ResetSriovDevice(ifaceStatus sriovnetworkv1.InterfaceExt) error {
log.Log.V(2).Info("ResetSriovDevice(): reset SRIOV device", "address", ifaceStatus.PciAddress)
if err := s.SetSriovNumVfs(ifaceStatus.PciAddress, 0); err != nil {
return err
}
if ifaceStatus.LinkType == consts.LinkTypeETH {
var mtu int
eswitchMode := sriovnetworkv1.ESwithModeLegacy
is := sriovnetworkv1.InitialState.GetInterfaceStateByPciAddress(ifaceStatus.PciAddress)
if is != nil {
mtu = is.Mtu
eswitchMode = sriovnetworkv1.GetEswitchModeFromStatus(is)
} else {
mtu = 1500
}
log.Log.V(2).Info("ResetSriovDevice(): reset mtu", "value", mtu)
if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, mtu); err != nil {
return err
}
log.Log.V(2).Info("ResetSriovDevice(): reset eswitch mode and number of VFs", "mode", eswitchMode)
if err := s.setEswitchModeAndNumVFs(ifaceStatus.PciAddress, eswitchMode, 0); err != nil {
return err
}
} else if ifaceStatus.LinkType == consts.LinkTypeIB {
if err := s.SetSriovNumVfs(ifaceStatus.PciAddress, 0); err != nil {
return err
}
if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, 2048); err != nil {
return err
}
Expand Down Expand Up @@ -305,6 +314,9 @@ func (s *sriov) configSriovPFDevice(iface *sriovnetworkv1.Interface) error {
log.Log.Error(err, "configSriovPFDevice(): fail to set NumVfs for device", "device", iface.PciAddress)
return err
}
if err := s.configureHWOptionsForPF(iface); err != nil {
return err
}
err := s.addUdevRules(iface)
if err != nil {
log.Log.Error(err, "configSriovPFDevice(): fail to set add udev rules", "device", iface.PciAddress)
Expand All @@ -330,6 +342,41 @@ func (s *sriov) configSriovPFDevice(iface *sriovnetworkv1.Interface) error {
return nil
}

func (s *sriov) configureHWOptionsForPF(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("configureHWOptionsForPF(): configure HW options for device",
"device", iface.PciAddress)
if sriovnetworkv1.GetEswitchModeFromSpec(iface) != sriovnetworkv1.ESwithModeSwitchDev {
// we need to configure HW options only for PFs for which switchdev is a target mode
return nil
}
if err := s.networkHelper.EnableHwTcOffload(iface.Name); err != nil {
return err
}
desiredFlowSteeringMode := "smfs"
currentFlowSteeringMode, err := s.networkHelper.GetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode")
if err != nil {
if errors.Is(err, syscall.EINVAL) {
log.Log.V(2).Info("configureHWOptionsForPF(): software flow steering is not supported by the device, skip configuration",
"device", iface.PciAddress)
return nil
}
log.Log.Error(err, "configureHWOptionsForPF(): fail to read current flow steering mode for the device", "device", iface.PciAddress)
return err
}
if currentFlowSteeringMode == desiredFlowSteeringMode {
return nil
}
// flow steering mode can be changed only when NIC is in legacy mode
if s.getCurrentEswitchMode(iface.PciAddress) != sriovnetworkv1.ESwithModeLegacy {
s.setEswitchModeAndNumVFs(iface.PciAddress, sriovnetworkv1.ESwithModeLegacy, 0)
}
if err := s.networkHelper.SetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode", desiredFlowSteeringMode); err != nil {
log.Log.Error(err, "configureHWOptionsForPF(): fail to configure flow steering mode for the device", "device", iface.PciAddress)
return err
}
return nil
}

func (s *sriov) checkExternallyManagedPF(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("checkExternallyManagedPF(): configure PF sriov device",
"device", iface.PciAddress)
Expand All @@ -341,6 +388,14 @@ func (s *sriov) checkExternallyManagedPF(iface *sriovnetworkv1.Interface) error
log.Log.Error(nil, errMsg)
return fmt.Errorf(errMsg)
}
currentEswitchMode := s.getCurrentEswitchMode(iface.PciAddress)
expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface)
if currentEswitchMode != expectedEswitchMode {
errMsg := fmt.Sprintf("checkExternallyManagedPF(): requested ESwitchMode mode \"%s\" is not equal to configured \"%s\" "+
"but the policy is configured as ExternallyManaged for device %s", expectedEswitchMode, currentEswitchMode, iface.PciAddress)
log.Log.Error(nil, errMsg)
return fmt.Errorf(errMsg)
}
currentMtu := s.networkHelper.GetNetdevMTU(iface.PciAddress)
if iface.Mtu > 0 && iface.Mtu > currentMtu {
err := fmt.Errorf("checkExternallyManagedPF(): requested MTU(%d) is greater than configured MTU(%d) for device %s. cannot change MTU as policy is configured as ExternallyManaged",
Expand Down Expand Up @@ -434,7 +489,16 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error {
if err = s.kernelHelper.UnbindDriverIfNeeded(addr, group.IsRdma); err != nil {
return err
}

// we set eswitch mode before this point and if the desired mode (and current at this point)
// is legacy, then VDPA device is already automatically disappeared,
// so we don't need to check it
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType == "" {
if err := s.vdpaHelper.DeleteVDPADevice(addr); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to delete VDPA device",
"device", addr)
return err
}
}
if !sriovnetworkv1.StringInArray(group.DeviceType, vars.DpdkDrivers) {
if err := s.kernelHelper.BindDefaultDriver(addr); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to bind default driver for device", "device", addr)
Expand All @@ -447,6 +511,13 @@ func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error {
return err
}
}
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType != "" {
if err := s.vdpaHelper.CreateVDPADevice(addr, group.VdpaType); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to create VDPA device",
"vdpaType", group.VdpaType, "device", addr)
return err
}
}
} else {
if err := s.kernelHelper.BindDpdkDriver(addr, group.DeviceType); err != nil {
log.Log.Error(err, "configSriovVFDevices(): fail to bind driver for device",
Expand Down Expand Up @@ -820,32 +891,98 @@ func (s *sriov) GetLinkType(name string) string {

// create required udev rules for PF:
// * rule to disable NetworkManager for VFs - for all modes
// * rule to rename VF representors - only for switchdev mode
func (s *sriov) addUdevRules(iface *sriovnetworkv1.Interface) error {
log.Log.V(2).Info("addUdevRules(): add udev rules for device",
"device", iface.PciAddress)
// TODO add creation of switchdev-related UDEV rules
return s.udevHelper.AddUdevRule(iface.PciAddress)
if err := s.udevHelper.AddUdevRule(iface.PciAddress); err != nil {
return err
}
if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev {
portName, err := s.networkHelper.GetPhysPortName(iface.Name)
if err != nil {
return err
}
switchID, err := s.networkHelper.GetPhysSwitchID(iface.Name)
if err != nil {
return err
}
if err := s.udevHelper.AddVfRepresentorUdevRule(iface.PciAddress, iface.Name, switchID, portName); err != nil {
return err
}
}
return nil
}

// remove all udev rules for PF created by the operator
func (s *sriov) removeUdevRules(pciAddress string) error {
log.Log.V(2).Info("removeUdevRules(): remove udev rules for device",
"device", pciAddress)
// TODO add support for removing switchdev-related UDEV rules
return s.udevHelper.RemoveUdevRule(pciAddress)
if err := s.udevHelper.RemoveUdevRule(pciAddress); err != nil {
return err
}
return s.udevHelper.RemoveVfRepresentorUdevRule(pciAddress)
}

// create VFs on the PF
func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error {
expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface)
log.Log.V(2).Info("createVFs(): configure VFs for device",
"device", iface.PciAddress, "count", iface.NumVfs)
if iface.NumVfs == s.dputilsLib.GetVFconfigured(iface.PciAddress) {
"device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode)

if iface.NumVfs == s.dputilsLib.GetVFconfigured(iface.PciAddress) && expectedEswitchMode == s.getCurrentEswitchMode(iface.PciAddress) {
log.Log.V(2).Info("createVFs(): device is already configured",
"device", iface.PciAddress, "count", iface.NumVfs)
"device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode)
return nil
}
// TODO add support for VF creation in switchdev mode
return s.SetSriovNumVfs(iface.PciAddress, iface.NumVfs)
return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs)
}

func (s *sriov) getCurrentEswitchMode(pciAddr string) string {
log.Log.V(2).Info("getCurrentEswitchMode(): get eswitch mode", "device", pciAddr)
mode, err := s.GetNicSriovMode(pciAddr)
if mode == "" || err != nil {
log.Log.Error(err, "failed to get eswitch mode for the device, assume legacy mode")
return sriovnetworkv1.ESwithModeLegacy
}
return mode
}

func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error {
log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode)
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {
log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode)
return err
}
if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil {
err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err)
log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode)
return err
}
return nil
}

func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error {
log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device",
"device", pciAddr, "count", numVFs, "mode", desiredEswitchMode)

currentEswitchMode := s.getCurrentEswitchMode(pciAddr)

// always switch NIC to the legacy mode before creating VFs. This is required because some drivers
// may not support VF creation in the switchdev mode
if currentEswitchMode != sriovnetworkv1.ESwithModeLegacy {
if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil {
return err
}
}
if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil {
return err
}

if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev)
}
return nil
}

// retrieve all VFs for the PF and unbind them from a driver
Expand Down
Loading

0 comments on commit 5282b87

Please sign in to comment.