From 976e6573ab0b5539270bd0735faf5a4048ef3994 Mon Sep 17 00:00:00 2001 From: Andrea Panattoni Date: Mon, 19 Aug 2024 12:25:11 +0200 Subject: [PATCH] eSwitch: switchdev and VF creation order Ice driver supports creating VFs after the eSwitcMode is set to switchdev. This is different than the preferred way for `mlx5` driver. Add specific functions to configure eSwitchMode and SriovNumVFs for each driver. Signed-off-by: Andrea Panattoni --- pkg/host/internal/sriov/sriov.go | 93 ++++++++++++++++++++++----- pkg/host/internal/sriov/sriov_test.go | 73 +++++++++++++++++++++ 2 files changed, 149 insertions(+), 17 deletions(-) diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 586b4438f..379cf6a70 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -898,9 +898,14 @@ func (s *sriov) SetNicSriovMode(pciAddress string, mode string) error { dev, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress) if err != nil { - return err + return fmt.Errorf("can't get devlink device [%s] to set eSwitch to [%s]: %w", pciAddress, mode, err) + } + + err = s.netlinkLib.DevLinkSetEswitchMode(dev, mode) + if err != nil { + return fmt.Errorf("can't set eSwitch mode to [%s] on device [%s]: %w", mode, pciAddress, err) } - return s.netlinkLib.DevLinkSetEswitchMode(dev, mode) + return nil } func (s *sriov) GetLinkType(name string) string { @@ -989,22 +994,41 @@ func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error { return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs) } -func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error { - log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode) - if err := s.unbindAllVFsOnPF(pciAddr); err != nil { - log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode) +type setEswitchModeAndNumVFsFn func(string, string, int) error + +func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error { + pfDriverName, err := s.dputilsLib.GetDriverName(pciAddr) + if err != nil { return err } - if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil { - err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err) - log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode) - return err + + log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device", + "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode, "driver", pfDriverName) + + setEswitchModeAndNumVFsByDriverName := map[string]setEswitchModeAndNumVFsFn{ + "ice": s.setEswitchModeAndNumVFsIce, + "mlx5_core": s.setEswitchModeAndNumVFsMlx, } - return nil + + fn, ok := setEswitchModeAndNumVFsByDriverName[pfDriverName] + if !ok { + log.Log.V(2).Info("setEswitchModeAndNumVFs(): driver not found in the support list. Using fallback implementation", + "device", pciAddr, "driver", pfDriverName) + + // Fallback to mlx5 driver + fn = s.setEswitchModeAndNumVFsMlx + } + + return fn(pciAddr, desiredEswitchMode, numVFs) } -func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error { - log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device", +// setEswitchModeAndNumVFsMlx configures PF eSwitch and sriov_numvfs in the following order: +// a. set eSwitchMode to legacy +// b. set the desired number of Virtual Functions +// c. unbind driver of all VFs +// d. set eSwitchMode to `switchdev` if requested +func (s *sriov) setEswitchModeAndNumVFsMlx(pciAddr string, desiredEswitchMode string, numVFs int) error { + log.Log.V(2).Info("setEswitchModeAndNumVFsMlx(): configure VFs for device", "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode) // always switch NIC to the legacy mode before creating VFs. This is required because some drivers @@ -1015,7 +1039,11 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin if err := s.detachPFFromBridge(pciAddr); err != nil { return err } - if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil { + if err := s.unbindAllVFsOnPF(pciAddr); err != nil { + log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode) + return err + } + if err := s.SetNicSriovMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil { return err } } @@ -1024,8 +1052,39 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin } if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev { - return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev) + if err := s.unbindAllVFsOnPF(pciAddr); err != nil { + log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode) + return err + } + if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil { + return err + } + } + return nil +} + +// setEswitchModeAndNumVFsIce configures PF eSwitch and sriov_numvfs in the following order: +// a. set eSwitchMode to the desired mode if needed +// a1. set sriov_numvfs to 0 before updating the eSwitchMode +// b. set sriov_numvfs to the desired number of VFs +func (s *sriov) setEswitchModeAndNumVFsIce(pciAddr string, desiredEswitchMode string, numVFs int) error { + log.Log.V(2).Info("setEswitchModeAndNumVFsIce(): configure VFs for device", + "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode) + + if s.GetNicSriovMode(pciAddr) != desiredEswitchMode { + if err := s.SetSriovNumVfs(pciAddr, 0); err != nil { + return err + } + + if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil { + return err + } } + + if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil { + return err + } + return nil } @@ -1047,11 +1106,11 @@ func (s *sriov) unbindAllVFsOnPF(addr string) error { log.Log.V(2).Info("unbindAllVFsOnPF(): unbind all VFs on PF", "device", addr) vfAddrs, err := s.dputilsLib.GetVFList(addr) if err != nil { - return fmt.Errorf("failed to read VF list: %v", err) + return fmt.Errorf("failed to read VF list for pci[%s]: %w", addr, err) } for _, vfAddr := range vfAddrs { if err := s.kernelHelper.Unbind(vfAddr); err != nil { - return fmt.Errorf("failed to unbind VF from the driver: %v", err) + return fmt.Errorf("failed to unbind VF from the driver PF[%s], PF[%s]: %w", addr, vfAddr, err) } } return nil diff --git a/pkg/host/internal/sriov/sriov_test.go b/pkg/host/internal/sriov/sriov_test.go index 6db7d9617..f30e93773 100644 --- a/pkg/host/internal/sriov/sriov_test.go +++ b/pkg/host/internal/sriov/sriov_test.go @@ -214,6 +214,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) @@ -282,6 +283,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) @@ -333,6 +335,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) @@ -393,6 +396,74 @@ var _ = Describe("SRIOV", func() { helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1") }) + It("should configure switchdev on ice driver", func() { + helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{ + Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"}, + Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}}, + }) + + dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1) + dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("ice", nil) + hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil) + hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil) + hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil) + hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", syscall.EINVAL) + dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(1) + pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl) + netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2) + netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false) + netlinkLibMock.EXPECT().LinkSetUp(pfLinkMock).Return(nil) + netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{ + Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil).Times(2) + netlinkLibMock.EXPECT().DevLinkSetEswitchMode(gomock.Any(), "switchdev").Return(nil) + + dputilsLibMock.EXPECT().GetVFID("0000:d8:00.2").Return(0, nil).Times(2) + hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(false, "") + hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil) + hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(true, "test") + hostMock.EXPECT().UnbindDriverIfNeeded("0000:d8:00.2", true).Return(nil) + hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil) + hostMock.EXPECT().SetNetdevMTU("0000:d8:00.2", 2000).Return(nil) + hostMock.EXPECT().GetInterfaceIndex("0000:d8:00.2").Return(42, nil).AnyTimes() + vf0LinkMock := netlinkMockPkg.NewMockLink(testCtrl) + vf0Mac, _ := net.ParseMAC("02:42:19:51:2f:af") + vf0LinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Name: "enp216s0f0_0", HardwareAddr: vf0Mac}) + netlinkLibMock.EXPECT().LinkByIndex(42).Return(vf0LinkMock, nil).AnyTimes() + netlinkLibMock.EXPECT().LinkSetVfHardwareAddr(vf0LinkMock, 0, vf0Mac).Return(nil) + hostMock.EXPECT().GetPhysPortName("enp216s0f0np0").Return("p0", nil) + hostMock.EXPECT().GetPhysSwitchID("enp216s0f0np0").Return("7cfe90ff2cc0", nil) + hostMock.EXPECT().AddVfRepresentorUdevRule("0000:d8:00.0", "enp216s0f0np0", "7cfe90ff2cc0", "p0").Return(nil) + hostMock.EXPECT().CreateVDPADevice("0000:d8:00.2", "vhost_vdpa") + hostMock.EXPECT().LoadUdevRules().Return(nil) + + storeManagerMode.EXPECT().SaveLastPfAppliedStatus(gomock.Any()).Return(nil) + + Expect(s.ConfigSriovInterfaces(storeManagerMode, + []sriovnetworkv1.Interface{{ + Name: "enp216s0f0np0", + PciAddress: "0000:d8:00.0", + NumVfs: 1, + LinkType: "ETH", + EswitchMode: "switchdev", + VfGroups: []sriovnetworkv1.VfGroup{ + { + VfRange: "0-0", + ResourceName: "test-resource0", + PolicyName: "test-policy0", + Mtu: 2000, + IsRdma: true, + VdpaType: "vhost_vdpa", + }}, + }}, + []sriovnetworkv1.InterfaceExt{{PciAddress: "0000:d8:00.0"}}, + false)).NotTo(HaveOccurred()) + helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1") + }) + It("externally managed - wrong VF count", func() { dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) @@ -459,6 +530,7 @@ var _ = Describe("SRIOV", func() { hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil) hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) hostMock.EXPECT().SetNetdevMTU("0000:d8:00.0", 1500).Return(nil) Expect(s.ConfigSriovInterfaces(storeManagerMode, @@ -499,6 +571,7 @@ var _ = Describe("SRIOV", func() { dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2) dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0) + dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil) netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return( &netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil)