Skip to content

Commit

Permalink
Merge pull request #757 from zeeke/intel-switchdev
Browse files Browse the repository at this point in the history
Intel switchdev
  • Loading branch information
zeeke authored Sep 17, 2024
2 parents 3e93cfb + 976e657 commit e812d35
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 19 deletions.
93 changes: 76 additions & 17 deletions pkg/host/internal/sriov/sriov.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,9 +898,14 @@ func (s *sriov) SetNicSriovMode(pciAddress string, mode string) error {

dev, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress)
if err != nil {
return err
return fmt.Errorf("can't get devlink device [%s] to set eSwitch to [%s]: %w", pciAddress, mode, err)
}

err = s.netlinkLib.DevLinkSetEswitchMode(dev, mode)
if err != nil {
return fmt.Errorf("can't set eSwitch mode to [%s] on device [%s]: %w", mode, pciAddress, err)
}
return s.netlinkLib.DevLinkSetEswitchMode(dev, mode)
return nil
}

func (s *sriov) GetLinkType(name string) string {
Expand Down Expand Up @@ -989,22 +994,41 @@ func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error {
return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs)
}

func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error {
log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode)
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {
log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode)
type setEswitchModeAndNumVFsFn func(string, string, int) error

func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error {
pfDriverName, err := s.dputilsLib.GetDriverName(pciAddr)
if err != nil {
return err
}
if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil {
err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err)
log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode)
return err

log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device",
"device", pciAddr, "count", numVFs, "mode", desiredEswitchMode, "driver", pfDriverName)

setEswitchModeAndNumVFsByDriverName := map[string]setEswitchModeAndNumVFsFn{
"ice": s.setEswitchModeAndNumVFsIce,
"mlx5_core": s.setEswitchModeAndNumVFsMlx,
}
return nil

fn, ok := setEswitchModeAndNumVFsByDriverName[pfDriverName]
if !ok {
log.Log.V(2).Info("setEswitchModeAndNumVFs(): driver not found in the support list. Using fallback implementation",
"device", pciAddr, "driver", pfDriverName)

// Fallback to mlx5 driver
fn = s.setEswitchModeAndNumVFsMlx
}

return fn(pciAddr, desiredEswitchMode, numVFs)
}

func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error {
log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device",
// setEswitchModeAndNumVFsMlx configures PF eSwitch and sriov_numvfs in the following order:
// a. set eSwitchMode to legacy
// b. set the desired number of Virtual Functions
// c. unbind driver of all VFs
// d. set eSwitchMode to `switchdev` if requested
func (s *sriov) setEswitchModeAndNumVFsMlx(pciAddr string, desiredEswitchMode string, numVFs int) error {
log.Log.V(2).Info("setEswitchModeAndNumVFsMlx(): configure VFs for device",
"device", pciAddr, "count", numVFs, "mode", desiredEswitchMode)

// always switch NIC to the legacy mode before creating VFs. This is required because some drivers
Expand All @@ -1015,7 +1039,11 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin
if err := s.detachPFFromBridge(pciAddr); err != nil {
return err
}
if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil {
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {
log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode)
return err
}
if err := s.SetNicSriovMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil {
return err
}
}
Expand All @@ -1024,8 +1052,39 @@ func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode strin
}

if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev {
return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev)
if err := s.unbindAllVFsOnPF(pciAddr); err != nil {
log.Log.Error(err, "setEswitchModeAndNumVFsMlx(): failed to unbind VFs", "device", pciAddr, "mode", desiredEswitchMode)
return err
}
if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil {
return err
}
}
return nil
}

// setEswitchModeAndNumVFsIce configures PF eSwitch and sriov_numvfs in the following order:
// a. set eSwitchMode to the desired mode if needed
// a1. set sriov_numvfs to 0 before updating the eSwitchMode
// b. set sriov_numvfs to the desired number of VFs
func (s *sriov) setEswitchModeAndNumVFsIce(pciAddr string, desiredEswitchMode string, numVFs int) error {
log.Log.V(2).Info("setEswitchModeAndNumVFsIce(): configure VFs for device",
"device", pciAddr, "count", numVFs, "mode", desiredEswitchMode)

if s.GetNicSriovMode(pciAddr) != desiredEswitchMode {
if err := s.SetSriovNumVfs(pciAddr, 0); err != nil {
return err
}

if err := s.SetNicSriovMode(pciAddr, desiredEswitchMode); err != nil {
return err
}
}

if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil {
return err
}

return nil
}

Expand All @@ -1047,11 +1106,11 @@ func (s *sriov) unbindAllVFsOnPF(addr string) error {
log.Log.V(2).Info("unbindAllVFsOnPF(): unbind all VFs on PF", "device", addr)
vfAddrs, err := s.dputilsLib.GetVFList(addr)
if err != nil {
return fmt.Errorf("failed to read VF list: %v", err)
return fmt.Errorf("failed to read VF list for pci[%s]: %w", addr, err)
}
for _, vfAddr := range vfAddrs {
if err := s.kernelHelper.Unbind(vfAddr); err != nil {
return fmt.Errorf("failed to unbind VF from the driver: %v", err)
return fmt.Errorf("failed to unbind VF from the driver PF[%s], PF[%s]: %w", addr, vfAddr, err)
}
}
return nil
Expand Down
73 changes: 73 additions & 0 deletions pkg/host/internal/sriov/sriov_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ var _ = Describe("SRIOV", func() {

dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2)
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{
Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil)
hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil)
Expand Down Expand Up @@ -282,6 +283,7 @@ var _ = Describe("SRIOV", func() {

dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{
Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil)
hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil)
Expand Down Expand Up @@ -333,6 +335,7 @@ var _ = Describe("SRIOV", func() {

dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil)
hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
Expand Down Expand Up @@ -393,6 +396,74 @@ var _ = Describe("SRIOV", func() {
helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1")
})

It("should configure switchdev on ice driver", func() {
helpers.GinkgoConfigureFakeFS(&fakefilesystem.FS{
Dirs: []string{"/sys/bus/pci/devices/0000:d8:00.0"},
Files: map[string][]byte{"/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs": {}},
})

dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(1)
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("ice", nil)
hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().AddDisableNMUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().AddPersistPFNameUdevRule("0000:d8:00.0", "enp216s0f0np0").Return(nil)
hostMock.EXPECT().EnableHwTcOffload("enp216s0f0np0").Return(nil)
hostMock.EXPECT().GetDevlinkDeviceParam("0000:d8:00.0", "flow_steering_mode").Return("", syscall.EINVAL)
dputilsLibMock.EXPECT().GetVFList("0000:d8:00.0").Return([]string{"0000:d8:00.2"}, nil).Times(1)
pfLinkMock := netlinkMockPkg.NewMockLink(testCtrl)
netlinkLibMock.EXPECT().LinkByName("enp216s0f0np0").Return(pfLinkMock, nil).Times(2)
netlinkLibMock.EXPECT().IsLinkAdminStateUp(pfLinkMock).Return(false)
netlinkLibMock.EXPECT().LinkSetUp(pfLinkMock).Return(nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(&netlink.DevlinkDevice{
Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}}, nil).Times(2)
netlinkLibMock.EXPECT().DevLinkSetEswitchMode(gomock.Any(), "switchdev").Return(nil)

dputilsLibMock.EXPECT().GetVFID("0000:d8:00.2").Return(0, nil).Times(2)
hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(false, "")
hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil)
hostMock.EXPECT().HasDriver("0000:d8:00.2").Return(true, "test")
hostMock.EXPECT().UnbindDriverIfNeeded("0000:d8:00.2", true).Return(nil)
hostMock.EXPECT().BindDefaultDriver("0000:d8:00.2").Return(nil)
hostMock.EXPECT().SetNetdevMTU("0000:d8:00.2", 2000).Return(nil)
hostMock.EXPECT().GetInterfaceIndex("0000:d8:00.2").Return(42, nil).AnyTimes()
vf0LinkMock := netlinkMockPkg.NewMockLink(testCtrl)
vf0Mac, _ := net.ParseMAC("02:42:19:51:2f:af")
vf0LinkMock.EXPECT().Attrs().Return(&netlink.LinkAttrs{Name: "enp216s0f0_0", HardwareAddr: vf0Mac})
netlinkLibMock.EXPECT().LinkByIndex(42).Return(vf0LinkMock, nil).AnyTimes()
netlinkLibMock.EXPECT().LinkSetVfHardwareAddr(vf0LinkMock, 0, vf0Mac).Return(nil)
hostMock.EXPECT().GetPhysPortName("enp216s0f0np0").Return("p0", nil)
hostMock.EXPECT().GetPhysSwitchID("enp216s0f0np0").Return("7cfe90ff2cc0", nil)
hostMock.EXPECT().AddVfRepresentorUdevRule("0000:d8:00.0", "enp216s0f0np0", "7cfe90ff2cc0", "p0").Return(nil)
hostMock.EXPECT().CreateVDPADevice("0000:d8:00.2", "vhost_vdpa")
hostMock.EXPECT().LoadUdevRules().Return(nil)

storeManagerMode.EXPECT().SaveLastPfAppliedStatus(gomock.Any()).Return(nil)

Expect(s.ConfigSriovInterfaces(storeManagerMode,
[]sriovnetworkv1.Interface{{
Name: "enp216s0f0np0",
PciAddress: "0000:d8:00.0",
NumVfs: 1,
LinkType: "ETH",
EswitchMode: "switchdev",
VfGroups: []sriovnetworkv1.VfGroup{
{
VfRange: "0-0",
ResourceName: "test-resource0",
PolicyName: "test-policy0",
Mtu: 2000,
IsRdma: true,
VdpaType: "vhost_vdpa",
}},
}},
[]sriovnetworkv1.InterfaceExt{{PciAddress: "0000:d8:00.0"}},
false)).NotTo(HaveOccurred())
helpers.GinkgoAssertFileContentsEquals("/sys/bus/pci/devices/0000:d8:00.0/sriov_numvfs", "1")
})

It("externally managed - wrong VF count", func() {
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)

Expand Down Expand Up @@ -459,6 +530,7 @@ var _ = Describe("SRIOV", func() {
hostMock.EXPECT().RemoveDisableNMUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemovePersistPFNameUdevRule("0000:d8:00.0").Return(nil)
hostMock.EXPECT().RemoveVfRepresentorUdevRule("0000:d8:00.0").Return(nil)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil)
hostMock.EXPECT().SetNetdevMTU("0000:d8:00.0", 1500).Return(nil)

Expect(s.ConfigSriovInterfaces(storeManagerMode,
Expand Down Expand Up @@ -499,6 +571,7 @@ var _ = Describe("SRIOV", func() {

dputilsLibMock.EXPECT().GetSriovVFcapacity("0000:d8:00.0").Return(2)
dputilsLibMock.EXPECT().GetVFconfigured("0000:d8:00.0").Return(0)
dputilsLibMock.EXPECT().GetDriverName("0000:d8:00.0").Return("mlx5_core", nil)
netlinkLibMock.EXPECT().DevLinkGetDeviceByName("pci", "0000:d8:00.0").Return(
&netlink.DevlinkDevice{Attrs: netlink.DevlinkDevAttrs{Eswitch: netlink.DevlinkDevEswitchAttr{Mode: "legacy"}}},
nil)
Expand Down
9 changes: 7 additions & 2 deletions test/conformance/tests/test_sriov_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1822,6 +1822,7 @@ func findMainSriovDevice(executorPod *corev1.Pod, sriovDevices []*sriovv1.Interf
stdout, _, err = pod.ExecCommand(clients, executorPod, "ip", "link", "show", device.Name)
Expect(err).ToNot(HaveOccurred())
Expect(len(stdout)).Should(Not(Equal(0)), "Unable to query link state")

if strings.Contains(stdout, "state DOWN") {
continue // The interface is not active
}
Expand All @@ -1830,6 +1831,7 @@ func findMainSriovDevice(executorPod *corev1.Pod, sriovDevices []*sriovv1.Interf
return device
}
}

return nil
}

Expand Down Expand Up @@ -2124,7 +2126,7 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n
})))
}

func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) {
func getConfigDaemonPod(nodeName string) *corev1.Pod {
pods := &corev1.PodList{}
label, err := labels.Parse("app=sriov-network-config-daemon")
Expect(err).ToNot(HaveOccurred())
Expand All @@ -2133,8 +2135,11 @@ func runCommandOnConfigDaemon(nodeName string, command ...string) (string, strin
err = clients.List(context.Background(), pods, &runtimeclient.ListOptions{Namespace: operatorNamespace, LabelSelector: label, FieldSelector: field})
Expect(err).ToNot(HaveOccurred())
Expect(len(pods.Items)).To(Equal(1))
return &pods.Items[0]
}

output, errOutput, err := pod.ExecCommand(clients, &pods.Items[0], command...)
func runCommandOnConfigDaemon(nodeName string, command ...string) (string, string, error) {
output, errOutput, err := pod.ExecCommand(clients, getConfigDaemonPod(nodeName), command...)
return output, errOutput, err
}

Expand Down
107 changes: 107 additions & 0 deletions test/conformance/tests/test_switchdev.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package tests

import (
"context"
"fmt"
"time"

corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/cluster"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/discovery"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/namespaces"
"github.com/k8snetworkplumbingwg/sriov-network-operator/test/util/network"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("[sriov] Switchdev", Ordered, func() {

BeforeAll(func() {
if cluster.VirtualCluster() {
Skip("IGB driver does not support switchdev driver model")
}

err := namespaces.Create(namespaces.Test, clients)
Expect(err).ToNot(HaveOccurred())

err = namespaces.Clean(operatorNamespace, namespaces.Test, clients, discovery.Enabled())
Expect(err).ToNot(HaveOccurred())

WaitForSRIOVStable()
})

It("create switchdev policies on supported devices", func() {
sriovInfos, err := cluster.DiscoverSriov(clients, operatorNamespace)
Expect(err).ToNot(HaveOccurred())
Expect(len(sriovInfos.Nodes)).ToNot(BeZero())

testNode, interfaces, err := sriovInfos.FindSriovDevicesAndNode()
Expect(err).ToNot(HaveOccurred())

// Avoid testing against primary NIC
interfaces, err = findUnusedSriovDevices(testNode, interfaces)
Expect(err).ToNot(HaveOccurred())

// Avoid testing the same NIC model more than once
interfaces = filterDuplicateNICModels(interfaces)

By(fmt.Sprintf("Testing on node %s, %d devices found", testNode, len(interfaces)))

for _, intf := range interfaces {
if !doesInterfaceSupportSwitchdev(intf) {
continue
}

By("Testing device " + nameVendorDeviceID(intf) + " on node " + testNode)
resourceName := "swtichdev" + intf.Name
_, err = network.CreateSriovPolicy(clients, "test-switchdev-policy-", operatorNamespace, intf.Name, testNode, 8, resourceName, "netdevice", func(snnp *sriovv1.SriovNetworkNodePolicy) {
snnp.Spec.EswitchMode = "switchdev"
})
Expect(err).ToNot(HaveOccurred())

WaitForSRIOVStable()

Eventually(func() int64 {
testedNode, err := clients.CoreV1Interface.Nodes().Get(context.Background(), testNode, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())
resNum := testedNode.Status.Allocatable[corev1.ResourceName("openshift.io/"+resourceName)]
capacity, _ := resNum.AsInt64()
return capacity
}, 10*time.Minute, time.Second).Should(Equal(int64(8)))
}
})
})

func doesInterfaceSupportSwitchdev(intf *sriovv1.InterfaceExt) bool {
if intf.Driver == "mlx5_core" {
return true
}

if intf.Driver == "ice" {
return true
}

return false
}

func filterDuplicateNICModels(devices []*sriovv1.InterfaceExt) []*sriovv1.InterfaceExt {
foundVendorAndModels := map[string]bool{}
ret := []*sriovv1.InterfaceExt{}

for _, device := range devices {
vendorAndDevice := device.Vendor + "_" + device.DeviceID
if _, value := foundVendorAndModels[vendorAndDevice]; !value {
ret = append(ret, device)
foundVendorAndModels[vendorAndDevice] = true
}
}
return ret
}

func nameVendorDeviceID(intf *sriovv1.InterfaceExt) string {
return fmt.Sprintf("%s (%s:%s)", intf.Name, intf.Vendor, intf.DeviceID)
}
Loading

0 comments on commit e812d35

Please sign in to comment.