Skip to content

Commit

Permalink
improve udev rules to only disable nm on operator managed devices
Browse files Browse the repository at this point in the history
Signed-off-by: Sebastian Sch <sebassch@gmail.com>
  • Loading branch information
SchSeba committed Sep 12, 2023
1 parent aeb60e2 commit 835f5ac
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 93 deletions.
17 changes: 17 additions & 0 deletions bindata/scripts/udev-find-sriov-pf.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

cat <<'EOF' > /host/etc/udev/disable-nm-sriov.sh
#!/bin/bash
if [ ! -d "/sys/class/net/$1/device/physfn" ]; then
exit 0
fi
pf_path=$(readlink /sys/class/net/$1/device/physfn -n)
pf_pci_address=${pf_path##*../}
if [ "$2" == "$pf_pci_address" ]; then
echo "NM_UNMANAGED=1"
fi
EOF

chmod +x /host/etc/udev/disable-nm-sriov.sh
84 changes: 19 additions & 65 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,33 +189,6 @@ func New(
}
}

func (dn *Daemon) tryCreateUdevRuleWrapper() error {
ns, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(
context.Background(),
dn.name,
metav1.GetOptions{},
)
if nodeStateErr != nil {
glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr)
} else {
err := tryCreateSwitchdevUdevRule(ns)
if err != nil {
glog.Warningf("Failed to create switchdev udev rules: %v", err)
}
}

// update udev rule only if we are on a BM environment
// for virtual environments we don't disable the vfs as they may be used by the platform/host
if dn.platform != utils.VirtualOpenStack {
err := tryCreateNMUdevRule()
if err != nil {
return err
}
}

return nil
}

// Run the config daemon
func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error {
glog.V(0).Infof("Run(): node: %s", dn.name)
Expand Down Expand Up @@ -251,8 +224,11 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error {
}
dn.storeManager = storeManager

if err := dn.tryCreateUdevRuleWrapper(); err != nil {
return err
if err := dn.prepareNMUdevRule(); err != nil {
glog.Warningf("failed to prepare udev files to disable network manager on requested VFs: %v", err)
}
if err := dn.tryCreateSwitchdevUdevRule(); err != nil {
glog.Warningf("failed to create udev files for switchdev")
}

var timeout int64 = 5
Expand Down Expand Up @@ -327,7 +303,7 @@ func (dn *Daemon) Run(stopCh <-chan struct{}, exitCh <-chan error) error {
return err
case <-time.After(30 * time.Second):
glog.V(2).Info("Run(): period refresh")
if err := dn.tryCreateUdevRuleWrapper(); err != nil {
if err := dn.tryCreateSwitchdevUdevRule(); err != nil {
glog.V(2).Info("Could not create udev rule: ", err)
}
}
Expand Down Expand Up @@ -1079,8 +1055,18 @@ func (dn *Daemon) drainNode() error {
return nil
}

func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState) error {
func (dn *Daemon) tryCreateSwitchdevUdevRule() error {
glog.V(2).Infof("tryCreateSwitchdevUdevRule()")
nodeState, nodeStateErr := dn.client.SriovnetworkV1().SriovNetworkNodeStates(namespace).Get(
context.Background(),
dn.name,
metav1.GetOptions{},
)
if nodeStateErr != nil {
glog.Warningf("Could not fetch node state %s: %v, skip updating switchdev udev rules", dn.name, nodeStateErr)
return nil
}

var newContent string
filePath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d/20-switchdev.rules")

Expand Down Expand Up @@ -1137,11 +1123,7 @@ func tryCreateSwitchdevUdevRule(nodeState *sriovnetworkv1.SriovNetworkNodeState)
return nil
}

func tryCreateNMUdevRule() error {
glog.V(2).Infof("tryCreateNMUdevRule()")
dirPath := path.Join(utils.FilesystemRoot, "/host/etc/udev/rules.d")
filePath := path.Join(dirPath, "10-nm-unmanaged.rules")

func (dn *Daemon) prepareNMUdevRule() error {
// we need to remove the Red Hat Virtio network device from the udev rule configuration
// if we don't remove it when running the config-daemon on a virtual node it will disconnect the node after a reboot
// even that the operator should not be installed on virtual environments that are not openstack
Expand All @@ -1153,34 +1135,6 @@ func tryCreateNMUdevRule() error {
}
supportedVfIds = append(supportedVfIds, vfID)
}
newContent := fmt.Sprintf("ACTION==\"add|change|move\", ATTRS{device}==\"%s\", ENV{NM_UNMANAGED}=\"1\"\n", strings.Join(supportedVfIds, "|"))

// add NM udev rules for renaming VF rep
newContent = newContent + "SUBSYSTEM==\"net\", ACTION==\"add|move\", ATTRS{phys_switch_id}!=\"\", ATTR{phys_port_name}==\"pf*vf*\", ENV{NM_UNMANAGED}=\"1\"\n"

oldContent, err := os.ReadFile(filePath)
// if oldContent = newContent, don't do anything
if err == nil && newContent == string(oldContent) {
return nil
}

glog.V(2).Infof("Old udev content '%v' and new content '%v' differ. Writing to file %v.",
strings.TrimSuffix(string(oldContent), "\n"),
strings.TrimSuffix(newContent, "\n"),
filePath)

err = os.MkdirAll(dirPath, os.ModePerm)
if err != nil && !os.IsExist(err) {
glog.Errorf("tryCreateNMUdevRule(): failed to create dir %s: %v", dirPath, err)
return err
}

// if the file does not exist or if oldContent != newContent
// write to file and create it if it doesn't exist
err = os.WriteFile(filePath, []byte(newContent), 0666)
if err != nil {
glog.Errorf("tryCreateNMUdevRule(): fail to write file: %v", err)
return err
}
return nil
return utils.PrepareNMUdevRule(supportedVfIds)
}
20 changes: 0 additions & 20 deletions pkg/daemon/daemon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package daemon
import (
"context"
"flag"
"os"
"path"
"testing"

. "github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -234,17 +232,6 @@ var _ = Describe("Config Daemon", func() {

Expect(sut.nodeState.GetGeneration()).To(BeNumerically("==", 777))
})

It("configure udev rules on host", func() {

networkManagerUdevRulePath := path.Join(utils.FilesystemRoot, "host/etc/udev/rules.d/10-nm-unmanaged.rules")

expectedContents := `ACTION=="add|change|move", ATTRS{device}=="0x1014|0x154c", ENV{NM_UNMANAGED}="1"
SUBSYSTEM=="net", ACTION=="add|move", ATTRS{phys_switch_id}!="", ATTR{phys_port_name}=="pf*vf*", ENV{NM_UNMANAGED}="1"
`
// No need to trigger any action on config-daemon, as it checks the file in the main loop
assertFileContents(networkManagerUdevRulePath, expectedContents)
})
})

Context("isNodeDraining", func() {
Expand Down Expand Up @@ -321,10 +308,3 @@ func updateSriovNetworkNodeState(c snclientset.Interface, nodeState *sriovnetwor
Update(context.Background(), nodeState, metav1.UpdateOptions{})
return err
}

func assertFileContents(path, contents string) {
Eventually(func() (string, error) {
ret, err := os.ReadFile(path)
return string(ret), err
}, "10s").WithOffset(1).Should(Equal(contents))
}
100 changes: 92 additions & 8 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strconv"
Expand Down Expand Up @@ -40,6 +41,10 @@ const (
VendorMellanox = "15b3"
DeviceBF2 = "a2d6"
DeviceBF3 = "a2dc"

udevFolder = "/etc/udev"
udevRulesFolder = udevFolder + "/rules.d"
udevDisableNM = "/bindata/scripts/udev-find-sriov-pf.sh"
)

var InitialState sriovnetworkv1.SriovNetworkNodeState
Expand All @@ -50,6 +55,8 @@ var pfPhysPortNameRe = regexp.MustCompile(`p\d+`)
// FilesystemRoot used by test to mock interactions with filesystem
var FilesystemRoot = ""

var SupportedVfIds []string

func init() {
ClusterType = os.Getenv("CLUSTER_TYPE")
}
Expand Down Expand Up @@ -242,6 +249,11 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses
ifaceStatus.Name,
ifaceStatus.PciAddress)
continue
} else {
err = RemoveUdevRule(ifaceStatus.PciAddress)
if err != nil {
return err
}
}

if err = resetSriovDevice(ifaceStatus); err != nil {
Expand Down Expand Up @@ -362,15 +374,27 @@ func configSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetwor
// set numVFs
if iface.NumVfs != ifaceStatus.NumVfs {
if iface.ExternallyManaged {
errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress)
glog.Error(errMsg)
return fmt.Errorf(errMsg)
}
if iface.NumVfs > ifaceStatus.NumVfs {
errMsg := fmt.Sprintf("configSriovDevice(): number of request virtual functions %d is not equal to configured virtual functions %d but the policy is configured as ExternallyManaged for device %s", iface.NumVfs, ifaceStatus.NumVfs, iface.PciAddress)
glog.Error(errMsg)
return fmt.Errorf(errMsg)
}
} else {
// create the udev rule to disable all the vfs from network manager as this vfs are managed by the operator
err = AddUdevRule(iface.PciAddress)
if err != nil {
return err
}

err = setSriovNumVfs(iface.PciAddress, iface.NumVfs)
if err != nil {
glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress)
return err
err = setSriovNumVfs(iface.PciAddress, iface.NumVfs)
if err != nil {
err = RemoveUdevRule(iface.PciAddress)
if err != nil {
return err
}
glog.Errorf("configSriovDevice(): fail to set NumVfs for device %s", iface.PciAddress)
return err
}
}
}
// set PF mtu
Expand Down Expand Up @@ -886,3 +910,63 @@ func RebindVfToDefaultDriver(vfAddr string) error {
glog.Warningf("RebindVfToDefaultDriver(): workaround implemented for VF %s", vfAddr)
return nil
}

func PrepareNMUdevRule(supportedVfIds []string) error {
glog.V(2).Infof("PrepareNMUdevRule()")
dirPath := path.Join(FilesystemRoot, "/host/etc/udev/rules.d")
filePath := path.Join(dirPath, "10-nm-unmanaged.rules")

// remove the old unmanaged rules file
if _, err := os.Stat(filePath); err == nil {
err = os.Remove(filePath)
if err != nil {
glog.Warningf("failed to remove the network manager global unmanaged rule on path %s: %v", filePath, err)
}
}

// create the pf finder script for udev rules
var stdout, stderr bytes.Buffer
cmd := exec.Command("/bin/bash", path.Join(FilesystemRoot, udevDisableNM))
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
return err
}
glog.V(2).Infof("PrepareNMUdevRule(): %v", cmd.Stdout)

//save the device list to use for udev rules
SupportedVfIds = supportedVfIds
return nil
}

func AddUdevRule(pfPciAddress string) error {
glog.V(2).Infof("AddUdevRule(): %s", pfPciAddress)
pathFile := udevRulesFolder
udevRuleContent := fmt.Sprintf("SUBSYSTEM==\"net\", ACTION==\"add|change|move\", ATTRS{device}==\"%s\", IMPORT{program}=\"/etc/udev/disable-nm-sriov.sh $env{INTERFACE} %s\"", strings.Join(SupportedVfIds, "|"), pfPciAddress)

err := os.MkdirAll(pathFile, os.ModePerm)
if err != nil && !os.IsExist(err) {
glog.Errorf("AddUdevRule(): failed to create dir %s: %v", pathFile, err)
return err
}

filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress))
// if the file does not exist or if oldContent != newContent
// write to file and create it if it doesn't exist
err = os.WriteFile(filePath, []byte(udevRuleContent), 0666)
if err != nil {
glog.Errorf("AddUdevRule(): fail to write file: %v", err)
return err
}
return nil
}

func RemoveUdevRule(pfPciAddress string) error {
pathFile := udevRulesFolder
filePath := path.Join(pathFile, fmt.Sprintf("10-nm-disable-%s.rules", pfPciAddress))
err := os.Remove(filePath)
if err != nil && !os.IsNotExist(err) {
return err
}
return nil
}

0 comments on commit 835f5ac

Please sign in to comment.