From fa195eed05703bdb0917bed899444b598e773223 Mon Sep 17 00:00:00 2001 From: Mamduh Alassi Date: Tue, 2 Mar 2021 09:26:57 +0200 Subject: [PATCH 1/4] Introduce service package to maintain services on vanilla Kubernetes Add pkg/service which contain utils to read MCO files for switchdev services and scripts, this package is needed because vanilla Kubernetes doesn't have support MCO which is part of openshift Signed-off-by: Mamduh Alassi --- go.mod | 2 + pkg/service/service.go | 15 ++++ pkg/service/service_manager.go | 76 +++++++++++++++++ pkg/service/types.go | 23 +++++ pkg/service/utils.go | 149 +++++++++++++++++++++++++++++++++ 5 files changed, 265 insertions(+) create mode 100644 pkg/service/service.go create mode 100644 pkg/service/service_manager.go create mode 100644 pkg/service/types.go create mode 100644 pkg/service/utils.go diff --git a/go.mod b/go.mod index 9f4581916..0deb40c06 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/Masterminds/sprig v2.22.0+incompatible github.com/blang/semver v3.5.0+incompatible github.com/cenkalti/backoff v2.2.1+incompatible + github.com/coreos/go-systemd/v22 v22.0.0 github.com/fsnotify/fsnotify v1.4.9 github.com/go-logr/logr v0.2.1 github.com/go-logr/zapr v0.2.0 // indirect @@ -29,6 +30,7 @@ require ( golang.org/x/time v0.0.0-20191024005414-555d28b269f0 google.golang.org/genproto v0.0.0-20200610104632-a5b850bcf112 // indirect google.golang.org/protobuf v1.25.0 // indirect + gopkg.in/yaml.v2 v2.3.0 k8s.io/api v0.19.0 k8s.io/apimachinery v0.19.0 k8s.io/client-go v0.19.0 diff --git a/pkg/service/service.go b/pkg/service/service.go new file mode 100644 index 000000000..671d2e20a --- /dev/null +++ b/pkg/service/service.go @@ -0,0 +1,15 @@ +package service + +type Service struct { + Name string + Path string + Content string +} + +func NewService(name, path, content string) *Service { + return &Service{ + Name: name, + Path: path, + Content: content, + } +} diff --git a/pkg/service/service_manager.go b/pkg/service/service_manager.go new file mode 100644 index 000000000..ddd9a1f4e --- /dev/null +++ b/pkg/service/service_manager.go @@ -0,0 +1,76 @@ +package service + +import ( + "io/ioutil" + "os" + "os/exec" + "path" + "path/filepath" + + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" +) + +type ServiceManager interface { + IsServiceExist(string) (bool, error) + ReadService(string) (*Service, error) + EnableService(service *Service) error +} + +type serviceManager struct { + chroot string +} + +func NewServiceManager(chroot string) ServiceManager { + root := chroot + if root == "" { + root = "/" + } + return &serviceManager{root} +} + +// ReadService read service from given path +func (sm *serviceManager) IsServiceExist(servicePath string) (bool, error) { + _, err := os.Stat(path.Join(sm.chroot, servicePath)) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + + return true, nil +} + +// ReadService read service from given path +func (sm *serviceManager) ReadService(servicePath string) (*Service, error) { + data, err := ioutil.ReadFile(path.Join(sm.chroot, servicePath)) + if err != nil { + return nil, err + } + + return &Service{ + Name: filepath.Base(servicePath), + Path: servicePath, + Content: string(data), + }, nil +} + +// EnableService creates service file and enables it with systemctl enable +func (sm *serviceManager) EnableService(service *Service) error { + // Write service file + err := ioutil.WriteFile(path.Join(sm.chroot, service.Path), []byte(service.Content), 0644) + if err != nil { + return err + } + + // Change root dir + exit, err := utils.Chroot(sm.chroot) + if err != nil { + return err + } + defer exit() + + // Enable service + cmd := exec.Command("systemctl", "enable", service.Name) + return cmd.Run() +} diff --git a/pkg/service/types.go b/pkg/service/types.go new file mode 100644 index 000000000..aaa879a4d --- /dev/null +++ b/pkg/service/types.go @@ -0,0 +1,23 @@ +package service + +// ServiceInjectionManifestFile service injection manifest file structure +type ServiceInjectionManifestFile struct { + Name string + Dropins []struct { + Contents string + } +} + +// ServiceManifestFile service manifest file structure +type ServiceManifestFile struct { + Name string + Contents string +} + +// ScriptManifestFile script manifest file structure +type ScriptManifestFile struct { + Path string + Contents struct { + Inline string + } +} diff --git a/pkg/service/utils.go b/pkg/service/utils.go new file mode 100644 index 000000000..8b9e82c72 --- /dev/null +++ b/pkg/service/utils.go @@ -0,0 +1,149 @@ +package service + +import ( + "io/ioutil" + "strings" + + "github.com/coreos/go-systemd/v22/unit" + "gopkg.in/yaml.v2" +) + +const systemdDir = "/usr/lib/systemd/system/" + +// CompareServices compare 2 service and return true if serviceA has all the fields of serviceB +func CompareServices(serviceA, serviceB *Service) (bool, error) { + optsA, err := unit.Deserialize(strings.NewReader(serviceA.Content)) + if err != nil { + return false, err + } + optsB, err := unit.Deserialize(strings.NewReader(serviceB.Content)) + if err != nil { + return false, err + } + +OUTER: + for _, optB := range optsB { + for _, optA := range optsA { + if optA.Match(optB) { + continue OUTER + } + } + + return true, nil + } + + return false, nil +} + +// RemoveFromService removes given fields from service +func RemoveFromService(service *Service, options ...*unit.UnitOption) (*Service, error) { + opts, err := unit.Deserialize(strings.NewReader(service.Content)) + if err != nil { + return nil, err + } + + var newServiceOptions []*unit.UnitOption +OUTER: + for _, opt := range opts { + for _, optRemove := range options { + if opt.Match(optRemove) { + continue OUTER + } + } + + newServiceOptions = append(newServiceOptions, opt) + } + + data, err := ioutil.ReadAll(unit.Serialize(newServiceOptions)) + if err != nil { + return nil, err + } + + return &Service{ + Name: service.Name, + Path: service.Path, + Content: string(data), + }, nil +} + +// AppendToService appends given fields to service +func AppendToService(service *Service, options ...*unit.UnitOption) (*Service, error) { + serviceOptions, err := unit.Deserialize(strings.NewReader(service.Content)) + if err != nil { + return nil, err + } + +OUTER: + for _, appendOpt := range options { + for _, opt := range serviceOptions { + if opt.Match(appendOpt) { + continue OUTER + } + } + serviceOptions = append(serviceOptions, appendOpt) + } + + data, err := ioutil.ReadAll(unit.Serialize(serviceOptions)) + if err != nil { + return nil, err + } + + return &Service{ + Name: service.Name, + Path: service.Path, + Content: string(data), + }, nil +} + +// ReadServiceInjectionManifestFile reads service injection file +func ReadServiceInjectionManifestFile(path string) (*Service, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + + var serviceContent ServiceInjectionManifestFile + if err := yaml.Unmarshal(data, &serviceContent); err != nil { + return nil, err + } + + return &Service{ + Name: serviceContent.Name, + Path: systemdDir + serviceContent.Name, + Content: serviceContent.Dropins[0].Contents, + }, nil +} + +// ReadServiceManifestFile reads service file +func ReadServiceManifestFile(path string) (*Service, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + + var serviceFile *ServiceManifestFile + if err := yaml.Unmarshal(data, &serviceFile); err != nil { + return nil, err + } + + return &Service{ + Name: serviceFile.Name, + Path: "/etc/systemd/system/" + serviceFile.Name, + Content: serviceFile.Contents, + }, nil +} + +// ReadScriptManifestFile reads script file +func ReadScriptManifestFile(path string) (*ScriptManifestFile, error) { + data, err := ioutil.ReadFile(path) + if err != nil { + return nil, err + } + + var scriptFile *ScriptManifestFile + if err := yaml.Unmarshal(data, &scriptFile); err != nil { + return nil, err + } + + return scriptFile, nil +} From 66f6f26e6a02323f0a89db253d9ee2be597d8f6f Mon Sep 17 00:00:00 2001 From: Mamduh Alassi Date: Tue, 2 Mar 2021 09:28:49 +0200 Subject: [PATCH 2/4] Move bindata/manifests/machine-config to bindata/manifests/switchdev-config Avoid confusion by machine-config which is related to Openshift MCO Signed-off-by: Mamduh Alassi --- .../files/configure-switchdev.sh.yaml | 0 .../files/switchdev-vf-link-name.sh.yaml | 0 .../machineconfigpool.yaml | 0 .../ovs-units/ovs-vswitchd.service.yaml | 0 .../switchdev-units/NetworkManager.service.yaml | 0 .../switchdev-units/switchdev-configuration.yaml | 0 controllers/sriovoperatorconfig_controller.go | 4 ++-- 7 files changed, 2 insertions(+), 2 deletions(-) rename bindata/manifests/{machine-config => switchdev-config}/files/configure-switchdev.sh.yaml (100%) rename bindata/manifests/{machine-config => switchdev-config}/files/switchdev-vf-link-name.sh.yaml (100%) rename bindata/manifests/{machine-config => switchdev-config}/machineconfigpool.yaml (100%) rename bindata/manifests/{machine-config => switchdev-config}/ovs-units/ovs-vswitchd.service.yaml (100%) rename bindata/manifests/{machine-config => switchdev-config}/switchdev-units/NetworkManager.service.yaml (100%) rename bindata/manifests/{machine-config => switchdev-config}/switchdev-units/switchdev-configuration.yaml (100%) diff --git a/bindata/manifests/machine-config/files/configure-switchdev.sh.yaml b/bindata/manifests/switchdev-config/files/configure-switchdev.sh.yaml similarity index 100% rename from bindata/manifests/machine-config/files/configure-switchdev.sh.yaml rename to bindata/manifests/switchdev-config/files/configure-switchdev.sh.yaml diff --git a/bindata/manifests/machine-config/files/switchdev-vf-link-name.sh.yaml b/bindata/manifests/switchdev-config/files/switchdev-vf-link-name.sh.yaml similarity index 100% rename from bindata/manifests/machine-config/files/switchdev-vf-link-name.sh.yaml rename to bindata/manifests/switchdev-config/files/switchdev-vf-link-name.sh.yaml diff --git a/bindata/manifests/machine-config/machineconfigpool.yaml b/bindata/manifests/switchdev-config/machineconfigpool.yaml similarity index 100% rename from bindata/manifests/machine-config/machineconfigpool.yaml rename to bindata/manifests/switchdev-config/machineconfigpool.yaml diff --git a/bindata/manifests/machine-config/ovs-units/ovs-vswitchd.service.yaml b/bindata/manifests/switchdev-config/ovs-units/ovs-vswitchd.service.yaml similarity index 100% rename from bindata/manifests/machine-config/ovs-units/ovs-vswitchd.service.yaml rename to bindata/manifests/switchdev-config/ovs-units/ovs-vswitchd.service.yaml diff --git a/bindata/manifests/machine-config/switchdev-units/NetworkManager.service.yaml b/bindata/manifests/switchdev-config/switchdev-units/NetworkManager.service.yaml similarity index 100% rename from bindata/manifests/machine-config/switchdev-units/NetworkManager.service.yaml rename to bindata/manifests/switchdev-config/switchdev-units/NetworkManager.service.yaml diff --git a/bindata/manifests/machine-config/switchdev-units/switchdev-configuration.yaml b/bindata/manifests/switchdev-config/switchdev-units/switchdev-configuration.yaml similarity index 100% rename from bindata/manifests/machine-config/switchdev-units/switchdev-configuration.yaml rename to bindata/manifests/switchdev-config/switchdev-units/switchdev-configuration.yaml diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index ea40d87d1..5a191c718 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -417,11 +417,11 @@ func (r *SriovOperatorConfigReconciler) syncOffloadMachineConfig(dc *sriovnetwor data.Data["HwOffloadNodeLabel"] = HwOffloadNodeLabel mcName := "00-" + HwOffloadNodeLabel mcpName := HwOffloadNodeLabel - mc, err := render.GenerateMachineConfig("bindata/manifests/machine-config", mcName, HwOffloadNodeLabel, dc.Spec.EnableOvsOffload, &data) + mc, err := render.GenerateMachineConfig("bindata/manifests/switchdev-config", mcName, HwOffloadNodeLabel, dc.Spec.EnableOvsOffload, &data) if err != nil { return err } - mcpRaw, err := render.RenderTemplate("bindata/manifests/machine-config/machineconfigpool.yaml", &data) + mcpRaw, err := render.RenderTemplate("bindata/manifests/switchdev-config/machineconfigpool.yaml", &data) if err != nil { return err } From 85cf7028753f9fcf63e17bdb45a085f996010257 Mon Sep 17 00:00:00 2001 From: Mamduh Alassi Date: Tue, 2 Mar 2021 09:31:00 +0200 Subject: [PATCH 3/4] Add Kubernetes plugin to maintain services for switchdev Openshift is handling switchdev by mco_plugin which uses Openshift MCO to handle the services, this commit introduce k8s_plugin which handle the same logic for vanilla Kubernetes that is missing MCO Signed-off-by: Mamduh Alassi --- Makefile | 2 +- bindata/manifests/daemon/daemonset.yaml | 4 + bindata/scripts/clean-k8s-services.sh | 36 +++ pkg/daemon/daemon.go | 2 + pkg/daemon/plugin.go | 1 + pkg/plugins/k8s/k8s_plugin.go | 395 ++++++++++++++++++++++++ pkg/plugins/mco/mco_plugin.go | 57 +--- pkg/utils/switchdev.go | 67 ++++ 8 files changed, 509 insertions(+), 55 deletions(-) create mode 100755 bindata/scripts/clean-k8s-services.sh create mode 100644 pkg/plugins/k8s/k8s_plugin.go create mode 100644 pkg/utils/switchdev.go diff --git a/Makefile b/Makefile index cecf58061..5c48e2128 100644 --- a/Makefile +++ b/Makefile @@ -63,7 +63,7 @@ _build-%: _plugin-%: vet @hack/build-plugins.sh $* -plugins: _plugin-intel _plugin-mellanox _plugin-generic _plugin-virtual _plugin-mco +plugins: _plugin-intel _plugin-mellanox _plugin-generic _plugin-virtual _plugin-mco _plugin-k8s clean: @rm -rf $(TARGET_DIR) diff --git a/bindata/manifests/daemon/daemonset.yaml b/bindata/manifests/daemon/daemonset.yaml index ddaca085d..aa6edb095 100644 --- a/bindata/manifests/daemon/daemonset.yaml +++ b/bindata/manifests/daemon/daemonset.yaml @@ -50,6 +50,10 @@ spec: volumeMounts: - name: host mountPath: /host + lifecycle: + preStop: + exec: + command: ["/bindata/scripts/clean-k8s-services.sh"] volumes: - name: host hostPath: diff --git a/bindata/scripts/clean-k8s-services.sh b/bindata/scripts/clean-k8s-services.sh new file mode 100755 index 000000000..49dc8139a --- /dev/null +++ b/bindata/scripts/clean-k8s-services.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +if [ "$CLUSTER_TYPE" == "openshift" ]; then + echo "openshift cluster" + exit +fi + +chroot_path="/host" + +function clean_services() { + # Remove switchdev service files + rm -f $chroot_path/etc/systemd/system/switchdev-configuration.service + rm -f $chroot_path/usr/local/bin/configure-switchdev.sh + rm -f $chroot_path/etc/switchdev.conf + rm -f $chroot_path/etc/udev/switchdev-vf-link-name.sh + + # clean NetworkManager and ovs-vswitchd services + network_manager_service=$chroot_path/usr/lib/systemd/system/NetworkManager.service + ovs_service=$chroot_path/usr/lib/systemd/system/ovs-vswitchd.service + + if [ -f $network_manager_service ]; then + sed -i.bak '/switchdev-configuration.service/d' $network_manager_service + fi + + if [ -f $ovs_service ]; then + sed -i.bak '/hw-offload/d' $ovs_service + fi +} + +clean_services +# Reload host services +chroot $chroot_path /bin/bash -c systemctl daemon-reload >/dev/null 2>&1 || true + +# Restart system services +chroot $chroot_path /bin/bash -c systemctl restart NetworkManager.service >/dev/null 2>&1 || true +chroot $chroot_path /bin/bash -c systemctl restart ovs-vswitchd.service >/dev/null 2>&1 || true diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index e28f85878..7815f8979 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -653,6 +653,8 @@ func (dn *Daemon) loadVendorPlugins(ns *sriovnetworkv1.SriovNetworkNodeState) er pl = registerPlugins(ns) if utils.ClusterType == utils.ClusterTypeOpenshift { pl = append(pl, McoPlugin) + } else { + pl = append(pl, K8sPlugin) } pl = append(pl, GenericPlugin) } diff --git a/pkg/daemon/plugin.go b/pkg/daemon/plugin.go index dd59de90f..ffab8607d 100644 --- a/pkg/daemon/plugin.go +++ b/pkg/daemon/plugin.go @@ -31,6 +31,7 @@ const ( GenericPlugin = "generic_plugin" VirtualPlugin = "virtual_plugin" McoPlugin = "mco_plugin" + K8sPlugin = "k8s_plugin" ) // loadPlugin loads a single plugin from a file path diff --git a/pkg/plugins/k8s/k8s_plugin.go b/pkg/plugins/k8s/k8s_plugin.go new file mode 100644 index 000000000..c26d82b42 --- /dev/null +++ b/pkg/plugins/k8s/k8s_plugin.go @@ -0,0 +1,395 @@ +package main + +import ( + "fmt" + "io/ioutil" + "os" + "path" + "strings" + + "github.com/coreos/go-systemd/v22/unit" + "github.com/golang/glog" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/service" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" +) + +type K8sPlugin struct { + PluginName string + SpecVersion string + serviceManager service.ServiceManager + switchdevRunScript *service.ScriptManifestFile + switchdevUdevScript *service.ScriptManifestFile + switchdevService *service.Service + openVSwitchService *service.Service + networkManagerService *service.Service + updateTarget *k8sUpdateTarget +} + +type k8sUpdateTarget struct { + switchdevService bool + switchdevRunScript bool + switchdevUdevScript bool + systemServices []*service.Service +} + +func (u *k8sUpdateTarget) needUpdate() bool { + return u.switchdevService || u.switchdevRunScript || u.switchdevUdevScript || len(u.systemServices) > 0 +} + +func (u *k8sUpdateTarget) reset() { + u.switchdevService = false + u.switchdevRunScript = false + u.systemServices = []*service.Service{} +} + +func (u *k8sUpdateTarget) String() string { + var updateList []string + if u.switchdevService { + updateList = append(updateList, "SwitchdevService") + } + if u.switchdevRunScript { + updateList = append(updateList, "SwitchdevRunScript") + } + if u.switchdevUdevScript { + updateList = append(updateList, "SwitchdevUdevScript") + } + for _, s := range u.systemServices { + updateList = append(updateList, s.Name) + } + + return strings.Join(updateList, ",") +} + +const ( + switchdevManifestPath = "bindata/manifests/switchdev-config/" + switchdevUnits = switchdevManifestPath + "switchdev-units/" + switchdevUnitFile = switchdevUnits + "switchdev-configuration.yaml" + networkManagerUnitFile = switchdevUnits + "NetworkManager.service.yaml" + ovsUnitFile = switchdevManifestPath + "ovs-units/ovs-vswitchd.service.yaml" + configuresSwitchdevScript = switchdevManifestPath + "files/configure-switchdev.sh.yaml" + switchdevRenamingUdevScript = switchdevManifestPath + "files/switchdev-vf-link-name.sh.yaml" + + chroot = "/host" +) + +var ( + Plugin K8sPlugin +) + +// Initialize our plugin and set up initial values +func init() { + Plugin = K8sPlugin{ + PluginName: "k8s_plugin", + SpecVersion: "1.0", + serviceManager: service.NewServiceManager(chroot), + updateTarget: &k8sUpdateTarget{}, + } + + // Read manifest files for plugin + if err := Plugin.readManifestFiles(); err != nil { + panic(err) + } +} + +// Name returns the name of the plugin +func (p *K8sPlugin) Name() string { + return p.PluginName +} + +// Spec returns the version of the spec expected by the plugin +func (p *K8sPlugin) Spec() string { + return p.SpecVersion +} + +// OnNodeStateAdd Invoked when SriovNetworkNodeState CR is created, return if need dain and/or reboot node +func (p *K8sPlugin) OnNodeStateAdd(state *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { + glog.Info("k8s-plugin OnNodeStateAdd()") + return p.OnNodeStateChange(nil, state) +} + +// OnNodeStateChange Invoked when SriovNetworkNodeState CR is updated, return if need dain and/or reboot node +func (p *K8sPlugin) OnNodeStateChange(old, new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { + glog.Info("k8s-plugin OnNodeStateChange()") + needDrain = false + needReboot = false + + p.updateTarget.reset() + // TODO add check for enableOvsOffload in OperatorConfig later + // Update services if switchdev not required + if utils.IsSwitchdevModeSpec(new.Spec) { + // Check services + err = p.servicesStateUpdate() + if err != nil { + glog.Errorf("k8s-plugin OnNodeStateChange(): failed : %v", err) + return + } + } + + // Check switchdev config + var update, remove bool + if update, remove, err = utils.WriteSwitchdevConfFile(new); err != nil { + glog.Errorf("k8s-plugin OnNodeStateChange():fail to update switchdev.conf file: %v", err) + return + } + if remove { + glog.Info("k8s-plugin OnNodeStateChange(): need reboot node to clean switchdev VFs") + needDrain = true + needReboot = true + return + } + if update { + glog.Info("k8s-plugin OnNodeStateChange(): need reboot node to use the up-to-date switchdev.conf") + needDrain = true + needReboot = true + return + } + if p.updateTarget.needUpdate() { + needDrain = true + if p.updateTarget.switchdevUdevScript { + needReboot = true + glog.Infof("k8s-plugin OnNodeStateChange(): needReboot to update %q", p.updateTarget) + } else { + glog.Infof("k8s-plugin OnNodeStateChange(): needDrain to update %q", p.updateTarget) + } + } + + return +} + +// Apply config change +func (p *K8sPlugin) Apply() error { + glog.Info("k8s-plugin Apply()") + if err := p.updateSwichdevService(); err != nil { + return err + } + + for _, systemService := range p.updateTarget.systemServices { + if err := p.updateSystemService(systemService); err != nil { + return err + } + } + + return nil +} + +func (p *K8sPlugin) readSwitchdevManifest() error { + // Read switchdev service + switchdevService, err := service.ReadServiceManifestFile(switchdevUnitFile) + if err != nil { + return err + } + + // Remove run condition form the service + conditionOpt := &unit.UnitOption{ + Section: "Unit", + Name: "ConditionPathExists", + Value: "!/etc/ignition-machine-config-encapsulated.json", + } + switchdevService, err = service.RemoveFromService(switchdevService, conditionOpt) + if err != nil { + return err + } + p.switchdevService = switchdevService + + // Read switchdev run script + switchdevRunScript, err := service.ReadScriptManifestFile(configuresSwitchdevScript) + if err != nil { + return err + } + p.switchdevRunScript = switchdevRunScript + + // Read switchdev udev script + switchdevUdevScript, err := service.ReadScriptManifestFile(switchdevRenamingUdevScript) + if err != nil { + return err + } + p.switchdevUdevScript = switchdevUdevScript + + return nil +} + +func (p *K8sPlugin) readNetworkManagerManifest() error { + networkManagerService, err := service.ReadServiceInjectionManifestFile(networkManagerUnitFile) + if err != nil { + return err + } + + p.networkManagerService = networkManagerService + return nil +} + +func (p *K8sPlugin) readOpenVSwitchdManifest() error { + openVSwitchService, err := service.ReadServiceInjectionManifestFile(ovsUnitFile) + if err != nil { + return err + } + + p.openVSwitchService = openVSwitchService + return nil +} + +func (p *K8sPlugin) readManifestFiles() error { + if err := p.readSwitchdevManifest(); err != nil { + return err + } + + if err := p.readNetworkManagerManifest(); err != nil { + return err + } + + if err := p.readOpenVSwitchdManifest(); err != nil { + return err + } + + return nil +} + +func (p *K8sPlugin) switchdevServiceStateUpdate() error { + // Check switchdev service + swdService, err := p.serviceManager.ReadService(p.switchdevService.Path) + if err != nil { + if !os.IsNotExist(err) { + return err + } + // service not exists + p.updateTarget.switchdevService = true + } else { + needChange, err := service.CompareServices(swdService, p.switchdevService) + if err != nil { + return err + } + p.updateTarget.switchdevService = needChange + } + + // Check switchdev run script + data, err := ioutil.ReadFile(path.Join(chroot, p.switchdevRunScript.Path)) + if err != nil { + if !os.IsNotExist(err) { + return err + } + p.updateTarget.switchdevRunScript = true + } else if string(data) != p.switchdevRunScript.Contents.Inline { + p.updateTarget.switchdevRunScript = true + } + + // Check switchdev udev script + data, err = ioutil.ReadFile(path.Join(chroot, p.switchdevUdevScript.Path)) + if err != nil { + if !os.IsNotExist(err) { + return err + } + p.updateTarget.switchdevUdevScript = true + } else if string(data) != p.switchdevUdevScript.Contents.Inline { + p.updateTarget.switchdevUdevScript = true + } + + return nil +} + +func (p *K8sPlugin) getSystemServices() []*service.Service { + return []*service.Service{p.networkManagerService, p.openVSwitchService} +} + +func (p *K8sPlugin) isSystemServiceNeedUpdate(serviceObj *service.Service) bool { + systemService, err := p.serviceManager.ReadService(serviceObj.Path) + if err != nil { + glog.Warningf("k8s-plugin isSystemServiceNeedUpdate(): failed to read switchdev service file %q: %v", + serviceObj.Path, err) + return false + } + if systemService != nil { + needChange, err := service.CompareServices(systemService, serviceObj) + if err != nil { + glog.Warningf("k8s-plugin isSystemServiceNeedUpdate(): failed to compare switchdev service : %v", err) + return false + } + return needChange + } + + return false +} + +func (p *K8sPlugin) systemServicesStateUpdate() error { + var services []*service.Service + for _, systemService := range p.getSystemServices() { + exist, err := p.serviceManager.IsServiceExist(systemService.Path) + if err != nil { + return err + } + if !exist { + return fmt.Errorf("k8s-plugin systemServicesStateUpdate(): %q not found", systemService.Name) + } + if p.isSystemServiceNeedUpdate(systemService) { + services = append(services, systemService) + } + } + + p.updateTarget.systemServices = services + return nil +} + +func (p *K8sPlugin) servicesStateUpdate() error { + // Check switchdev + err := p.switchdevServiceStateUpdate() + if err != nil { + return err + } + + // Check system services + err = p.systemServicesStateUpdate() + if err != nil { + return err + } + + return nil +} + +func (p *K8sPlugin) updateSwichdevService() error { + if p.updateTarget.switchdevService { + err := p.serviceManager.EnableService(p.switchdevService) + if err != nil { + return err + } + } + + if p.updateTarget.switchdevRunScript { + err := ioutil.WriteFile(path.Join(chroot, p.switchdevRunScript.Path), + []byte(p.switchdevRunScript.Contents.Inline), 0755) + if err != nil { + return err + } + } + + if p.updateTarget.switchdevUdevScript { + err := ioutil.WriteFile(path.Join(chroot, p.switchdevUdevScript.Path), + []byte(p.switchdevUdevScript.Contents.Inline), 0755) + if err != nil { + return err + } + } + + return nil +} + +func (p *K8sPlugin) updateSystemService(serviceObj *service.Service) error { + systemService, err := p.serviceManager.ReadService(serviceObj.Path) + if err != nil { + return err + } + if systemService == nil { + // Invalid case to reach here + return fmt.Errorf("k8s-plugin Apply(): can't update non-existing service %q", serviceObj.Name) + } + serviceOptions, err := unit.Deserialize(strings.NewReader(serviceObj.Content)) + if err != nil { + return err + } + updatedService, err := service.AppendToService(systemService, serviceOptions...) + if err != nil { + return err + } + + return p.serviceManager.EnableService(updatedService) +} diff --git a/pkg/plugins/mco/mco_plugin.go b/pkg/plugins/mco/mco_plugin.go index 215ae7fd8..b63c8dfe3 100644 --- a/pkg/plugins/mco/mco_plugin.go +++ b/pkg/plugins/mco/mco_plugin.go @@ -3,8 +3,6 @@ package main import ( "context" "encoding/json" - "fmt" - "io/ioutil" "os" "github.com/golang/glog" @@ -17,6 +15,7 @@ import ( sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" "github.com/k8snetworkplumbingwg/sriov-network-operator/controllers" + "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" ) type McoPlugin struct { @@ -28,7 +27,6 @@ type McoPlugin struct { const ( switchdevUnitPath = "/host/etc/systemd/system/switchdev-configuration.service" - switchDevConfPath = "/host/etc/switchdev.conf" nodeLabelPrefix = "node-role.kubernetes.io/" ) @@ -80,16 +78,10 @@ func (p *McoPlugin) OnNodeStateAdd(state *sriovnetworkv1.SriovNetworkNodeState) // OnNodeStateChange Invoked when SriovNetworkNodeState CR is updated, return if need dain and/or reboot node func (p *McoPlugin) OnNodeStateChange(old, new *sriovnetworkv1.SriovNetworkNodeState) (needDrain bool, needReboot bool, err error) { glog.Info("mco-plugin OnNodeStateChange()") - switchdevConfigured = false - for _, iface := range new.Spec.Interfaces { - if iface.EswitchMode == sriovnetworkv1.ESWITCHMODE_SWITCHDEV { - switchdevConfigured = true - break - } - } + switchdevConfigured = utils.IsSwitchdevModeSpec(new.Spec) var update, remove bool - if update, remove, err = writeSwitchdevConfFile(new); err != nil { + if update, remove, err = utils.WriteSwitchdevConfFile(new); err != nil { glog.Errorf("mco-plugin OnNodeStateChange():fail to update switchdev.conf file: %v", err) return } @@ -156,46 +148,3 @@ func (p *McoPlugin) Apply() error { glog.Infof("Node %s is not in HW offload MachineConfigPool", node.Name) return nil } - -func writeSwitchdevConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (update, remove bool, err error) { - _, err = os.Stat(switchDevConfPath) - if err != nil { - if os.IsNotExist(err) { - glog.V(2).Infof("writeSwitchdevConfFile(): file not existed, create it") - _, err = os.Create(switchDevConfPath) - if err != nil { - glog.Errorf("writeSwitchdevConfFile(): fail to create file: %v", err) - return - } - } else { - return - } - } - newContent := "" - for _, iface := range newState.Spec.Interfaces { - if iface.EswitchMode == sriovnetworkv1.ESWITCHMODE_SWITCHDEV { - newContent = newContent + fmt.Sprintln(iface.PciAddress, iface.NumVfs) - } - } - oldContent, err := ioutil.ReadFile(switchDevConfPath) - if err != nil { - glog.Errorf("writeSwitchdevConfFile(): fail to read file: %v", err) - return - } - if newContent == string(oldContent) { - glog.V(2).Info("writeSwitchdevConfFile(): no update") - return - } - if newContent == "" { - remove = true - glog.V(2).Info("writeSwitchdevConfFile(): remove content in switchdev.conf") - } - update = true - glog.V(2).Infof("writeSwitchdevConfFile(): write %s to switchdev.conf", newContent) - err = ioutil.WriteFile(switchDevConfPath, []byte(newContent), 0666) - if err != nil { - glog.Errorf("writeSwitchdevConfFile(): fail to write file: %v", err) - return - } - return -} diff --git a/pkg/utils/switchdev.go b/pkg/utils/switchdev.go new file mode 100644 index 000000000..26d7819a0 --- /dev/null +++ b/pkg/utils/switchdev.go @@ -0,0 +1,67 @@ +package utils + +import ( + "fmt" + "io/ioutil" + "os" + + "github.com/golang/glog" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +const ( + switchDevConfPath = "/host/etc/switchdev.conf" +) + +func IsSwitchdevModeSpec(spec sriovnetworkv1.SriovNetworkNodeStateSpec) bool { + for _, iface := range spec.Interfaces { + if iface.EswitchMode == sriovnetworkv1.ESWITCHMODE_SWITCHDEV { + return true + } + } + return false +} + +func WriteSwitchdevConfFile(newState *sriovnetworkv1.SriovNetworkNodeState) (update, remove bool, err error) { + _, err = os.Stat(switchDevConfPath) + if err != nil { + if os.IsNotExist(err) { + glog.V(2).Infof("WriteSwitchdevConfFile(): file not existed, create it") + _, err = os.Create(switchDevConfPath) + if err != nil { + glog.Errorf("WriteSwitchdevConfFile(): fail to create file: %v", err) + return + } + } else { + return + } + } + newContent := "" + for _, iface := range newState.Spec.Interfaces { + if iface.EswitchMode == sriovnetworkv1.ESWITCHMODE_SWITCHDEV { + newContent = newContent + fmt.Sprintln(iface.PciAddress, iface.NumVfs) + } + } + oldContent, err := ioutil.ReadFile(switchDevConfPath) + if err != nil { + glog.Errorf("WriteSwitchdevConfFile(): fail to read file: %v", err) + return + } + if newContent == string(oldContent) { + glog.V(2).Info("WriteSwitchdevConfFile(): no update") + return + } + if newContent == "" { + remove = true + glog.V(2).Info("WriteSwitchdevConfFile(): remove content in switchdev.conf") + } + update = true + glog.V(2).Infof("WriteSwitchdevConfFile(): write %s to switchdev.conf", newContent) + err = ioutil.WriteFile(switchDevConfPath, []byte(newContent), 0644) + if err != nil { + glog.Errorf("WriteSwitchdevConfFile(): fail to write file: %v", err) + return + } + return +} From 5067405608a3aaef0bbf10f39a1fdfeab34166f7 Mon Sep 17 00:00:00 2001 From: Mamduh Alassi Date: Tue, 2 Mar 2021 09:34:08 +0200 Subject: [PATCH 4/4] Add documention for hardware offload Signed-off-by: Mamduh Alassi --- doc/ovs-hw-offload.md | 135 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 doc/ovs-hw-offload.md diff --git a/doc/ovs-hw-offload.md b/doc/ovs-hw-offload.md new file mode 100644 index 000000000..b9be9d836 --- /dev/null +++ b/doc/ovs-hw-offload.md @@ -0,0 +1,135 @@ +# OVS Hardware Offload + +The OVS software based solution is CPU intensive, affecting system performance +and preventing fully utilizing available bandwidth. OVS 2.8 and above support +a feature called OVS Hardware Offload which improves performance significantly. +This feature allows offloading the OVS data-plane to the NIC while maintaining +OVS control-plane unmodified. It is using SR-IOV technology with VF representor +host net-device. The VF representor plays the same role as TAP devices +in Para-Virtual (PV) setup. A packet sent through the VF representor on the host +arrives to the VF, and a packet sent through the VF is received by its representor. + +## Supported Ethernet controllers + +The following manufacturers are known to work: + +- Mellanox ConnectX-5 and above + +## Instructions for Mellanox ConnectX-5 + +## Prerequisites + +- OpenVswitch installed +- Network Manager installed + +### Deploy SriovNetworkNodePolicy + +```yaml +apiVersion: sriovnetwork.openshift.io/v1 +kind: SriovNetworkNodePolicy +metadata: + name: ovs-hw-offload + namespace: sriov-network-operator +spec: + deviceType: netdevice + nicSelector: + deviceID: "1017" + rootDevices: + - 0000:02:00.0 + - 0000:02:00.1 + vendor: "15b3" + nodeSelector: + feature.node.kubernetes.io/network-sriov.capable: "true" + numVfs: 8 + priority: 10 + resourceName: cx5_sriov_switchdev + isRdma: true + eSwitchMode: switchdev + linkType: eth +``` + +### Create NetworkAttachementDefinition CRD with OVS CNI config + +```yaml +apiVersion: "k8s.cni.cncf.io/v1" +kind: NetworkAttachmentDefinition +metadata: + name: ovs-net + annotations: + k8s.v1.cni.cncf.io/resourceName: openshift.io/cx5_sriov_switchdev +spec: + config: '{ + "cniVersion": "0.3.1", + "type": "ovs", + "bridge": "br-sriov0", + "vlan": 10 + }' +``` + +### Deploy POD with OVS hardware-offload + +Create POD spec and request a VF + +```yaml +apiVersion: v1 +kind: Pod +metadata: + name: ovs-offload-pod1 + annotations: + k8s.v1.cni.cncf.io/networks: ovs-net +spec: + containers: + - name: ovs-offload + image: networkstatic/iperf3 + resources: + requests: + openshift.io/cx5_sriov_switchdev: '1' + limits: + openshift.io/cx5_sriov_switchdev: '1' + command: + - sh + - -c + - | + ls -l /dev/infiniband /sys/class/net + sleep 1000000 +``` + +## Verify Hardware-Offloads is Working + +Run iperf3 server on POD 1 + +```bash +kubectl exec -it ovs-offload-pod1 -- iperf3 -s +``` + +Run iperf3 client on POD 2 + +```bash +kubectl exec -it ovs-offload-pod2 -- iperf3 -c 192.168.1.17 -t 100 +``` + +Check traffic on the VF representor port. Verify only TCP connection establishment appears + +```text +tcpdump -i enp3s0f0_3 tcp +listening on enp3s0f0_3, link-type EN10MB (Ethernet), capture size 262144 bytes +22:24:44.969516 IP 192.168.1.16.43558 > 192.168.1.17.targus-getdata1: Flags [S], seq 89800743, win 64860, options [mss 1410,sackOK,TS val 491087056 ecr 0,nop,wscale 7], length 0 +22:24:44.969773 IP 192.168.1.17.targus-getdata1 > 192.168.1.16.43558: Flags [S.], seq 1312764151, ack 89800744, win 64308, options [mss 1410,sackOK,TS val 4095895608 ecr 491087056,nop,wscale 7], length 0 +22:24:45.085558 IP 192.168.1.16.43558 > 192.168.1.17.targus-getdata1: Flags [.], ack 1, win 507, options [nop,nop,TS val 491087222 ecr 4095895608], length 0 +22:24:45.085592 IP 192.168.1.16.43558 > 192.168.1.17.targus-getdata1: Flags [P.], seq 1:38, ack 1, win 507, options [nop,nop,TS val 491087222 ecr 4095895608], length 37 +22:24:45.086311 IP 192.168.1.16.43560 > 192.168.1.17.targus-getdata1: Flags [S], seq 3802331506, win 64860, options [mss 1410,sackOK,TS val 491087279 ecr 0,nop,wscale 7], length 0 +22:24:45.086462 IP 192.168.1.17.targus-getdata1 > 192.168.1.16.43560: Flags [S.], seq 441940709, ack 3802331507, win 64308, options [mss 1410,sackOK,TS val 4095895725 ecr 491087279,nop,wscale 7], length 0 +22:24:45.086624 IP 192.168.1.16.43560 > 192.168.1.17.targus-getdata1: Flags [.], ack 1, win 507, options [nop,nop,TS val 491087279 ecr 4095895725], length 0 +22:24:45.086654 IP 192.168.1.16.43560 > 192.168.1.17.targus-getdata1: Flags [P.], seq 1:38, ack 1, win 507, options [nop,nop,TS val 491087279 ecr 4095895725], length 37 +22:24:45.086715 IP 192.168.1.17.targus-getdata1 > 192.168.1.16.43560: Flags [.], ack 38, win 503, options [nop,nop,TS val 4095895725 ecr 491087279], length 0 +``` + +Check datapath rules are offloaded + +```text +ovs-appctl dpctl/dump-flows --names type=offloaded +recirc_id(0),in_port(eth0),eth(src=16:fd:c6:0b:60:52),eth_type(0x0800),ipv4(src=192.168.1.17,frag=no), packets:2235857, bytes:147599302, used:0.550s, actions:ct(zone=65520),recirc(0x18) +ct_state(+est+trk),ct_mark(0),recirc_id(0x18),in_port(eth0),eth(dst=42:66:d7:45:0d:7e),eth_type(0x0800),ipv4(dst=192.168.1.0/255.255.255.0,frag=no), packets:2235857, bytes:147599302, used:0.550s, actions:eth1 +recirc_id(0),in_port(eth1),eth(src=42:66:d7:45:0d:7e),eth_type(0x0800),ipv4(src=192.168.1.16,frag=no), packets:133410141, bytes:195255745684, used:0.550s, actions:ct(zone=65520),recirc(0x16) +ct_state(+est+trk),ct_mark(0),recirc_id(0x16),in_port(eth1),eth(dst=16:fd:c6:0b:60:52),eth_type(0x0800),ipv4(dst=192.168.1.0/255.255.255.0,frag=no), packets:133410138, bytes:195255745483, used:0.550s, actions:eth0 +```