Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-39333: E2E: Add support for hypershift to ovs dynamic pinning and kubelet.experimental annotation tests #1150

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,24 +20,25 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/utils/cpuset"
"sigs.k8s.io/controller-runtime/pkg/client"

machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cgroup"
testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/hypershift"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodepools"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/systemd"
)

Expand All @@ -53,11 +54,12 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
workerRTNode *corev1.Node
workerRTNodes []corev1.Node
profile, initialProfile *performancev2.PerformanceProfile
performanceMCP string
poolName string
ovsSliceCgroup string
ctx context.Context = context.Background()
ovsSystemdServices []string
isCgroupV2 bool
err error
)

BeforeAll(func() {
Expand All @@ -74,18 +76,24 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
profile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred())

performanceMCP, err = mcps.GetByProfile(profile)
Expect(err).ToNot(HaveOccurred())
if !hypershift.IsHypershiftCluster() {
poolName, err = mcps.GetByProfile(profile)
Expect(err).ToNot(HaveOccurred())
} else {
hostedClusterName, err := hypershift.GetHostedClusterName()
np, err := nodepools.GetByClusterName(ctx, testclient.ControlPlaneClient, hostedClusterName)
Expect(err).ToNot(HaveOccurred())
poolName = client.ObjectKeyFromObject(np).String()
}

isCgroupV2, err = cgroup.IsVersion2(ctx, testclient.Client)
isCgroupV2, err = cgroup.IsVersion2(ctx, testclient.DataPlaneClient)
Expect(err).ToNot(HaveOccurred())

ovsSystemdServices = ovsSystemdServicesOnOvsSlice(ctx, workerRTNode)

})

BeforeEach(func() {
var err error
By(fmt.Sprintf("Checking the profile %s with cpus %s", profile.Name, cpuSpecToString(profile.Spec.CPU)))

Expect(profile.Spec.CPU.Isolated).NotTo(BeNil())
Expand All @@ -111,6 +119,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
ovnKubenodepod, err := ovnCnfNodePod(ctx, workerRTNode)
Expect(err).ToNot(HaveOccurred())
containerIds, err := ovnPodContainers(&ovnKubenodepod)
Expect(err).ToNot(HaveOccurred())
for _, ctn := range containerIds {
var containerCgroupPath string
pid, err := nodes.ContainerPid(ctx, workerRTNode, ctn)
Expand All @@ -119,16 +128,19 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
out, err := nodes.ExecCommand(context.TODO(), workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred())
cgroupPathOfPid, err := cgroup.PidParser(out)
Expect(err).ToNot(HaveOccurred())
if isCgroupV2 {
containerCgroupPath = filepath.Join(cgroupRoot, cgroupPathOfPid)
} else {
controller := filepath.Join(cgroupRoot, "/cpuset")
containerCgroupPath = filepath.Join(controller, cgroupPathOfPid)
}
cmd = []string{"cat", fmt.Sprintf("%s", filepath.Join(containerCgroupPath, "/cpuset.cpus"))}
cmd = []string{"cat", filepath.Join(containerCgroupPath, "/cpuset.cpus")}
out, err = nodes.ExecCommand(ctx, workerRTNode, cmd)
Expect(err).ToNot(HaveOccurred())
cpus := testutils.ToString(out)
containerCpuset, err := cpuset.Parse(cpus)
Expect(err).ToNot(HaveOccurred())
Expect(containerCpuset).To(Equal(onlineCPUSet), "Burstable pod containers cpuset.cpus do not match total online cpus")
}

Expand All @@ -141,12 +153,10 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
initialProfile = profile.DeepCopy()
})
It("[test_id:64099] Activation file doesn't get deleted", func() {
performanceMCP, err := mcps.GetByProfile(profile)
Expect(err).ToNot(HaveOccurred())
policy := "best-effort"
// Need to make some changes to pp , causing system reboot
// and check if activation files is modified or deleted
profile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
profile, err := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch latest performance profile")
currentPolicy := profile.Spec.NUMA.TopologyPolicy
if *currentPolicy == "best-effort" {
Expand All @@ -157,10 +167,13 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
}
By("Updating the performance profile")
profiles.UpdateWithRetry(profile)
By("Applying changes in performance profile and waiting until mcp will start updating")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdating, corev1.ConditionTrue)
By("Waiting for MCP being updated")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdated, corev1.ConditionTrue)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, profile)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.PostUpdateSync(ctx, profile)

By("Checking Activation file")
cmd := []string{"ls", activation_file}
for _, node := range workerRTNodes {
Expand All @@ -176,22 +189,14 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
Expect(err).ToNot(HaveOccurred())
currentSpec, _ := json.Marshal(profile.Spec)
spec, _ := json.Marshal(initialProfile.Spec)
performanceMCP, err := mcps.GetByProfile(profile)
Expect(err).ToNot(HaveOccurred())
// revert only if the profile changes.
if !bytes.Equal(currentSpec, spec) {
Expect(testclient.Client.Patch(context.TODO(), profile,
client.RawPatch(
types.JSONPatchType,
[]byte(fmt.Sprintf(`[{ "op": "replace", "path": "/spec", "value": %s }]`, spec)),
),
)).ToNot(HaveOccurred())

By("Applying changes in performance profile and waiting until mcp will start updating")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdating, corev1.ConditionTrue)

By("Waiting when mcp finishes updates")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdated, corev1.ConditionTrue)
profiles.UpdateWithRetry(initialProfile)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, profile)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.PostUpdateSync(ctx, profile)
}
})
})
Expand Down Expand Up @@ -311,14 +316,15 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
},
}
testpod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name}
err = testclient.Client.Create(ctx, testpod)
err = testclient.DataPlaneClient.Create(ctx, testpod)
Expect(err).ToNot(HaveOccurred())
testpod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testpod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred())
Expect(testpod.Status.QOSClass).To(Equal(corev1.PodQOSGuaranteed))

cmd := []string{"taskset", "-pc", "1"}
outputb, err := pods.ExecCommandOnPod(testclient.K8sClient, testpod, "", cmd)
Expect(err).ToNot(HaveOccurred())
testpodCpus := bytes.Split(outputb, []byte(":"))
testlog.Infof("%v pod is using cpus %v", testpod.Name, string(testpodCpus[1]))

Expand All @@ -333,6 +339,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
ovnContainers, err := ovnPodContainers(&ovnPod)
Expect(err).ToNot(HaveOccurred())
containerPid, err := nodes.ContainerPid(ctx, workerRTNode, ovnContainers[0])
Expect(err).ToNot(HaveOccurred())
// we need to wait as process affinity can change
time.Sleep(30 * time.Second)
ctnCpuset := taskSet(ctx, containerPid, workerRTNode)
Expand Down Expand Up @@ -367,7 +374,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
}

testpod1.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name}
err = testclient.Client.Create(ctx, testpod1)
err = testclient.DataPlaneClient.Create(ctx, testpod1)
Expect(err).ToNot(HaveOccurred())
testpod1, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testpod1), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred())
Expand All @@ -387,7 +394,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
},
}
testpod2.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name}
err = testclient.Client.Create(ctx, testpod2)
err = testclient.DataPlaneClient.Create(ctx, testpod2)
Expect(err).ToNot(HaveOccurred())
testpod2, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testpod2), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -446,13 +453,13 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
testNode := make(map[string]string)
testNode["kubernetes.io/hostname"] = workerRTNode.Name
dp.Spec.Template.Spec.NodeSelector = testNode
err := testclient.Client.Create(ctx, dp)
err := testclient.DataPlaneClient.Create(ctx, dp)
Expect(err).ToNot(HaveOccurred(), "Unable to create Deployment")

defer func() {
// delete deployment
testlog.Infof("Deleting Deployment %v", dp.Name)
err := testclient.Client.Delete(ctx, dp)
err := testclient.DataPlaneClient.Delete(ctx, dp)
Expect(err).ToNot(HaveOccurred())
}()
ovnPod, err := ovnCnfNodePod(ctx, workerRTNode)
Expand Down Expand Up @@ -494,10 +501,12 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab

By("Updating the performance profile")
profiles.UpdateWithRetry(profile)
By("Applying changes in performance profile and waiting until mcp will start updating")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdating, corev1.ConditionTrue)
By("Waiting for MCP being updated")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdated, corev1.ConditionTrue)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, profile)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.PostUpdateSync(ctx, profile)

// After reboot we want the deployment to be ready before moving forward
desiredStatus := appsv1.DeploymentStatus{
Expand Down Expand Up @@ -558,7 +567,7 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
defer func() {
By("Delete Deployment")
testlog.Infof("Deleting Deployment %v", dp.Name)
err := testclient.Client.Delete(ctx, dp)
err := testclient.DataPlaneClient.Delete(ctx, dp)
Expect(err).ToNot(HaveOccurred())
}()

Expand Down Expand Up @@ -677,22 +686,15 @@ var _ = Describe("[performance] Cgroups and affinity", Ordered, Label(string(lab
Expect(err).ToNot(HaveOccurred())
currentSpec, _ := json.Marshal(profile.Spec)
spec, _ := json.Marshal(initialProfile.Spec)
performanceMCP, err := mcps.GetByProfile(profile)
Expect(err).ToNot(HaveOccurred())
// revert only if the profile changes.
if !bytes.Equal(currentSpec, spec) {
Expect(testclient.Client.Patch(context.TODO(), profile,
client.RawPatch(
types.JSONPatchType,
[]byte(fmt.Sprintf(`[{ "op": "replace", "path": "/spec", "value": %s }]`, spec)),
),
)).ToNot(HaveOccurred())

By("Applying changes in performance profile and waiting until mcp will start updating")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdating, corev1.ConditionTrue)

By("Waiting when mcp finishes updates")
mcps.WaitForCondition(performanceMCP, machineconfigv1.MachineConfigPoolUpdated, corev1.ConditionTrue)
profiles.UpdateWithRetry(initialProfile)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, initialProfile)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.PostUpdateSync(ctx, initialProfile)
}
})
})
Expand Down Expand Up @@ -736,12 +738,12 @@ func checkCpuCount(ctx context.Context, workerNode *corev1.Node) {
// deleteTestPod removes guaranteed pod
func deleteTestPod(ctx context.Context, testpod *corev1.Pod) {
// it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
err := testclient.Client.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
if errors.IsNotFound(err) {
return
}

err = testclient.Client.Delete(ctx, testpod)
err = testclient.DataPlaneClient.Delete(ctx, testpod)
Expect(err).ToNot(HaveOccurred())

err = pods.WaitForDeletion(ctx, testpod, pods.DefaultDeletionTimeout*time.Second)
Expand All @@ -755,7 +757,7 @@ func ovnCnfNodePod(ctx context.Context, workerNode *corev1.Node) (corev1.Pod, er
options := &client.ListOptions{
Namespace: "openshift-ovn-kubernetes",
}
err := testclient.Client.List(ctx, ovnpods, options)
err := testclient.DataPlaneClient.List(ctx, ovnpods, options)
if err != nil {
return ovnKubeNodePod, err
}
Expand Down
Loading