From 6f54e23124301266432259ac07993622636dce12 Mon Sep 17 00:00:00 2001 From: Sebastian Sch Date: Thu, 19 Oct 2023 01:07:17 +0300 Subject: [PATCH] CI scripts improvements Updates to OCP: * add a retry to the policy create to fix OCP CI flake * add more ram to the OCP master single node * add a wait for registry to be available * update the ocp version to latest 4.14 rc release Updates for K8s: * switch to not use dockerhub for the registry image to overcome the pull limit * add alias for golang to the dockerhub image github actions: * always remove the cluster Signed-off-by: Sebastian Sch --- .github/workflows/test.yml | 12 ++++++++++++ hack/run-e2e-conformance-virtual-cluster.sh | 5 ++++- hack/run-e2e-conformance-virtual-ocp.sh | 10 +++++++--- test/conformance/tests/test_sriov_operator.go | 15 +++++++++------ 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1ff07eed9..1b41ae30a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -119,6 +119,12 @@ jobs: name: k8s-artifact path: ./k8s-artifacts + - name: remove virtual cluster + if: always() + run: | + kcli delete cluster $CLUSTER_NAME -y + kcli delete network $CLUSTER_NAME -y + virtual-ocp: name: ocp needs: [ @@ -145,3 +151,9 @@ jobs: with: name: ocp-artifact path: ./ocp-artifacts + + - name: remove virtual cluster + if: always() + run: | + kcli delete cluster $CLUSTER_NAME -y + kcli delete network $CLUSTER_NAME -y diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh index 8f9f2c440..8bba27e2f 100755 --- a/hack/run-e2e-conformance-virtual-cluster.sh +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -120,6 +120,9 @@ controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'` insecure_registry="[[registry]] location = \"$controller_ip:5000\" insecure = true + +[aliases] +\"golang\" = \"docker.io/library/golang\" " cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf @@ -223,7 +226,7 @@ spec: - effect: NoSchedule key: node-role.kubernetes.io/control-plane containers: - - image: docker.io/registry:latest + - image: quay.io/libpod/registry:2.8.2 imagePullPolicy: Always name: registry volumeMounts: diff --git a/hack/run-e2e-conformance-virtual-ocp.sh b/hack/run-e2e-conformance-virtual-ocp.sh index a23410b04..4509af225 100755 --- a/hack/run-e2e-conformance-virtual-ocp.sh +++ b/hack/run-e2e-conformance-virtual-ocp.sh @@ -41,8 +41,8 @@ kcli create network -c 192.168.123.0/24 ocp kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name cat < ./${cluster_name}-plan.yaml -tag: 4.14.0-rc.1 -ctlplane_memory: 24576 +tag: 4.14.0-rc.6 +ctlplane_memory: 32768 worker_memory: 8192 pool: default disk_size: 50 @@ -191,13 +191,17 @@ podman build -t "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}" -f "${root}/Dockerfile.sr echo "## build webhook image" podman build -t "${SRIOV_NETWORK_WEBHOOK_IMAGE}" -f "${root}/Dockerfile.webhook" "${root}" +echo "## wait for registry to be available" +kubectl wait configs.imageregistry.operator.openshift.io/cluster --for=condition=Available --timeout=120s + dockercgf=`kubectl -n ${NAMESPACE} get sa builder -oyaml | grep imagePullSecrets -A 1 | grep -o "builder-.*"` auth=`kubectl -n ${NAMESPACE} get secret ${dockercgf} -ojson | jq '.data.".dockercfg"'` auth="${auth:1:-1}" auth=`echo ${auth} | base64 -d` echo ${auth} > registry-login.conf -pass=$( jq .\"$registry\".password registry-login.conf ) +internal_registry="image-registry.openshift-image-registry.svc:5000" +pass=$( jq .\"$internal_registry\".password registry-login.conf ) podman login -u serviceaccount -p ${pass:1:-1} $registry --tls-verify=false podman push --tls-verify=false "${SRIOV_NETWORK_OPERATOR_IMAGE}" diff --git a/test/conformance/tests/test_sriov_operator.go b/test/conformance/tests/test_sriov_operator.go index ac120e525..a206bf4d9 100644 --- a/test/conformance/tests/test_sriov_operator.go +++ b/test/conformance/tests/test_sriov_operator.go @@ -143,8 +143,9 @@ var _ = Describe("[sriov] operator", func() { cfg.Spec.ConfigDaemonNodeSelector = map[string]string{ "sriovenabled": "true", } - err = clients.Update(context.TODO(), &cfg) - Expect(err).ToNot(HaveOccurred()) + Eventually(func() error { + return clients.Update(context.TODO(), &cfg) + }, 1*time.Minute, 5*time.Second).ShouldNot(HaveOccurred()) By("Checking that a daemon is scheduled only on selected node") Eventually(func() bool { @@ -159,8 +160,9 @@ var _ = Describe("[sriov] operator", func() { }, &cfg) Expect(err).ToNot(HaveOccurred()) cfg.Spec.ConfigDaemonNodeSelector = map[string]string{} - err = clients.Update(context.TODO(), &cfg) - Expect(err).ToNot(HaveOccurred()) + Eventually(func() error { + return clients.Update(context.TODO(), &cfg) + }, 1*time.Minute, 5*time.Second).ShouldNot(HaveOccurred()) By("Checking that a daemon is scheduled on each worker node") Eventually(func() bool { @@ -2316,8 +2318,9 @@ func createVanillaNetworkPolicy(node string, sriovInfos *cluster.EnabledNodes, n DeviceType: "netdevice", }, } - err = clients.Create(context.Background(), config) - Expect(err).ToNot(HaveOccurred()) + Eventually(func() error { + return clients.Create(context.Background(), config) + }, 1*time.Minute, 5*time.Second).ShouldNot(HaveOccurred()) Eventually(func() sriovv1.Interfaces { nodeState, err := clients.SriovNetworkNodeStates(operatorNamespace).Get(context.Background(), node, metav1.GetOptions{})