diff --git a/.github/workflows/virtual-cluster.yml b/.github/workflows/virtual-cluster.yml new file mode 100644 index 0000000000..81b7c17402 --- /dev/null +++ b/.github/workflows/virtual-cluster.yml @@ -0,0 +1,43 @@ +name: sriov-operator-test +on: [pull_request] + +jobs: + virtual-k8s-cluster: + name: k8s + runs-on: [sriov] + steps: + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Set up Go 1.20 + uses: actions/setup-go@v3 + with: + go-version: 1.20.x + + - name: run test + run: make test-e2e-conformance-virtual-k8s-cluster-ci + + - uses: actions/upload-artifact@v3 + with: + name: artifact + path: ./artifacts.tar.gz + + virtual-ocp: + name: ocp + runs-on: [ ocp ] + steps: + - name: Check out code into the Go module directory + uses: actions/checkout@v2 + + - name: Set up Go 1.20 + uses: actions/setup-go@v3 + with: + go-version: 1.20.x + + - name: run test + run: make test-e2e-conformance-virtual-ocp-cluster-ci + + - uses: actions/upload-artifact@v3 + with: + name: artifact + path: ./artifacts.tar.gz \ No newline at end of file diff --git a/Makefile b/Makefile index f48baa6813..79ca433eb1 100644 --- a/Makefile +++ b/Makefile @@ -181,6 +181,18 @@ deploy-setup-k8s: deploy-setup test-e2e-conformance: SUITE=./test/conformance ./hack/run-e2e-conformance.sh +test-e2e-conformance-virtual-k8s-cluster-ci: + ./hack/run-e2e-conformance-virtual-cluster.sh + +test-e2e-conformance-virtual-k8s-cluster: + SKIP_DELETE=TRUE ./hack/run-e2e-conformance-virtual-cluster.sh + +test-e2e-conformance-virtual-ocp-cluster-ci: + ./hack/run-e2e-conformance-virtual-ocp.sh + +test-e2e-conformance-virtual-ocp-cluster: + SKIP_DELETE=TRUE ./hack/run-e2e-conformance-virtual-ocp.sh + test-e2e-validation-only: SUITE=./test/validation ./hack/run-e2e-conformance.sh diff --git a/deploy/configmap.yaml b/deploy/configmap.yaml index 9d7cf8596e..41ce4c8095 100644 --- a/deploy/configmap.yaml +++ b/deploy/configmap.yaml @@ -7,6 +7,7 @@ data: Intel_i40e_25G_SFP28: "8086 158b 154c" Intel_i40e_10G_X710_SFP: "8086 1572 154c" Intel_ixgbe_10G_X550: "8086 1563 1565" + Intel_ixgbe_82576: "8086 10c9 10ca" Intel_i40e_X710_X557_AT_10G: "8086 1589 154c" Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c" Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c" diff --git a/deployment/sriov-network-operator/templates/configmap.yaml b/deployment/sriov-network-operator/templates/configmap.yaml index 9d7cf8596e..41ce4c8095 100644 --- a/deployment/sriov-network-operator/templates/configmap.yaml +++ b/deployment/sriov-network-operator/templates/configmap.yaml @@ -7,6 +7,7 @@ data: Intel_i40e_25G_SFP28: "8086 158b 154c" Intel_i40e_10G_X710_SFP: "8086 1572 154c" Intel_ixgbe_10G_X550: "8086 1563 1565" + Intel_ixgbe_82576: "8086 10c9 10ca" Intel_i40e_X710_X557_AT_10G: "8086 1589 154c" Intel_i40e_10G_X710_BACKPLANE: "8086 1581 154c" Intel_i40e_10G_X710_BASE_T: "8086 15ff 154c" diff --git a/doc/testing-virtual-machine.md b/doc/testing-virtual-machine.md new file mode 100644 index 0000000000..31fce2457a --- /dev/null +++ b/doc/testing-virtual-machine.md @@ -0,0 +1,58 @@ +## E2E conformance test + +It's possible to use QEMU to test the SR-IOV operator on a virtual kubernetes/openshift cluster. +Using the IGB model network driver allow to create virtual functions on the virtual system + +## How to test + +First you will need to enable the `DEV_MODE` via the operator environment variable. +Second step is to add the intel virtual nic to the supported nics configmap. + +Another requirement is to load the vfio kernel module with no_iommu configuration. Example systemd: + +``` +[Unit] +Description=vfio no-iommu +Before=kubelet.service crio.service node-valid-hostname.service + +[Service] +# Need oneshot to delay kubelet +Type=oneshot +ExecStart=/usr/bin/bash -c "modprobe vfio enable_unsafe_noiommu_mode=1" +StandardOutput=journal+console +StandardError=journal+console + +[Install] +WantedBy=network-online.target +``` + +### Prerequisites +* kcli - deployment tool (https://github.com/karmab/kcli) +* virsh +* qemu > 8.1 +* libvirt > 9 +* podman +* make +* go + +## Deploy the cluster + +use the deployment [script](../hack/run-e2e-conformance-virtual-cluster.sh), this will deploy a k8s cluster +compile the operator images and run the e2e tests. + +example: +``` +SKIP_DELETE=TRUE make test-e2e-conformance-virtual-k8s-cluster +``` + +It's also possible to skip the tests and only deploy the cluster running + +``` +SKIP_TEST=TRUE SKIP_DELETE=TRUE make test-e2e-conformance-virtual-k8s-cluster +``` + +To use the cluster after the deployment you need to export the kubeconfig + +``` +export KUBECONFIG=$HOME/.kcli/clusters/virtual/auth/kubeconfig +``` \ No newline at end of file diff --git a/hack/run-e2e-conformance-virtual-cluster.sh b/hack/run-e2e-conformance-virtual-cluster.sh new file mode 100755 index 0000000000..7bd8b2ce9e --- /dev/null +++ b/hack/run-e2e-conformance-virtual-cluster.sh @@ -0,0 +1,351 @@ +#!/usr/bin/env bash +set -xeo pipefail + +cluster_name=${CLUSTER_NAME:-virtual} +domain_name=$cluster_name.lab + +api_ip=${API_IP:-192.168.122.250} +virtual_router_id=${VIRTUAL_ROUTER_ID:-250} +HOME="/root" + +here="$(dirname "$(readlink --canonicalize "${BASH_SOURCE[0]}")")" +root="$(readlink --canonicalize "$here/..")" + +check_requirements() { + for cmd in kcli virsh virt-edit podman make go; do + if ! command -v "$cmd" &> /dev/null; then + echo "$cmd is not available" + exit 1 + fi + done + return 0 +} + +echo "## checking requirements" +check_requirements +echo "## delete existing cluster name $cluster_name" +kcli delete cluster $cluster_name -y +kcli delete network $cluster_name -y + +function cleanup { + kcli delete cluster $cluster_name -y + kcli delete network $cluster_name -y +} + +if [ -z $SKIP_DELETE ]; then + trap cleanup EXIT +fi + +kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name + +cat < ./${cluster_name}-plan.yaml +ctlplane_memory: 4096 +worker_memory: 4096 +pool: default +disk_size: 50 +network: default +api_ip: $api_ip +virtual_router_id: $virtual_router_id +domain: $domain_name +ctlplanes: 1 +workers: 2 +ingress: false +machine: q35 +engine: crio +sdn: flannel +autolabeller: false +vmrules: + - $cluster_name-worker-.*: + nets: + - name: default + type: igb + vfio: true + noconf: true + numa: 0 + - name: $cluster_name + type: igb + vfio: true + noconf: true + numa: 1 + numcpus: 6 + numa: + - id: 0 + vcpus: 0,2,4 + memory: 2048 + - id: 1 + vcpus: 1,3,5 + memory: 2048 + +EOF + +kcli create cluster generic --paramfile ./${cluster_name}-plan.yaml $cluster_name + +export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig +export PATH=$PWD:$PATH + +ATTEMPTS=0 +MAX_ATTEMPTS=72 +ready=false +sleep_time=10 + +until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] +do + echo "waiting for cluster to be ready" + if [ `kubectl get node | grep Ready | wc -l` == 3 ]; then + echo "cluster is ready" + ready=true + else + echo "cluster is not ready yet" + sleep $sleep_time + fi + ATTEMPTS=$((ATTEMPTS+1)) +done + +if ! $ready; then + echo "Timed out waiting for cluster to be ready" + kubectl get nodes + exit 1 +fi + +echo "## label cluster workers as sriov capable" +kubectl label node $cluster_name-worker-0.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite +kubectl label node $cluster_name-worker-1.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite + +echo "## label cluster worker as worker" +kubectl label node $cluster_name-worker-0.$domain_name node-role.kubernetes.io/worker= --overwrite +kubectl label node $cluster_name-worker-1.$domain_name node-role.kubernetes.io/worker= --overwrite + +controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'` +insecure_registry="[[registry]] +location = \"$controller_ip:5000\" +insecure = true +" + +cat << EOF > /etc/containers/registries.conf.d/003-${cluster_name}.conf +$insecure_registry +EOF + +kcli ssh $cluster_name-ctlplane-0 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kcli ssh $cluster_name-worker-0 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kcli ssh $cluster_name-worker-1 << EOF +sudo su +echo '$insecure_registry' > /etc/containers/registries.conf.d/003-internal.conf +systemctl restart crio +EOF + +kubectl create namespace container-registry + +echo "## deploy internal registry" +cat < /dev/null; then + echo "$cmd is not available" + exit 1 + fi + done + return 0 +} + +echo "## checking requirements" +check_requirements +echo "## delete existing cluster name $cluster_name" +kcli delete cluster $cluster_name -y +kcli delete network $cluster_name -y + +function cleanup { + kcli delete cluster $cluster_name -y + kcli delete network $cluster_name -y + podman logout $registry +} + +if [ -z $SKIP_DELETE ]; then + trap cleanup EXIT +fi + +kcli create network -c 192.168.${virtual_router_id}.0/24 --nodhcp -i $cluster_name + +cat < ./${cluster_name}-plan.yaml +tag: 4.14.0-rc.1 +ctlplane_memory: 24576 +worker_memory: 8192 +pool: default +disk_size: 50 +network: default +api_ip: $api_ip +virtual_router_id: $virtual_router_id +domain: $domain_name +ctlplanes: 1 +workers: 3 +machine: q35 +network_type: OVNKubernetes +pull_secret: /root/openshift_pull.json +vmrules: + - $cluster_name-worker-.*: + nets: + - name: default + numa: 0 + - name: $cluster_name + type: igb + vfio: true + noconf: true + numa: 0 + - name: $cluster_name + type: igb + vfio: true + noconf: true + numa: 1 + numcpus: 6 + numa: + - id: 0 + vcpus: 0,2,4 + memory: 4096 + - id: 1 + vcpus: 1,3,5 + memory: 4096 + +EOF + +kcli create cluster openshift --paramfile ./${cluster_name}-plan.yaml $cluster_name + +export KUBECONFIG=$HOME/.kcli/clusters/$cluster_name/auth/kubeconfig +export PATH=$PWD:$PATH + +# w/a for the registry pull +kubectl create clusterrolebinding authenticated-registry-viewer --clusterrole registry-viewer --group system:unauthenticated + +ATTEMPTS=0 +MAX_ATTEMPTS=72 +ready=false +sleep_time=10 + +until $ready || [ $ATTEMPTS -eq $MAX_ATTEMPTS ] +do + echo "waiting for cluster to be ready" + if [ `kubectl get node | grep Ready | wc -l` == 4 ]; then + echo "cluster is ready" + ready=true + else + echo "cluster is not ready yet" + sleep $sleep_time + fi + ATTEMPTS=$((ATTEMPTS+1)) +done + +if ! $ready; then + echo "Timed out waiting for cluster to be ready" + kubectl get nodes + exit 1 +fi + +echo "## label cluster workers as sriov capable" +kubectl label node $cluster_name-worker-0.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite +kubectl label node $cluster_name-worker-1.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite +kubectl label node $cluster_name-worker-2.$domain_name feature.node.kubernetes.io/network-sriov.capable=true --overwrite + +controller_ip=`kubectl get node -o wide | grep ctlp | awk '{print $6}'` + +if [ `cat /etc/hosts | grep ${api_ip} | grep "default-route-openshift-image-registry.apps.${cluster_name}.${domain_name}" | wc -l` == 0 ]; then + echo "adding registry to hosts" + sed -i "s/${api_ip}/${api_ip} default-route-openshift-image-registry.apps.${cluster_name}.${domain_name}/g" /etc/hosts +fi + + +cat < registry-login.conf + +pass=$( jq .\"$registry\".password registry-login.conf ) +podman login -u serviceaccount -p ${pass:1:-1} $registry --tls-verify=false + +podman push --tls-verify=false "${SRIOV_NETWORK_OPERATOR_IMAGE}" +podman push --tls-verify=false "${SRIOV_NETWORK_CONFIG_DAEMON_IMAGE}" +podman push --tls-verify=false "${SRIOV_NETWORK_WEBHOOK_IMAGE}" + +podman logout $registry + +echo "## apply CRDs" +kubectl apply -k $root/config/crd + + +cat <