Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test for cluster reset restore #156

Draft
wants to merge 23 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
1b9694a
Add clusterreset test
endawkins Oct 17, 2023
c9f81a9
Cluster-Reset Update
endawkins Mar 11, 2024
393c5f9
Merge with Upstream
endawkins Mar 12, 2024
8b2304b
Cluster Reset Update 2
endawkins Mar 12, 2024
885dda7
Cluster Reset Update 3
endawkins Mar 12, 2024
5fb1b36
Cluster Reset Update functions to match newer changes and test for ve…
endawkins Mar 12, 2024
0a74dbf
Update makefile and go.mod and go.sum
endawkins Mar 19, 2024
3994cd3
fix lint errors in node.go and cluster.go
endawkins Mar 19, 2024
02757ba
Cluster Reset refactor
endawkins Apr 4, 2024
93c0cfa
creating suite and test for clusterresetrestore
endawkins Apr 10, 2024
4615b83
Merge remote-tracking branch 'upstream/main' into test_for_cluster_re…
endawkins May 6, 2024
bc171a7
clusterresetrestore changes for customflags
endawkins May 21, 2024
7990225
Merge remote-tracking branch 'upstream/main' into test_for_cluster_re…
endawkins May 21, 2024
8fdb992
Merge remote-tracking branch 'upstream/main' into test_for_cluster_re…
endawkins Aug 22, 2024
ed22810
Merge remote-tracking branch 'upstream/main' into test_for_cluster_re…
endawkins Sep 4, 2024
29fb6cf
adding clusterresetrestore to test_runner.sh and building out the com…
endawkins Sep 5, 2024
c9ff7cd
updating Jenkinsfile for s3 variables adding snapshot save command
endawkins Sep 6, 2024
55e52f3
fixing error to initialize cluster in suite
endawkins Sep 6, 2024
5ef9915
update to s3 variables
endawkins Sep 6, 2024
a1018c1
updating takeSnapshot function to pull the path of on-demand-path
endawkins Sep 7, 2024
abb967d
deleting unnecessary previously added but no longer used commands and…
endawkins Sep 17, 2024
ab70a67
Merge remote-tracking branch 'upstream/main' into test_for_cluster_re…
endawkins Sep 17, 2024
b98429b
adding function to create new Instance
endawkins Sep 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions entrypoint/clusterreset/clusterreset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
)

var _ = Describe("Test:", func() {

It("Start Up with no issues", func() {
testcase.TestBuildCluster(cluster)
})
Expand Down Expand Up @@ -85,8 +86,8 @@ var _ = Describe("Test:", func() {

var _ = AfterEach(func() {
if CurrentSpecReport().Failed() {
fmt.Printf("\nFAILED! %s\n\n", CurrentSpecReport().FullText())
fmt.Printf("\nFAILED! %s\n", CurrentSpecReport().FullText())
} else {
fmt.Printf("\nPASSED! %s\n\n", CurrentSpecReport().FullText())
fmt.Printf("\nPASSED! %s\n", CurrentSpecReport().FullText())
}
})
58 changes: 58 additions & 0 deletions entrypoint/clusterresetrestore/clusterresetrestore_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package clusterresetrestore

import (
"flag"
"os"
"testing"

"github.com/rancher/distros-test-framework/config"
"github.com/rancher/distros-test-framework/pkg/customflag"
"github.com/rancher/distros-test-framework/shared"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var (
cfg *config.Product
flags *customflag.FlagConfig
kubeconfig string
cluster *shared.Cluster
)

func TestMain(m *testing.M) {
var err error
flags = &customflag.ServiceFlag
flag.Var(&flags.Destroy, "destroy", "Destroy cluster after test")
flag.Parse()

_, err = config.AddEnv()
if err != nil {
shared.LogLevel("error", "error adding env vars: %w\n", err)
os.Exit(1)
}

kubeconfig = os.Getenv("KUBE_CONFIG")
if kubeconfig == "" {
// gets a cluster from terraform.
cluster = shared.ClusterConfig()
} else {
// gets a cluster from kubeconfig.
cluster = shared.KubeConfigCluster(kubeconfig)
}

os.Exit(m.Run())
}

func TestClusterResetRestoreSuite(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Cluster Reset Restore Test Suite")
}

var _ = AfterSuite(func() {
if customflag.ServiceFlag.Destroy {
status, err := shared.DestroyCluster()
Expect(err).NotTo(HaveOccurred())
Expect(status).To(Equal("cluster destroyed"))
}
})
93 changes: 93 additions & 0 deletions entrypoint/clusterresetrestore/clusterresetrestore_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package clusterresetrestore

import (
"fmt"

"github.com/rancher/distros-test-framework/pkg/assert"
"github.com/rancher/distros-test-framework/pkg/testcase"

. "github.com/onsi/ginkgo/v2"
)

var _ = Describe("Test:", func() {
It("Start Up with no issues", func() {
testcase.TestBuildCluster(cluster)
})

It("Validate Nodes", func() {
testcase.TestNodeStatus(
cluster,
assert.NodeAssertReadyStatus(),
nil,
)
})

It("Validate Pods", func() {
testcase.TestPodStatus(
cluster,
assert.PodAssertRestart(),
assert.PodAssertReady())
})

It("Verifies ClusterIP Service Before Snapshot", func() {
testcase.TestServiceClusterIP(true, false)
})

It("Verifies NodePort Service Before Snapshot", func() {
testcase.TestServiceNodePort(true, false)
})

// deploy more workloads before and after snapshot -- do not delete the workloads
It("Verifies Cluster Reset Restore", func() {
testcase.TestClusterResetRestoreS3Snapshot(cluster, true, false)
})

// It("Verifies Ingress After Snapshot", func() {
// testcase.TestIngress(true, true)
// })

// It("Validate Nodes", func() {
// testcase.TestNodeStatus(
// cluster,
// assert.NodeAssertReadyStatus(),
// nil,
// )
// })

// It("Validate Pods", func() {
// testcase.TestPodStatus(
// cluster,
// assert.PodAssertRestart(),
// assert.PodAssertReady())
// })

// It("Verifies Daemonset", func() {
// testcase.TestDaemonset(true, true)
// })

// It("Verifies NodePort Service After Reset", func() {
// testcase.TestServiceNodePort(false, true)
// })

// It("Verifies dns access", func() {
// testcase.TestDNSAccess(true, true)
// })

// if cluster.Config.Product == "k3s" {
// It("Verifies Local Path Provisioner storage", func() {
// testcase.TestLocalPathProvisionerStorage(cluster, true, true)
// })

// It("Verifies LoadBalancer Service", func() {
// testcase.TestServiceLoadBalancer(true, true)
// })
// }
})

var _ = AfterEach(func() {
if CurrentSpecReport().Failed() {
fmt.Printf("\nFAILED! %s\n", CurrentSpecReport().FullText())
} else {
fmt.Printf("\nPASSED! %s\n", CurrentSpecReport().FullText())
}
})
4 changes: 4 additions & 0 deletions pkg/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,3 +425,7 @@ func extractID(reservation *ec2.Reservation) (string, error) {

return *reservation.Instances[0].InstanceId, nil
}

// func(c Client) deleteS3Folder(s3FolderName string) {
// c.
// }
7 changes: 7 additions & 0 deletions pkg/customflag/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ type FlagConfig struct {
External externalConfigFlag
RancherConfig rancherConfigFlag
HelmCharts helmChartsFlag
S3 s3ConfigFlag
}

// TestMapConfig is a type that wraps the test commands and expected values.
Expand Down Expand Up @@ -173,3 +174,9 @@ type rancherConfigFlag struct {
CertManagerVersion string
RancherVersion string
}

type s3ConfigFlag struct {
Bucket string
Folder string
Region string
}
8 changes: 4 additions & 4 deletions pkg/testcase/clusterreset.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ func TestClusterReset(cluster *shared.Cluster) {
Expect(resetCmdErr.Error()).To(ContainSubstring("Managed etcd cluster"))
Expect(resetCmdErr.Error()).To(ContainSubstring("has been reset"))
}
shared.LogLevel("info", "cluster reset successful. Waiting 60 seconds for cluster "+
shared.LogLevel("info", "cluster reset successful. Waiting 120 seconds for cluster "+
"to complete background processes after reset.")
time.Sleep(60 * time.Second)
time.Sleep(120 * time.Second)

deleteDataDirectories(cluster)
shared.LogLevel("info", "data directories deleted")

startServer(cluster)
shared.LogLevel("info", "%s-service started. Waiting 60 seconds for nodes "+
shared.LogLevel("info", "%s-service started. Waiting 120 seconds for nodes "+
"and pods to sync after reset.", cluster.Config.Product)

time.Sleep(60 * time.Second)
time.Sleep(120 * time.Second)
}

func killall(cluster *shared.Cluster) {
Expand Down
163 changes: 163 additions & 0 deletions pkg/testcase/clusterresetrestore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package testcase

import (
"fmt"
"os"
"strings"

"github.com/rancher/distros-test-framework/pkg/aws"
"github.com/rancher/distros-test-framework/shared"

. "github.com/onsi/gomega"
)

func TestClusterResetRestoreS3Snapshot(
cluster *shared.Cluster,
applyWorkload,
deleteWorkload bool,
) {
var workloadErr error
if applyWorkload {
workloadErr = shared.ManageWorkload("apply", cluster.Config.Product+"-extra-metadata.yaml")
Expect(workloadErr).NotTo(HaveOccurred(), "configmap failed to create")
}

shared.LogLevel("info", "%s-extra-metadata configmap successfully added", cluster.Config.Product)

s3Bucket := os.Getenv("S3_BUCKET")
s3Folder := os.Getenv("S3_FOLDER")
accessKeyID := os.Getenv("AWS_ACCESS_KEY_ID")
secretAccessKey := os.Getenv("AWS_SECRET_ACCESS_KEY")
s3Region := cluster.AwsEc2.Region

takeS3Snapshot(
cluster,
s3Bucket,
s3Folder,
s3Region,
accessKeyID,
secretAccessKey,
true,
false,
)

onDemandPathCmd := fmt.Sprintf("sudo ls /var/lib/rancher/%s/server/db/snapshots", cluster.Config.Product)
onDemandPath, _ := shared.RunCommandOnNode(onDemandPathCmd, cluster.ServerIPs[0])

fmt.Println("\non-demand-path: ", onDemandPath)

clusterTokenCmd := fmt.Sprintf("sudo cat /var/lib/rancher/%s/server/token", cluster.Config.Product)
clusterToken, _ := shared.RunCommandOnNode(clusterTokenCmd, cluster.ServerIPs[0])

fmt.Println("\ntoken: ", clusterToken)

stopInstances()
// create fresh new VM and install K3s/RKE2 using RunCommandOnNode
createNewServer(cluster)
// how do I delete the instances, bring up a new instance and install K3s/RKE2 using what we currently have?
shared.LogLevel("info", "running cluster reset on server %s\n", cluster.ServerIPs[0])
restoreS3Snapshot(
cluster,
s3Bucket,
s3Folder,
s3Region,
onDemandPath,
accessKeyID,
secretAccessKey,
clusterToken,
)

}

// perform snapshot and list snapshot commands -- deploy workloads after snapshot [apply workload]
func takeS3Snapshot(
cluster *shared.Cluster,
s3Bucket,
s3Folder,
s3Region,
accessKeyID,
secretAccessKey string,
applyWorkload,
deleteWorkload bool,
) {
productLocationCmd, findErr := shared.FindPath(cluster.Config.Product, cluster.ServerIPs[0])
Expect(findErr).NotTo(HaveOccurred())

takeSnapshotCmd := fmt.Sprintf("sudo %s etcd-snapshot save --s3 --s3-bucket=%s "+
"--s3-folder=%s --s3-region=%s --s3-access-key=%s --s3-secret-key=%s",
productLocationCmd, s3Bucket, s3Folder, s3Region, accessKeyID, secretAccessKey)

takeSnapshotRes, takeSnapshotErr := shared.RunCommandOnNode(takeSnapshotCmd, cluster.ServerIPs[0])
Expect(takeSnapshotRes).To(ContainSubstring("Snapshot on-demand"))
Expect(takeSnapshotErr).NotTo(HaveOccurred())

var workloadErr error
if applyWorkload {
workloadErr = shared.ManageWorkload("apply", "daemonset.yaml")
Expect(workloadErr).NotTo(HaveOccurred(), "Daemonset manifest not deployed")
}

// diff command -- comparison of outputs []

}

func restoreS3Snapshot(
cluster *shared.Cluster,
s3Bucket,
s3Folder,
s3Region,
onDemandPath,
accessKeyID,
secretAccessKey,
token string,
) {
// var path string
productLocationCmd, findErr := shared.FindPath(cluster.Config.Product, cluster.ServerIPs[0])
Expect(findErr).NotTo(HaveOccurred())
resetCmd := fmt.Sprintf("sudo %s server --cluster-reset --etcd-s3 --cluster-reset-restore-path=%s"+
"--etcd-s3-bucket=%s --etcd-s3-folder=%s --etcd-s3-region=%s --etcd-s3-access-key=%s"+
"--etcd-s3-secret-key=%s --token=%s", productLocationCmd, onDemandPath, s3Bucket, s3Folder, s3Region, accessKeyID,
secretAccessKey, token)
resetRes, resetCmdErr := shared.RunCommandOnNode(resetCmd, cluster.ServerIPs[0])
Expect(resetCmdErr).NotTo(HaveOccurred())
Expect(resetRes).To(ContainSubstring("Managed etcd cluster"))
Expect(resetRes).To(ContainSubstring("has been reset"))
}

func stopInstances()

func createNewServer(cluster *shared.Cluster) {

resourceName := os.Getenv("resource_name")
awsDependencies, err := aws.AddAWSClient(cluster)
Expect(err).NotTo(HaveOccurred(), "error adding aws nodes: %s", err)

// create server names.
var (
serverName,
instanceServerId,
newExternalServerIp,
newPrivateServerIp,
)

Check failure on line 141 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

syntax error: unexpected ), expected name

Check failure on line 141 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

expected 'IDENT', found ')' (typecheck)

Check failure on line 141 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

syntax error: unexpected ), expected name


serverName = append(serverName, fmt.Sprintf("%s-server-fresh", resourceName))

var createErr error

Check failure on line 146 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

syntax error: unexpected var, expected name (typecheck)

Check failure on line 146 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

expected 'IDENT', found 'var' (typecheck)

Check failure on line 146 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

syntax error: unexpected var, expected name) (typecheck)
newExternalServerIp, newPrivateServerIp, instanceServerId, createErr =
awsDependencies.CreateInstances(serverName...)
Expect(createErr).NotTo(HaveOccurred(), createErr)
}

// make sure the workload you deployed after the snapshot isn't present after the restore snapshot

func installProduct(cluster *share.cluster) {
version := cluster.Config.Version
if cluster.Config.Product == "k3s" {

Check failure on line 156 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

expected 'IDENT', found 'if' (typecheck)
installCmd := fmt.Sprint("curl -sfL https://get.k3s.io/ | sudo INSTALL_K3S_VERSION=%s INSTALL_K3S_SKIP_ENABLE=true sh -")
}
}

// func deleteOldNodes() {

// }

Check failure on line 163 in pkg/testcase/clusterresetrestore.go

View workflow job for this annotation

GitHub Actions / Build and Lint

expected ')', found 'EOF' (typecheck)
2 changes: 1 addition & 1 deletion scripts/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ node {
}
dir("./config") {
//update product info
def data = "ENV_PRODUCT=${env.PRODUCT_NAME}\nENV_TFVARS=${env.PRODUCT_NAME}.tfvars\nKUBE_CONFIG=${env.KUBE_CONFIG}\nBASTION_IP=${env.BASTION_IP}\nLOG_LEVEL=${env.LOG_LEVEL}\n"
def data = "ENV_PRODUCT=${env.PRODUCT_NAME}\nENV_TFVARS=${env.PRODUCT_NAME}.tfvars\nKUBE_CONFIG=${env.KUBE_CONFIG}\nBASTION_IP=${env.BASTION_IP}\nLOG_LEVEL=${env.LOG_LEVEL}\nS3_BUCKET=${env.S3_BUCKET}\nS3_FOLDER=${env.S3_FOLDER}\n"
writeFile(file: '.env', text: data)
//update tfvars file
def filename = "${env.PRODUCT_NAME}.tfvars"
Expand Down
Loading
Loading