From 3c2d77f4cfe2319217c4823a791f768fdd7bd25d Mon Sep 17 00:00:00 2001 From: Scott Seago Date: Wed, 8 May 2024 17:24:40 -0400 Subject: [PATCH] replaced BIAv3 with new ItemBlockAction plugin type Signed-off-by: Scott Seago --- design/backup-performance-improvements.md | 246 +++++++++++----------- 1 file changed, 124 insertions(+), 122 deletions(-) diff --git a/design/backup-performance-improvements.md b/design/backup-performance-improvements.md index 0257d58013..ebd075beec 100644 --- a/design/backup-performance-improvements.md +++ b/design/backup-performance-improvements.md @@ -6,13 +6,14 @@ The second goal is to enable Velero to eventually support VolumeGroupSnapshots. For both of these goals, Velero needs a way to determine which items should be backed up together. This design proposal will include two development phases: -- Phase 1 will refactor the backup workflow to identify blocks of items that should be backed up together, and then coordinate backup hooks among items in the block. +- Phase 1 will refactor the backup workflow to identify blocks of related items that should be backed up together, and then coordinate backup hooks among items in the block. - Phase 2 will add multiple worker threads for backing up item blocks, so instead of backing up each block as it identified, the velero backup workflow will instead add the block to a channel and one of the workers will pick it up. - Actual support for VolumeGroupSnapshots is out-of-scope here and will be handled in a future design proposal, but the item block refactor introduced in Phase 1 is a primary building block for this future proposal. ## Background Currently, during backup processing, the main Velero backup controller runs in a single thread, completely finishing the primary backup processing for one resource before moving on to the next one. We can improve the overall backup performance by backing up multiple items for a backup at the same time, but before we can do this we must first identify resources that need to be backed up together. +Generally speaking, resources that need to be backed up together are resources with interdependencies -- pods with their PVCs, PVCs with their PVs, groups of pods that form a single application, CRs, pods, and other resources that belong to the same operator, etc. As part of this initial refactoring, once these "Item Blocks" are identified, an additional change will be to move pod hook processing up to the ItemBlock level. If there are multiple pods in the ItemBlock, pre-hooks for all pods will be run before backing up the items, followed by post-hooks for all pods. This change to hook processing is another prerequisite for future VolumeGroupSnapshot support, since supporting this will require backing up the pods and volumes together for any volumes which belong to the same group. @@ -27,7 +28,7 @@ In case 1, the majority of the time spent on the backup is in the asynchronous p In case 2, the majority of time spent on the backup will likely be during the synchronous actions. Especially as regards CSI snapshot creation, the waiting for the VSC snaphandle to exist will result in significant passage of time with thousands of volumes. This is the sort of use case which will benefit the most from parallel item processing. ## Goals -- Identify groups of items to back up together (ItemBlocks). +- Identify groups of related items to back up together (ItemBlocks). - Manage backup hooks at the ItemBlock level rather than per-item. - Using worker threads, back up ItemBlocks at the same time. @@ -38,12 +39,25 @@ In case 2, the majority of time spent on the backup will likely be during the sy ## High-Level Design +### ItemBlock concept + +The updated design is based on a new struct/type called `ItemBlock`. +Essentially, an `ItemBlock` is a group of items that must be backed up together in order to guarantee backup integrity. +When we eventually split item backup across multiple worker threads, `ItemBlocks` will be kept together as the basic unit of backup. +To facilitate this, a new plugin type, `ItemBlockAction` will allow relationships between items to be identified by velero -- any resources that must be backed up with other resources will need IBA plugins defined for them. +Examples of `ItemBlocks` include: +1. A pod, its mounted PVCs, and the bound PVs for those PVCs. +2. A VolumeGroup (related PVCs and PVs) along with any pods mounting these volumes. +3. For a ReadWriteMany PVC, the PVC, its bound PV, and all pods mounting this PVC. + ### Phase 1: ItemBlock processing -- A new BIA method, `GetAdditionalItems`, will be needed for pre-processing ItemBlocks (this will require a new BIAv3 API). -- When processing the list of items returned from the item collector, instead of simply calling `BackupItem` on each in turn, we will use the `GetAdditionalItems` BIAv3 API call to determine other items to include with the current item in an ItemBlock. Repeat recursively on each item returned. +- A new plugin type, `ItemBlockAction`, will be created +- `ItemBlockAction` will contain the API method `GetRelatedItems`, which will be needed for determining which items to group together into `ItemBlocks`. +- When processing the list of items returned from the item collector, instead of simply calling `BackupItem` on each in turn, we will use the `GetRelatedItems` API call to determine other items to include with the current item in an ItemBlock. Repeat recursively on each item returned. - Don't include an item in more than one ItemBlock -- if the next item from the item collector is already in a block, skip it. - Once ItemBlock is determined, call new func `BackupItemBlock` instead of `BackupItem`. - New func `BackupItemBlock` will call pre hooks for any pods in the block, then back up the items in the block (`BackupItem` will no longer run hooks directly), then call post hooks for any pods in the block. +- The finalize phase will not be affected by the ItemBlock design, since this is just updating resources after async operations are completed on the items and there is no need to run these updates in parallel. ### Phase 2: Process ItemBlocks for a single backup in multiple threads - Concurrent `BackupItemBlock` operations will be executed by worker threads invoked by the backup controller, which will communicate with the backup controller operation via a shared channel. @@ -55,42 +69,50 @@ In case 2, the majority of time spent on the backup will likely be during the sy ### Phase 1: ItemBlock processing -#### BackupItemAction plugin changes +#### New ItemBlockAction plugin type -In order for Velero to identify groups of items to back up together in an ItemBlock, we need a way to identify items which need to be backed up along with the current item. While the current `Execute` BackupItemAction method does return a list of additional items which are required by the current item, we need to know this *before* we start the item backup. To support this, we need a new API method, `GetAdditionalItems` which Velero will call on each item as it processes it for an ItemBlock. The expectation is that this method will return the same items as currently returned as additional items by the current `Execute` method, with the exception that items which are not created until calling `Execute` should not be returned here, as they don't exist yet. +In order for Velero to identify groups of items to back up together in an ItemBlock, we need a way to identify items which need to be backed up along with the current item. While the current `Execute` BackupItemAction method does return a list of additional items which are required by the current item, we need to know this *before* we start the item backup. To support this, we need a new plugin type, `ItemBlockAction` (IBA) with an API method, `GetRelatedItems` which Velero will call on each item as it processes. The expectation is that the registered IBA plugins will return the same items as returned as additional items by the BIA `Execute` method, with the exception that items which are not created until calling `Execute` should not be returned here, as they don't exist yet. -#### Proto changes (compiled into golang by protoc) +#### Proto definition (compiled into golang by protoc) -The BackupItemAction service gets one new rpc method: +The ItemBlockAction plugin type is defined as follows: ``` -service BackupItemAction { - rpc GetAdditionalItems(BackupItemActionGetAdditionalItemsRequest) returns (BackupItemActionGetAdditionalItemsResponse); +service ItemBlockAction { + rpc AppliesTo(ItemBlockActionAppliesToRequest) returns (ItemBlockActionAppliesToResponse); + rpc GetRelatedItems(ItemBlockActionGetRelatedItemsRequest) returns (ItemBlockActionGetRelatedItemsResponse); } -``` -To support this new rpc method, we define new request/response message types: -``` -message BackupItemActionAdditionalItemsRequest { +message ItemBlockActionAppliesToRequest { + string plugin = 1; +} + +message ItemBlockActionAppliesToResponse { + ResourceSelector ResourceSelector = 1; +} + +message ItemBlockActionRelatedItemsRequest { string plugin = 1; bytes item = 2; bytes backup = 3; } -message BackupItemActionAdditionalItemsResponse { - repeated generated.ResourceIdentifier additionalItems = 1; +message ItemBlockActionRelatedItemsResponse { + repeated generated.ResourceIdentifier relatedItems = 1; } ``` -A new PluginKind, `BackupItemActionV3`, will be created, and the backup process will be modified to use this plugin kind. Existing v1/v2 plugins will be adapted to v3 with an empty `GetAdditionalItems` method, meaning that those plugins will not add anything to the ItemBlock for the item being backed up. +A new PluginKind, `ItemBlockActionV1`, will be created, and the backup process will be modified to use this plugin kind. -Any BIA plugins which return additional items from `Execute()` that need to be backed up at the same time or sequentially in the same worker thread as the current items need to be upgraded to v3. This mainly applies to plugins that operate on pods which reference resources which must be backed up along with the pod and are potentially affected by pod hooks or for plugins which connect multiple pods whose volumes should be backed up at the same time. +For any BIA plugins which return additional items from `Execute()` that need to be backed up at the same time or sequentially in the same worker thread as the current items should add a new IBA plugin to return these same items (minus any which won't exist before BIA `Execute()` is called). +This mainly applies to plugins that operate on pods which reference resources which must be backed up along with the pod and are potentially affected by pod hooks or for plugins which connect multiple pods whose volumes should be backed up at the same time. ### Changes to processing item list from the Item Collector #### New structs ItemBlock and ItemBlockItem -``` +```go type ItemBlock struct { log logrus.FieldLogger + // This is a reference to the shared itemBackupper for the backup itemBackupper *itemBackupper Items []ItemBlockItem } @@ -107,14 +129,16 @@ In the `BackupWithResolvers` func, the current Velero implementation iterates ov #### Modifications to the loop over ItemCollector results The `kubernetesResource` struct used by the item collector will be modified to add an `orderedResource` bool which will be set true for all of the resources moved to the beginning of the list as a result of being ordered resources. +While the item collector already puts ordered resources first, there is no indication in the list which of these initial items are from the ordered resources list and which are the remaining (unordered) items. +Velero needs to know which resources are ordered because when we process them later, these initial resources must be processed sequentially, one at a time, before processing the remaining resources in a parallel manner. The current workflow within each iteration of the ItemCollector.items loop will replaced with the following: -- (note that some of the below should be pulled out into a helper func to facilitate recursive call to it for items returned from `GetAdditonalItems`.) +- (note that some of the below should be pulled out into a helper func to facilitate recursive call to it for items returned from `GetRelatedItems`.) - Before loop iteration, create a new `itemsInBlock` map of type map[velero.ResourceIdentifier]bool which represents the set of items already included in a block. - If `item` is already in `itemsInBlock`, continue. This one has already been processed. - Add `item` to `itemsInBlock`. - Load item from ItemCollector file. Close/remove file after loading (on error return or not, possibly with similar anonymous func to current impl) -- Get matching BIA plugins for item, call `GetAdditionalItems` for each. For each item returned, get full item content from ItemCollector (if present in item list, pulling from file, removing file when done) or from cluster (if not present in item list), add item to the current block, add item to `itemsInBlock` map, and then recursively apply current step to each (i.e. call BIA method, add to block, etc.) +- Get matching IBA plugins for item, call `GetRelatedItems` for each. For each item returned, get full item content from ItemCollector (if present in item list, pulling from file, removing file when done) or from cluster (if not present in item list), add item to the current block, add item to `itemsInBlock` map, and then recursively apply current step to each (i.e. call IBA method, add to block, etc.) - Once full ItemBlock list is generated, call `backupItemBlock(block ItemBlock) - Add `backupItemBlock` return values to `backedUpGroupResources` map @@ -122,7 +146,7 @@ The current workflow within each iteration of the ItemCollector.items loop will #### New func `backupItemBlock` Method signature for new func `backupItemBlock` is as follows: -``` +```go func backupItemBlock(block ItemBlock) []schema.GroupResource ``` The return value is a slice of GRs for resources which were backed up. Velero tracks these to determine which CRDs need to be included in the backup. Note that we need to make sure we include in this not only those resources that were backed up directly, but also those backed up indirectly via additional items BIA execute returns. @@ -146,7 +170,7 @@ The `backupReconciler` struct will also have this new field added. #### Worker pool for item block processing A new type, `ItemBlockWorker` will be added which will manage a pool of worker goroutines which will process item blocks, a shared input channel for passing blocks to workers, and a WaitGroup to shut down cleanly when the reconciler exits. -``` +```go type ItemBlockWorkerPool struct { itemBlockChannel chan ItemBlockInput wg *sync.WaitGroup @@ -179,7 +203,7 @@ The `processItemBlocksWorker` func (run by the worker goroutines) will read from The ItemBlock processing loop implemented in Phase 1 will be modified to send each newly-created ItemBlock to the shared channel rather than calling `BackupItemBlock` inline, using a WaitGroup to manage in-process items. A separate goroutine will be created to process returns for this backup. After completion of the ItemBlock processing loop, velero will use the WaitGroup to wait for all ItemBlock processing to complete before moving forward. A simplified example of what this response goroutine might look like: -``` +```go // omitting cancel handling, context, etc ret := make(chan ItemBlockReturn) wg := &sync.WaitGroup{} @@ -215,26 +239,26 @@ A simplified example of what this response goroutine might look like: // responses from BackupItemBlock calls are in responses ``` -The ItemBlock processing loop described above will be split into two separate iterations. For the first iteration, velero will only process those items at the beginning of the loop identified as `orderedResources` -- when the groups generated from these resources are passed to the worker channel, velero will wait for the response before moving on to the next ItemBlock. This is to ensure that the ordered resources are processed in the required order. Once the last ordered resource is processed, the remaining ItemBlocks will be processed and sent to the worker channel without waiting for a response, in order to allow these ItemBlocks to be processed in parallel. +When processing the responses, the main thing is to set `backedUpGroupResources[item.groupResource]=true` for each GR returned, which will give the same result as the current implementation calling items one-by-one and setting that field as needed. + +The ItemBlock processing loop described above will be split into two separate iterations. For the first iteration, velero will only process those items at the beginning of the loop identified as `orderedResources` -- when the groups generated from these resources are passed to the worker channel, velero will wait for the response before moving on to the next ItemBlock. +This is to ensure that the ordered resources are processed in the required order. Once the last ordered resource is processed, the remaining ItemBlocks will be processed and sent to the worker channel without waiting for a response, in order to allow these ItemBlocks to be processed in parallel. +The reason we must execute `ItemBlocks` with ordered resources first (and one at a time) is that this is a list of resources identified by the user as resources which must be backed up first, and in a particular order. #### Synchronize access to the BackedUpItems map Velero uses a map of BackedUpItems to track which items have already been backed up. This prevents velero from attempting to back up an item more than once, as well as guarding against creating infinite loops due to circular dependencies in the additional items returns. Since velero will now be accessing this map from the parallel goroutines, access to the map must be synchronized with mutexes. -## Alternatives considered - -### New ItemBlockAction plugin type +### Backup Finalize phase -Instead of adding a new `GetAdditionalItems` method to BackupItemAction, another possibility would be to leave BIA alone and create a new plugin type, ItemBlockAction. -Rather than adding the `GetAdditionalItems` method to the existing BIA plugin type, velero would introduce a new ItemBlockAction plugin type which provides the single `GetAdditionalItems` method, described above as a new BIAv3 method. -For existing BIA implementations which return additional items in `Execute()`, instead of adding the new v3 method (and, if necessary, the missing v2 methods) to the existing BIA, they would create a new ItemBlockAction plugin to provide this method. +The finalize phase will not be affected by the ItemBlock design, since this is just updating resources after async operations are completed on the items and there is no need to run these updates in parallel. -The change to the backup workflow would be relatively simple: resolve registered ItemBlockAction (IBA) plugins as well as BIA plugins, determine for both plugin types which apply to the current resource type, and when calling the new API method, iterate over IBA plugins rather than BIA plugins. +## Alternatives considered -The change to the "BackupItemAction plugin changes" section would be to create a new plugin type rather than enhancing the existing plugin type. The API function needed in this new plugin type would be identical to the one described as a new BIAv3 method. The other work here would be to create the infrastructure/scaffolding to define, register, etc. the new plugin type. +### BackpuItemAction v3 API -Overall, the decision of extending BIA vs. creating a new plugin type won't change much in the design here. -The new plugin type will require more initial work, but ongoing maintenance should be similar between the options. +Instead of adding a new `ItemBlockAction` plugin type, we could add a `GetAdditionalItems` method to BackupItemAction. +This was rejected because the new plugin type provides a cleaner interface, and keeps the function of grouping related items separate from the function of modifying item content for the backup. ### Per-backup worker pool @@ -256,96 +280,74 @@ For the per-backup worker approach, the worker count represents the worker count ## Compatibility -### Example upgrade to BIAv3 +### Example IBA implementation for BIA plugins which return additional items -Included below is an example of what might be required to upgrade a v1 plugin which returns additional items to BIAv3. +Included below is an example of what might be required for a BIA plugin which returns additional items. The code is taken from the internal velero `pod_action.go` which identifies the items required for a given pod. -Basically, the required changes are as follows: -- (for v1 plugins) Implement `Name()`. This returns a string. The content isn't particularly important, since Velero doesn't actually make an RPC call for this method, but it must be defined on the type in order to implement the interface. -- (for v1 plugins) Implement `Progress(`). Since the plugin doesn't have any asynchronous operations (or it would already be a v2 plugin), this should just return the error `biav2.AsyncOperationsNotSupportedError()`. -- (for v1 plugins) Implement `Cancel(`). Since the plugin doesn't have any asynchronous operations (or it would already be a v2 plugin), this should just return nil. -- (for v1 or v2 plugins) Implement `GetAdditionalItems()` If the additionalItems return value from `Execute()` is nil (or only returns items newly-created in Execute()), it should return nil. Otherwise, it should return the same items as `Execute()` minus any items that don't exist yet. In the example below, this was done by putting the additional item list generation code into `GetAdditionalItems()` and refactoring `Execute()` to call the new func to get the list. For plugins which return a combination of already-existing and newly-created items, `GetAdditionalItems()` should generate the already-existing list, and `Execute()` should append the newly-created items to the list. -- When registering the BIA, replace `RegisterBackupItemAction` (for v1 plugins) or `RegisterBackupItemActionV2` (for v2 plugins) with `RegisterBackupItemActionV3` - -```diff --git a/pkg/backup/actions/pod_action.go b/pkg/backup/actions/pod_action.go -index ce6b1ade8..5625dcb5b 100644 ---- a/pkg/backup/actions/pod_action.go -+++ b/pkg/backup/actions/pod_action.go -@@ -25,6 +25,7 @@ import ( - v1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" - "github.com/vmware-tanzu/velero/pkg/kuberesource" - "github.com/vmware-tanzu/velero/pkg/plugin/velero" -+ biav2 "github.com/vmware-tanzu/velero/pkg/plugin/velero/backupitemaction/v2" - ) - - // PodAction implements ItemAction. -@@ -47,13 +48,19 @@ func (a *PodAction) AppliesTo() (velero.ResourceSelector, error) { - // Execute scans the pod's spec.volumes for persistentVolumeClaim volumes and returns a - // ResourceIdentifier list containing references to all of the persistentVolumeClaim volumes used by - // the pod. This ensures that when a pod is backed up, all referenced PVCs are backed up too. --func (a *PodAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, error) { -+func (a *PodAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, string, []velero.ResourceIdentifier, error) { - a.log.Info("Executing podAction") - defer a.log.Info("Done executing podAction") - -+ additionalItems, err := a.GetAdditionalItems(item, backup) -+ return item, additionalItems, "", nil, err -+} -+ -+// v3 API call, return nil if no additional items for this resource -+func (a *PodAction) GetAdditionalItems(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, error) { - pod := new(corev1api.Pod) - if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), pod); err != nil { -- return nil, nil, errors.WithStack(err) -+ return nil, errors.WithStack(err) - } - - var additionalItems []velero.ResourceIdentifier -@@ -67,7 +74,7 @@ func (a *PodAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runti - - if len(pod.Spec.Volumes) == 0 { - a.log.Info("pod has no volumes") -- return item, additionalItems, nil -+ return additionalItems, nil - } - - for _, volume := range pod.Spec.Volumes { -@@ -82,5 +89,20 @@ func (a *PodAction) Execute(item runtime.Unstructured, backup *v1.Backup) (runti - } - } - -- return item, additionalItems, nil -+ return additionalItems, nil -+} -+ -+// v2 API call -+func (a *PodAction) Name() string { -+ return "PodAction" -+} -+ -+// v2 API call, just return this error for BIAs without async operations -+func (a *PodAction) Progress(operationID string, backup *velerov1api.Backup) (velero.OperationProgress, error) { -+ return velero.OperationProgress{}, biav2.AsyncOperationsNotSupportedError() -+} -+ -+// v2 API call, just return nil for BIAs without async operations -+func (a *PodAction) Cancel(operationID string, backup *velerov1api.Backup) error { -+ return nil - } -diff --git a/pkg/cmd/server/plugin/plugin.go b/pkg/cmd/server/plugin/plugin.go -index c375c5437..7717443dc 100644 ---- a/pkg/cmd/server/plugin/plugin.go -+++ b/pkg/cmd/server/plugin/plugin.go -@@ -42,7 +42,7 @@ func NewCommand(f client.Factory) *cobra.Command { - Run: func(c *cobra.Command, args []string) { - pluginServer = pluginServer. - RegisterBackupItemAction("velero.io/pv", newPVBackupItemAction). -- RegisterBackupItemAction("velero.io/pod", newPodBackupItemAction). -+ RegisterBackupItemActionV3("velero.io/pod", newPodBackupItemAction). - RegisterBackupItemAction("velero.io/service-account", newServiceAccountBackupItemAction(f)). - RegisterRestoreItemAction("velero.io/job", newJobRestoreItemAction). - RegisterRestoreItemAction("velero.io/pod", newPodRestoreItemAction). +In this particular case, the only function of pod_action is to return additional items, so we can really just convert this plugin to an IBA plugin. If there were other actions, such as modifying the pod content on backup, then we would still need the pod action, and the related items vs. content manipulation functions would need to be separated. + +```go +// PodAction implements ItemBlockAction. +type PodAction struct { + log logrus.FieldLogger +} + +// NewPodAction creates a new ItemAction for pods. +func NewPodAction(logger logrus.FieldLogger) *PodAction { + return &PodAction{log: logger} +} + +// AppliesTo returns a ResourceSelector that applies only to pods. +func (a *PodAction) AppliesTo() (velero.ResourceSelector, error) { + return velero.ResourceSelector{ + IncludedResources: []string{"pods"}, + }, nil +} + +// GetRelatedItems scans the pod's spec.volumes for persistentVolumeClaim volumes and returns a +// ResourceIdentifier list containing references to all of the persistentVolumeClaim volumes used by +// the pod. This ensures that when a pod is backed up, all referenced PVCs are backed up too. +func (a *PodAction) GetRelatedItems(item runtime.Unstructured, backup *v1.Backup) (runtime.Unstructured, []velero.ResourceIdentifier, error) { + pod := new(corev1api.Pod) + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(item.UnstructuredContent(), pod); err != nil { + return nil, errors.WithStack(err) + } + + var relatedItems []velero.ResourceIdentifier + if pod.Spec.PriorityClassName != "" { + a.log.Infof("Adding priorityclass %s to relatedItems", pod.Spec.PriorityClassName) + relatedItems = append(relatedItems, velero.ResourceIdentifier{ + GroupResource: kuberesource.PriorityClasses, + Name: pod.Spec.PriorityClassName, + }) + } + + if len(pod.Spec.Volumes) == 0 { + a.log.Info("pod has no volumes") + return relatedItems, nil + } + + for _, volume := range pod.Spec.Volumes { + if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName != "" { + a.log.Infof("Adding pvc %s to relatedItems", volume.PersistentVolumeClaim.ClaimName) + + relatedItems = append(relatedItems, velero.ResourceIdentifier{ + GroupResource: kuberesource.PersistentVolumeClaims, + Namespace: pod.Namespace, + Name: volume.PersistentVolumeClaim.ClaimName, + }) + } + } + + return relatedItems, nil +} + +// API call +func (a *PodAction) Name() string { + return "PodAction" +} + ```