diff --git a/pkg/cmd/cli/install/install.go b/pkg/cmd/cli/install/install.go index 989041c2d1..8305ad1839 100644 --- a/pkg/cmd/cli/install/install.go +++ b/pkg/cmd/cli/install/install.go @@ -67,6 +67,7 @@ type Options struct { VolumeSnapshotConfig flag.Map UseNodeAgent bool PrivilegedNodeAgent bool + PrivilegedDatamoverPods bool //TODO remove UseRestic when migration test out of using it UseRestic bool Wait bool @@ -120,6 +121,7 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) { flags.BoolVar(&o.DryRun, "dry-run", o.DryRun, "Generate resources, but don't send them to the cluster. Use with -o. Optional.") flags.BoolVar(&o.UseNodeAgent, "use-node-agent", o.UseNodeAgent, "Create Velero node-agent daemonset. Optional. Velero node-agent hosts Velero modules that need to run in one or more nodes(i.e. Restic, Kopia).") flags.BoolVar(&o.PrivilegedNodeAgent, "privileged-node-agent", o.PrivilegedNodeAgent, "Use privileged mode for the node agent. Optional. Required to backup block devices.") + flags.BoolVar(&o.PrivilegedDatamoverPods, "privileged-datamover-pods", o.PrivilegedDatamoverPods, "Use privileged mode for the datamover pods. Optional.") flags.BoolVar(&o.Wait, "wait", o.Wait, "Wait for Velero deployment to be ready. Optional.") flags.DurationVar(&o.DefaultRepoMaintenanceFrequency, "default-repo-maintain-frequency", o.DefaultRepoMaintenanceFrequency, "How often 'maintain' is run for backup repositories by default. Optional.") flags.DurationVar(&o.GarbageCollectionFrequency, "garbage-collection-frequency", o.GarbageCollectionFrequency, "How often the garbage collection runs for expired backups.(default 1h)") @@ -263,6 +265,7 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) { RestoreOnly: o.RestoreOnly, UseNodeAgent: o.UseNodeAgent, PrivilegedNodeAgent: o.PrivilegedNodeAgent, + PrivilegedDatamoverPods: o.PrivilegedDatamoverPods, UseVolumeSnapshots: o.UseVolumeSnapshots, BSLConfig: o.BackupStorageConfig.Data(), VSLConfig: o.VolumeSnapshotConfig.Data(), diff --git a/pkg/cmd/cli/nodeagent/server.go b/pkg/cmd/cli/nodeagent/server.go index ba7c1610fb..b233bc6d86 100644 --- a/pkg/cmd/cli/nodeagent/server.go +++ b/pkg/cmd/cli/nodeagent/server.go @@ -89,6 +89,7 @@ type nodeAgentServerConfig struct { resourceTimeout time.Duration dataMoverPrepareTimeout time.Duration nodeAgentConfig string + privilegedDatamoverPods bool } func NewServerCommand(f client.Factory) *cobra.Command { @@ -126,6 +127,7 @@ func NewServerCommand(f client.Factory) *cobra.Command { command.Flags().DurationVar(&config.dataMoverPrepareTimeout, "data-mover-prepare-timeout", config.dataMoverPrepareTimeout, "How long to wait for preparing a DataUpload/DataDownload. Default is 30 minutes.") command.Flags().StringVar(&config.metricsAddress, "metrics-address", config.metricsAddress, "The address to expose prometheus metrics") command.Flags().StringVar(&config.nodeAgentConfig, "node-agent-configmap", config.nodeAgentConfig, "The name of ConfigMap containing node-agent configurations.") + command.Flags().BoolVar(&config.privilegedDatamoverPods, "privileged-datamover-pods", config.privilegedDatamoverPods, "Use privileged mode for the datamover pods. Optional.") return command } @@ -330,12 +332,24 @@ func (s *nodeAgentServer) run() { s.config.dataMoverPrepareTimeout, s.logger, s.metrics, + s.config.privilegedDatamoverPods, ) if err = dataUploadReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the data upload controller") } - dataDownloadReconciler := controller.NewDataDownloadReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, podResources, s.nodeName, s.config.dataMoverPrepareTimeout, s.logger, s.metrics) + dataDownloadReconciler := controller.NewDataDownloadReconciler( + s.mgr.GetClient(), + s.mgr, + s.kubeClient, + s.dataPathMgr, + podResources, + s.nodeName, + s.config.dataMoverPrepareTimeout, + s.logger, + s.metrics, + s.config.privilegedDatamoverPods, +) if err = dataDownloadReconciler.SetupWithManager(s.mgr); err != nil { s.logger.WithError(err).Fatal("Unable to create the data download controller") } diff --git a/pkg/controller/data_download_controller.go b/pkg/controller/data_download_controller.go index 701a033ee7..caf7299b04 100644 --- a/pkg/controller/data_download_controller.go +++ b/pkg/controller/data_download_controller.go @@ -53,33 +53,45 @@ import ( // DataDownloadReconciler reconciles a DataDownload object type DataDownloadReconciler struct { - client client.Client - kubeClient kubernetes.Interface - mgr manager.Manager - logger logrus.FieldLogger - Clock clock.WithTickerAndDelayedExecution - restoreExposer exposer.GenericRestoreExposer - nodeName string - dataPathMgr *datapath.Manager - podResources v1.ResourceRequirements - preparingTimeout time.Duration - metrics *metrics.ServerMetrics + client client.Client + kubeClient kubernetes.Interface + mgr manager.Manager + logger logrus.FieldLogger + Clock clock.WithTickerAndDelayedExecution + restoreExposer exposer.GenericRestoreExposer + nodeName string + dataPathMgr *datapath.Manager + podResources v1.ResourceRequirements + preparingTimeout time.Duration + metrics *metrics.ServerMetrics + privilegedDatamoverPods bool } -func NewDataDownloadReconciler(client client.Client, mgr manager.Manager, kubeClient kubernetes.Interface, dataPathMgr *datapath.Manager, - podResources v1.ResourceRequirements, nodeName string, preparingTimeout time.Duration, logger logrus.FieldLogger, metrics *metrics.ServerMetrics) *DataDownloadReconciler { +func NewDataDownloadReconciler( + client client.Client, + mgr manager.Manager, + kubeClient kubernetes.Interface, + dataPathMgr *datapath.Manager, + podResources v1.ResourceRequirements, + nodeName string, + preparingTimeout time.Duration, + logger logrus.FieldLogger, + metrics *metrics.ServerMetrics, + privilegedDatamoverPods bool, +) *DataDownloadReconciler { return &DataDownloadReconciler{ - client: client, - kubeClient: kubeClient, - mgr: mgr, - logger: logger.WithField("controller", "DataDownload"), - Clock: &clock.RealClock{}, - nodeName: nodeName, - restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger), - dataPathMgr: dataPathMgr, - podResources: podResources, - preparingTimeout: preparingTimeout, - metrics: metrics, + client: client, + kubeClient: kubeClient, + mgr: mgr, + logger: logger.WithField("controller", "DataDownload"), + Clock: &clock.RealClock{}, + nodeName: nodeName, + restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger), + dataPathMgr: dataPathMgr, + podResources: podResources, + preparingTimeout: preparingTimeout, + metrics: metrics, + privilegedDatamoverPods: privilegedDatamoverPods, } } @@ -182,7 +194,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request // Expose() will trigger to create one pod whose volume is restored by a given volume snapshot, // but the pod maybe is not in the same node of the current controller, so we need to return it here. // And then only the controller who is in the same node could do the rest work. - err = r.restoreExposer.Expose(ctx, getDataDownloadOwnerObject(dd), dd.Spec.TargetVolume.PVC, dd.Spec.TargetVolume.Namespace, hostingPodLabels, r.podResources, dd.Spec.OperationTimeout.Duration) + err = r.restoreExposer.Expose(ctx, getDataDownloadOwnerObject(dd), dd.Spec.TargetVolume.PVC, dd.Spec.TargetVolume.Namespace, hostingPodLabels, r.podResources, dd.Spec.OperationTimeout.Duration, r.privilegedDatamoverPods) if err != nil { if err := r.client.Get(ctx, req.NamespacedName, dd); err != nil { if !apierrors.IsNotFound(err) { diff --git a/pkg/controller/data_download_controller_test.go b/pkg/controller/data_download_controller_test.go index a675b73cd5..2548884dd4 100644 --- a/pkg/controller/data_download_controller_test.go +++ b/pkg/controller/data_download_controller_test.go @@ -140,7 +140,7 @@ func initDataDownloadReconcilerWithError(objects []runtime.Object, needError ... dataPathMgr := datapath.NewManager(1) - return NewDataDownloadReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, corev1.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics()), nil + return NewDataDownloadReconciler(fakeClient, nil, fakeKubeClient, dataPathMgr, corev1.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), false), nil } func TestDataDownloadReconcile(t *testing.T) { @@ -441,7 +441,7 @@ func TestDataDownloadReconcile(t *testing.T) { r.restoreExposer = func() exposer.GenericRestoreExposer { ep := exposermockes.NewGenericRestoreExposer(t) if test.isExposeErr { - ep.On("Expose", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("Error to expose restore exposer")) + ep.On("Expose", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(errors.New("Error to expose restore exposer")) } else if test.notNilExpose { hostingPod := builder.ForPod("test-ns", "test-name").Volumes(&corev1.Volume{Name: "test-pvc"}).Result() hostingPod.ObjectMeta.SetUID("test-uid") @@ -959,7 +959,7 @@ func (dt *ddResumeTestHelper) resumeCancellableDataPath(_ *DataUploadReconciler, return dt.resumeErr } -func (dt *ddResumeTestHelper) Expose(context.Context, corev1.ObjectReference, string, string, map[string]string, corev1.ResourceRequirements, time.Duration) error { +func (dt *ddResumeTestHelper) Expose(context.Context, corev1.ObjectReference, string, string, map[string]string, corev1.ResourceRequirements, time.Duration, bool) error { return nil } diff --git a/pkg/controller/data_upload_controller.go b/pkg/controller/data_upload_controller.go index f6bd9c947c..e78f3eedd7 100644 --- a/pkg/controller/data_upload_controller.go +++ b/pkg/controller/data_upload_controller.go @@ -62,20 +62,21 @@ const ( // DataUploadReconciler reconciles a DataUpload object type DataUploadReconciler struct { - client client.Client - kubeClient kubernetes.Interface - csiSnapshotClient snapshotter.SnapshotV1Interface - mgr manager.Manager - Clock clocks.WithTickerAndDelayedExecution - nodeName string - logger logrus.FieldLogger - snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer - dataPathMgr *datapath.Manager - loadAffinity *kube.LoadAffinity - backupPVCConfig map[string]nodeagent.BackupPVC - podResources corev1.ResourceRequirements - preparingTimeout time.Duration - metrics *metrics.ServerMetrics + client client.Client + kubeClient kubernetes.Interface + csiSnapshotClient snapshotter.SnapshotV1Interface + mgr manager.Manager + Clock clocks.WithTickerAndDelayedExecution + nodeName string + logger logrus.FieldLogger + snapshotExposerList map[velerov2alpha1api.SnapshotType]exposer.SnapshotExposer + dataPathMgr *datapath.Manager + loadAffinity *kube.LoadAffinity + backupPVCConfig map[string]nodeagent.BackupPVC + podResources corev1.ResourceRequirements + preparingTimeout time.Duration + metrics *metrics.ServerMetrics + privilegedDatamoverPods bool } func NewDataUploadReconciler( @@ -92,6 +93,7 @@ func NewDataUploadReconciler( preparingTimeout time.Duration, log logrus.FieldLogger, metrics *metrics.ServerMetrics, + privilegedDatamoverPods bool, ) *DataUploadReconciler { return &DataUploadReconciler{ client: client, @@ -108,12 +110,13 @@ func NewDataUploadReconciler( log, ), }, - dataPathMgr: dataPathMgr, - loadAffinity: loadAffinity, - backupPVCConfig: backupPVCConfig, - podResources: podResources, - preparingTimeout: preparingTimeout, - metrics: metrics, + dataPathMgr: dataPathMgr, + loadAffinity: loadAffinity, + backupPVCConfig: backupPVCConfig, + podResources: podResources, + preparingTimeout: preparingTimeout, + metrics: metrics, + privilegedDatamoverPods: privilegedDatamoverPods, } } @@ -816,6 +819,7 @@ func (r *DataUploadReconciler) setupExposeParam(du *velerov2alpha1api.DataUpload Affinity: r.loadAffinity, BackupPVCConfig: r.backupPVCConfig, Resources: r.podResources, + Privileged: r.privilegedDatamoverPods, }, nil } return nil, nil diff --git a/pkg/controller/data_upload_controller_test.go b/pkg/controller/data_upload_controller_test.go index ac61865552..d8010c87e3 100644 --- a/pkg/controller/data_upload_controller_test.go +++ b/pkg/controller/data_upload_controller_test.go @@ -245,6 +245,7 @@ func initDataUploaderReconcilerWithError(needError ...error) (*DataUploadReconci time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), + false, ), nil } diff --git a/pkg/exposer/csi_snapshot.go b/pkg/exposer/csi_snapshot.go index 4dcc50d129..1944c1f1da 100644 --- a/pkg/exposer/csi_snapshot.go +++ b/pkg/exposer/csi_snapshot.go @@ -73,6 +73,9 @@ type CSISnapshotExposeParam struct { // Resources defines the resource requirements of the hosting pod Resources corev1.ResourceRequirements + + // Privileged determines whether to create a privileged pod + Privileged bool } // CSISnapshotExposeWaitParam define the input param for WaitExposed of CSI snapshots @@ -202,6 +205,7 @@ func (e *csiSnapshotExposer) Expose(ctx context.Context, ownerObject corev1.Obje csiExposeParam.HostingPodLabels, csiExposeParam.Affinity, csiExposeParam.Resources, + csiExposeParam.Privileged, ) if err != nil { return errors.Wrap(err, "error to create backup pod") @@ -441,6 +445,7 @@ func (e *csiSnapshotExposer) createBackupPod( label map[string]string, affinity *kube.LoadAffinity, resources corev1.ResourceRequirements, + privileged bool, ) (*corev1.Pod, error) { podName := ownerObject.Name @@ -550,5 +555,11 @@ func (e *csiSnapshotExposer) createBackupPod( }, } + if privileged { + pod.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + Privileged: boolptr.True(), + } + } + return e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Create(ctx, pod, metav1.CreateOptions{}) } diff --git a/pkg/exposer/generic_restore.go b/pkg/exposer/generic_restore.go index d498470a77..33a23b29f7 100644 --- a/pkg/exposer/generic_restore.go +++ b/pkg/exposer/generic_restore.go @@ -37,7 +37,7 @@ import ( // GenericRestoreExposer is the interfaces for a generic restore exposer type GenericRestoreExposer interface { // Expose starts the process to a restore expose, the expose process may take long time - Expose(context.Context, corev1.ObjectReference, string, string, map[string]string, corev1.ResourceRequirements, time.Duration) error + Expose(context.Context, corev1.ObjectReference, string, string, map[string]string, corev1.ResourceRequirements, time.Duration, bool) error // GetExposed polls the status of the expose. // If the expose is accessible by the current caller, it waits the expose ready and returns the expose result. @@ -69,7 +69,16 @@ type genericRestoreExposer struct { log logrus.FieldLogger } -func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1.ObjectReference, targetPVCName string, sourceNamespace string, hostingPodLabels map[string]string, resources corev1.ResourceRequirements, timeout time.Duration) error { +func (e *genericRestoreExposer) Expose( + ctx context.Context, + ownerObject corev1.ObjectReference, + targetPVCName string, + sourceNamespace string, + hostingPodLabels map[string]string, + resources corev1.ResourceRequirements, + timeout time.Duration, + privileged bool, +) error { curLog := e.log.WithFields(logrus.Fields{ "owner": ownerObject.Name, "target PVC": targetPVCName, @@ -87,7 +96,7 @@ func (e *genericRestoreExposer) Expose(ctx context.Context, ownerObject corev1.O return errors.Errorf("Target PVC %s/%s has already been bound, abort", sourceNamespace, targetPVCName) } - restorePod, err := e.createRestorePod(ctx, ownerObject, targetPVC, timeout, hostingPodLabels, selectedNode, resources) + restorePod, err := e.createRestorePod(ctx, ownerObject, targetPVC, timeout, hostingPodLabels, selectedNode, resources, privileged) if err != nil { return errors.Wrapf(err, "error to create restore pod") } @@ -296,8 +305,16 @@ func (e *genericRestoreExposer) RebindVolume(ctx context.Context, ownerObject co return nil } -func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObject corev1.ObjectReference, targetPVC *corev1.PersistentVolumeClaim, - operationTimeout time.Duration, label map[string]string, selectedNode string, resources corev1.ResourceRequirements) (*corev1.Pod, error) { +func (e *genericRestoreExposer) createRestorePod( + ctx context.Context, + ownerObject corev1.ObjectReference, + targetPVC *corev1.PersistentVolumeClaim, + operationTimeout time.Duration, + label map[string]string, + selectedNode string, + resources corev1.ResourceRequirements, + privileged bool, +) (*corev1.Pod, error) { restorePodName := ownerObject.Name restorePVCName := ownerObject.Name @@ -384,6 +401,12 @@ func (e *genericRestoreExposer) createRestorePod(ctx context.Context, ownerObjec }, } + if privileged { + pod.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + Privileged: boolptr.True(), + } + } + return e.kubeClient.CoreV1().Pods(ownerObject.Namespace).Create(ctx, pod, metav1.CreateOptions{}) } diff --git a/pkg/exposer/generic_restore_test.go b/pkg/exposer/generic_restore_test.go index 4c3221b5ca..1956d0414e 100644 --- a/pkg/exposer/generic_restore_test.go +++ b/pkg/exposer/generic_restore_test.go @@ -180,7 +180,7 @@ func TestRestoreExpose(t *testing.T) { } } - err := exposer.Expose(context.Background(), ownerObject, test.targetPVCName, test.sourceNamespace, map[string]string{}, corev1.ResourceRequirements{}, time.Millisecond) + err := exposer.Expose(context.Background(), ownerObject, test.targetPVCName, test.sourceNamespace, map[string]string{}, corev1.ResourceRequirements{}, time.Millisecond, false) assert.EqualError(t, err, test.err) }) } diff --git a/pkg/exposer/mocks/generic_restore.go b/pkg/exposer/mocks/generic_restore.go index e0b76d6e75..d98e85bc56 100644 --- a/pkg/exposer/mocks/generic_restore.go +++ b/pkg/exposer/mocks/generic_restore.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.39.1. DO NOT EDIT. +// Code generated by mockery v2.43.2. DO NOT EDIT. package mocks @@ -26,17 +26,17 @@ func (_m *GenericRestoreExposer) CleanUp(_a0 context.Context, _a1 v1.ObjectRefer _m.Called(_a0, _a1) } -// Expose provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6 -func (_m *GenericRestoreExposer) Expose(_a0 context.Context, _a1 v1.ObjectReference, _a2 string, _a3 string, _a4 map[string]string, _a5 v1.ResourceRequirements, _a6 time.Duration) error { - ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6) +// Expose provides a mock function with given fields: _a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7 +func (_m *GenericRestoreExposer) Expose(_a0 context.Context, _a1 v1.ObjectReference, _a2 string, _a3 string, _a4 map[string]string, _a5 v1.ResourceRequirements, _a6 time.Duration, _a7 bool) error { + ret := _m.Called(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) if len(ret) == 0 { panic("no return value specified for Expose") } var r0 error - if rf, ok := ret.Get(0).(func(context.Context, v1.ObjectReference, string, string, map[string]string, v1.ResourceRequirements, time.Duration) error); ok { - r0 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6) + if rf, ok := ret.Get(0).(func(context.Context, v1.ObjectReference, string, string, map[string]string, v1.ResourceRequirements, time.Duration, bool) error); ok { + r0 = rf(_a0, _a1, _a2, _a3, _a4, _a5, _a6, _a7) } else { r0 = ret.Error(0) } diff --git a/pkg/install/daemonset.go b/pkg/install/daemonset.go index 9cc3a814c1..17d1ef5599 100644 --- a/pkg/install/daemonset.go +++ b/pkg/install/daemonset.go @@ -53,6 +53,9 @@ func DaemonSet(namespace string, opts ...podTemplateOption) *appsv1.DaemonSet { if len(c.nodeAgentConfigMap) > 0 { daemonSetArgs = append(daemonSetArgs, fmt.Sprintf("--node-agent-configmap=%s", c.nodeAgentConfigMap)) } + if c.privilegedDatamoverPods { + daemonSetArgs = append(daemonSetArgs, "--privileged-datamover-pods=true") + } userID := int64(0) mountPropagationMode := corev1.MountPropagationHostToContainer diff --git a/pkg/install/deployment.go b/pkg/install/deployment.go index aa0b8b7790..eb3645d741 100644 --- a/pkg/install/deployment.go +++ b/pkg/install/deployment.go @@ -50,6 +50,7 @@ type podTemplateConfig struct { uploaderType string defaultSnapshotMoveData bool privilegedNodeAgent bool + privilegedDatamoverPods bool disableInformerCache bool scheduleSkipImmediately bool podResources kube.PodResources @@ -177,6 +178,12 @@ func WithPrivilegedNodeAgent(b bool) podTemplateOption { } } +func WithPrivilegedDatamoverPods(b bool) podTemplateOption { + return func(c *podTemplateConfig) { + c.privilegedDatamoverPods = b + } +} + func WithNodeAgentConfigMap(nodeAgentConfigMap string) podTemplateOption { return func(c *podTemplateConfig) { c.nodeAgentConfigMap = nodeAgentConfigMap diff --git a/pkg/install/resources.go b/pkg/install/resources.go index f962dbe327..26c53277cd 100644 --- a/pkg/install/resources.go +++ b/pkg/install/resources.go @@ -247,6 +247,7 @@ type VeleroOptions struct { RestoreOnly bool UseNodeAgent bool PrivilegedNodeAgent bool + PrivilegedDatamoverPods bool UseVolumeSnapshots bool BSLConfig map[string]string VSLConfig map[string]string @@ -408,6 +409,9 @@ func AllResources(o *VeleroOptions) *unstructured.UnstructuredList { if o.PrivilegedNodeAgent { dsOpts = append(dsOpts, WithPrivilegedNodeAgent(true)) } + if o.PrivilegedDatamoverPods { + dsOpts = append(dsOpts, WithPrivilegedDatamoverPods(true)) + } if len(o.NodeAgentConfigMap) > 0 { dsOpts = append(dsOpts, WithNodeAgentConfigMap(o.NodeAgentConfigMap)) }