diff --git a/pkg/scheduler/api/job_info.go b/pkg/scheduler/api/job_info.go index ec9e5fe606..19084af263 100644 --- a/pkg/scheduler/api/job_info.go +++ b/pkg/scheduler/api/job_info.go @@ -33,6 +33,7 @@ import ( batch "volcano.sh/apis/pkg/apis/batch/v1alpha1" "volcano.sh/apis/pkg/apis/scheduling" "volcano.sh/apis/pkg/apis/scheduling/v1beta1" + volumescheduling "volcano.sh/volcano/pkg/scheduler/capabilities/volumebinding" ) @@ -130,6 +131,11 @@ type TaskInfo struct { NumaInfo *TopologyInfo PodVolumes *volumescheduling.PodVolumes Pod *v1.Pod + + // CustomBindErrHandler is a custom callback func called when task bind err. + CustomBindErrHandler func() error + // CustomBindErrHandlerSucceeded indicates whether CustomBindErrHandler is executed successfully. + CustomBindErrHandlerSucceeded bool } func getJobID(pod *v1.Pod) JobID { diff --git a/pkg/scheduler/cache/cache.go b/pkg/scheduler/cache/cache.go index 68c5f840f0..4c4107659c 100644 --- a/pkg/scheduler/cache/cache.go +++ b/pkg/scheduler/cache/cache.go @@ -961,9 +961,24 @@ func (sc *SchedulerCache) processResyncTask() { return } + reSynced := false if err := sc.syncTask(task); err != nil { klog.Errorf("Failed to sync pod <%v/%v>, retry it.", task.Namespace, task.Name) sc.resyncTask(task) + reSynced = true + } + + // execute custom bind err handler call back func if exists. + if task.CustomBindErrHandler != nil && !task.CustomBindErrHandlerSucceeded { + err := task.CustomBindErrHandler() + if err != nil { + klog.ErrorS(err, "Failed to execute custom bind err handler, retry it.") + } else { + task.CustomBindErrHandlerSucceeded = true + } + if !task.CustomBindErrHandlerSucceeded && !reSynced { + sc.resyncTask(task) + } } }