Skip to content

Commit

Permalink
Implemented controller's UnsuitableNodes() method
Browse files Browse the repository at this point in the history
Signed-off-by: Ivan Sim <ihcsim@gmail.com>
  • Loading branch information
ihcsim committed May 23, 2024
1 parent 73ede75 commit 7f8f7c6
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 17 deletions.
1 change: 1 addition & 0 deletions cmd/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func NewK8sFlags() *pflag.FlagSet {

func NewControllerFlags() *pflag.FlagSet {
flags := pflag.NewFlagSet("controller", pflag.ExitOnError)
flags.String("namespace", "k8s-dra", "Namespace where the controller watches for DeviceAllocation CRDs")
flags.Int("workers", 3, "Number of workers the controller spawns")
flags.Int("metrics-port", 9001, "HTTP port to expose metrics")
flags.String("metrics-path", "metrics", "HTTP path to expose metrics")
Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func run(ctx context.Context) error {
pprofPort = viper.GetInt("pprof-port")
pprofPath = "/debug/pprof/"

driver = gpu.NewDriver()
driver = gpu.NewDriver(viper.GetString("namespace"))
)

go func() {
Expand Down
53 changes: 40 additions & 13 deletions pkg/drivers/gpu/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,16 @@ var _ dractrl.Driver = &driver{}
// allocation and deallocation operations of GPU resources.
type driver struct {
clientset clientset.Interface
gpu GPUPlugin
gpu gpuPlugin
namespace string
}

// NewDriver returns a new instance of the GPU driver.
func NewDriver() *driver {
return &driver{}
func NewDriver(namespace string) *driver {
return &driver{
gpu: gpuPlugin{},
namespace: namespace,
}
}

// GetName returns the name of the driver.
Expand Down Expand Up @@ -125,13 +129,10 @@ func (d *driver) Allocate(ctx context.Context, claimAllocations []*dractrl.Claim
}
}

func (d *driver) nodeDeviceAllocation(ctx context.Context, claim *resourcev1alpha2.ResourceClaim, selectedNode string) (*allocationv1alpha1.NodeDeviceAllocation, error) {
var (
namespace = claim.GetNamespace()
listOpts = metav1.ListOptions{
LabelSelector: "kubernetes.io/hostname=" + selectedNode,
}
)
func (d *driver) nodeDeviceAllocation(ctx context.Context, namespace, selectedNode string) (*allocationv1alpha1.NodeDeviceAllocation, error) {
listOpts := metav1.ListOptions{
LabelSelector: "kubernetes.io/hostname=" + selectedNode,
}

deviceAllocations, err := d.clientset.AllocationV1alpha1().NodeDeviceAllocations(namespace).List(ctx, listOpts)
if err != nil {
Expand Down Expand Up @@ -164,7 +165,7 @@ func (d *driver) allocateGPU(
claimNamespace = claim.GetNamespace()
)

deviceAllocation, err := d.nodeDeviceAllocation(ctx, claim, selectedNode)
deviceAllocation, err := d.nodeDeviceAllocation(ctx, d.namespace, selectedNode)
if err != nil {
return err
}
Expand Down Expand Up @@ -210,7 +211,7 @@ func (d *driver) Deallocate(ctx context.Context, claim *resourcev1alpha2.Resourc
return nil
}

deviceAllocation, err := d.nodeDeviceAllocation(ctx, claim, selectedNode)
deviceAllocation, err := d.nodeDeviceAllocation(ctx, d.namespace, selectedNode)
if err != nil {
return err
}
Expand Down Expand Up @@ -265,7 +266,33 @@ func (d *driver) deallocateGPU(ctx context.Context, claimUID string, gpu allocat

// see https://pkg.go.dev/k8s.io/dynamic-resource-allocation/controller#Driver
func (d *driver) UnsuitableNodes(ctx context.Context, pod *corev1.Pod, claims []*dractrl.ClaimAllocation, potentialNodes []string) error {
return nil
var (
errs error
gpuClaims = []*dractrl.ClaimAllocation{}
)
for _, potentialNode := range potentialNodes {
deviceAllocation, err := d.nodeDeviceAllocation(ctx, d.namespace, potentialNode)
if err != nil {
for _, claim := range claims {
claim.UnsuitableNodes = append(claim.UnsuitableNodes, potentialNode)
}
return nil
}

for _, claim := range claims {
if _, ok := claim.ClaimParameters.(*gpuv1alpha1.GPUClaimParameters); !ok {
errs = errors.Join(errs, fmt.Errorf("unsupported claim parameters kind: %T", claim.ClaimParameters))
continue
}
gpuClaims = append(gpuClaims, claim)
}

if err := d.gpu.unsuitableNode(deviceAllocation, pod, gpuClaims, claims, potentialNode); err != nil {
errs = errors.Join(errs, err)
}
}

return errs
}

func (d *driver) validateClaimParameters(claimParams *gpuv1alpha1.GPUClaimParametersSpec) error {
Expand Down
17 changes: 14 additions & 3 deletions pkg/drivers/gpu/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,28 @@ package gpu
import (
allocationapiv1alpha1 "github.com/ihcsim/k8s-dra/pkg/apis/allocation/v1alpha1"
gpuv1alpha1 "github.com/ihcsim/k8s-dra/pkg/apis/gpu/v1alpha1"
corev1 "k8s.io/api/core/v1"
dractrl "k8s.io/dynamic-resource-allocation/controller"
)

type GPUPlugin struct{}
type gpuPlugin struct{}

func (p *GPUPlugin) pendingAllocatedClaims(
func (p *gpuPlugin) pendingAllocatedClaims(
claimUID, selectedNode string,
claimParams *gpuv1alpha1.GPUClaimParameters,
classParams *gpuv1alpha1.GPUClassParameters) (allocationapiv1alpha1.AllocatedDevices, error) {
return allocationapiv1alpha1.AllocatedDevices{}, nil
}

func (p *GPUPlugin) removeAllocatedClaim(claimUID string) error {
func (p *gpuPlugin) removeAllocatedClaim(claimUID string) error {
return nil
}

func (p *gpuPlugin) unsuitableNode(
nodedeviceAllocation *allocationapiv1alpha1.NodeDeviceAllocation,
pod *corev1.Pod,
gpuClaims []*dractrl.ClaimAllocation,
allClaims []*dractrl.ClaimAllocation,
potentialNode string) error {
return nil
}

0 comments on commit 7f8f7c6

Please sign in to comment.