Skip to content

Commit

Permalink
Add an extra parameter ExitCode to RemoveFailedPod
Browse files Browse the repository at this point in the history
Update README.md

Fix README and test files

Update README

Address a7i's comments

Update README
  • Loading branch information
yuanchen8911 committed May 3, 2024
1 parent b953806 commit 25b9eda
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 22 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Balance Plugins: These plugins process all pods, or groups of pods, and determin
| [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint) |Balance|Evicts pods violating TopologySpreadConstraints|
| [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts) |Deschedule|Evicts pods having too many restarts|
| [PodLifeTime](#podlifetime) |Deschedule|Evicts pods that have exceeded a specified age limit|
| [RemoveFailedPods](#removefailedpods) |Deschedule|Evicts pods with certain failed reasons|
| [RemoveFailedPods](#removefailedpods) |Deschedule|Evicts pods with certain failed reasons and exit codes|


### RemoveDuplicates
Expand Down Expand Up @@ -698,10 +698,8 @@ profiles:
```

### RemoveFailedPods

This strategy evicts pods that are in failed status phase.
You can provide an optional parameter to filter by failed `reasons`.
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can provide optional parameters to filter by failed pods' and containters' `reasons`. and `exitCodes`. `exitCodes` apply to failed pods' containers with `terminated` state only. `reasons` and `exitCodes` can be expanded to include those of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
Expand All @@ -713,6 +711,7 @@ has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considere
|`minPodLifetimeSeconds`|uint|
|`excludeOwnerKinds`|list(string)|
|`reasons`|list(string)|
|`exitCodes`|list(int32)|
|`includingInitContainers`|bool|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
Expand All @@ -729,6 +728,8 @@ profiles:
args:
reasons:
- "NodeAffinity"
exitCodes:
- 1
includingInitContainers: true
excludeOwnerKinds:
- "Job"
Expand Down
23 changes: 23 additions & 0 deletions pkg/framework/plugins/removefailedpods/failedpods.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,17 @@ func validateCanEvict(pod *v1.Pod, failedPodArgs *RemoveFailedPodsArgs) error {
}
}

if len(failedPodArgs.ExitCodes) > 0 {
exitCodes := getFailedContainerStatusExitCodes(pod.Status.ContainerStatuses)
if failedPodArgs.IncludingInitContainers {
exitCodes = append(exitCodes, getFailedContainerStatusExitCodes(pod.Status.InitContainerStatuses)...)
}

if !sets.New(failedPodArgs.ExitCodes...).HasAny(exitCodes...) {
errs = append(errs, fmt.Errorf("pod does not match any of the exitCodes"))
}
}

return utilerrors.NewAggregate(errs)
}

Expand All @@ -165,3 +176,15 @@ func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []s

return reasons
}

func getFailedContainerStatusExitCodes(containerStatuses []v1.ContainerStatus) []int32 {
exitCodes := make([]int32, 0)

for _, containerStatus := range containerStatuses {
if containerStatus.State.Terminated != nil {
exitCodes = append(exitCodes, containerStatus.State.Terminated.ExitCode)
}
}

return exitCodes
}
123 changes: 105 additions & 18 deletions pkg/framework/plugins/removefailedpods/failedpods_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,15 @@ var OneHourInSeconds uint = 3600
func TestRemoveFailedPods(t *testing.T) {
createRemoveFailedPodsArgs := func(
includingInitContainers bool,
reasons, excludeKinds []string,
reasons []string,
exitCodes []int32,
excludeKinds []string,
minAgeSeconds *uint,
) RemoveFailedPodsArgs {
return RemoveFailedPodsArgs{
IncludingInitContainers: includingInitContainers,
Reasons: reasons,
ExitCodes: exitCodes,
MinPodLifetimeSeconds: minAgeSeconds,
ExcludeOwnerKinds: excludeKinds,
}
Expand All @@ -69,14 +72,14 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "0 failures, 0 evictions",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "1 container terminated with reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -85,9 +88,70 @@ func TestRemoveFailedPods(t *testing.T) {
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity", ExitCode: 1},
}), nil),
},
},
{
description: "1 container terminated with exitCode 1, expected exitCode 2, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{2}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "2 containers terminated with exitCode 1 and 2 respectively, 2 evictions",
args: createRemoveFailedPodsArgs(false, nil, []int32{1, 2, 3}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
buildTestPod("p2", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "2 containers terminated with exitCode 1 and 2 respectively, expected exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(false, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
buildTestPod("p2", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "1 init container terminated with reason NodeAffinity, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -96,9 +160,31 @@ func TestRemoveFailedPods(t *testing.T) {
}, nil), nil),
},
},
{
description: "1 init container terminated with exitCode 1, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 1},
}), nil),
},
},
{
description: "1 init container terminated with exitCode 2, expected 1, 0 eviction",
args: createRemoveFailedPodsArgs(true, nil, []int32{1}, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{ExitCode: 2},
}), nil),
},
},
{
description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -109,7 +195,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions",
args: createRemoveFailedPodsArgs(true, nil, nil, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, nil, nil),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 3000, 10, nil),
Expand All @@ -126,7 +212,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -137,7 +223,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -148,7 +234,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -159,7 +245,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -170,7 +256,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, &OneHourInSeconds),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, &OneHourInSeconds),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -181,7 +267,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
Expand All @@ -198,7 +284,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"CreateContainerConfigError"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -210,7 +296,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
args: createRemoveFailedPodsArgs(true, nil, []string{"ReplicaSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"ReplicaSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -221,7 +307,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction",
args: createRemoveFailedPodsArgs(true, nil, []string{"DaemonSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"DaemonSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
Expand All @@ -232,7 +318,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved",
args: createRemoveFailedPodsArgs(true, nil, []string{"DaemonSet"}, nil),
args: createRemoveFailedPodsArgs(true, nil, nil, []string{"DaemonSet"}, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -243,7 +329,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "1 container terminated with reason ShutDown, 0 evictions",
args: createRemoveFailedPodsArgs(false, nil, nil, nil),
args: createRemoveFailedPodsArgs(false, nil, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
Expand All @@ -253,7 +339,7 @@ func TestRemoveFailedPods(t *testing.T) {
},
{
description: "include reason=Shutdown, 2 containers terminated with reason ShutDown, 2 evictions",
args: createRemoveFailedPodsArgs(false, []string{"Shutdown"}, nil, nil),
args: createRemoveFailedPodsArgs(false, []string{"Shutdown"}, nil, nil, nil),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
Expand Down Expand Up @@ -322,6 +408,7 @@ func TestRemoveFailedPods(t *testing.T) {

plugin, err := New(&RemoveFailedPodsArgs{
Reasons: tc.args.Reasons,
ExitCodes: tc.args.ExitCodes,
MinPodLifetimeSeconds: tc.args.MinPodLifetimeSeconds,
IncludingInitContainers: tc.args.IncludingInitContainers,
ExcludeOwnerKinds: tc.args.ExcludeOwnerKinds,
Expand Down
1 change: 1 addition & 0 deletions pkg/framework/plugins/removefailedpods/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,6 @@ type RemoveFailedPodsArgs struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds"`
Reasons []string `json:"reasons"`
ExitCodes []int32 `json:"exitCodes"`
IncludingInitContainers bool `json:"includingInitContainers"`
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 25b9eda

Please sign in to comment.