Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Graceful Shutdown #751

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions bindata/manifests/daemon/daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ spec:
preStop:
exec:
command: ["/bindata/scripts/clean-k8s-services.sh"]
terminationGracePeriodSeconds: 900
volumes:
- name: host
hostPath:
Expand Down
28 changes: 26 additions & 2 deletions bindata/scripts/clean-k8s-services.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,36 @@
#!/bin/bash

chroot_path="/proc/1/root"
delay_shutdown_path="$chroot_path/tmp/sriov-delay-shutdown"
kubelet_config_path="$chroot_path/etc/kubernetes/kubelet.conf"

# 10 minutes - this should be shorter than the time that is specifed for the
# terminationGracePeriodSeconds in the daemonset's pod spec, so that everything
# else in the preStop hook has time to run and the Pod can be terminated properly.
wait_time=600

# If the kubelet is configured to shutdown gracefully (>0s shutdownGracePeriod), we need to wait for
# things to settle before shutting down the node.
if [ -f "$delay_shutdown_path" ]; then
if grep "$kubelet_config_path" -e shutdownGracePeriod | grep -qv \"0s\"; then
start=$(date +%s)
touch "$chroot_path/var/log/sriov-delay-start"
while [ $(( $(date +%s) - $start )) -lt $wait_time ]; do
if [ ! -f "$delay_shutdown_path" ]; then # don't have to wait anymore
break
fi
sleep 1
done
rm -f "$delay_shutdown_path"
touch "$chroot_path/var/log/sriov-delay-end"
fi
fi

if [ "$CLUSTER_TYPE" == "openshift" ]; then
echo "openshift cluster"
exit
fi

chroot_path="/host"

function clean_services() {
# Remove switchdev service files
rm -f $chroot_path/etc/systemd/system/switchdev-configuration-after-nm.service
Expand Down
39 changes: 31 additions & 8 deletions pkg/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ const (
// maxUpdateBackoff is the maximum time to react to a change as we back off
// in the face of errors.
maxUpdateBackoff = 60 * time.Second

// the presence of this file indicates that the sriov shutdown should be delayed
delayShutdownPath = "/host/tmp/sriov-delay-shutdown"
)

type Message struct {
Expand Down Expand Up @@ -604,6 +607,17 @@ func (dn *Daemon) completeDrain() error {
glog.Errorf("completeDrain(): failed to annotate node: %v", err)
return err
}

if _, err := os.Stat(delayShutdownPath); err == nil {
if err := os.Remove(delayShutdownPath); err != nil {
glog.Errorf("completeDrain(): failed to remove file %v: %v", delayShutdownPath, err)
return err
}
} else if !os.IsNotExist(err) { // error is not "not exist"
glog.Errorf("completeDrain(): error checking file status %v: %v", delayShutdownPath, err)
return err
}

return nil
}

Expand Down Expand Up @@ -671,15 +685,16 @@ func rebootNode() {
glog.Errorf("rebootNode(): %v", err)
}
defer exit()
// creates a new transient systemd unit to reboot the system.
// We explictily try to stop kubelet.service first, before anything else; this
// way we ensure the rest of system stays running, because kubelet may need
// to do "graceful" shutdown by e.g. de-registering with a load balancer.
// However note we use `;` instead of `&&` so we keep rebooting even
// if kubelet failed to shutdown - that way the machine will still eventually reboot
// as systemd will time out the stop invocation.
// creates a new transient systemd unit to reboot the system that
// reboots the system using `systemctl rooboot``
// by shutting down the system this way instead via `reboot`,
// when kubelet is configured with a shutdownGracePeriod, then it will
// be give some time to pods to run their preStop scripts and respond to
// SIGTERM by terminating gracefully before being forcefully killed via
// SIGKILL. stopping the kubelet service and then immediately running
// `reboot` just results in all pods being immediately killed
cmd := exec.Command("systemd-run", "--unit", "sriov-network-config-daemon-reboot",
"--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl stop kubelet.service; reboot")
"--description", "sriov-network-config-daemon reboot node", "/bin/sh", "-c", "systemctl reboot")

if err := cmd.Run(); err != nil {
glog.Errorf("failed to reboot node: %v", err)
Expand Down Expand Up @@ -925,6 +940,14 @@ func (dn *Daemon) drainNode() error {
return err
}
glog.Info("drainNode(): drain complete")

file, err := os.Create(delayShutdownPath)
if err != nil {
glog.Errorf("drainNode(): failed to create file %v %v", delayShutdownPath, err)
return err
}
defer file.Close()

return nil
}

Expand Down