Skip to content

Commit

Permalink
Add systemd cgroup controller support
Browse files Browse the repository at this point in the history
Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
  • Loading branch information
brandond committed Apr 29, 2022
1 parent 1caae63 commit 333311c
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 44 deletions.
8 changes: 6 additions & 2 deletions pkg/agent/containerd/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,18 +45,22 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
}

isRunningInUserNS := userns.RunningInUserNS()
_, _, hasCFS, hasPIDs := cgroups.CheckCgroups()
_, _, controllers := cgroups.CheckCgroups()
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
disableCgroup := isRunningInUserNS && (!hasCFS || !hasPIDs || !cgroupfsWritable)
disableCgroup := isRunningInUserNS && (!controllers["cpu"] || !controllers["pids"] || !cgroupfsWritable)
if disableCgroup {
logrus.Warn("cgroup v2 controllers are not delegated for rootless. Disabling cgroup.")
}

systemdCgroup := controllers["cpuset"] && os.Getenv("NOTIFY_SOCKET") != ""
cfg.AgentConfig.Systemd = systemdCgroup

var containerdTemplate string
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: disableCgroup,
SystemdCgroup: systemdCgroup,
IsRunningInUserNS: isRunningInUserNS,
PrivateRegistryConfig: privRegistries.Registry,
ExtraRuntimes: findNvidiaContainerRuntimes(os.DirFS(string(os.PathSeparator))),
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/containerd/config_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ func setupContainerdConfig(ctx context.Context, cfg *config.Node) error {
containerdConfig := templates.ContainerdConfig{
NodeConfig: cfg,
DisableCgroup: true,
SystemdCgroup: false,
IsRunningInUserNS: false,
PrivateRegistryConfig: privRegistries.Registry,
}
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/templates/templates.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ type ContainerdRuntimeConfig struct {
type ContainerdConfig struct {
NodeConfig *config.Node
DisableCgroup bool
SystemdCgroup bool
IsRunningInUserNS bool
PrivateRegistryConfig *registries.Registry
ExtraRuntimes map[string]ContainerdRuntimeConfig
Expand Down
3 changes: 3 additions & 0 deletions pkg/agent/templates/templates_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ enable_keychain = true
[plugins.cri.containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins.cri.containerd.runtimes.runc.options]
SystemdCgroup = {{ .SystemdCgroup }}
{{ if .PrivateRegistryConfig }}
{{ if .PrivateRegistryConfig.Mirrors }}
[plugins.cri.registry.mirrors]{{end}}
Expand Down
36 changes: 16 additions & 20 deletions pkg/cgroups/cgroups_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,34 +65,30 @@ func validateCgroupsV2() error {
return nil
}

func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
cgroupsModeV2 := cgroups.Mode() == cgroups.Unified
controllers = make(map[string]bool)

// For Unified (v2) cgroups we can directly check to see what controllers are mounted
// under the unified hierarchy.
if cgroupsModeV2 {
m, err := cgroupsv2.LoadManager("/sys/fs/cgroup", "/")
if err != nil {
return "", "", false, false
return
}
controllers, err := m.Controllers()
enabledControllers, err := m.Controllers()
if err != nil {
return "", "", false, false
return
}
// Intentionally using an expressionless switch to match the logic below
for _, controller := range controllers {
switch {
case controller == "cpu":
hasCFS = true
case controller == "pids":
hasPIDs = true
}
for _, controller := range enabledControllers {
controllers[controller] = true
}
}

f, err := os.Open("/proc/self/cgroup")
if err != nil {
return "", "", false, false
return
}
defer f.Close()

Expand All @@ -102,10 +98,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
if len(parts) < 3 {
continue
}
controllers := strings.Split(parts[1], ",")
enabledControllers := strings.Split(parts[1], ",")
// For v1 or hybrid, controller can be a single value {"blkio"}, or a comounted set {"cpu","cpuacct"}
// For v2, controllers = {""} (only contains a single empty string)
for _, controller := range controllers {
// For v2, controllers = {""} (only contains a single empty string) so this section is not used.
for _, controller := range enabledControllers {
switch {
case controller == "name=systemd" || cgroupsModeV2:
// If we detect that we are running under a `.scope` unit with systemd
Expand All @@ -128,10 +124,10 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// can fail if we use the comma-separated name. Instead, we check for the controller using the symlink.
p := filepath.Join("/sys/fs/cgroup", controller, parts[2], "cpu.cfs_period_us")
if _, err := os.Stat(p); err == nil {
hasCFS = true
controllers[controller] = true
}
case controller == "pids":
hasPIDs = true
default:
controllers[controller] = true
}
}
}
Expand All @@ -146,7 +142,7 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
// a host PID scenario but we don't support this.
g, err := os.Open("/proc/1/cgroup")
if err != nil {
return "", "", false, false
return
}
defer g.Close()
scan = bufio.NewScanner(g)
Expand All @@ -170,5 +166,5 @@ func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
}
}
}
return kubeletRoot, runtimeRoot, hasCFS, hasPIDs
return
}
5 changes: 3 additions & 2 deletions pkg/cgroups/cgroups_windows.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
//go:build windows
// +build windows

package cgroups
Expand All @@ -6,6 +7,6 @@ func Validate() error {
return nil
}

func CheckCgroups() (kubeletRoot, runtimeRoot string, hasCFS, hasPIDs bool) {
return "", "", false, false
func CheckCgroups() (kubeletRoot, runtimeRoot string, controllers map[string]bool) {
return
}
41 changes: 21 additions & 20 deletions pkg/daemons/agent/agent_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,15 @@ import (
"k8s.io/kubernetes/pkg/kubeapiserver/authorizer/modes"
)

func createRootlessConfig(argsMap map[string]string, hasCFS, hasPIDs bool) {
func createRootlessConfig(argsMap map[string]string, controllers map[string]bool) {
argsMap["feature-gates=KubeletInUserNamespace"] = "true"
// "/sys/fs/cgroup" is namespaced
cgroupfsWritable := unix.Access("/sys/fs/cgroup", unix.W_OK) == nil
if hasCFS && hasPIDs && cgroupfsWritable {
if controllers["cpu"] && controllers["pids"] && cgroupfsWritable {
logrus.Info("cgroup v2 controllers are delegated for rootless.")
// cgroupfs v2, delegated for rootless by systemd
argsMap["cgroup-driver"] = "cgroupfs"
} else {
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
return
}
logrus.Fatal("delegated cgroup v2 controllers are required for rootless.")
}

func checkRuntimeEndpoint(cfg *config.Agent, argsMap map[string]string) {
Expand Down Expand Up @@ -67,14 +65,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
bindAddress = "::1"
}
argsMap := map[string]string{
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
//"cgroup-root": "/k3s",
"healthz-bind-address": bindAddress,
"read-only-port": "0",
"cluster-domain": cfg.ClusterDomain,
"kubeconfig": cfg.KubeConfigKubelet,
"eviction-hard": "imagefs.available<5%,nodefs.available<5%",
"eviction-minimum-reclaim": "imagefs.available=10%,nodefs.available=10%",
"fail-swap-on": "false",
"cgroup-driver": "cgroupfs",
"authentication-token-webhook": "true",
"anonymous-auth": "false",
Expand Down Expand Up @@ -138,13 +135,13 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
if err != nil || defaultIP.String() != cfg.NodeIP {
argsMap["node-ip"] = cfg.NodeIP
}
kubeletRoot, runtimeRoot, hasCFS, hasPIDs := cgroups.CheckCgroups()
if !hasCFS {
logrus.Warn("Disabling CPU quotas due to missing cpu.cfs_period_us")
kubeletRoot, runtimeRoot, controllers := cgroups.CheckCgroups()
if !controllers["cpu"] {
logrus.Warn("Disabling CPU quotas due to missing cpu controller or cpu.cfs_period_us")
argsMap["cpu-cfs-quota"] = "false"
}
if !hasPIDs {
logrus.Fatal("PIDS cgroup support not found")
if !controllers["pids"] {
logrus.Fatal("pids cgroup controller not found")
}
if kubeletRoot != "" {
argsMap["kubelet-cgroups"] = kubeletRoot
Expand Down Expand Up @@ -172,7 +169,11 @@ func kubeletArgs(cfg *config.Agent) map[string]string {
}

if cfg.Rootless {
createRootlessConfig(argsMap, hasCFS, hasCFS)
createRootlessConfig(argsMap, controllers)
}

if cfg.Systemd {
argsMap["cgroup-driver"] = "systemd"
}

if cfg.ProtectKernelDefaults {
Expand Down
1 change: 1 addition & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ type Agent struct {
ExtraKubeProxyArgs []string
PauseImage string
Snapshotter string
Systemd bool
CNIPlugin bool
NodeTaints []string
NodeLabels []string
Expand Down

0 comments on commit 333311c

Please sign in to comment.