Skip to content

Commit

Permalink
L3mdev cgroup (#73)
Browse files Browse the repository at this point in the history
* add driver-l3mdev-cgroup patch

Signed-off-by: Guohan Lu <gulv@microsoft.com>

* update abiname to 8-1

Signed-off-by: Guohan Lu <gulv@microsoft.com>
  • Loading branch information
lguohan authored Jan 11, 2019
1 parent d631eeb commit 37734aa
Show file tree
Hide file tree
Showing 6 changed files with 386 additions and 1 deletion.
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
SHELL = /bin/bash
.SHELLFLAGS += -e

KVERSION_SHORT ?= 4.9.0-8
KERNEL_ABI_MINOR_VERSION = 1
KVERSION_SHORT ?= 4.9.0-8-$(KERNEL_ABI_MINOR_VERSION)
KVERSION ?= $(KVERSION_SHORT)-amd64
KERNEL_VERSION ?= 4.9.110
KERNEL_SUBVERSION ?= 3+deb9u6
Expand Down Expand Up @@ -79,6 +80,8 @@ $(addprefix $(DEST)/, $(MAIN_TARGET)): $(DEST)/% :
git add debian/build/build_amd64_none_amd64/.config -f
git add debian/config.defines.dump -f
git add debian/control -f
git add debian/rules.gen -f
git add debian/tests/control -f
git commit -m "unmodified debian source"

# Learning new git repo head (above commit) by calling stg repair.
Expand Down
21 changes: 21 additions & 0 deletions patch/config-l3mdev-cgroup.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
enable CONFIG_CGROUP_L3MDEV=y

From: Guohan Lu <gulv@microsoft.com>


---
debian/build/build_amd64_none_amd64/.config | 1 +
1 file changed, 1 insertion(+)

diff --git a/debian/build/build_amd64_none_amd64/.config b/debian/build/build_amd64_none_amd64/.config
index b7bcf15..a8d64a7 100644
--- a/debian/build/build_amd64_none_amd64/.config
+++ b/debian/build/build_amd64_none_amd64/.config
@@ -1493,6 +1492,7 @@ CONFIG_MPLS_IPTUNNEL=m
# CONFIG_HSR is not set
# CONFIG_NET_SWITCHDEV is not set
CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_CGROUP_L3MDEV=y
# CONFIG_NET_NCSI is not set
CONFIG_RPS=y
CONFIG_RFS_ACCEL=y
338 changes: 338 additions & 0 deletions patch/driver-l3mdev-cgroup.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
Add cgroup to assoicate tasks with L3 networking domains. AF_INET{6}
sockets opened by tasks associated with an l3mdev cgroup are bound to
the associated master device when the socket is created. This allows a
user to run a command (and its children) within an L3 networking context.

The master-device for an l3mdev cgroup must be an L3 master device
(e.g., VRF), and it must be set before attaching tasks to the cgroup. Once
set the master-device can not change. Nested l3mdev cgroups are not
supported. The root (aka default) l3mdev cgroup can not be bound to a
master device.

Example:
ip link add vrf-red type vrf table vrf-red
ip link set dev vrf-red up
ip link set dev eth1 master vrf-red

cgcreate -g l3mdev:vrf-red
cgset -r l3mdev.master-device=vrf-red vrf-red
cgexec -g l3mdev:vrf-red bash

At this point the current shell and its child processes are attached to
the vrf-red L3 domain. Any AF_INET and AF_INET6 sockets opened by the
tasks are bound to the vrf-red device.

TO-DO:
- how to auto-create the cgroup when a VRF device is created and auto-deleted
when a VRF device is destroyed

Signed-off-by: David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
---
include/linux/cgroup_subsys.h | 4 +
include/net/l3mdev_cgroup.h | 27 ++++++
net/core/sock.c | 2
net/l3mdev/Kconfig | 12 +++
net/l3mdev/Makefile | 1
net/l3mdev/l3mdev_cgroup.c | 195 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 241 insertions(+)
create mode 100644 include/net/l3mdev_cgroup.h
create mode 100644 net/l3mdev/l3mdev_cgroup.c

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336..72c220a 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -48,6 +48,10 @@ SUBSYS(perf_event)
SUBSYS(net_prio)
#endif

+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+SUBSYS(l3mdev)
+#endif
+
#if IS_ENABLED(CONFIG_CGROUP_HUGETLB)
SUBSYS(hugetlb)
#endif
diff --git a/include/net/l3mdev_cgroup.h b/include/net/l3mdev_cgroup.h
new file mode 100644
index 0000000..7d2bef1
--- /dev/null
+++ b/include/net/l3mdev_cgroup.h
@@ -0,0 +1,27 @@
+/*
+ * l3mdev_cgroup.h Control Group for L3 Master Device
+ *
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _L3MDEV_CGROUP_H
+#define _L3MDEV_CGROUP_H
+
+#if IS_ENABLED(CONFIG_CGROUP_L3MDEV)
+
+void sock_update_l3mdev(struct sock *sk);
+
+#else /* !CONFIG_CGROUP_L3MDEV */
+
+static inline void sock_update_l3mdev(struct sock *sk)
+{
+}
+
+#endif /* CONFIG_CGROUP_L3MDEV */
+#endif /* _L3MDEV_CGROUP_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c4c434..ebb25ca 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
+#include <net/l3mdev_cgroup.h>
#include <linux/sock_diag.h>

#include <linux/filter.h>
@@ -1402,6 +1403,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
cgroup_sk_alloc(&sk->sk_cgrp_data);
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
+ sock_update_l3mdev(sk);
}

return sk;
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
index 5d47325..3142d81 100644
--- a/net/l3mdev/Kconfig
+++ b/net/l3mdev/Kconfig
@@ -8,3 +8,15 @@ config NET_L3_MASTER_DEV
---help---
This module provides glue between core networking code and device
drivers to support L3 master devices like VRF.
+
+config CGROUP_L3MDEV
+ bool "L3 Master Device cgroup"
+ depends on CGROUPS
+ depends on NET_L3_MASTER_DEV
+ ---help---
+ Cgroup subsystem for assigning processes to an L3 domain.
+ When a process is assigned to an l3mdev domain all AF_INET and
+ AF_INET6 sockets opened by the process are bound to the L3 master
+ device.
+
+
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
index 84a53a6..ae74eba 100644
--- a/net/l3mdev/Makefile
+++ b/net/l3mdev/Makefile
@@ -3,3 +3,4 @@
#

obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
+obj-$(CONFIG_CGROUP_L3MDEV) += l3mdev_cgroup.o
diff --git a/net/l3mdev/l3mdev_cgroup.c b/net/l3mdev/l3mdev_cgroup.c
new file mode 100644
index 0000000..b5fea03
--- /dev/null
+++ b/net/l3mdev/l3mdev_cgroup.c
@@ -0,0 +1,195 @@
+/*
+ * net/l3mdev/l3mdev_cgroup.c Control Group for L3 Master Devices
+ *
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2015 David Ahern <dsa-qUQiAmfTcIp+XZJcv9eMoEEOCMrvLtNR@public.gmane.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/cgroup.h>
+#include <net/sock.h>
+#include <net/l3mdev_cgroup.h>
+
+struct l3mdev_cgroup {
+ struct cgroup_subsys_state css;
+ struct net *net;
+ int dev_idx;
+};
+
+static inline struct l3mdev_cgroup *css_l3mdev(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct l3mdev_cgroup, css) : NULL;
+}
+
+static void l3mdev_set_bound_dev(struct sock *sk)
+{
+ struct task_struct *tsk = current;
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ rcu_read_lock();
+
+ l3mdev_cgrp = css_l3mdev(task_css(tsk, l3mdev_cgrp_id));
+ if (l3mdev_cgrp && l3mdev_cgrp->dev_idx)
+ sk->sk_bound_dev_if = l3mdev_cgrp->dev_idx;
+
+ rcu_read_unlock();
+}
+
+void sock_update_l3mdev(struct sock *sk)
+{
+ switch (sk->sk_family) {
+ case AF_INET:
+ case AF_INET6:
+ l3mdev_set_bound_dev(sk);
+ break;
+ }
+}
+
+static bool is_root_cgroup(struct cgroup_subsys_state *css)
+{
+ return !css || !css->parent;
+}
+
+static struct cgroup_subsys_state *
+l3mdev_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ /* nested l3mdev domains are not supportd */
+ if (!is_root_cgroup(parent_css))
+ return ERR_PTR(-EINVAL);
+
+ l3mdev_cgrp = kzalloc(sizeof(*l3mdev_cgrp), GFP_KERNEL);
+ if (!l3mdev_cgrp)
+ return ERR_PTR(-ENOMEM);
+
+ return &l3mdev_cgrp->css;
+}
+
+static int l3mdev_css_online(struct cgroup_subsys_state *css)
+{
+ return 0;
+}
+
+static void l3mdev_css_free(struct cgroup_subsys_state *css)
+{
+ kfree(css_l3mdev(css));
+}
+
+static int l3mdev_read(struct seq_file *sf, void *v)
+{
+ struct cgroup_subsys_state *css = seq_css(sf);
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
+
+ if (!l3mdev_cgrp)
+ return -EINVAL;
+
+ if (l3mdev_cgrp->net) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(l3mdev_cgrp->net, l3mdev_cgrp->dev_idx);
+
+ seq_printf(sf, "net[%u]: device index %d ==> %s\n",
+ l3mdev_cgrp->net->ns.inum, l3mdev_cgrp->dev_idx,
+ dev ? dev->name : "<none>");
+
+ if (dev)
+ dev_put(dev);
+ }
+ return 0;
+}
+
+static ssize_t l3mdev_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct cgroup_subsys_state *css = of_css(of);
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ int rc = -EINVAL;
+
+ /* once master device is set can not undo. Must delete
+ * cgroup and reset
+ */
+ if (l3mdev_cgrp->dev_idx)
+ goto out;
+
+ /* root cgroup does not bind to an L3 domain */
+ if (is_root_cgroup(css))
+ goto out;
+
+ if (sscanf(buf, "%" __stringify(IFNAMSIZ) "s", name) != 1)
+ goto out;
+
+ dev = dev_get_by_name(net, name);
+ if (!dev) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ if (netif_is_l3_master(dev)) {
+ l3mdev_cgrp->net = net;
+ l3mdev_cgrp->dev_idx = dev->ifindex;
+ rc = 0;
+ }
+
+ dev_put(dev);
+out:
+ return rc ? : nbytes;
+}
+
+/* make master device is set for non-root cgroups before tasks can be added */
+static int l3mdev_can_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *dst_css;
+ struct task_struct *tsk;
+ int rc = 0;
+
+ cgroup_taskset_for_each(tsk, dst_css, tset) {
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ l3mdev_cgrp = css_l3mdev(dst_css);
+ if (!is_root_cgroup(dst_css) && !l3mdev_cgrp->dev_idx) {
+ rc = -ENODEV;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "master-device",
+ .seq_show = l3mdev_read,
+ .write = l3mdev_write,
+ },
+ { } /* terminate */
+};
+
+struct cgroup_subsys l3mdev_cgrp_subsys = {
+ .css_alloc = l3mdev_css_alloc,
+ .css_online = l3mdev_css_online,
+ .css_free = l3mdev_css_free,
+ .can_attach = l3mdev_can_attach,
+ .legacy_cftypes = ss_files,
+};
+
+static int __init init_cgroup_l3mdev(void)
+{
+ return 0;
+}
+
+subsys_initcall(init_cgroup_l3mdev);
+MODULE_AUTHOR("David Ahern");
+MODULE_DESCRIPTION("Control Group for L3 Networking Domains");
+MODULE_LICENSE("GPL");
Loading

0 comments on commit 37734aa

Please sign in to comment.