From b20e6f80b3afa90cd144725e0900f63962b1f8be Mon Sep 17 00:00:00 2001 From: hardslog Date: Thu, 20 Nov 2014 14:33:54 -0500 Subject: [PATCH] tf300t: initial kernel bringup for Android 5.0. Based on changes from Grouper Kernel Change-Id: Ic82ea9595db42be073576a2fff717dbaf1449611 --- Documentation/networking/ip-sysctl.txt | 14 ++ arch/arm/configs/tegra3_android_defconfig | 1 - arch/arm/mach-tegra/pm-irq.c | 3 +- arch/um/os-Linux/start_up.c | 2 + drivers/net/wireless/bcmdhd/wl_cfg80211.c | 3 + include/linux/fib_rules.h | 2 + include/linux/ipv6.h | 2 + include/linux/prctl.h | 6 + include/linux/rtnetlink.h | 2 + include/linux/wakeup_reason.h | 23 +++ include/net/addrconf.h | 2 + include/net/fib_rules.h | 6 +- include/net/flow.h | 8 +- include/net/inet_sock.h | 10 + include/net/ip.h | 4 + include/net/ipv6.h | 3 + include/net/netns/ipv4.h | 2 + include/net/netns/ipv6.h | 1 + include/net/route.h | 6 +- kernel/cgroup.c | 20 +- kernel/power/Makefile | 2 + kernel/power/wakeup_reason.c | 132 +++++++++++++ kernel/sys.c | 22 +++ net/core/fib_rules.c | 62 +++++- net/ipv4/fib_frontend.c | 1 + net/ipv4/icmp.c | 11 +- net/ipv4/inet_connection_sock.c | 12 +- net/ipv4/ip_output.c | 6 +- net/ipv4/ping.c | 18 +- net/ipv4/raw.c | 3 +- net/ipv4/route.c | 12 ++ net/ipv4/syncookies.c | 6 +- net/ipv4/sysctl_net_ipv4.c | 14 ++ net/ipv4/tcp_ipv4.c | 7 + net/ipv4/udp.c | 3 +- net/ipv4/xfrm4_policy.c | 1 + net/ipv6/addrconf.c | 37 +++- net/ipv6/af_inet6.c | 1 + net/ipv6/datagram.c | 1 + net/ipv6/icmp.c | 6 + net/ipv6/inet6_connection_sock.c | 4 +- net/ipv6/ping.c | 219 ++++++++++++++++++++++ net/ipv6/raw.c | 1 + net/ipv6/route.c | 75 ++++---- net/ipv6/syncookies.c | 5 +- net/ipv6/sysctl_net_ipv6.c | 7 + net/ipv6/tcp_ipv6.c | 23 ++- net/ipv6/udp.c | 1 + net/netfilter/xt_IDLETIMER.c | 198 ++++++++++++++++++- net/netlink/af_netlink.c | 4 +- security/selinux/hooks.c | 7 + 51 files changed, 932 insertions(+), 89 deletions(-) create mode 100644 include/linux/wakeup_reason.h create mode 100644 kernel/power/wakeup_reason.c create mode 100644 net/ipv6/ping.c diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index ca5cdcd0..35ff1797 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -22,6 +22,13 @@ ip_no_pmtu_disc - BOOLEAN min_pmtu - INTEGER default 562 - minimum discovered Path MTU +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv4 reply packets that are not + associated with a socket for example, TCP RSTs or ICMP echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + route/max_size - INTEGER Maximum number of routes allowed in the kernel. Increase this when using large numbers of interfaces and/or routes. @@ -1036,6 +1043,13 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +fwmark_reflect - BOOLEAN + Controls the fwmark of kernel-generated IPv6 reply packets that are not + associated with a socket for example, TCP RSTs or ICMPv6 echo replies). + If unset, these packets have a fwmark of zero. If set, they have the + fwmark of the packet they are replying to. + Default: 0 + conf/interface/*: Change special settings per interface. diff --git a/arch/arm/configs/tegra3_android_defconfig b/arch/arm/configs/tegra3_android_defconfig index 9d7c9322..c712e8a4 100755 --- a/arch/arm/configs/tegra3_android_defconfig +++ b/arch/arm/configs/tegra3_android_defconfig @@ -487,7 +487,6 @@ CONFIG_RTC_DRV_RC5T583=y CONFIG_STAGING=y CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y -CONFIG_ANDROID_LOGGER=y CONFIG_ANDROID_RAM_CONSOLE=y CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION=y CONFIG_ANDROID_TIMED_GPIO=y diff --git a/arch/arm/mach-tegra/pm-irq.c b/arch/arm/mach-tegra/pm-irq.c index a51b4555..138d7d03 100644 --- a/arch/arm/mach-tegra/pm-irq.c +++ b/arch/arm/mach-tegra/pm-irq.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -218,7 +219,7 @@ static void tegra_pm_irq_syscore_resume_helper( (wake + 32 * index)); continue; } - + log_wakeup_reason(irq); desc = irq_to_desc(irq); if (!desc || !desc->action || !desc->action->name) { pr_info("Resume caused by WAKE%d, irq %d\n", diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 02ee9adf..eac388e2 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "init.h" #include "kern_constants.h" diff --git a/drivers/net/wireless/bcmdhd/wl_cfg80211.c b/drivers/net/wireless/bcmdhd/wl_cfg80211.c index 9f46c776..b16d0bb5 100644 --- a/drivers/net/wireless/bcmdhd/wl_cfg80211.c +++ b/drivers/net/wireless/bcmdhd/wl_cfg80211.c @@ -4753,6 +4753,9 @@ static s32 wl_setup_wiphy(struct wireless_dev *wdev, struct device *sdiofunc_dev #endif /* AP_SME flag can be advertised to remove patch from wpa_supplicant */ wdev->wiphy->flags |= WIPHY_FLAG_HAVE_AP_SME; +#if defined(CONFIG_PM) + wdev->wiphy->wowlan.flags = WIPHY_WOWLAN_ANY; +#endif WL_DBG(("Registering custom regulatory)\n")); wdev->wiphy->flags |= WIPHY_FLAG_CUSTOM_REGULATORY; wiphy_apply_custom_regulatory(wdev->wiphy, &brcm_regdom); diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 51da65b6..9dcdb625 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -49,6 +49,8 @@ enum { FRA_TABLE, /* Extended table id */ FRA_FWMASK, /* mask for netfilter mark */ FRA_OIFNAME, + FRA_UID_START, /* UID range */ + FRA_UID_END, __FRA_MAX }; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 0c997767..15395001 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -161,6 +161,7 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif + __s32 accept_ra_rt_table; __s32 proxy_ndp; __s32 accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -213,6 +214,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_ACCEPT_RA_RT_TABLE, DEVCONF_MAX }; diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a3baeb2c..1c0b14a7 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -102,4 +102,10 @@ #define PR_MCE_KILL_GET 34 +/* Sets the timerslack for arbitrary threads + * arg2 slack value, 0 means "use default" + * arg3 pid of the thread whose timer slack needs to be set + */ +#define PR_SET_TIMERSLACK_PID 41 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 8e872ead..97a95808 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -283,6 +283,8 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, /* not used - backported from the future */ + RTA_UID, __RTA_MAX }; diff --git a/include/linux/wakeup_reason.h b/include/linux/wakeup_reason.h new file mode 100644 index 00000000..7ce50f0d --- /dev/null +++ b/include/linux/wakeup_reason.h @@ -0,0 +1,23 @@ +/* + * include/linux/wakeup_reason.h + * + * Logs the reason which caused the kernel to resume + * from the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_WAKEUP_REASON_H +#define _LINUX_WAKEUP_REASON_H + +void log_wakeup_reason(int irq); + +#endif /* _LINUX_WAKEUP_REASON_H */ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index cbc6bb0a..f4883029 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -166,6 +166,8 @@ extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr); +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table); + /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 075f1e3a..52e77a36 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -23,6 +23,8 @@ struct fib_rule { struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; + uid_t uid_start; + uid_t uid_end; struct rcu_head rcu; struct net * fr_net; }; @@ -79,7 +81,9 @@ struct fib_rules_ops { [FRA_FWMARK] = { .type = NLA_U32 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ - [FRA_GOTO] = { .type = NLA_U32 } + [FRA_GOTO] = { .type = NLA_U32 }, \ + [FRA_UID_START] = { .type = NLA_U32 }, \ + [FRA_UID_END] = { .type = NLA_U32 } static inline void fib_rule_get(struct fib_rule *rule) { diff --git a/include/net/flow.h b/include/net/flow.h index 57f15a7f..1beab06a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -23,6 +23,7 @@ struct flowi_common { #define FLOWI_FLAG_PRECOW_METRICS 0x02 #define FLOWI_FLAG_CAN_SLEEP 0x04 __u32 flowic_secid; + uid_t flowic_uid; }; union flowi_uli { @@ -59,6 +60,7 @@ struct flowi4 { #define flowi4_proto __fl_common.flowic_proto #define flowi4_flags __fl_common.flowic_flags #define flowi4_secid __fl_common.flowic_secid +#define flowi4_uid __fl_common.flowic_uid __be32 daddr; __be32 saddr; union flowi_uli uli; @@ -75,7 +77,8 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, __u32 mark, __u8 tos, __u8 scope, __u8 proto, __u8 flags, __be32 daddr, __be32 saddr, - __be16 dport, __be32 sport) + __be16 dport, __be32 sport, + uid_t uid) { fl4->flowi4_oif = oif; fl4->flowi4_iif = 0; @@ -85,6 +88,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->flowi4_proto = proto; fl4->flowi4_flags = flags; fl4->flowi4_secid = 0; + fl4->flowi4_uid = uid; fl4->daddr = daddr; fl4->saddr = saddr; fl4->fl4_dport = dport; @@ -102,6 +106,7 @@ struct flowi6 { #define flowi6_proto __fl_common.flowic_proto #define flowi6_flags __fl_common.flowic_flags #define flowi6_secid __fl_common.flowic_secid +#define flowi6_uid __fl_common.flowic_uid struct in6_addr daddr; struct in6_addr saddr; __be32 flowlabel; @@ -145,6 +150,7 @@ struct flowi { #define flowi_proto u.__fl_common.flowic_proto #define flowi_flags u.__fl_common.flowic_flags #define flowi_secid u.__fl_common.flowic_secid +#define flowi_uid u.__fl_common.flowic_uid } __attribute__((__aligned__(BITS_PER_LONG/8))); static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index b897d6e6..da4d79fa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -87,6 +87,7 @@ struct inet_request_sock { no_srccheck: 1; kmemcheck_bitfield_end(flags); struct ip_options_rcu *opt; + u32 ir_mark; }; static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) @@ -94,6 +95,15 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +static inline u32 inet_request_mark(struct sock *sk, struct sk_buff *skb) +{ + if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept) { + return skb->mark; + } else { + return sk->sk_mark; + } +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip.h b/include/net/ip.h index aa76c7a4..d7c988fe 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -165,6 +165,7 @@ struct ip_reply_arg { int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; + uid_t uid; }; #define IP_REPLY_ARG_NOSRCCHECK 1 @@ -236,6 +237,9 @@ extern void ipfrag_init(void); extern void ip_static_sysctl_init(void); +#define IP4_REPLY_MARK(net, mark) \ + ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0) + static inline bool ip_is_fragment(const struct iphdr *iph) { return (iph->frag_off & htons(IP_MF | IP_OFFSET)) != 0; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3b5ac1fb..f03a97ec 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -109,6 +109,9 @@ struct frag_hdr { #define IP6_MF 0x0001 +#define IP6_REPLY_MARK(net, mark) \ + ((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0) + #include /* sysctls */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d786b4fc..76ebd40d 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -55,6 +55,8 @@ struct netns_ipv4 { int current_rt_cache_rebuild_count; unsigned int sysctl_ping_group_range[2]; + int sysctl_fwmark_reflect; + int sysctl_tcp_fwmark_accept; atomic_t rt_genid; atomic_t dev_addr_genid; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 81abfcb2..20b76abc 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -25,6 +25,7 @@ struct netns_sysctl_ipv6 { int ip6_rt_mtu_expires; int ip6_rt_min_advmss; int icmpv6_time; + int fwmark_reflect; }; struct netns_ipv6 { diff --git a/include/net/route.h b/include/net/route.h index db7b3432..5e9519ed 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -60,6 +60,7 @@ struct rtable { int rt_iif; int rt_oif; __u32 rt_mark; + uid_t rt_uid; /* Info on neighbour */ __be32 rt_gateway; @@ -146,7 +147,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos, RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, - daddr, saddr, dport, sport); + daddr, saddr, dport, sport, sk ? sock_i_uid(sk) : 0); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); return ip_route_output_flow(net, fl4, sk); @@ -250,7 +251,8 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 flow_flags |= FLOWI_FLAG_CAN_SLEEP; flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, - protocol, flow_flags, dst, src, dport, sport); + protocol, flow_flags, dst, src, dport, sport, + sock_i_uid(sk)); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 54a36fe2..0e4298fc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -272,7 +272,7 @@ static void check_for_release(struct cgroup *cgrp); /* * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some + * list_empty(&cgroup->children) && subsys has some * reference to css->refcnt. In general, this refcnt is expected to goes down * to zero, soon. * @@ -3935,6 +3935,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) struct cgroup_subsys *ss; unsigned long flags; bool failed = false; + + if (atomic_read(&cgrp->count) != 0) + return false; + local_irq_save(flags); for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; @@ -3977,19 +3981,23 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) return !failed; } -/* checks if all of the css_sets attached to a cgroup have a refcount of 0. - * Must be called with css_set_lock held */ +/* Checks if all of the css_sets attached to a cgroup have a refcount of 0. */ static int cgroup_css_sets_empty(struct cgroup *cgrp) { struct cg_cgroup_link *link; + int retval = 1; + read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { struct css_set *cg = link->cg; - if (atomic_read(&cg->refcount) > 0) - return 0; + if (atomic_read(&cg->refcount) > 0) { + retval = 0; + break; + } } + read_unlock(&css_set_lock); - return 1; + return retval; } static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 9b224e16..a6ef0bed 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -16,3 +16,5 @@ obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o obj-$(CONFIG_SUSPEND_TIME) += suspend_time.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o + +obj-$(CONFIG_SUSPEND) += wakeup_reason.o diff --git a/kernel/power/wakeup_reason.c b/kernel/power/wakeup_reason.c new file mode 100644 index 00000000..9823d9cc --- /dev/null +++ b/kernel/power/wakeup_reason.c @@ -0,0 +1,132 @@ +/* + * kernel/power/wakeup_reason.c + * + * Logs the reasons which caused the kernel to resume from + * the suspend mode. + * + * Copyright (C) 2014 Google, Inc. + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define MAX_WAKEUP_REASON_IRQS 32 +static int irq_list[MAX_WAKEUP_REASON_IRQS]; +static int irq_count; +static struct kobject *wakeup_reason; +static spinlock_t resume_reason_lock; + +static ssize_t last_resume_reason_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + int irq_no, buf_offset = 0; + struct irq_desc *desc; + spin_lock(&resume_reason_lock); + for (irq_no = 0; irq_no < irq_count; irq_no++) { + desc = irq_to_desc(irq_list[irq_no]); + if (desc && desc->action && desc->action->name) + buf_offset += sprintf(buf + buf_offset, "%d %s\n", + irq_list[irq_no], desc->action->name); + else + buf_offset += sprintf(buf + buf_offset, "%d\n", + irq_list[irq_no]); + } + spin_unlock(&resume_reason_lock); + return buf_offset; +} + +static struct kobj_attribute resume_reason = __ATTR_RO(last_resume_reason); + +static struct attribute *attrs[] = { + &resume_reason.attr, + NULL, +}; +static struct attribute_group attr_group = { + .attrs = attrs, +}; + +/* + * logs all the wake up reasons to the kernel + * stores the irqs to expose them to the userspace via sysfs + */ +void log_wakeup_reason(int irq) +{ + struct irq_desc *desc; + desc = irq_to_desc(irq); + if (desc && desc->action && desc->action->name) + printk(KERN_INFO "Resume caused by IRQ %d, %s\n", irq, + desc->action->name); + else + printk(KERN_INFO "Resume caused by IRQ %d\n", irq); + + spin_lock(&resume_reason_lock); + irq_list[irq_count++] = irq; + spin_unlock(&resume_reason_lock); +} + +/* Detects a suspend and clears all the previous wake up reasons*/ +static int wakeup_reason_pm_event(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + switch (pm_event) { + case PM_SUSPEND_PREPARE: + spin_lock(&resume_reason_lock); + irq_count = 0; + spin_unlock(&resume_reason_lock); + break; + default: + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block wakeup_reason_pm_notifier_block = { + .notifier_call = wakeup_reason_pm_event, +}; + +/* Initializes the sysfs parameter + * registers the pm_event notifier + */ +int __init wakeup_reason_init(void) +{ + int retval; + spin_lock_init(&resume_reason_lock); + retval = register_pm_notifier(&wakeup_reason_pm_notifier_block); + if (retval) + printk(KERN_WARNING "[%s] failed to register PM notifier %d\n", + __func__, retval); + + wakeup_reason = kobject_create_and_add("wakeup_reasons", kernel_kobj); + if (!wakeup_reason) { + printk(KERN_WARNING "[%s] failed to create a sysfs kobject\n", + __func__); + return 1; + } + retval = sysfs_create_group(wakeup_reason, &attr_group); + if (retval) { + kobject_put(wakeup_reason); + printk(KERN_WARNING "[%s] failed to create a sysfs group %d\n", + __func__, retval); + } + return 0; +} + +late_initcall(wakeup_reason_init); diff --git a/kernel/sys.c b/kernel/sys.c index c54a3e61..596dbcb2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -1695,6 +1696,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; + struct task_struct *tsk; unsigned char comm[sizeof(me->comm)]; long error; @@ -1839,6 +1841,26 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; + case PR_SET_TIMERSLACK_PID: + if (current->pid != (pid_t)arg3 && + !capable(CAP_SYS_NICE)) + return -EPERM; + rcu_read_lock(); + tsk = find_task_by_pid_ns((pid_t)arg3, &init_pid_ns); + if (tsk == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + get_task_struct(tsk); + rcu_read_unlock(); + if (arg2 <= 0) + tsk->timer_slack_ns = + tsk->default_timer_slack_ns; + else + tsk->timer_slack_ns = arg2; + put_task_struct(tsk); + error = 0; + break; default: error = -EINVAL; break; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 27071ee2..c9f46b01 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -16,6 +16,12 @@ #include #include +#define INVALID_UID ((uid_t) -1) +#define uid_valid(uid) ((uid) != -1) +#define uid_lte(a, b) ((a) <= (b)) +#define uid_eq(a, b) ((a) == (b)) +#define uid_gte(a, b) ((a) >= (b)) + int fib_default_rule_add(struct fib_rules_ops *ops, u32 pref, u32 table, u32 flags) { @@ -30,6 +36,8 @@ int fib_default_rule_add(struct fib_rules_ops *ops, r->pref = pref; r->table = table; r->flags = flags; + r->uid_start = INVALID_UID; + r->uid_end = INVALID_UID; r->fr_net = hold_net(ops->fro_net); /* The lock is not required here, the list in unreacheable @@ -176,6 +184,23 @@ void fib_rules_unregister(struct fib_rules_ops *ops) } EXPORT_SYMBOL_GPL(fib_rules_unregister); +static inline uid_t fib_nl_uid(struct nlattr *nla) +{ + return nla_get_u32(nla); +} + +static int nla_put_uid(struct sk_buff *skb, int idx, uid_t uid) +{ + return nla_put_u32(skb, idx, uid); +} + +static int fib_uid_range_match(struct flowi *fl, struct fib_rule *rule) +{ + return (!uid_valid(rule->uid_start) && !uid_valid(rule->uid_end)) || + (uid_gte(fl->flowi_uid, rule->uid_start) && + uid_lte(fl->flowi_uid, rule->uid_end)); +} + static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags) { @@ -190,6 +215,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) goto out; + if (!fib_uid_range_match(fl, rule)) + goto out; + ret = ops->match(rule, fl, flags); out: return (rule->flags & FIB_RULE_INVERT) ? !ret : ret; @@ -360,6 +388,19 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } else if (rule->action == FR_ACT_GOTO) goto errout_free; + /* UID start and end must either both be valid or both unspecified. */ + rule->uid_start = rule->uid_end = INVALID_UID; + if (tb[FRA_UID_START] || tb[FRA_UID_END]) { + if (tb[FRA_UID_START] && tb[FRA_UID_END]) { + rule->uid_start = fib_nl_uid(tb[FRA_UID_START]); + rule->uid_end = fib_nl_uid(tb[FRA_UID_END]); + } + if (!uid_valid(rule->uid_start) || + !uid_valid(rule->uid_end) || + !uid_lte(rule->uid_start, rule->uid_end)) + goto errout_free; + } + err = ops->configure(rule, skb, frh, tb); if (err < 0) goto errout_free; @@ -442,7 +483,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -465,6 +507,14 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) (rule->mark_mask != nla_get_u32(tb[FRA_FWMASK]))) continue; + if (tb[FRA_UID_START] && + !uid_eq(rule->uid_start, fib_nl_uid(tb[FRA_UID_START]))) + continue; + + if (tb[FRA_UID_END] && + !uid_eq(rule->uid_end, fib_nl_uid(tb[FRA_UID_END]))) + continue; + if (!ops->compare(rule, frh, tb)) continue; @@ -519,7 +569,9 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops, + nla_total_size(4) /* FRA_PRIORITY */ + nla_total_size(4) /* FRA_TABLE */ + nla_total_size(4) /* FRA_FWMARK */ - + nla_total_size(4); /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_FWMASK */ + + nla_total_size(4) /* FRA_UID_START */ + + nla_total_size(4); /* FRA_UID_END */ if (ops->nlmsg_payload) payload += ops->nlmsg_payload(rule); @@ -577,6 +629,12 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->target) NLA_PUT_U32(skb, FRA_GOTO, rule->target); + if (uid_valid(rule->uid_start)) + nla_put_uid(skb, FRA_UID_START, rule->uid_start); + + if (uid_valid(rule->uid_end)) + nla_put_uid(skb, FRA_UID_END, rule->uid_end); + if (ops->fill(rule, skb, frh) < 0) goto nla_put_failure; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 92fc5f69..a54817ac 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -482,6 +482,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = { [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31ba..6ea43e6f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -334,6 +334,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct sock *sk; struct inet_sock *inet; __be32 daddr; + u32 mark = IP4_REPLY_MARK(net, skb->mark); if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) return; @@ -346,6 +347,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) icmp_param->data.icmph.checksum = 0; inet->tos = ip_hdr(skb)->tos; + sk->sk_mark = mark; daddr = ipc.addr = ip_hdr(skb)->saddr; ipc.opt = NULL; ipc.tx_flags = 0; @@ -357,6 +359,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; fl4.saddr = rt->rt_spec_dst; + fl4.flowi4_mark = mark; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); fl4.flowi4_proto = IPPROTO_ICMP; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); @@ -375,7 +378,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, struct sk_buff *skb_in, const struct iphdr *iph, - __be32 saddr, u8 tos, + __be32 saddr, u8 tos, u32 mark, int type, int code, struct icmp_bxm *param) { @@ -387,6 +390,7 @@ static struct rtable *icmp_route_lookup(struct net *net, fl4->daddr = (param->replyopts.opt.opt.srr ? param->replyopts.opt.opt.faddr : iph->saddr); fl4->saddr = saddr; + fl4->flowi4_mark = mark; fl4->flowi4_tos = RT_TOS(tos); fl4->flowi4_proto = IPPROTO_ICMP; fl4->fl4_icmp_type = type; @@ -484,6 +488,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) struct flowi4 fl4; __be32 saddr; u8 tos; + u32 mark; struct net *net; struct sock *sk; @@ -580,6 +585,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; + mark = IP4_REPLY_MARK(net, skb_in->mark); if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -596,11 +602,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) icmp_param.skb = skb_in; icmp_param.offset = skb_network_offset(skb_in); inet_sk(sk)->tos = tos; + sk->sk_mark = mark; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts.opt; ipc.tx_flags = 0; - rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, + rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark, type, code, &icmp_param); if (IS_ERR(rt)) goto out_unlock; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad..2b8e7d7d 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,11 +358,12 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options_rcu *opt = inet_rsk(req)->opt; struct net *net = sock_net(sk); - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -391,11 +392,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, struct rtable *rt; fl4 = &newinet->cork.fl.u.ip4; - flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark, + flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), (opt && opt->opt.srr) ? opt->opt.faddr : ireq->rmt_addr, - ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport); + ireq->loc_addr, ireq->rmt_port, inet_sk(sk)->inet_sport, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) @@ -604,6 +606,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, inet_sk(newsk)->inet_sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; + newsk->sk_mark = inet_rsk(req)->ir_mark; + newicsk->icsk_retransmits = 0; newicsk->icsk_backoff = 0; newicsk->icsk_probes_out = 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8c656336..16ac1635 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1487,12 +1487,14 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr, daddr = replyopts.opt.opt.faddr; } - flowi4_init_output(&fl4, arg->bound_dev_if, 0, + flowi4_init_output(&fl4, arg->bound_dev_if, + IP4_REPLY_MARK(sock_net(sk), skb->mark), RT_TOS(ip_hdr(skb)->tos), RT_SCOPE_UNIVERSE, sk->sk_protocol, ip_reply_arg_flowi_flags(arg), daddr, rt->rt_spec_dst, - tcp_hdr(skb)->source, tcp_hdr(skb)->dest); + tcp_hdr(skb)->source, tcp_hdr(skb)->dest, + arg->uid); security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 39b403f8..9694df41 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -202,26 +202,33 @@ static int ping_init_sock(struct sock *sk) struct net *net = sock_net(sk); gid_t group = current_egid(); gid_t range[2]; - struct group_info *group_info = get_current_groups(); - int i, j, count = group_info->ngroups; + struct group_info *group_info; + int i, j, count; + int ret = 0; inet_get_ping_group_range_net(net, range, range+1); if (range[0] <= group && group <= range[1]) return 0; + group_info = get_current_groups(); + count = group_info->ngroups; for (i = 0; i < group_info->nblocks; i++) { int cp_count = min_t(int, NGROUPS_PER_BLOCK, count); for (j = 0; j < cp_count; j++) { group = group_info->blocks[i][j]; if (range[0] <= group && group <= range[1]) - return 0; + goto out_release_group; } count -= cp_count; } - return -EACCES; + ret = -EACCES; + +out_release_group: + put_group_info(group_info); + return ret; } static void ping_close(struct sock *sk, long timeout) @@ -559,7 +566,8 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, - inet_sk_flowi_flags(sk), faddr, saddr, 0, 0); + inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 61714bd5..415b3a80 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -564,7 +564,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, - daddr, saddr, 0, 0); + daddr, saddr, 0, 0, + sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b5638545..6c58c923 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -741,6 +741,7 @@ static inline int compare_keys(struct rtable *rt1, struct rtable *rt2) (rt1->rt_mark ^ rt2->rt_mark) | (rt1->rt_key_tos ^ rt2->rt_key_tos) | (rt1->rt_route_iif ^ rt2->rt_route_iif) | + (rt1->rt_uid ^ rt2->rt_uid) | (rt1->rt_oif ^ rt2->rt_oif)) == 0; } @@ -1886,6 +1887,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) fl4.flowi4_oif = rt->dst.dev->ifindex; fl4.flowi4_iif = skb->dev->ifindex; fl4.flowi4_mark = skb->mark; + fl4.flowi4_uid = skb->sk ? sock_i_uid(skb->sk) : 0; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0) @@ -2065,6 +2067,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2200,6 +2203,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = in_dev->dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2383,6 +2387,7 @@ out: return err; rth->rt_iif = dev->ifindex; rth->rt_oif = 0; rth->rt_mark = skb->mark; + rth->rt_uid = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_peer_genid = 0; @@ -2587,6 +2592,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : dev_out->ifindex; rth->rt_oif = orig_oif; rth->rt_mark = fl4->flowi4_mark; + rth->rt_uid = fl4->flowi4_uid; rth->rt_gateway = fl4->daddr; rth->rt_spec_dst= fl4->saddr; rth->rt_peer_genid = 0; @@ -2838,6 +2844,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp4) rt_is_output_route(rth) && rth->rt_oif == flp4->flowi4_oif && rth->rt_mark == flp4->flowi4_mark && + rth->rt_uid == flp4->flowi4_uid && !((rth->rt_key_tos ^ flp4->flowi4_tos) & (IPTOS_RT_MASK | RTO_ONLINK)) && net_eq(dev_net(rth->dst.dev), net) && @@ -2917,6 +2924,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_iif = ort->rt_iif; rt->rt_oif = ort->rt_oif; rt->rt_mark = ort->rt_mark; + rt->rt_uid = ort->rt_uid; rt->rt_genid = rt_genid(net); rt->rt_flags = ort->rt_flags; @@ -3012,6 +3020,9 @@ static int rt_fill_info(struct net *net, if (rt->rt_mark) NLA_PUT_BE32(skb, RTA_MARK, rt->rt_mark); + if (rt->rt_uid != (uid_t) -1) + NLA_PUT_BE32(skb, RTA_UID, rt->rt_uid); + error = rt->dst.error; if (peer) { inet_peer_refcheck(rt->peer); @@ -3127,6 +3138,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void .flowi4_tos = rtm->rtm_tos, .flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .flowi4_mark = mark, + .flowi4_uid = tb[RTA_UID] ? nla_get_u32(tb[RTA_UID]) : current_uid(), }; rt = ip_route_output_key(net, &fl4); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7..184a40f4 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -310,6 +310,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; + ireq->ir_mark = inet_request_mark(sk, skb); ireq->ecn_ok = ecn_ok; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -348,11 +349,12 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, { struct flowi4 fl4; - flowi4_init_output(&fl4, 0, sk->sk_mark, RT_CONN_FLAGS(sk), + flowi4_init_output(&fl4, 0, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP, inet_sk_flowi_flags(sk), (opt && opt->srr) ? opt->faddr : ireq->rmt_addr, - ireq->loc_addr, th->source, th->dest); + ireq->loc_addr, th->source, th->dest, + sock_i_uid(sk)); security_req_classify_flow(req, flowi4_to_flowi(&fl4)); rt = ip_route_output_key(sock_net(sk), &fl4); if (IS_ERR(rt)) { diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 69fd7201..0a56bca7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -721,6 +721,20 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = ipv4_ping_group_range, }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv4.sysctl_fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fwmark_accept", + .data = &init_net.ipv4.sysctl_tcp_fwmark_accept, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 6cdf6a28..0b4a35e0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -650,6 +650,12 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; + /* When socket is gone, all binding information is lost. + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. + */ + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr, @@ -1338,6 +1344,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a1934..55feb883 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -928,7 +928,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP, - faddr, saddr, dport, inet->inet_sport); + faddr, saddr, dport, inet->inet_sport, + sock_i_uid(sk)); security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a0b4c5da..e8ee4279 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -86,6 +86,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, xdst->u.rt.rt_iif = fl4->flowi4_iif; xdst->u.rt.rt_oif = fl4->flowi4_oif; xdst->u.rt.rt_mark = fl4->flowi4_mark; + xdst->u.rt.rt_uid = fl4->flowi4_uid; xdst->u.dst.dev = dev; dev_hold(dev); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1587d0d9..b10720f2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -192,6 +192,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -226,6 +227,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif + .accept_ra_rt_table = 0, .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, @@ -1680,6 +1682,31 @@ static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad } #endif +u32 addrconf_rt_table(const struct net_device *dev, u32 default_table) { + /* Determines into what table to put autoconf PIO/RIO/default routes + * learned on this device. + * + * - If 0, use the same table for every device. This puts routes into + * one of RT_TABLE_{PREFIX,INFO,DFLT} depending on the type of route + * (but note that these three are currently all equal to + * RT6_TABLE_MAIN). + * - If > 0, use the specified table. + * - If < 0, put routes into table dev->ifindex + (-rt_table). + */ + struct inet6_dev *idev = in6_dev_get(dev); + u32 table; + int sysctl = idev->cnf.accept_ra_rt_table; + if (sysctl == 0) { + table = default_table; + } else if (sysctl > 0) { + table = (u32) sysctl; + } else { + table = (unsigned) dev->ifindex + (-sysctl); + } + in6_dev_put(idev); + return table; +} + /* * Add prefix route. */ @@ -1689,7 +1716,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_PREFIX), .fc_metric = IP6_RT_PRIO_ADDRCONF, .fc_ifindex = dev->ifindex, .fc_expires = expires, @@ -3860,6 +3887,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif + array[DEVCONF_ACCEPT_RA_RT_TABLE] = cnf->accept_ra_rt_table; array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD @@ -4470,6 +4498,13 @@ static struct addrconf_sysctl_table }, #endif #endif + { + .procname = "accept_ra_rt_table", + .data = &ipv6_devconf.accept_ra_rt_table, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { .procname = "proxy_ndp", .data = &ipv6_devconf.proxy_ndp, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4252b3cc..51fb75ae 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -701,6 +701,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index b46e9f88..c880af54 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -160,6 +160,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST)) fl6.flowi6_oif = np->mcast_oif; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 11900417..72e68520 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -382,6 +382,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb->network_header + sizeof(*hdr)) > skb->tail) @@ -447,6 +448,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) ipv6_addr_copy(&fl6.daddr, &hdr->saddr); if (saddr) ipv6_addr_copy(&fl6.saddr, saddr); + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -455,6 +457,7 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -529,6 +532,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; @@ -545,11 +549,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl6.saddr, saddr); fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 8a58e8cf..219023f1 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -69,9 +69,10 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); @@ -222,6 +223,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) fl6.flowi6_mark = sk->sk_mark; fl6.fl6_sport = inet->inet_sport; fl6.fl6_dport = inet->inet_dport; + fl6.flowi6_uid = sock_i_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); final_p = fl6_update_dst(&fl6, np->opt, &final); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c new file mode 100644 index 00000000..0082212c --- /dev/null +++ b/net/ipv6/ping.c @@ -0,0 +1,219 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * "Ping" sockets + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Based on ipv4/ping.c code. + * + * Authors: Lorenzo Colitti (IPv6 support) + * Vasiliy Kulikov / Openwall (IPv4 implementation, for Linux 2.6), + * Pavel Kankovsky (IPv4 implementation, for Linux 2.4.32) + * + */ + +#include +#include +#include +#include +#include +#include +#include + +struct proto pingv6_prot = { + .name = "PINGv6", + .owner = THIS_MODULE, + .init = ping_init_sock, + .close = ping_close, + .connect = ip6_datagram_connect, + .disconnect = udp_disconnect, + .setsockopt = ipv6_setsockopt, + .getsockopt = ipv6_getsockopt, + .sendmsg = ping_v6_sendmsg, + .recvmsg = ping_recvmsg, + .bind = ping_bind, + .backlog_rcv = ping_queue_rcv_skb, + .hash = ping_hash, + .unhash = ping_unhash, + .get_port = ping_get_port, + .obj_size = sizeof(struct raw6_sock), +}; +EXPORT_SYMBOL_GPL(pingv6_prot); + +static struct inet_protosw pingv6_protosw = { + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + .prot = &pingv6_prot, + .ops = &inet6_dgram_ops, + .no_check = UDP_CSUM_DEFAULT, + .flags = INET_PROTOSW_REUSE, +}; + + +/* Compatibility glue so we can support IPv6 when it's compiled as a module */ +int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +{ + return -EAFNOSUPPORT; +} +int dummy_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + return -EAFNOSUPPORT; +} +int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) +{ + return -EAFNOSUPPORT; +} +void dummy_ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload) {} +int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, int strict) +{ + return 0; +} + +int __init pingv6_init(void) +{ + pingv6_ops.ipv6_recv_error = ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; + return inet6_register_protosw(&pingv6_protosw); +} + +/* This never gets called because it's not possible to unload the ipv6 module, + * but just in case. + */ +void pingv6_exit(void) +{ + pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; + pingv6_ops.datagram_recv_ctl = dummy_datagram_recv_ctl; + pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; + pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; + pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; + inet6_unregister_protosw(&pingv6_protosw); +} + +int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct icmp6hdr user_icmph; + int addr_type; + struct in6_addr *daddr; + int iif = 0; + struct flowi6 fl6; + int err; + int hlimit; + struct dst_entry *dst; + struct rt6_info *rt; + struct pingfakehdr pfh; + + pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); + + err = ping_common_sendmsg(AF_INET6, msg, len, &user_icmph, + sizeof(user_icmph)); + if (err) + return err; + + if (msg->msg_name) { + struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + if (msg->msg_namelen < sizeof(struct sockaddr_in6) || + u->sin6_family != AF_INET6) { + return -EINVAL; + } + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != u->sin6_scope_id) { + return -EINVAL; + } + daddr = &(u->sin6_addr); + iif = u->sin6_scope_id; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = &np->daddr; + } + + if (!iif) + iif = sk->sk_bound_dev_if; + + addr_type = ipv6_addr_type(daddr); + if (__ipv6_addr_needs_scope_id(addr_type) && !iif) + return -EINVAL; + if (addr_type & IPV6_ADDR_MAPPED) + return -EINVAL; + + /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ + + memset(&fl6, 0, sizeof(fl6)); + + fl6.flowi6_proto = IPPROTO_ICMPV6; + fl6.saddr = np->saddr; + fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; + fl6.fl6_icmp_type = user_icmph.icmp6_type; + fl6.fl6_icmp_code = user_icmph.icmp6_code; + fl6.flowi6_uid = sock_i_uid(sk); + security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + if (IS_ERR(dst)) + return PTR_ERR(dst); + rt = (struct rt6_info *) dst; + + np = inet6_sk(sk); + if (!np) + return -EBADF; + + if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) + fl6.flowi6_oif = np->mcast_oif; + + pfh.icmph.type = user_icmph.icmp6_type; + pfh.icmph.code = user_icmph.icmp6_code; + pfh.icmph.checksum = 0; + pfh.icmph.un.echo.id = inet->inet_sport; + pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence; + pfh.iov = msg->msg_iov; + pfh.wcheck = 0; + pfh.family = AF_INET6; + + if (ipv6_addr_is_multicast(&fl6.daddr)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = ip6_dst_hoplimit(dst); + + lock_sock(sk); + err = ip6_append_data(sk, ping_getfrag, &pfh, len, + 0, hlimit, + np->tclass, NULL, &fl6, rt, + MSG_DONTWAIT, np->dontfrag); + + if (err) { + ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); + ip6_flush_pending_frames(sk); + } else { + err = icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *) &pfh.icmph, + len); + } + release_sock(sk); + + if (err) + return err; + + return len; +} diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 343852e5..913830a4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -758,6 +758,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f02fe523..113a70ff 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -89,13 +89,13 @@ static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref); -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr); #endif static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) @@ -547,7 +547,6 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) { - struct net *net = dev_net(dev); struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; @@ -589,8 +588,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + rt = rt6_get_route_info(dev, prefix, rinfo->prefix_len, gwaddr); if (rt && !lifetime) { ip6_del_rt(rt); @@ -598,8 +596,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(dev, prefix, rinfo->prefix_len, gwaddr, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -1791,15 +1788,16 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, } #ifdef CONFIG_IPV6_ROUTE_INFO -static struct rt6_info *rt6_get_route_info(struct net *net, +static struct rt6_info *rt6_get_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr) { struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_INFO)); if (table == NULL) return NULL; @@ -1809,7 +1807,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, goto out; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_dev->ifindex != ifindex) + if (rt->rt6i_dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; @@ -1823,21 +1821,21 @@ static struct rt6_info *rt6_get_route_info(struct net *net, return rt; } -static struct rt6_info *rt6_add_route_info(struct net *net, +static struct rt6_info *rt6_add_route_info(struct net_device *dev, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, unsigned pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_INFO), .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), .fc_nlinfo.pid = 0, .fc_nlinfo.nlh = NULL, - .fc_nlinfo.nl_net = net, + .fc_nlinfo.nl_net = dev_net(dev), }; ipv6_addr_copy(&cfg.fc_dst, prefix); @@ -1849,7 +1847,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(dev, prefix, prefixlen, gwaddr); } #endif @@ -1858,7 +1856,8 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), + addrconf_rt_table(dev, RT6_TABLE_DFLT)); if (table == NULL) return NULL; @@ -1880,7 +1879,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, unsigned int pref) { struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, + .fc_table = addrconf_rt_table(dev, RT6_TABLE_DFLT), .fc_metric = IP6_RT_PRIO_USER, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | @@ -1897,27 +1896,17 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) -{ - struct rt6_info *rt; - struct fib6_table *table; - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (table == NULL) - return; +int rt6_addrconf_purge(struct rt6_info *rt, void *arg) { + if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && + (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) + return -1; + return 0; +} -restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); +void rt6_purge_dflt_routers(struct net *net) +{ + fib6_clean_all(net, rt6_addrconf_purge, 0, NULL); } static void rtmsg_to_fib6_config(struct net *net, @@ -2220,6 +2209,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_IIF] = { .type = NLA_U32 }, [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_UID] = { .type = NLA_U32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2519,6 +2509,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (tb[RTA_OIF]) fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_UID]) + fl6.flowi6_uid = nla_get_u32(tb[RTA_UID]); + else + fl6.flowi6_uid = (iif ? (uid_t) -1 : current_uid()); + if (iif) { struct net_device *dev; dev = __dev_get_by_index(net, iif); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965..a6a636d6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -215,6 +215,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq6->iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->retrans = 0; ireq->ecn_ok = ecn_ok; @@ -241,9 +243,10 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr); fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 6dcf5e7d..4c27009d 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -47,6 +47,13 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cdbce216..0c08b0bb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); final_p = fl6_update_dst(&fl6, np->opt, &final); @@ -404,6 +405,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; + fl6.flowi6_uid = sock_i_uid(sk); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false); @@ -493,9 +495,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, ipv6_addr_copy(&fl6.saddr, &treq->loc_addr); fl6.flowlabel = 0; fl6.flowi6_oif = treq->iif; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = inet_rsk(req)->ir_mark; fl6.fl6_dport = inet_rsk(req)->rmt_port; fl6.fl6_sport = inet_rsk(req)->loc_port; + fl6.flowi6_uid = sock_i_uid(sk); security_req_classify_flow(req, flowi6_to_flowi(&fl6)); opt = np->opt; @@ -1046,7 +1049,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - fl6.flowi6_oif = inet6_iif(skb); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = inet6_iif(skb); + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -1252,6 +1257,14 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!want_cookie || tmp_opt.tstamp_ok) TCP_ECN_create_request(req, tcp_hdr(skb)); + treq->iif = sk->sk_bound_dev_if; + inet_rsk(req)->ir_mark = inet_request_mark(sk, skb); + + /* So that link locals have meaning */ + if (!sk->sk_bound_dev_if && + ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) + treq->iif = inet6_iif(skb); + if (!isn) { struct inet_peer *peer = NULL; @@ -1261,12 +1274,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) atomic_inc(&skb->users); treq->pktopts = skb; } - treq->iif = sk->sk_bound_dev_if; - - /* So that link locals have meaning */ - if (!sk->sk_bound_dev_if && - ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = inet6_iif(skb); if (want_cookie) { isn = cookie_v6_init_sequence(sk, skb, &req->mss); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bb95e8e1..b9bc3ca4 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1084,6 +1084,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_uid = sock_i_uid(sk); if (msg->msg_controllen) { opt = &opt_space; diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 3bdd443a..eed2538f 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -40,6 +40,13 @@ #include #include #include +#include +#include +#include +#include +#include +#include + struct idletimer_tg_attr { struct attribute attr; @@ -55,14 +62,98 @@ struct idletimer_tg { struct kobject *kobj; struct idletimer_tg_attr attr; + struct timespec delayed_timer_trigger; + struct timespec last_modified_timer; + struct timespec last_suspend_time; + struct notifier_block pm_nb; + + int timeout; unsigned int refcnt; + bool work_pending; + bool send_nl_msg; + bool active; }; static LIST_HEAD(idletimer_tg_list); static DEFINE_MUTEX(list_mutex); +static DEFINE_SPINLOCK(timestamp_lock); static struct kobject *idletimer_tg_kobj; + +static bool check_for_delayed_trigger(struct idletimer_tg *timer, + struct timespec *ts) +{ + bool state; + struct timespec temp; + spin_lock_bh(×tamp_lock); + timer->work_pending = false; + if ((ts->tv_sec - timer->last_modified_timer.tv_sec) > timer->timeout || + timer->delayed_timer_trigger.tv_sec != 0) { + state = false; + temp.tv_sec = timer->timeout; + temp.tv_nsec = 0; + if (timer->delayed_timer_trigger.tv_sec != 0) { + temp = timespec_add(timer->delayed_timer_trigger, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + timer->delayed_timer_trigger.tv_sec = 0; + timer->work_pending = true; + schedule_work(&timer->work); + } else { + temp = timespec_add(timer->last_modified_timer, temp); + ts->tv_sec = temp.tv_sec; + ts->tv_nsec = temp.tv_nsec; + } + } else { + state = timer->active; + } + spin_unlock_bh(×tamp_lock); + return state; +} + +static void notify_netlink_uevent(const char *iface, struct idletimer_tg *timer) +{ + char iface_msg[NLMSG_MAX_SIZE]; + char state_msg[NLMSG_MAX_SIZE]; + char timestamp_msg[NLMSG_MAX_SIZE]; + char *envp[] = { iface_msg, state_msg, timestamp_msg, NULL }; + int res; + struct timespec ts; + uint64_t time_ns; + bool state; + + res = snprintf(iface_msg, NLMSG_MAX_SIZE, "INTERFACE=%s", + iface); + if (NLMSG_MAX_SIZE <= res) { + pr_err("message too long (%d)", res); + return; + } + + get_monotonic_boottime(&ts); + state = check_for_delayed_trigger(timer, &ts); + res = snprintf(state_msg, NLMSG_MAX_SIZE, "STATE=%s", + state ? "active" : "inactive"); + + if (NLMSG_MAX_SIZE <= res) { + pr_err("message too long (%d)", res); + return; + } + + time_ns = timespec_to_ns(&ts); + res = snprintf(timestamp_msg, NLMSG_MAX_SIZE, "TIME_NS=%llu", time_ns); + if (NLMSG_MAX_SIZE <= res) { + timestamp_msg[0] = '\0'; + pr_err("message too long (%d)", res); + } + + pr_debug("putting nlmsg: <%s> <%s>\n", iface_msg, state_msg); + kobject_uevent_env(idletimer_tg_kobj, KOBJ_CHANGE, envp); + return; + + +} + static struct idletimer_tg *__idletimer_tg_find_by_label(const char *label) { @@ -112,8 +203,55 @@ static void idletimer_tg_expired(unsigned long data) struct idletimer_tg *timer = (struct idletimer_tg *) data; pr_debug("timer %s expired\n", timer->attr.attr.name); - + spin_lock_bh(×tamp_lock); + timer->active = false; + timer->work_pending = true; schedule_work(&timer->work); + spin_unlock_bh(×tamp_lock); +} + +static int idletimer_resume(struct notifier_block *notifier, + unsigned long pm_event, void *unused) +{ + struct timespec ts; + unsigned long time_diff, now = jiffies; + struct idletimer_tg *timer = container_of(notifier, + struct idletimer_tg, pm_nb); + if (!timer) + return NOTIFY_DONE; + switch (pm_event) { + case PM_SUSPEND_PREPARE: + get_monotonic_boottime(&timer->last_suspend_time); + break; + case PM_POST_SUSPEND: + spin_lock_bh(×tamp_lock); + if (!timer->active) { + spin_unlock_bh(×tamp_lock); + break; + } + /* since jiffies are not updated when suspended now represents + * the time it would have suspended */ + if (time_after(timer->timer.expires, now)) { + get_monotonic_boottime(&ts); + ts = timespec_sub(ts, timer->last_suspend_time); + time_diff = timespec_to_jiffies(&ts); + if (timer->timer.expires > (time_diff + now)) { + mod_timer_pending(&timer->timer, + (timer->timer.expires - time_diff)); + } else { + del_timer(&timer->timer); + timer->timer.expires = 0; + timer->active = false; + timer->work_pending = true; + schedule_work(&timer->work); + } + } + spin_unlock_bh(×tamp_lock); + break; + default: + break; + } + return NOTIFY_DONE; } static int idletimer_tg_create(struct idletimer_tg_info *info) @@ -147,6 +285,20 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) setup_timer(&info->timer->timer, idletimer_tg_expired, (unsigned long) info->timer); info->timer->refcnt = 1; + info->timer->send_nl_msg = (info->send_nl_msg == 0) ? false : true; + info->timer->active = true; + info->timer->timeout = info->timeout; + + info->timer->delayed_timer_trigger.tv_sec = 0; + info->timer->delayed_timer_trigger.tv_nsec = 0; + info->timer->work_pending = false; + get_monotonic_boottime(&info->timer->last_modified_timer); + + info->timer->pm_nb.notifier_call = idletimer_resume; + ret = register_pm_notifier(&info->timer->pm_nb); + if (ret) + printk(KERN_WARNING "[%s] Failed to register pm notifier %d\n", + __func__, ret); mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); @@ -163,6 +315,34 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) return ret; } +static void reset_timer(const struct idletimer_tg_info *info) +{ + unsigned long now = jiffies; + struct idletimer_tg *timer = info->timer; + bool timer_prev; + + spin_lock_bh(×tamp_lock); + timer_prev = timer->active; + timer->active = true; + /* timer_prev is used to guard overflow problem in time_before*/ + if (!timer_prev || time_before(timer->timer.expires, now)) { + pr_debug("Starting Checkentry timer (Expired, Jiffies): %lu, %lu\n", + timer->timer.expires, now); + /* checks if there is a pending inactive notification*/ + if (timer->work_pending) + timer->delayed_timer_trigger = timer->last_modified_timer; + else { + timer->work_pending = true; + schedule_work(&timer->work); + } + } + + get_monotonic_boottime(&timer->last_modified_timer); + mod_timer(&timer->timer, + msecs_to_jiffies(info->timeout * 1000) + now); + spin_unlock_bh(×tamp_lock); +} + /* * The actual xt_tables plugin. */ @@ -176,9 +356,16 @@ static unsigned int idletimer_tg_target(struct sk_buff *skb, BUG_ON(!info->timer); - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); + info->timer->active = true; + if (time_before(info->timer->timer.expires, now)) { + schedule_work(&info->timer->work); + pr_debug("Starting timer %s (Expired, Jiffies): %lu, %lu\n", + info->label, info->timer->timer.expires, now); + } + + /* TODO: Avoid modifying timers on each packet */ + reset_timer(info); return XT_CONTINUE; } @@ -206,9 +393,7 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) info->timer = __idletimer_tg_find_by_label(info->label); if (info->timer) { info->timer->refcnt++; - mod_timer(&info->timer->timer, - msecs_to_jiffies(info->timeout * 1000) + jiffies); - + reset_timer(info); pr_debug("increased refcnt of timer %s to %u\n", info->label, info->timer->refcnt); } else { @@ -238,6 +423,7 @@ static void idletimer_tg_destroy(const struct xt_tgdtor_param *par) list_del(&info->timer->entry); del_timer_sync(&info->timer->timer); sysfs_remove_file(idletimer_tg_kobj, &info->timer->attr.attr); + unregister_pm_notifier(&info->timer->pm_nb); kfree(info->timer->attr.attr.name); kfree(info->timer); } else { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db021..86d7a7af 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1339,7 +1339,8 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, dst_pid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) + if ((dst_group || dst_pid) && + !netlink_capable(sock, NL_NONROOT_SEND)) goto out; } else { dst_pid = nlk->dst_pid; @@ -2102,6 +2103,7 @@ static void __init netlink_add_usersock_entry(void) rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners); nl_table[NETLINK_USERSOCK].module = THIS_MODULE; nl_table[NETLINK_USERSOCK].registered = 1; + nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND; netlink_table_ungrab(); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 266a2292..9f2132ff 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -407,6 +407,13 @@ static int sb_finish_set_opts(struct super_block *sb) if (strncmp(sb->s_type->name, "sysfs", sizeof("sysfs")) == 0) sbsec->flags |= SE_SBLABELSUPP; + /* + * Special handling for rootfs. Is genfs but supports + * setting SELinux context on in-core inodes. + */ + if (strncmp(sb->s_type->name, "rootfs", sizeof("rootfs")) == 0) + sbsec->flags |= SE_SBLABELSUPP; + /* Initialize the root inode. */ rc = inode_doinit_with_dentry(root_inode, root);