ipmr, ip6mr: fix scheduling while atomic and a deadlock with ipmr_get_route
authorNikolay Aleksandrov <nikolay@cumulusnetworks.com>
Sun, 25 Sep 2016 21:08:31 +0000 (23:08 +0200)
committerWilly Tarreau <w@1wt.eu>
Fri, 10 Feb 2017 10:03:52 +0000 (11:03 +0100)
commit 2cf750704bb6d7ed8c7d732e071dd1bc890ea5e8 upstream.

Since the commit below the ipmr/ip6mr rtnl_unicast() code uses the portid
instead of the previous dst_pid which was copied from in_skb's portid.
Since the skb is new the portid is 0 at that point so the packets are sent
to the kernel and we get scheduling while atomic or a deadlock (depending
on where it happens) by trying to acquire rtnl two times.
Also since this is RTM_GETROUTE, it can be triggered by a normal user.

Here's the sleeping while atomic trace:
[ 7858.212557] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
[ 7858.212748] in_atomic(): 1, irqs_disabled(): 0, pid: 0, name: swapper/0
[ 7858.212881] 2 locks held by swapper/0/0:
[ 7858.213013]  #0:  (((&mrt->ipmr_expire_timer))){+.-...}, at: [<ffffffff810fbbf5>] call_timer_fn+0x5/0x350
[ 7858.213422]  #1:  (mfc_unres_lock){+.....}, at: [<ffffffff8161e005>] ipmr_expire_process+0x25/0x130
[ 7858.213807] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc7+ #179
[ 7858.213934] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014
[ 7858.214108]  0000000000000000 ffff88005b403c50 ffffffff813a7804 0000000000000000
[ 7858.214412]  ffffffff81a1338e ffff88005b403c78 ffffffff810a4a72 ffffffff81a1338e
[ 7858.214716]  000000000000026c 0000000000000000 ffff88005b403ca8 ffffffff810a4b9f
[ 7858.215251] Call Trace:
[ 7858.215412]  <IRQ>  [<ffffffff813a7804>] dump_stack+0x85/0xc1
[ 7858.215662]  [<ffffffff810a4a72>] ___might_sleep+0x192/0x250
[ 7858.215868]  [<ffffffff810a4b9f>] __might_sleep+0x6f/0x100
[ 7858.216072]  [<ffffffff8165bea3>] mutex_lock_nested+0x33/0x4d0
[ 7858.216279]  [<ffffffff815a7a5f>] ? netlink_lookup+0x25f/0x460
[ 7858.216487]  [<ffffffff8157474b>] rtnetlink_rcv+0x1b/0x40
[ 7858.216687]  [<ffffffff815a9a0c>] netlink_unicast+0x19c/0x260
[ 7858.216900]  [<ffffffff81573c70>] rtnl_unicast+0x20/0x30
[ 7858.217128]  [<ffffffff8161cd39>] ipmr_destroy_unres+0xa9/0xf0
[ 7858.217351]  [<ffffffff8161e06f>] ipmr_expire_process+0x8f/0x130
[ 7858.217581]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.217785]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.217990]  [<ffffffff810fbc95>] call_timer_fn+0xa5/0x350
[ 7858.218192]  [<ffffffff810fbbf5>] ? call_timer_fn+0x5/0x350
[ 7858.218415]  [<ffffffff8161dfe0>] ? ipmr_net_init+0x180/0x180
[ 7858.218656]  [<ffffffff810fde10>] run_timer_softirq+0x260/0x640
[ 7858.218865]  [<ffffffff8166379b>] ? __do_softirq+0xbb/0x54f
[ 7858.219068]  [<ffffffff816637c8>] __do_softirq+0xe8/0x54f
[ 7858.219269]  [<ffffffff8107a948>] irq_exit+0xb8/0xc0
[ 7858.219463]  [<ffffffff81663452>] smp_apic_timer_interrupt+0x42/0x50
[ 7858.219678]  [<ffffffff816625bc>] apic_timer_interrupt+0x8c/0xa0
[ 7858.219897]  <EOI>  [<ffffffff81055f16>] ? native_safe_halt+0x6/0x10
[ 7858.220165]  [<ffffffff810d64dd>] ? trace_hardirqs_on+0xd/0x10
[ 7858.220373]  [<ffffffff810298e3>] default_idle+0x23/0x190
[ 7858.220574]  [<ffffffff8102a20f>] arch_cpu_idle+0xf/0x20
[ 7858.220790]  [<ffffffff810c9f8c>] default_idle_call+0x4c/0x60
[ 7858.221016]  [<ffffffff810ca33b>] cpu_startup_entry+0x39b/0x4d0
[ 7858.221257]  [<ffffffff8164f995>] rest_init+0x135/0x140
[ 7858.221469]  [<ffffffff81f83014>] start_kernel+0x50e/0x51b
[ 7858.221670]  [<ffffffff81f82120>] ? early_idt_handler_array+0x120/0x120
[ 7858.221894]  [<ffffffff81f8243f>] x86_64_start_reservations+0x2a/0x2c
[ 7858.222113]  [<ffffffff81f8257c>] x86_64_start_kernel+0x13b/0x14a

Fixes: 2942e9005056 ("[RTNETLINK]: Use rtnl_unicast() for rtnetlink unicasts")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Willy Tarreau <w@1wt.eu>
include/linux/mroute.h
include/linux/mroute6.h
net/ipv4/ipmr.c
net/ipv4/route.c
net/ipv6/ip6mr.c
net/ipv6/route.c

index 79aaa9fc1a15d6ed7caac2b147ae030b18da060a..d5277fc3ce2eea2470891f360031f694d92b3996 100644 (file)
@@ -103,5 +103,5 @@ struct mfc_cache {
 struct rtmsg;
 extern int ipmr_get_route(struct net *net, struct sk_buff *skb,
                          __be32 saddr, __be32 daddr,
-                         struct rtmsg *rtm, int nowait);
+                         struct rtmsg *rtm, int nowait, u32 portid);
 #endif
index 66982e7640514389098c140a39665f95261071d8..f831155dc7d1201f5b2ef899a51a1e3a45f768e8 100644 (file)
@@ -115,7 +115,7 @@ struct mfc6_cache {
 
 struct rtmsg;
 extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
-                          struct rtmsg *rtm, int nowait);
+                          struct rtmsg *rtm, int nowait, u32 portid);
 
 #ifdef CONFIG_IPV6_MROUTE
 extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
index 89570f070e0e4f8d3bd168514e37f9a328f600e3..a429ac69af783454a74f15852992b380f83cff8f 100644 (file)
@@ -2190,7 +2190,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 
 int ipmr_get_route(struct net *net, struct sk_buff *skb,
                   __be32 saddr, __be32 daddr,
-                  struct rtmsg *rtm, int nowait)
+                  struct rtmsg *rtm, int nowait, u32 portid)
 {
        struct mfc_cache *cache;
        struct mr_table *mrt;
@@ -2235,6 +2235,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
                        return -ENOMEM;
                }
 
+               NETLINK_CB(skb2).portid = portid;
                skb_push(skb2, sizeof(struct iphdr));
                skb_reset_network_header(skb2);
                iph = ip_hdr(skb2);
index 624ca8ed350c443fba7143bd9146fe7a13224f71..93efb898bd047afd8469e92513747e2256cbd64f 100644 (file)
@@ -2325,7 +2325,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
                    IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
                        int err = ipmr_get_route(net, skb,
                                                 fl4->saddr, fl4->daddr,
-                                                r, nowait);
+                                                r, nowait, portid);
+
                        if (err <= 0) {
                                if (!nowait) {
                                        if (err == 0)
index 107f75283b1bb5a19226a0683d83a0c173701315..8344f686335d4055c665998b75a97fa2420b9b9b 100644 (file)
@@ -2275,8 +2275,8 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
        return 1;
 }
 
-int ip6mr_get_route(struct net *net,
-                   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
+int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+                   int nowait, u32 portid)
 {
        int err;
        struct mr6_table *mrt;
@@ -2321,6 +2321,7 @@ int ip6mr_get_route(struct net *net,
                        return -ENOMEM;
                }
 
+               NETLINK_CB(skb2).portid = portid;
                skb_reset_transport_header(skb2);
 
                skb_put(skb2, sizeof(struct ipv6hdr));
index 6ebefd46f7184931a60ac5e72ae8970c4c89a39a..fb5010c27a22ca60e3fc857c1379a08f38775546 100644 (file)
@@ -2536,7 +2536,9 @@ static int rt6_fill_node(struct net *net,
        if (iif) {
 #ifdef CONFIG_IPV6_MROUTE
                if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
-                       int err = ip6mr_get_route(net, skb, rtm, nowait);
+                       int err = ip6mr_get_route(net, skb, rtm, nowait,
+                                                 portid);
+
                        if (err <= 0) {
                                if (!nowait) {
                                        if (err == 0)