tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain"
authorParthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Tue, 1 Mar 2016 10:07:09 +0000 (11:07 +0100)
committerDavid S. Miller <davem@davemloft.net>
Thu, 3 Mar 2016 21:30:29 +0000 (16:30 -0500)
reverts commit 94153e36e709e ("tipc: use existing sk_write_queue for
outgoing packet chain")

In Commit 94153e36e709e, we assume that we fill & empty the socket's
sk_write_queue within the same lock_sock() session.

This is not true if the link is congested. During congestion, the
socket lock is released while we wait for the congestion to cease.
This implementation causes a nullptr exception, if the user space
program has several threads accessing the same socket descriptor.

Consider two threads of the same program performing the following:
     Thread1                                  Thread2
--------------------                    ----------------------
Enter tipc_sendmsg()                    Enter tipc_sendmsg()
lock_sock()                             lock_sock()
Enter tipc_link_xmit(), ret=ELINKCONG   spin on socket lock..
sk_wait_event()                             :
release_sock()                          grab socket lock
    :                                   Enter tipc_link_xmit(), ret=0
    :                                   release_sock()
Wakeup after congestion
lock_sock()
skb = skb_peek(pktchain);
!! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;

In this case, the second thread transmits the buffers belonging to
both thread1 and thread2 successfully. When the first thread wakeup
after the congestion it assumes that the pktchain is intact and
operates on the skb's in it, which leads to the following exception:

[2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00000000000000d0
[2102.440074] IP: [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0
[2102.440074] Oops: 0000 [#1] SMP
[2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1
[2102.440074] RIP: 0010:[<ffffffffa005f330>]  [<ffffffffa005f330>] __tipc_link_xmit+0x2b0/0x4d0 [tipc]
[...]
[2102.440074] Call Trace:
[2102.440074]  [<ffffffff8163f0b9>] ? schedule+0x29/0x70
[2102.440074]  [<ffffffffa006a756>] ? tipc_node_unlock+0x46/0x170 [tipc]
[2102.440074]  [<ffffffffa005f761>] tipc_link_xmit+0x51/0xf0 [tipc]
[2102.440074]  [<ffffffffa006d8ae>] tipc_send_stream+0x11e/0x4f0 [tipc]
[2102.440074]  [<ffffffff8106b150>] ? __wake_up_sync+0x20/0x20
[2102.440074]  [<ffffffffa006dc9c>] tipc_send_packet+0x1c/0x20 [tipc]
[2102.440074]  [<ffffffff81502478>] sock_sendmsg+0xa8/0xd0
[2102.440074]  [<ffffffff81507895>] ? release_sock+0x145/0x170
[2102.440074]  [<ffffffff815030d8>] ___sys_sendmsg+0x3d8/0x3e0
[2102.440074]  [<ffffffff816426ae>] ? _raw_spin_unlock+0xe/0x10
[2102.440074]  [<ffffffff81115c2a>] ? handle_mm_fault+0x6ca/0x9d0
[2102.440074]  [<ffffffff8107dd65>] ? set_next_entity+0x85/0xa0
[2102.440074]  [<ffffffff816426de>] ? _raw_spin_unlock_irq+0xe/0x20
[2102.440074]  [<ffffffff8107463c>] ? finish_task_switch+0x5c/0xc0
[2102.440074]  [<ffffffff8163ea8c>] ? __schedule+0x34c/0x950
[2102.440074]  [<ffffffff81504e12>] __sys_sendmsg+0x42/0x80
[2102.440074]  [<ffffffff81504e62>] SyS_sendmsg+0x12/0x20
[2102.440074]  [<ffffffff8164aed2>] system_call_fastpath+0x16/0x1b

In this commit, we maintain the skb list always in the stack.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/tipc/socket.c

index 69c29050f14abe8c926db671dfa05c96f2ddc3bc..4d420bb273960cd6eac206753f25435970f2724d 100644 (file)
@@ -673,7 +673,7 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
        struct tipc_sock *tsk = tipc_sk(sk);
        struct net *net = sock_net(sk);
        struct tipc_msg *mhdr = &tsk->phdr;
-       struct sk_buff_head *pktchain = &sk->sk_write_queue;
+       struct sk_buff_head pktchain;
        struct iov_iter save = msg->msg_iter;
        uint mtu;
        int rc;
@@ -687,14 +687,16 @@ static int tipc_sendmcast(struct  socket *sock, struct tipc_name_seq *seq,
        msg_set_nameupper(mhdr, seq->upper);
        msg_set_hdr_sz(mhdr, MCAST_H_SIZE);
 
+       skb_queue_head_init(&pktchain);
+
 new_mtu:
        mtu = tipc_bcast_get_mtu(net);
-       rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain);
+       rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain);
        if (unlikely(rc < 0))
                return rc;
 
        do {
-               rc = tipc_bcast_xmit(net, pktchain);
+               rc = tipc_bcast_xmit(net, &pktchain);
                if (likely(!rc))
                        return dsz;
 
@@ -704,7 +706,7 @@ new_mtu:
                        if (!rc)
                                continue;
                }
-               __skb_queue_purge(pktchain);
+               __skb_queue_purge(&pktchain);
                if (rc == -EMSGSIZE) {
                        msg->msg_iter = save;
                        goto new_mtu;
@@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
        struct net *net = sock_net(sk);
        struct tipc_msg *mhdr = &tsk->phdr;
        u32 dnode, dport;
-       struct sk_buff_head *pktchain = &sk->sk_write_queue;
+       struct sk_buff_head pktchain;
        struct sk_buff *skb;
        struct tipc_name_seq *seq;
        struct iov_iter save;
@@ -924,17 +926,18 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz)
                msg_set_hdr_sz(mhdr, BASIC_H_SIZE);
        }
 
+       skb_queue_head_init(&pktchain);
        save = m->msg_iter;
 new_mtu:
        mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
-       rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, pktchain);
+       rc = tipc_msg_build(mhdr, m, 0, dsz, mtu, &pktchain);
        if (rc < 0)
                return rc;
 
        do {
-               skb = skb_peek(pktchain);
+               skb = skb_peek(&pktchain);
                TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
-               rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+               rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid);
                if (likely(!rc)) {
                        if (sock->state != SS_READY)
                                sock->state = SS_CONNECTING;
@@ -946,7 +949,7 @@ new_mtu:
                        if (!rc)
                                continue;
                }
-               __skb_queue_purge(pktchain);
+               __skb_queue_purge(&pktchain);
                if (rc == -EMSGSIZE) {
                        m->msg_iter = save;
                        goto new_mtu;
@@ -1016,7 +1019,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
        struct net *net = sock_net(sk);
        struct tipc_sock *tsk = tipc_sk(sk);
        struct tipc_msg *mhdr = &tsk->phdr;
-       struct sk_buff_head *pktchain = &sk->sk_write_queue;
+       struct sk_buff_head pktchain;
        DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
        u32 portid = tsk->portid;
        int rc = -EINVAL;
@@ -1044,17 +1047,19 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz)
 
        timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
        dnode = tsk_peer_node(tsk);
+       skb_queue_head_init(&pktchain);
 
 next:
        save = m->msg_iter;
        mtu = tsk->max_pkt;
        send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
-       rc = tipc_msg_build(mhdr, m, sent, send, mtu, pktchain);
+       rc = tipc_msg_build(mhdr, m, sent, send, mtu, &pktchain);
        if (unlikely(rc < 0))
                return rc;
+
        do {
                if (likely(!tsk_conn_cong(tsk))) {
-                       rc = tipc_node_xmit(net, pktchain, dnode, portid);
+                       rc = tipc_node_xmit(net, &pktchain, dnode, portid);
                        if (likely(!rc)) {
                                tsk->sent_unacked++;
                                sent += send;
@@ -1063,7 +1068,7 @@ next:
                                goto next;
                        }
                        if (rc == -EMSGSIZE) {
-                               __skb_queue_purge(pktchain);
+                               __skb_queue_purge(&pktchain);
                                tsk->max_pkt = tipc_node_get_mtu(net, dnode,
                                                                 portid);
                                m->msg_iter = save;
@@ -1077,7 +1082,7 @@ next:
                rc = tipc_wait_for_sndpkt(sock, &timeo);
        } while (!rc);
 
-       __skb_queue_purge(pktchain);
+       __skb_queue_purge(&pktchain);
        return sent ? sent : rc;
 }