dccp: Extend CCID packet dequeueing interface
authorGerrit Renker <gerrit@erg.abdn.ac.uk>
Thu, 4 Sep 2008 05:30:19 +0000 (07:30 +0200)
committerGerrit Renker <gerrit@erg.abdn.ac.uk>
Thu, 4 Sep 2008 05:45:38 +0000 (07:45 +0200)
This extends the packet dequeuing interface of dccp_write_xmit() to allow
 1. CCIDs to take care of timing when the next packet may be sent;
 2. delayed sending (as before, with an inter-packet gap up to 65.535 seconds).

The main purpose is to take CCID2 out of its polling mode (when it is network-
limited, it tries every millisecond to send, without interruption).
The interface can also be used to support other CCIDs.

The mode of operation for (2) is as follows:
 * new packet is enqueued via dccp_sendmsg() => dccp_write_xmit(),
 * ccid_hc_tx_send_packet() detects that it may not send (e.g. window full),
 * it signals this condition via `CCID_PACKET_WILL_DEQUEUE_LATER',
 * dccp_write_xmit() returns without further action;
 * after some time the wait-condition for CCID becomes true,
 * that CCID schedules the tasklet,
 * tasklet function calls ccid_hc_tx_send_packet() via dccp_write_xmit(),
 * since the wait-condition is now true, ccid_hc_tx_packet() returns "send now",
 * packet is sent, and possibly more (since dccp_write_xmit() loops).

Code reuse: the taskled function calls dccp_write_xmit(), the timer function
            reduces to a wrapper around the same code.

If the tasklet finds that the socket is locked, it re-schedules the tasklet
function (not the tasklet) after one jiffy.

Changed DCCP_BUG to dccp_pr_debug when transmit_skb returns an error (e.g. when a
local qdisc is used, NET_XMIT_DROP=1 can be returned for many packets).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
include/linux/dccp.h
net/dccp/output.c
net/dccp/timer.c

index 83197b601a4f95a00a02a40e8b524490f9739725..eed52bcd35d0d7e2e6e0e36e12ac773ada2dbb6d 100644 (file)
@@ -463,7 +463,8 @@ struct dccp_ackvec;
  * @dccps_hc_tx_insert_options - sender wants to add options when sending
  * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
  * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmit_timer - timer for when CCID is not ready to send
+ * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
+ * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
  * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
  */
 struct dccp_sock {
@@ -504,6 +505,7 @@ struct dccp_sock {
        __u8                            dccps_hc_tx_insert_options:1;
        __u8                            dccps_server_timewait:1;
        __u8                            dccps_sync_scheduled:1;
+       struct tasklet_struct           dccps_xmitlet;
        struct timer_list               dccps_xmit_timer;
 };
 
index bfda071559f4d2979c980dc6193d5536a91a9db5..9afd58e39e239136cbb926af965a90925e1f735a 100644 (file)
@@ -251,65 +251,98 @@ do_interrupted:
        goto out;
 }
 
+/**
+ * dccp_xmit_packet  -  Send data packet under control of CCID
+ * Transmits next-queued payload and informs CCID to account for the packet.
+ */
+static void dccp_xmit_packet(struct sock *sk)
+{
+       int err, len;
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct sk_buff *skb = skb_dequeue(&sk->sk_write_queue);
+
+       if (unlikely(skb == NULL))
+               return;
+       len = skb->len;
+
+       if (sk->sk_state == DCCP_PARTOPEN) {
+               const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
+               /*
+                * See 8.1.5 - Handshake Completion.
+                *
+                * For robustness we resend Confirm options until the client has
+                * entered OPEN. During the initial feature negotiation, the MPS
+                * is smaller than usual, reduced by the Change/Confirm options.
+                */
+               if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
+                       DCCP_WARN("Payload too large (%d) for featneg.\n", len);
+                       dccp_send_ack(sk);
+                       dccp_feat_list_purge(&dp->dccps_featneg);
+               }
+
+               inet_csk_schedule_ack(sk);
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
+                                             inet_csk(sk)->icsk_rto,
+                                             DCCP_RTO_MAX);
+               DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+       } else if (dccp_ack_pending(sk)) {
+               DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
+       } else {
+               DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
+       }
+
+       err = dccp_transmit_skb(sk, skb);
+       if (err)
+               dccp_pr_debug("transmit_skb() returned err=%d\n", err);
+       /*
+        * Register this one as sent even if an error occurred. To the remote
+        * end a local packet drop is indistinguishable from network loss, i.e.
+        * any local drop will eventually be reported via receiver feedback.
+        */
+       ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
+
+       /*
+        * If the CCID needs to transfer additional header options out-of-band
+        * (e.g. Ack Vectors or feature-negotiation options), it activates this
+        * flag to schedule a Sync. The Sync will automatically incorporate all
+        * currently pending header options, thus clearing the backlog.
+        */
+       if (dp->dccps_sync_scheduled)
+               dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
+}
+
 void dccp_write_xmit(struct sock *sk, int block)
 {
        struct dccp_sock *dp = dccp_sk(sk);
        struct sk_buff *skb;
 
        while ((skb = skb_peek(&sk->sk_write_queue))) {
-               int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
+               int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 
-               if (err > 0) {
+               switch (ccid_packet_dequeue_eval(rc)) {
+               case CCID_PACKET_WILL_DEQUEUE_LATER:
+                       return;
+               case CCID_PACKET_DELAY:
                        if (!block) {
                                sk_reset_timer(sk, &dp->dccps_xmit_timer,
-                                               msecs_to_jiffies(err)+jiffies);
+                                               msecs_to_jiffies(rc)+jiffies);
+                               return;
+                       }
+                       rc = dccp_wait_for_ccid(sk, skb, rc);
+                       if (rc && rc != -EINTR) {
+                               DCCP_BUG("err=%d after dccp_wait_for_ccid", rc);
+                               skb_dequeue(&sk->sk_write_queue);
+                               kfree_skb(skb);
                                break;
-                       } else
-                               err = dccp_wait_for_ccid(sk, skb, err);
-                       if (err && err != -EINTR)
-                               DCCP_BUG("err=%d after dccp_wait_for_ccid", err);
-               }
-
-               skb_dequeue(&sk->sk_write_queue);
-               if (err == 0) {
-                       struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-                       const int len = skb->len;
-
-                       if (sk->sk_state == DCCP_PARTOPEN) {
-                               const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
-                               /*
-                                * See 8.1.5 - Handshake Completion.
-                                *
-                                * For robustness we resend Confirm options until the client has
-                                * entered OPEN. During the initial feature negotiation, the MPS
-                                * is smaller than usual, reduced by the Change/Confirm options.
-                                */
-                               if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
-                                       DCCP_WARN("Payload too large (%d) for featneg.\n", len);
-                                       dccp_send_ack(sk);
-                                       dccp_feat_list_purge(&dp->dccps_featneg);
-                               }
-
-                               inet_csk_schedule_ack(sk);
-                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
-                                                 inet_csk(sk)->icsk_rto,
-                                                 DCCP_RTO_MAX);
-                               dcb->dccpd_type = DCCP_PKT_DATAACK;
-                       } else if (dccp_ack_pending(sk))
-                               dcb->dccpd_type = DCCP_PKT_DATAACK;
-                       else
-                               dcb->dccpd_type = DCCP_PKT_DATA;
-
-                       err = dccp_transmit_skb(sk, skb);
-                       ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
-                       if (err)
-                               DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
-                                        err);
-                       if (dp->dccps_sync_scheduled)
-                               dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
-               } else {
-                       dccp_pr_debug("packet discarded due to err=%d\n", err);
+                       }
+                       /* fall through */
+               case CCID_PACKET_SEND_AT_ONCE:
+                       dccp_xmit_packet(sk);
+                       break;
+               case CCID_PACKET_ERR:
+                       skb_dequeue(&sk->sk_write_queue);
                        kfree_skb(skb);
+                       dccp_pr_debug("packet discarded due to err=%d\n", rc);
                }
        }
 }
index 162d1e683c3908afc76966353f51e5dc9f769de6..9369aca4b0e9661181479ac10c205e7fac0bba6e 100644 (file)
@@ -237,32 +237,35 @@ out:
        sock_put(sk);
 }
 
-/* Transmit-delay timer: used by the CCIDs to delay actual send time */
-static void dccp_write_xmit_timer(unsigned long data)
+/**
+ * dccp_write_xmitlet  -  Workhorse for CCID packet dequeueing interface
+ * See the comments above %ccid_dequeueing_decision for supported modes.
+ */
+static void dccp_write_xmitlet(unsigned long data)
 {
        struct sock *sk = (struct sock *)data;
-       struct dccp_sock *dp = dccp_sk(sk);
 
        bh_lock_sock(sk);
        if (sock_owned_by_user(sk))
-               sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1);
+               sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
        else
                dccp_write_xmit(sk, 0);
        bh_unlock_sock(sk);
-       sock_put(sk);
 }
 
-static void dccp_init_write_xmit_timer(struct sock *sk)
+static void dccp_write_xmit_timer(unsigned long data)
 {
-       struct dccp_sock *dp = dccp_sk(sk);
-
-       setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
-                       (unsigned long)sk);
+       dccp_write_xmitlet(data);
+       sock_put((struct sock *)data);
 }
 
 void dccp_init_xmit_timers(struct sock *sk)
 {
-       dccp_init_write_xmit_timer(sk);
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk);
+       setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer,
+                                                            (unsigned long)sk);
        inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
                                  &dccp_keepalive_timer);
 }