[NET]: Speed up __alloc_skb()
authorBenjamin LaHaise <bcrl@kvack.org>
Tue, 3 Jan 2006 22:06:50 +0000 (14:06 -0800)
committerDavid S. Miller <davem@davemloft.net>
Tue, 3 Jan 2006 22:06:50 +0000 (14:06 -0800)
From: Benjamin LaHaise <bcrl@kvack.org>

In __alloc_skb(), the use of skb_shinfo() which casts a u8 * to the
shared info structure results in gcc being forced to do a reload of the
pointer since it has no information on possible aliasing.  Fix this by
using a pointer to refer to skb_shared_info.

By initializing skb_shared_info sequentially, the write combining buffers
can reduce the number of memory transactions to a single write.  Reorder
the initialization in __alloc_skb() to match the structure definition.
There is also an alignment issue on 64 bit systems with skb_shared_info
by converting nr_frags to a short everything packs up nicely.

Also, pass the slab cache pointer according to the fclone flag instead
of using two almost identical function calls.

This raises bw_unix performance up to a peak of 707KB/s when combined
with the spinlock patch.  It should help other networking protocols, too.

Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/skbuff.h
net/core/skbuff.c

index 971677178e0ce38bfc83c6aa0cdade1e73ccad2c..483cfc47ec34203535f2fe0f0ed776c95da7c077 100644 (file)
@@ -133,7 +133,7 @@ struct skb_frag_struct {
  */
 struct skb_shared_info {
        atomic_t        dataref;
-       unsigned int    nr_frags;
+       unsigned short  nr_frags;
        unsigned short  tso_size;
        unsigned short  tso_segs;
        unsigned short  ufo_size;
index 83fee37de38ee7caa37f102eec58adc383e5a585..070f91cfde598cf20c20b1b291479a832922657b 100644 (file)
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                            int fclone)
 {
+       struct skb_shared_info *shinfo;
        struct sk_buff *skb;
        u8 *data;
 
        /* Get the HEAD */
-       if (fclone)
-               skb = kmem_cache_alloc(skbuff_fclone_cache,
-                                      gfp_mask & ~__GFP_DMA);
-       else
-               skb = kmem_cache_alloc(skbuff_head_cache,
-                                      gfp_mask & ~__GFP_DMA);
-
+       skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
+                               gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
 
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
+       /* make sure we initialize shinfo sequentially */
+       shinfo = skb_shinfo(skb);
+       atomic_set(&shinfo->dataref, 1);
+       shinfo->nr_frags  = 0;
+       shinfo->tso_size = 0;
+       shinfo->tso_segs = 0;
+       shinfo->ufo_size = 0;
+       shinfo->ip6_frag_id = 0;
+       shinfo->frag_list = NULL;
+
        if (fclone) {
                struct sk_buff *child = skb + 1;
                atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
                child->fclone = SKB_FCLONE_UNAVAILABLE;
        }
-       atomic_set(&(skb_shinfo(skb)->dataref), 1);
-       skb_shinfo(skb)->nr_frags  = 0;
-       skb_shinfo(skb)->tso_size = 0;
-       skb_shinfo(skb)->tso_segs = 0;
-       skb_shinfo(skb)->frag_list = NULL;
-       skb_shinfo(skb)->ufo_size = 0;
-       skb_shinfo(skb)->ip6_frag_id = 0;
 out:
        return skb;
 nodata: