aoe: update copyright year in touched files
[GitHub/mt8127/android_kernel_alcatel_ttab.git] / drivers / block / aoe / aoecmd.c
index 887f68f6d79a9e615beba7525cb9d0e671917c4e..39dacdbda7f15d6abd12f51a3715ed52381dcc46 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2007 Coraid, Inc.  See COPYING for GPL terms. */
+/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
 /*
  * aoecmd.c
  * Filesystem request handling methods
 #include <linux/netdevice.h>
 #include <linux/genhd.h>
 #include <linux/moduleparam.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <net/net_namespace.h>
 #include <asm/unaligned.h>
+#include <linux/uio.h>
 #include "aoe.h"
 
+#define MAXIOC (8192)  /* default meant to avoid most soft lockups */
+
+static void ktcomplete(struct frame *, struct sk_buff *);
+
+static struct buf *nextbuf(struct aoedev *);
+
 static int aoe_deadsecs = 60 * 3;
 module_param(aoe_deadsecs, int, 0644);
 MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev.");
@@ -25,6 +34,15 @@ module_param(aoe_maxout, int, 0644);
 MODULE_PARM_DESC(aoe_maxout,
        "Only aoe_maxout outstanding packets for every MAC on eX.Y.");
 
+static wait_queue_head_t ktiowq;
+static struct ktstate kts;
+
+/* io completion queue */
+static struct {
+       struct list_head head;
+       spinlock_t lock;
+} iocq;
+
 static struct sk_buff *
 new_skb(ulong len)
 {
@@ -41,15 +59,21 @@ new_skb(ulong len)
 }
 
 static struct frame *
-getframe(struct aoetgt *t, int tag)
+getframe(struct aoedev *d, u32 tag)
 {
-       struct frame *f, *e;
+       struct frame *f;
+       struct list_head *head, *pos, *nx;
+       u32 n;
 
-       f = t->frames;
-       e = f + t->nframes;
-       for (; f<e; f++)
-               if (f->tag == tag)
+       n = tag % NFACTIVE;
+       head = &d->factive[n];
+       list_for_each_safe(pos, nx, head) {
+               f = list_entry(pos, struct frame, head);
+               if (f->tag == tag) {
+                       list_del(pos);
                        return f;
+               }
+       }
        return NULL;
 }
 
@@ -59,18 +83,18 @@ getframe(struct aoetgt *t, int tag)
  * This driver reserves tag -1 to mean "unused frame."
  */
 static int
-newtag(struct aoetgt *t)
+newtag(struct aoedev *d)
 {
        register ulong n;
 
        n = jiffies & 0xffff;
-       return n |= (++t->lasttag & 0x7fff) << 16;
+       return n |= (++d->lasttag & 0x7fff) << 16;
 }
 
-static int
+static u32
 aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h)
 {
-       u32 host_tag = newtag(t);
+       u32 host_tag = newtag(d);
 
        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
        memcpy(h->dst, t->addr, sizeof h->dst);
@@ -95,16 +119,18 @@ put_lba(struct aoe_atahdr *ah, sector_t lba)
        ah->lba5 = lba >>= 8;
 }
 
-static void
+static struct aoeif *
 ifrotate(struct aoetgt *t)
 {
-       t->ifp++;
-       if (t->ifp >= &t->ifs[NAOEIFS] || t->ifp->nd == NULL)
-               t->ifp = t->ifs;
-       if (t->ifp->nd == NULL) {
-               printk(KERN_INFO "aoe: no interface to rotate to\n");
-               BUG();
-       }
+       struct aoeif *ifp;
+
+       ifp = t->ifp;
+       ifp++;
+       if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL)
+               ifp = t->ifs;
+       if (ifp->nd == NULL)
+               return NULL;
+       return t->ifp = ifp;
 }
 
 static void
@@ -129,78 +155,128 @@ skb_pool_get(struct aoedev *d)
        return NULL;
 }
 
-/* freeframe is where we do our load balancing so it's a little hairy. */
+void
+aoe_freetframe(struct frame *f)
+{
+       struct aoetgt *t;
+
+       t = f->t;
+       f->buf = NULL;
+       f->bv = NULL;
+       f->r_skb = NULL;
+       list_add(&f->head, &t->ffree);
+}
+
 static struct frame *
-freeframe(struct aoedev *d)
+newtframe(struct aoedev *d, struct aoetgt *t)
 {
-       struct frame *f, *e, *rf;
-       struct aoetgt **t;
+       struct frame *f;
        struct sk_buff *skb;
+       struct list_head *pos;
+
+       if (list_empty(&t->ffree)) {
+               if (t->falloc >= NSKBPOOLMAX*2)
+                       return NULL;
+               f = kcalloc(1, sizeof(*f), GFP_ATOMIC);
+               if (f == NULL)
+                       return NULL;
+               t->falloc++;
+               f->t = t;
+       } else {
+               pos = t->ffree.next;
+               list_del(pos);
+               f = list_entry(pos, struct frame, head);
+       }
+
+       skb = f->skb;
+       if (skb == NULL) {
+               f->skb = skb = new_skb(ETH_ZLEN);
+               if (!skb) {
+bail:                  aoe_freetframe(f);
+                       return NULL;
+               }
+       }
+
+       if (atomic_read(&skb_shinfo(skb)->dataref) != 1) {
+               skb = skb_pool_get(d);
+               if (skb == NULL)
+                       goto bail;
+               skb_pool_put(d, f->skb);
+               f->skb = skb;
+       }
+
+       skb->truesize -= skb->data_len;
+       skb_shinfo(skb)->nr_frags = skb->data_len = 0;
+       skb_trim(skb, 0);
+       return f;
+}
+
+static struct frame *
+newframe(struct aoedev *d)
+{
+       struct frame *f;
+       struct aoetgt *t, **tt;
+       int totout = 0;
 
        if (d->targets[0] == NULL) {    /* shouldn't happen, but I'm paranoid */
                printk(KERN_ERR "aoe: NULL TARGETS!\n");
                return NULL;
        }
-       t = d->tgt;
-       t++;
-       if (t >= &d->targets[NTARGETS] || !*t)
-               t = d->targets;
+       tt = d->tgt;    /* last used target */
        for (;;) {
-               if ((*t)->nout < (*t)->maxout
+               tt++;
+               if (tt >= &d->targets[NTARGETS] || !*tt)
+                       tt = d->targets;
+               t = *tt;
+               totout += t->nout;
+               if (t->nout < t->maxout
                && t != d->htgt
-               && (*t)->ifp->nd) {
-                       rf = NULL;
-                       f = (*t)->frames;
-                       e = f + (*t)->nframes;
-                       for (; f < e; f++) {
-                               if (f->tag != FREETAG)
-                                       continue;
-                               skb = f->skb;
-                               if (!skb
-                               && !(f->skb = skb = new_skb(ETH_ZLEN)))
-                                       continue;
-                               if (atomic_read(&skb_shinfo(skb)->dataref)
-                                       != 1) {
-                                       if (!rf)
-                                               rf = f;
-                                       continue;
-                               }
-gotone:                                skb_shinfo(skb)->nr_frags = skb->data_len = 0;
-                               skb_trim(skb, 0);
-                               d->tgt = t;
-                               ifrotate(*t);
+               && t->ifp->nd) {
+                       f = newtframe(d, t);
+                       if (f) {
+                               ifrotate(t);
+                               d->tgt = tt;
                                return f;
                        }
-                       /* Work can be done, but the network layer is
-                          holding our precious packets.  Try to grab
-                          one from the pool. */
-                       f = rf;
-                       if (f == NULL) {        /* more paranoia */
-                               printk(KERN_ERR
-                                       "aoe: freeframe: %s.\n",
-                                       "unexpected null rf");
-                               d->flags |= DEVFL_KICKME;
-                               return NULL;
-                       }
-                       skb = skb_pool_get(d);
-                       if (skb) {
-                               skb_pool_put(d, f->skb);
-                               f->skb = skb;
-                               goto gotone;
-                       }
-                       (*t)->dataref++;
-                       if ((*t)->nout == 0)
-                               d->flags |= DEVFL_KICKME;
                }
-               if (t == d->tgt)        /* we've looped and found nada */
+               if (tt == d->tgt)       /* we've looped and found nada */
                        break;
-               t++;
-               if (t >= &d->targets[NTARGETS] || !*t)
-                       t = d->targets;
+       }
+       if (totout == 0) {
+               d->kicked++;
+               d->flags |= DEVFL_KICKME;
        }
        return NULL;
 }
 
+static void
+skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+{
+       int frag = 0;
+       ulong fcnt;
+loop:
+       fcnt = bv->bv_len - (off - bv->bv_offset);
+       if (fcnt > cnt)
+               fcnt = cnt;
+       skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
+       cnt -= fcnt;
+       if (cnt <= 0)
+               return;
+       bv++;
+       off = bv->bv_offset;
+       goto loop;
+}
+
+static void
+fhash(struct frame *f)
+{
+       struct aoedev *d = f->t->d;
+       u32 n;
+
+       n = f->tag % NFACTIVE;
+       list_add_tail(&f->head, &d->factive[n]);
+}
+
 static int
 aoecmd_ata_rw(struct aoedev *d)
 {
@@ -208,26 +284,47 @@ aoecmd_ata_rw(struct aoedev *d)
        struct aoe_hdr *h;
        struct aoe_atahdr *ah;
        struct buf *buf;
-       struct bio_vec *bv;
        struct aoetgt *t;
        struct sk_buff *skb;
-       ulong bcnt;
+       struct sk_buff_head queue;
+       ulong bcnt, fbcnt;
        char writebit, extbit;
 
        writebit = 0x10;
        extbit = 0x4;
 
-       f = freeframe(d);
+       buf = nextbuf(d);
+       if (buf == NULL)
+               return 0;
+       f = newframe(d);
        if (f == NULL)
                return 0;
        t = *d->tgt;
-       buf = d->inprocess;
-       bv = buf->bv;
-       bcnt = t->ifp->maxbcnt;
+       bcnt = d->maxbcnt;
        if (bcnt == 0)
                bcnt = DEFAULTBCNT;
-       if (bcnt > buf->bv_resid)
-               bcnt = buf->bv_resid;
+       if (bcnt > buf->resid)
+               bcnt = buf->resid;
+       fbcnt = bcnt;
+       f->bv = buf->bv;
+       f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
+       do {
+               if (fbcnt < buf->bv_resid) {
+                       buf->bv_resid -= fbcnt;
+                       buf->resid -= fbcnt;
+                       break;
+               }
+               fbcnt -= buf->bv_resid;
+               buf->resid -= buf->bv_resid;
+               if (buf->resid == 0) {
+                       d->ip.buf = NULL;
+                       break;
+               }
+               buf->bv++;
+               buf->bv_resid = buf->bv->bv_len;
+               WARN_ON(buf->bv_resid == 0);
+       } while (fbcnt);
+
        /* initialize the headers & frame */
        skb = f->skb;
        h = (struct aoe_hdr *) skb_mac_header(skb);
@@ -235,10 +332,10 @@ aoecmd_ata_rw(struct aoedev *d)
        skb_put(skb, sizeof *h + sizeof *ah);
        memset(h, 0, skb->len);
        f->tag = aoehdr_atainit(d, t, h);
+       fhash(f);
        t->nout++;
        f->waited = 0;
        f->buf = buf;
-       f->bufaddr = page_address(bv->bv_page) + buf->bv_off;
        f->bcnt = bcnt;
        f->lba = buf->sector;
 
@@ -253,10 +350,11 @@ aoecmd_ata_rw(struct aoedev *d)
                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
        }
        if (bio_data_dir(buf->bio) == WRITE) {
-               skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
+               skb_fillup(skb, f->bv, f->bv_off, bcnt);
                ah->aflags |= AOEAFL_WRITE;
                skb->len += bcnt;
                skb->data_len = bcnt;
+               skb->truesize += bcnt;
                t->wpkts++;
        } else {
                t->rpkts++;
@@ -267,23 +365,15 @@ aoecmd_ata_rw(struct aoedev *d)
 
        /* mark all tracking fields and load out */
        buf->nframesout += 1;
-       buf->bv_off += bcnt;
-       buf->bv_resid -= bcnt;
-       buf->resid -= bcnt;
        buf->sector += bcnt >> 9;
-       if (buf->resid == 0) {
-               d->inprocess = NULL;
-       } else if (buf->bv_resid == 0) {
-               buf->bv = ++bv;
-               buf->bv_resid = bv->bv_len;
-               WARN_ON(buf->bv_resid == 0);
-               buf->bv_off = bv->bv_offset;
-       }
 
        skb->dev = t->ifp->nd;
        skb = skb_clone(skb, GFP_ATOMIC);
-       if (skb)
-               __skb_queue_tail(&d->sendq, skb);
+       if (skb) {
+               __skb_queue_head_init(&queue);
+               __skb_queue_tail(&queue, skb);
+               aoenet_xmit(&queue);
+       }
        return 1;
 }
 
@@ -330,17 +420,25 @@ cont:
 }
 
 static void
-resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
+resend(struct aoedev *d, struct frame *f)
 {
        struct sk_buff *skb;
+       struct sk_buff_head queue;
        struct aoe_hdr *h;
        struct aoe_atahdr *ah;
+       struct aoetgt *t;
        char buf[128];
        u32 n;
 
-       ifrotate(t);
-       n = newtag(t);
+       t = f->t;
+       n = newtag(d);
        skb = f->skb;
+       if (ifrotate(t) == NULL) {
+               /* probably can't happen, but set it up to fail anyway */
+               pr_info("aoe: resend: no interfaces to rotate to.\n");
+               ktcomplete(f, NULL);
+               return;
+       }
        h = (struct aoe_hdr *) skb_mac_header(skb);
        ah = (struct aoe_atahdr *) (h+1);
 
@@ -351,39 +449,22 @@ resend(struct aoedev *d, struct aoetgt *t, struct frame *f)
        aoechr_error(buf);
 
        f->tag = n;
+       fhash(f);
        h->tag = cpu_to_be32(n);
        memcpy(h->dst, t->addr, sizeof h->dst);
        memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src);
 
-       switch (ah->cmdstat) {
-       default:
-               break;
-       case ATA_CMD_PIO_READ:
-       case ATA_CMD_PIO_READ_EXT:
-       case ATA_CMD_PIO_WRITE:
-       case ATA_CMD_PIO_WRITE_EXT:
-               put_lba(ah, f->lba);
-
-               n = f->bcnt;
-               if (n > DEFAULTBCNT)
-                       n = DEFAULTBCNT;
-               ah->scnt = n >> 9;
-               if (ah->aflags & AOEAFL_WRITE) {
-                       skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-                               offset_in_page(f->bufaddr), n);
-                       skb->len = sizeof *h + sizeof *ah + n;
-                       skb->data_len = n;
-               }
-       }
        skb->dev = t->ifp->nd;
        skb = skb_clone(skb, GFP_ATOMIC);
        if (skb == NULL)
                return;
-       __skb_queue_tail(&d->sendq, skb);
+       __skb_queue_head_init(&queue);
+       __skb_queue_tail(&queue, skb);
+       aoenet_xmit(&queue);
 }
 
 static int
-tsince(int tag)
+tsince(u32 tag)
 {
        int n;
 
@@ -407,58 +488,65 @@ getif(struct aoetgt *t, struct net_device *nd)
        return NULL;
 }
 
-static struct aoeif *
-addif(struct aoetgt *t, struct net_device *nd)
-{
-       struct aoeif *p;
-
-       p = getif(t, NULL);
-       if (!p)
-               return NULL;
-       p->nd = nd;
-       p->maxbcnt = DEFAULTBCNT;
-       p->lost = 0;
-       p->lostjumbo = 0;
-       return p;
-}
-
 static void
 ejectif(struct aoetgt *t, struct aoeif *ifp)
 {
        struct aoeif *e;
+       struct net_device *nd;
        ulong n;
 
+       nd = ifp->nd;
        e = t->ifs + NAOEIFS - 1;
        n = (e - ifp) * sizeof *ifp;
        memmove(ifp, ifp+1, n);
        e->nd = NULL;
+       dev_put(nd);
 }
 
 static int
 sthtith(struct aoedev *d)
 {
-       struct frame *f, *e, *nf;
+       struct frame *f, *nf;
+       struct list_head *nx, *pos, *head;
        struct sk_buff *skb;
-       struct aoetgt *ht = *d->htgt;
-
-       f = ht->frames;
-       e = f + ht->nframes;
-       for (; f < e; f++) {
-               if (f->tag == FREETAG)
-                       continue;
-               nf = freeframe(d);
-               if (!nf)
-                       return 0;
-               skb = nf->skb;
-               *nf = *f;
-               f->skb = skb;
-               f->tag = FREETAG;
-               nf->waited = 0;
-               ht->nout--;
-               (*d->tgt)->nout++;
-               resend(d, *d->tgt, nf);
+       struct aoetgt *ht = d->htgt;
+       int i;
+
+       for (i = 0; i < NFACTIVE; i++) {
+               head = &d->factive[i];
+               list_for_each_safe(pos, nx, head) {
+                       f = list_entry(pos, struct frame, head);
+                       if (f->t != ht)
+                               continue;
+
+                       nf = newframe(d);
+                       if (!nf)
+                               return 0;
+
+                       /* remove frame from active list */
+                       list_del(pos);
+
+                       /* reassign all pertinent bits to new outbound frame */
+                       skb = nf->skb;
+                       nf->skb = f->skb;
+                       nf->buf = f->buf;
+                       nf->bcnt = f->bcnt;
+                       nf->lba = f->lba;
+                       nf->bv = f->bv;
+                       nf->bv_off = f->bv_off;
+                       nf->waited = 0;
+                       f->skb = skb;
+                       aoe_freetframe(f);
+                       ht->nout--;
+                       nf->t->nout++;
+                       resend(d, nf);
+               }
        }
-       /* he's clean, he's useless.  take away his interfaces */
+       /* We've cleaned up the outstanding so take away his
+        * interfaces so he won't be used.  We should remove him from
+        * the target array here, but cleaning up a target is
+        * involved.  PUNT!
+        */
        memset(ht->ifs, 0, sizeof ht->ifs);
        d->htgt = NULL;
        return 1;
@@ -477,13 +565,15 @@ ata_scnt(unsigned char *packet) {
 static void
 rexmit_timer(ulong vp)
 {
-       struct sk_buff_head queue;
        struct aoedev *d;
        struct aoetgt *t, **tt, **te;
        struct aoeif *ifp;
-       struct frame *f, *e;
+       struct frame *f;
+       struct list_head *head, *pos, *nx;
+       LIST_HEAD(flist);
        register long timeout;
        ulong flags, n;
+       int i;
 
        d = (struct aoedev *) vp;
 
@@ -497,58 +587,22 @@ rexmit_timer(ulong vp)
                spin_unlock_irqrestore(&d->lock, flags);
                return;
        }
-       tt = d->targets;
-       te = tt + NTARGETS;
-       for (; tt < te && *tt; tt++) {
-               t = *tt;
-               f = t->frames;
-               e = f + t->nframes;
-               for (; f < e; f++) {
-                       if (f->tag == FREETAG
-                       || tsince(f->tag) < timeout)
-                               continue;
-                       n = f->waited += timeout;
-                       n /= HZ;
-                       if (n > aoe_deadsecs) {
-                               /* waited too long.  device failure. */
-                               aoedev_downdev(d);
-                               break;
-                       }
-
-                       if (n > HELPWAIT /* see if another target can help */
-                       && (tt != d->targets || d->targets[1]))
-                               d->htgt = tt;
-
-                       if (t->nout == t->maxout) {
-                               if (t->maxout > 1)
-                                       t->maxout--;
-                               t->lastwadj = jiffies;
-                       }
-
-                       ifp = getif(t, f->skb->dev);
-                       if (ifp && ++ifp->lost > (t->nframes << 1)
-                       && (ifp != t->ifs || t->ifs[1].nd)) {
-                               ejectif(t, ifp);
-                               ifp = NULL;
-                       }
 
-                       if (ata_scnt(skb_mac_header(f->skb)) > DEFAULTBCNT / 512
-                       && ifp && ++ifp->lostjumbo > (t->nframes << 1)
-                       && ifp->maxbcnt != DEFAULTBCNT) {
-                               printk(KERN_INFO
-                                       "aoe: e%ld.%d: "
-                                       "too many lost jumbo on "
-                                       "%s:%pm - "
-                                       "falling back to %d frames.\n",
-                                       d->aoemajor, d->aoeminor,
-                                       ifp->nd->name, t->addr,
-                                       DEFAULTBCNT);
-                               ifp->maxbcnt = 0;
-                       }
-                       resend(d, t, f);
+       /* collect all frames to rexmit into flist */
+       for (i = 0; i < NFACTIVE; i++) {
+               head = &d->factive[i];
+               list_for_each_safe(pos, nx, head) {
+                       f = list_entry(pos, struct frame, head);
+                       if (tsince(f->tag) < timeout)
+                               break;  /* end of expired frames */
+                       /* move to flist for later processing */
+                       list_move_tail(pos, &flist);
                }
-
-               /* window check */
+       }
+       /* window check */
+       tt = d->targets;
+       te = tt + d->ntargets;
+       for (; tt < te && (t = *tt); tt++) {
                if (t->nout == t->maxout
                && t->maxout < t->nframes
                && (jiffies - t->lastwadj)/HZ > 10) {
@@ -557,45 +611,173 @@ rexmit_timer(ulong vp)
                }
        }
 
-       if (!skb_queue_empty(&d->sendq)) {
+       if (!list_empty(&flist)) {      /* retransmissions necessary */
                n = d->rttavg <<= 1;
                if (n > MAXTIMER)
                        d->rttavg = MAXTIMER;
        }
 
-       if (d->flags & DEVFL_KICKME || d->htgt) {
-               d->flags &= ~DEVFL_KICKME;
-               aoecmd_work(d);
+       /* process expired frames */
+       while (!list_empty(&flist)) {
+               pos = flist.next;
+               f = list_entry(pos, struct frame, head);
+               n = f->waited += timeout;
+               n /= HZ;
+               if (n > aoe_deadsecs) {
+                       /* Waited too long.  Device failure.
+                        * Hang all frames on first hash bucket for downdev
+                        * to clean up.
+                        */
+                       list_splice(&flist, &d->factive[0]);
+                       aoedev_downdev(d);
+                       break;
+               }
+               list_del(pos);
+
+               t = f->t;
+               if (n > aoe_deadsecs/2)
+                       d->htgt = t; /* see if another target can help */
+
+               if (t->nout == t->maxout) {
+                       if (t->maxout > 1)
+                               t->maxout--;
+                       t->lastwadj = jiffies;
+               }
+
+               ifp = getif(t, f->skb->dev);
+               if (ifp && ++ifp->lost > (t->nframes << 1)
+               && (ifp != t->ifs || t->ifs[1].nd)) {
+                       ejectif(t, ifp);
+                       ifp = NULL;
+               }
+               resend(d, f);
        }
 
-       __skb_queue_head_init(&queue);
-       skb_queue_splice_init(&d->sendq, &queue);
+       if ((d->flags & DEVFL_KICKME || d->htgt) && d->blkq) {
+               d->flags &= ~DEVFL_KICKME;
+               d->blkq->request_fn(d->blkq);
+       }
 
        d->timer.expires = jiffies + TIMERTICK;
        add_timer(&d->timer);
 
        spin_unlock_irqrestore(&d->lock, flags);
+}
 
-       aoenet_xmit(&queue);
+static unsigned long
+rqbiocnt(struct request *r)
+{
+       struct bio *bio;
+       unsigned long n = 0;
+
+       __rq_for_each_bio(bio, r)
+               n++;
+       return n;
+}
+
+/* This can be removed if we are certain that no users of the block
+ * layer will ever use zero-count pages in bios.  Otherwise we have to
+ * protect against the put_page sometimes done by the network layer.
+ *
+ * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for
+ * discussion.
+ *
+ * We cannot use get_page in the workaround, because it insists on a
+ * positive page count as a precondition.  So we use _count directly.
+ */
+static void
+bio_pageinc(struct bio *bio)
+{
+       struct bio_vec *bv;
+       struct page *page;
+       int i;
+
+       bio_for_each_segment(bv, bio, i) {
+               page = bv->bv_page;
+               /* Non-zero page count for non-head members of
+                * compound pages is no longer allowed by the kernel,
+                * but this has never been seen here.
+                */
+               if (unlikely(PageCompound(page)))
+                       if (compound_trans_head(page) != page) {
+                               pr_crit("page tail used for block I/O\n");
+                               BUG();
+                       }
+               atomic_inc(&page->_count);
+       }
+}
+
+static void
+bio_pagedec(struct bio *bio)
+{
+       struct bio_vec *bv;
+       int i;
+
+       bio_for_each_segment(bv, bio, i)
+               atomic_dec(&bv->bv_page->_count);
+}
+
+static void
+bufinit(struct buf *buf, struct request *rq, struct bio *bio)
+{
+       struct bio_vec *bv;
+
+       memset(buf, 0, sizeof(*buf));
+       buf->rq = rq;
+       buf->bio = bio;
+       buf->resid = bio->bi_size;
+       buf->sector = bio->bi_sector;
+       bio_pageinc(bio);
+       buf->bv = bv = &bio->bi_io_vec[bio->bi_idx];
+       buf->bv_resid = bv->bv_len;
+       WARN_ON(buf->bv_resid == 0);
+}
+
+static struct buf *
+nextbuf(struct aoedev *d)
+{
+       struct request *rq;
+       struct request_queue *q;
+       struct buf *buf;
+       struct bio *bio;
+
+       q = d->blkq;
+       if (q == NULL)
+               return NULL;    /* initializing */
+       if (d->ip.buf)
+               return d->ip.buf;
+       rq = d->ip.rq;
+       if (rq == NULL) {
+               rq = blk_peek_request(q);
+               if (rq == NULL)
+                       return NULL;
+               blk_start_request(rq);
+               d->ip.rq = rq;
+               d->ip.nxbio = rq->bio;
+               rq->special = (void *) rqbiocnt(rq);
+       }
+       buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
+       if (buf == NULL) {
+               pr_err("aoe: nextbuf: unable to mempool_alloc!\n");
+               return NULL;
+       }
+       bio = d->ip.nxbio;
+       bufinit(buf, rq, bio);
+       bio = bio->bi_next;
+       d->ip.nxbio = bio;
+       if (bio == NULL)
+               d->ip.rq = NULL;
+       return d->ip.buf = buf;
 }
 
 /* enters with d->lock held */
 void
 aoecmd_work(struct aoedev *d)
 {
-       struct buf *buf;
-loop:
        if (d->htgt && !sthtith(d))
                return;
-       if (d->inprocess == NULL) {
-               if (list_empty(&d->bufq))
-                       return;
-               buf = container_of(d->bufq.next, struct buf, bufs);
-               list_del(d->bufq.next);
-               d->inprocess = buf;
-       }
-       if (aoecmd_ata_rw(d))
-               goto loop;
+       while (aoecmd_ata_rw(d))
+               ;
 }
 
 /* this function performs work that has been deferred until sleeping is OK
@@ -604,28 +786,25 @@ void
 aoecmd_sleepwork(struct work_struct *work)
 {
        struct aoedev *d = container_of(work, struct aoedev, work);
+       struct block_device *bd;
+       u64 ssize;
 
        if (d->flags & DEVFL_GDALLOC)
                aoeblk_gdalloc(d);
 
        if (d->flags & DEVFL_NEWSIZE) {
-               struct block_device *bd;
-               unsigned long flags;
-               u64 ssize;
-
                ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
-
                if (bd) {
                        mutex_lock(&bd->bd_inode->i_mutex);
                        i_size_write(bd->bd_inode, (loff_t)ssize<<9);
                        mutex_unlock(&bd->bd_inode->i_mutex);
                        bdput(bd);
                }
-               spin_lock_irqsave(&d->lock, flags);
+               spin_lock_irq(&d->lock);
                d->flags |= DEVFL_UP;
                d->flags &= ~DEVFL_NEWSIZE;
-               spin_unlock_irqrestore(&d->lock, flags);
+               spin_unlock_irq(&d->lock);
        }
 }
 
@@ -718,163 +897,299 @@ gettgt(struct aoedev *d, char *addr)
        return NULL;
 }
 
-static inline void
-diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector)
+static void
+bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt)
+{
+       ulong fcnt;
+       char *p;
+       int soff = 0;
+loop:
+       fcnt = bv->bv_len - (off - bv->bv_offset);
+       if (fcnt > cnt)
+               fcnt = cnt;
+       p = page_address(bv->bv_page) + off;
+       skb_copy_bits(skb, soff, p, fcnt);
+       soff += fcnt;
+       cnt -= fcnt;
+       if (cnt <= 0)
+               return;
+       bv++;
+       off = bv->bv_offset;
+       goto loop;
+}
+
+void
+aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
+{
+       struct bio *bio;
+       int bok;
+       struct request_queue *q;
+
+       q = d->blkq;
+       if (rq == d->ip.rq)
+               d->ip.rq = NULL;
+       do {
+               bio = rq->bio;
+               bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
+       } while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size));
+
+       /* cf. http://lkml.org/lkml/2006/10/31/28 */
+       if (!fastfail)
+               q->request_fn(q);
+}
+
+static void
+aoe_end_buf(struct aoedev *d, struct buf *buf)
+{
+       struct request *rq;
+       unsigned long n;
+
+       if (buf == d->ip.buf)
+               d->ip.buf = NULL;
+       rq = buf->rq;
+       bio_pagedec(buf->bio);
+       mempool_free(buf, d->bufpool);
+       n = (unsigned long) rq->special;
+       rq->special = (void *) --n;
+       if (n == 0)
+               aoe_end_request(d, rq, 0);
+}
+
+static void
+ktiocomplete(struct frame *f)
 {
-       unsigned long n_sect = bio->bi_size >> 9;
-       const int rw = bio_data_dir(bio);
-       struct hd_struct *part;
-       int cpu;
+       struct aoe_hdr *hin, *hout;
+       struct aoe_atahdr *ahin, *ahout;
+       struct buf *buf;
+       struct sk_buff *skb;
+       struct aoetgt *t;
+       struct aoeif *ifp;
+       struct aoedev *d;
+       long n;
+
+       if (f == NULL)
+               return;
+
+       t = f->t;
+       d = t->d;
+
+       hout = (struct aoe_hdr *) skb_mac_header(f->skb);
+       ahout = (struct aoe_atahdr *) (hout+1);
+       buf = f->buf;
+       skb = f->r_skb;
+       if (skb == NULL)
+               goto noskb;     /* just fail the buf. */
+
+       hin = (struct aoe_hdr *) skb->data;
+       skb_pull(skb, sizeof(*hin));
+       ahin = (struct aoe_atahdr *) skb->data;
+       skb_pull(skb, sizeof(*ahin));
+       if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
+               pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
+                       ahout->cmdstat, ahin->cmdstat,
+                       d->aoemajor, d->aoeminor);
+noskb: if (buf)
+                       clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+               goto badrsp;
+       }
+
+       n = ahout->scnt << 9;
+       switch (ahout->cmdstat) {
+       case ATA_CMD_PIO_READ:
+       case ATA_CMD_PIO_READ_EXT:
+               if (skb->len < n) {
+                       pr_err("aoe: runt data size in read.  skb->len=%d need=%ld\n",
+                               skb->len, n);
+                       clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+                       break;
+               }
+               bvcpy(f->bv, f->bv_off, skb, n);
+       case ATA_CMD_PIO_WRITE:
+       case ATA_CMD_PIO_WRITE_EXT:
+               spin_lock_irq(&d->lock);
+               ifp = getif(t, skb->dev);
+               if (ifp)
+                       ifp->lost = 0;
+               if (d->htgt == t) /* I'll help myself, thank you. */
+                       d->htgt = NULL;
+               spin_unlock_irq(&d->lock);
+               break;
+       case ATA_CMD_ID_ATA:
+               if (skb->len < 512) {
+                       pr_info("aoe: runt data size in ataid.  skb->len=%d\n",
+                               skb->len);
+                       break;
+               }
+               if (skb_linearize(skb))
+                       break;
+               spin_lock_irq(&d->lock);
+               ataid_complete(d, t, skb->data);
+               spin_unlock_irq(&d->lock);
+               break;
+       default:
+               pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n",
+                       ahout->cmdstat,
+                       be16_to_cpu(get_unaligned(&hin->major)),
+                       hin->minor);
+       }
+badrsp:
+       spin_lock_irq(&d->lock);
+
+       aoe_freetframe(f);
+
+       if (buf && --buf->nframesout == 0 && buf->resid == 0)
+               aoe_end_buf(d, buf);
+
+       aoecmd_work(d);
+
+       spin_unlock_irq(&d->lock);
+       aoedev_put(d);
+       dev_kfree_skb(skb);
+}
 
-       cpu = part_stat_lock();
-       part = disk_map_sector_rcu(disk, sector);
+/* Enters with iocq.lock held.
+ * Returns true iff responses needing processing remain.
+ */
+static int
+ktio(void)
+{
+       struct frame *f;
+       struct list_head *pos;
+       int i;
 
-       part_stat_inc(cpu, part, ios[rw]);
-       part_stat_add(cpu, part, ticks[rw], duration);
-       part_stat_add(cpu, part, sectors[rw], n_sect);
-       part_stat_add(cpu, part, io_ticks, duration);
+       for (i = 0; ; ++i) {
+               if (i == MAXIOC)
+                       return 1;
+               if (list_empty(&iocq.head))
+                       return 0;
+               pos = iocq.head.next;
+               list_del(pos);
+               spin_unlock_irq(&iocq.lock);
+               f = list_entry(pos, struct frame, head);
+               ktiocomplete(f);
+               spin_lock_irq(&iocq.lock);
+       }
+}
 
-       part_stat_unlock();
+static int
+kthread(void *vp)
+{
+       struct ktstate *k;
+       DECLARE_WAITQUEUE(wait, current);
+       int more;
+
+       k = vp;
+       current->flags |= PF_NOFREEZE;
+       set_user_nice(current, -10);
+       complete(&k->rendez);   /* tell spawner we're running */
+       do {
+               spin_lock_irq(k->lock);
+               more = k->fn();
+               if (!more) {
+                       add_wait_queue(k->waitq, &wait);
+                       __set_current_state(TASK_INTERRUPTIBLE);
+               }
+               spin_unlock_irq(k->lock);
+               if (!more) {
+                       schedule();
+                       remove_wait_queue(k->waitq, &wait);
+               } else
+                       cond_resched();
+       } while (!kthread_should_stop());
+       complete(&k->rendez);   /* tell spawner we're stopping */
+       return 0;
 }
 
 void
+aoe_ktstop(struct ktstate *k)
+{
+       kthread_stop(k->task);
+       wait_for_completion(&k->rendez);
+}
+
+int
+aoe_ktstart(struct ktstate *k)
+{
+       struct task_struct *task;
+
+       init_completion(&k->rendez);
+       task = kthread_run(kthread, k, k->name);
+       if (task == NULL || IS_ERR(task))
+               return -ENOMEM;
+       k->task = task;
+       wait_for_completion(&k->rendez); /* allow kthread to start */
+       init_completion(&k->rendez);    /* for waiting for exit later */
+       return 0;
+}
+
+/* pass it off to kthreads for processing */
+static void
+ktcomplete(struct frame *f, struct sk_buff *skb)
+{
+       ulong flags;
+
+       f->r_skb = skb;
+       spin_lock_irqsave(&iocq.lock, flags);
+       list_add_tail(&f->head, &iocq.head);
+       spin_unlock_irqrestore(&iocq.lock, flags);
+       wake_up(&ktiowq);
+}
+
+struct sk_buff *
 aoecmd_ata_rsp(struct sk_buff *skb)
 {
-       struct sk_buff_head queue;
        struct aoedev *d;
-       struct aoe_hdr *hin, *hout;
-       struct aoe_atahdr *ahin, *ahout;
+       struct aoe_hdr *h;
        struct frame *f;
-       struct buf *buf;
        struct aoetgt *t;
-       struct aoeif *ifp;
-       register long n;
+       u32 n;
        ulong flags;
        char ebuf[128];
        u16 aoemajor;
 
-       hin = (struct aoe_hdr *) skb_mac_header(skb);
-       aoemajor = get_unaligned_be16(&hin->major);
-       d = aoedev_by_aoeaddr(aoemajor, hin->minor);
+       h = (struct aoe_hdr *) skb->data;
+       aoemajor = be16_to_cpu(get_unaligned(&h->major));
+       d = aoedev_by_aoeaddr(aoemajor, h->minor);
        if (d == NULL) {
                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
                        "for unknown device %d.%d\n",
-                        aoemajor, hin->minor);
+                       aoemajor, h->minor);
                aoechr_error(ebuf);
-               return;
+               return skb;
        }
 
        spin_lock_irqsave(&d->lock, flags);
 
-       n = get_unaligned_be32(&hin->tag);
-       t = gettgt(d, hin->src);
-       if (t == NULL) {
-               printk(KERN_INFO "aoe: can't find target e%ld.%d:%pm\n",
-                       d->aoemajor, d->aoeminor, hin->src);
-               spin_unlock_irqrestore(&d->lock, flags);
-               return;
-       }
-       f = getframe(t, n);
+       n = be32_to_cpu(get_unaligned(&h->tag));
+       f = getframe(d, n);
        if (f == NULL) {
                calc_rttavg(d, -tsince(n));
                spin_unlock_irqrestore(&d->lock, flags);
+               aoedev_put(d);
                snprintf(ebuf, sizeof ebuf,
                        "%15s e%d.%d    tag=%08x@%08lx\n",
                        "unexpected rsp",
-                       get_unaligned_be16(&hin->major),
-                       hin->minor,
-                       get_unaligned_be32(&hin->tag),
+                       get_unaligned_be16(&h->major),
+                       h->minor,
+                       get_unaligned_be32(&h->tag),
                        jiffies);
                aoechr_error(ebuf);
-               return;
+               return skb;
        }
-
+       t = f->t;
        calc_rttavg(d, tsince(f->tag));
-
-       ahin = (struct aoe_atahdr *) (hin+1);
-       hout = (struct aoe_hdr *) skb_mac_header(f->skb);
-       ahout = (struct aoe_atahdr *) (hout+1);
-       buf = f->buf;
-
-       if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
-               printk(KERN_ERR
-                       "aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n",
-                       ahout->cmdstat, ahin->cmdstat,
-                       d->aoemajor, d->aoeminor);
-               if (buf)
-                       buf->flags |= BUFFL_FAIL;
-       } else {
-               if (d->htgt && t == *d->htgt) /* I'll help myself, thank you. */
-                       d->htgt = NULL;
-               n = ahout->scnt << 9;
-               switch (ahout->cmdstat) {
-               case ATA_CMD_PIO_READ:
-               case ATA_CMD_PIO_READ_EXT:
-                       if (skb->len - sizeof *hin - sizeof *ahin < n) {
-                               printk(KERN_ERR
-                                       "aoe: %s.  skb->len=%d need=%ld\n",
-                                       "runt data size in read", skb->len, n);
-                               /* fail frame f?  just returning will rexmit. */
-                               spin_unlock_irqrestore(&d->lock, flags);
-                               return;
-                       }
-                       memcpy(f->bufaddr, ahin+1, n);
-               case ATA_CMD_PIO_WRITE:
-               case ATA_CMD_PIO_WRITE_EXT:
-                       ifp = getif(t, skb->dev);
-                       if (ifp) {
-                               ifp->lost = 0;
-                               if (n > DEFAULTBCNT)
-                                       ifp->lostjumbo = 0;
-                       }
-                       if (f->bcnt -= n) {
-                               f->lba += n >> 9;
-                               f->bufaddr += n;
-                               resend(d, t, f);
-                               goto xmit;
-                       }
-                       break;
-               case ATA_CMD_ID_ATA:
-                       if (skb->len - sizeof *hin - sizeof *ahin < 512) {
-                               printk(KERN_INFO
-                                       "aoe: runt data size in ataid.  skb->len=%d\n",
-                                       skb->len);
-                               spin_unlock_irqrestore(&d->lock, flags);
-                               return;
-                       }
-                       ataid_complete(d, t, (char *) (ahin+1));
-                       break;
-               default:
-                       printk(KERN_INFO
-                               "aoe: unrecognized ata command %2.2Xh for %d.%d\n",
-                               ahout->cmdstat,
-                               get_unaligned_be16(&hin->major),
-                               hin->minor);
-               }
-       }
-
-       if (buf && --buf->nframesout == 0 && buf->resid == 0) {
-               diskstats(d->gd, buf->bio, jiffies - buf->stime, buf->sector);
-               if (buf->flags & BUFFL_FAIL)
-                       bio_endio(buf->bio, -EIO);
-               else {
-                       bio_flush_dcache_pages(buf->bio);
-                       bio_endio(buf->bio, 0);
-               }
-               mempool_free(buf, d->bufpool);
-       }
-
-       f->buf = NULL;
-       f->tag = FREETAG;
        t->nout--;
-
        aoecmd_work(d);
-xmit:
-       __skb_queue_head_init(&queue);
-       skb_queue_splice_init(&d->sendq, &queue);
 
        spin_unlock_irqrestore(&d->lock, flags);
-       aoenet_xmit(&queue);
+
+       ktcomplete(f, skb);
+
+       /*
+        * Note here that we do not perform an aoedev_put, as we are
+        * leaving this reference for the ktio to release.
+        */
+       return NULL;
 }
 
 void
@@ -896,7 +1211,7 @@ aoecmd_ata_id(struct aoedev *d)
        struct sk_buff *skb;
        struct aoetgt *t;
 
-       f = freeframe(d);
+       f = newframe(d);
        if (f == NULL)
                return NULL;
 
@@ -909,6 +1224,7 @@ aoecmd_ata_id(struct aoedev *d)
        skb_put(skb, sizeof *h + sizeof *ah);
        memset(h, 0, skb->len);
        f->tag = aoehdr_atainit(d, t, h);
+       fhash(f);
        t->nout++;
        f->waited = 0;
 
@@ -929,7 +1245,6 @@ static struct aoetgt *
 addtgt(struct aoedev *d, char *addr, ulong nframes)
 {
        struct aoetgt *t, **tt, **te;
-       struct frame *f, *e;
 
        tt = d->targets;
        te = tt + NTARGETS;
@@ -941,26 +1256,73 @@ addtgt(struct aoedev *d, char *addr, ulong nframes)
                        "aoe: device addtgt failure; too many targets\n");
                return NULL;
        }
-       t = kcalloc(1, sizeof *t, GFP_ATOMIC);
-       f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
-       if (!t || !f) {
-               kfree(f);
-               kfree(t);
+       t = kzalloc(sizeof(*t), GFP_ATOMIC);
+       if (!t) {
                printk(KERN_INFO "aoe: cannot allocate memory to add target\n");
                return NULL;
        }
 
+       d->ntargets++;
        t->nframes = nframes;
-       t->frames = f;
-       e = f + nframes;
-       for (; f < e; f++)
-               f->tag = FREETAG;
+       t->d = d;
        memcpy(t->addr, addr, sizeof t->addr);
        t->ifp = t->ifs;
        t->maxout = t->nframes;
+       INIT_LIST_HEAD(&t->ffree);
        return *tt = t;
 }
 
+static void
+setdbcnt(struct aoedev *d)
+{
+       struct aoetgt **t, **e;
+       int bcnt = 0;
+
+       t = d->targets;
+       e = t + NTARGETS;
+       for (; t < e && *t; t++)
+               if (bcnt == 0 || bcnt > (*t)->minbcnt)
+                       bcnt = (*t)->minbcnt;
+       if (bcnt != d->maxbcnt) {
+               d->maxbcnt = bcnt;
+               pr_info("aoe: e%ld.%d: setting %d byte data frames\n",
+                       d->aoemajor, d->aoeminor, bcnt);
+       }
+}
+
+static void
+setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt)
+{
+       struct aoedev *d;
+       struct aoeif *p, *e;
+       int minbcnt;
+
+       d = t->d;
+       minbcnt = bcnt;
+       p = t->ifs;
+       e = p + NAOEIFS;
+       for (; p < e; p++) {
+               if (p->nd == NULL)
+                       break;          /* end of the valid interfaces */
+               if (p->nd == nd) {
+                       p->bcnt = bcnt; /* we're updating */
+                       nd = NULL;
+               } else if (minbcnt > p->bcnt)
+                       minbcnt = p->bcnt; /* find the min interface */
+       }
+       if (nd) {
+               if (p == e) {
+                       pr_err("aoe: device setifbcnt failure; too many interfaces.\n");
+                       return;
+               }
+               dev_hold(nd);
+               p->nd = nd;
+               p->bcnt = bcnt;
+       }
+       t->minbcnt = minbcnt;
+       setdbcnt(d);
+}
+
 void
 aoecmd_cfg_rsp(struct sk_buff *skb)
 {
@@ -968,11 +1330,12 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
        struct aoe_hdr *h;
        struct aoe_cfghdr *ch;
        struct aoetgt *t;
-       struct aoeif *ifp;
        ulong flags, sysminor, aoemajor;
        struct sk_buff *sl;
+       struct sk_buff_head queue;
        u16 n;
 
+       sl = NULL;
        h = (struct aoe_hdr *) skb_mac_header(skb);
        ch = (struct aoe_cfghdr *) (h+1);
 
@@ -986,6 +1349,13 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
                        "Check shelf dip switches.\n");
                return;
        }
+       if (h->minor >= NPERSHELF) {
+               pr_err("aoe: e%ld.%d %s, %d\n",
+                       aoemajor, h->minor,
+                       "slot number larger than the maximum",
+                       NPERSHELF-1);
+               return;
+       }
 
        sysminor = SYSMINOR(aoemajor, h->minor);
        if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
@@ -1009,52 +1379,26 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
        t = gettgt(d, h->src);
        if (!t) {
                t = addtgt(d, h->src, n);
-               if (!t) {
-                       spin_unlock_irqrestore(&d->lock, flags);
-                       return;
-               }
-       }
-       ifp = getif(t, skb->dev);
-       if (!ifp) {
-               ifp = addif(t, skb->dev);
-               if (!ifp) {
-                       printk(KERN_INFO
-                               "aoe: device addif failure; "
-                               "too many interfaces?\n");
-                       spin_unlock_irqrestore(&d->lock, flags);
-                       return;
-               }
-       }
-       if (ifp->maxbcnt) {
-               n = ifp->nd->mtu;
-               n -= sizeof (struct aoe_hdr) + sizeof (struct aoe_atahdr);
-               n /= 512;
-               if (n > ch->scnt)
-                       n = ch->scnt;
-               n = n ? n * 512 : DEFAULTBCNT;
-               if (n != ifp->maxbcnt) {
-                       printk(KERN_INFO
-                               "aoe: e%ld.%d: setting %d%s%s:%pm\n",
-                               d->aoemajor, d->aoeminor, n,
-                               " byte data frames on ", ifp->nd->name,
-                               t->addr);
-                       ifp->maxbcnt = n;
-               }
+               if (!t)
+                       goto bail;
        }
+       n = skb->dev->mtu;
+       n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
+       n /= 512;
+       if (n > ch->scnt)
+               n = ch->scnt;
+       n = n ? n * 512 : DEFAULTBCNT;
+       setifbcnt(t, skb->dev, n);
 
        /* don't change users' perspective */
-       if (d->nopen) {
-               spin_unlock_irqrestore(&d->lock, flags);
-               return;
+       if (d->nopen == 0) {
+               d->fw_ver = be16_to_cpu(ch->fwver);
+               sl = aoecmd_ata_id(d);
        }
-       d->fw_ver = be16_to_cpu(ch->fwver);
-
-       sl = aoecmd_ata_id(d);
-
+bail:
        spin_unlock_irqrestore(&d->lock, flags);
-
+       aoedev_put(d);
        if (sl) {
-               struct sk_buff_head queue;
                __skb_queue_head_init(&queue);
                __skb_queue_tail(&queue, sl);
                aoenet_xmit(&queue);
@@ -1065,20 +1409,74 @@ void
 aoecmd_cleanslate(struct aoedev *d)
 {
        struct aoetgt **t, **te;
-       struct aoeif *p, *e;
 
        d->mintimer = MINTIMER;
+       d->maxbcnt = 0;
 
        t = d->targets;
        te = t + NTARGETS;
-       for (; t < te && *t; t++) {
+       for (; t < te && *t; t++)
                (*t)->maxout = (*t)->nframes;
-               p = (*t)->ifs;
-               e = p + NAOEIFS;
-               for (; p < e; p++) {
-                       p->lostjumbo = 0;
-                       p->lost = 0;
-                       p->maxbcnt = DEFAULTBCNT;
+}
+
+void
+aoe_failbuf(struct aoedev *d, struct buf *buf)
+{
+       if (buf == NULL)
+               return;
+       buf->resid = 0;
+       clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+       if (buf->nframesout == 0)
+               aoe_end_buf(d, buf);
+}
+
+void
+aoe_flush_iocq(void)
+{
+       struct frame *f;
+       struct aoedev *d;
+       LIST_HEAD(flist);
+       struct list_head *pos;
+       struct sk_buff *skb;
+       ulong flags;
+
+       spin_lock_irqsave(&iocq.lock, flags);
+       list_splice_init(&iocq.head, &flist);
+       spin_unlock_irqrestore(&iocq.lock, flags);
+       while (!list_empty(&flist)) {
+               pos = flist.next;
+               list_del(pos);
+               f = list_entry(pos, struct frame, head);
+               d = f->t->d;
+               skb = f->r_skb;
+               spin_lock_irqsave(&d->lock, flags);
+               if (f->buf) {
+                       f->buf->nframesout--;
+                       aoe_failbuf(d, f->buf);
                }
+               aoe_freetframe(f);
+               spin_unlock_irqrestore(&d->lock, flags);
+               dev_kfree_skb(skb);
+               aoedev_put(d);
        }
 }
+
+int __init
+aoecmd_init(void)
+{
+       INIT_LIST_HEAD(&iocq.head);
+       spin_lock_init(&iocq.lock);
+       init_waitqueue_head(&ktiowq);
+       kts.name = "aoe_ktio";
+       kts.fn = ktio;
+       kts.waitq = &ktiowq;
+       kts.lock = &iocq.lock;
+       return aoe_ktstart(&kts);
+}
+
+void
+aoecmd_exit(void)
+{
+       aoe_ktstop(&kts);
+       aoe_flush_iocq();
+}