[PATCH] uml: revert block driver use of host AIO
authorJeff Dike <jdike@addtoit.com>
Tue, 11 Oct 2005 03:10:32 +0000 (23:10 -0400)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 12 Oct 2005 15:22:26 +0000 (08:22 -0700)
The patch to use host AIO support that I submitted early after 2.6.13 exposed
some problems in the block driver.  I have fixes for these, but am not
comfortable putting them into 2.6.14 at this late date.  So, this patch reverts
the use of host AIO.

I will resubmit the original patch, plus fixes to the driver after 2.6.14
in order to get a reasonable amount of testing before they're exposed to
the general public.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
arch/um/drivers/Makefile
arch/um/drivers/ubd_kern.c
arch/um/drivers/ubd_user.c [new file with mode: 0644]
arch/um/include/aio.h
arch/um/os-Linux/aio.c

index 783e18cae090ec1d1c77e0cafc05cfd5fa703608..de17d4c6e02db13309752ca26a99c2536f5f0163 100644 (file)
@@ -13,7 +13,7 @@ mcast-objs := mcast_kern.o mcast_user.o
 net-objs := net_kern.o net_user.o
 mconsole-objs := mconsole_kern.o mconsole_user.o
 hostaudio-objs := hostaudio_kern.o
-ubd-objs := ubd_kern.o
+ubd-objs := ubd_kern.o ubd_user.o
 port-objs := port_kern.o port_user.o
 harddog-objs := harddog_kern.o harddog_user.o
 
index e77a38da4350d6b4abdccfd3cab1c1d5755c3864..f73134333f64ee55c9eb7f87521e89f577400297 100644 (file)
@@ -35,7 +35,6 @@
 #include "linux/blkpg.h"
 #include "linux/genhd.h"
 #include "linux/spinlock.h"
-#include "asm/atomic.h"
 #include "asm/segment.h"
 #include "asm/uaccess.h"
 #include "asm/irq.h"
 #include "mem.h"
 #include "mem_kern.h"
 #include "cow.h"
-#include "aio.h"
 
 enum ubd_req { UBD_READ, UBD_WRITE };
 
 struct io_thread_req {
-       enum aio_type op;
+       enum ubd_req op;
        int fds[2];
        unsigned long offsets[2];
        unsigned long long offset;
        unsigned long length;
        char *buffer;
        int sectorsize;
-       int bitmap_offset;
-       long bitmap_start;
-       long bitmap_end;
+       unsigned long sector_mask;
+       unsigned long long cow_offset;
+       unsigned long bitmap_words[2];
        int error;
 };
 
@@ -82,31 +80,28 @@ extern int create_cow_file(char *cow_file, char *backing_file,
                           unsigned long *bitmap_len_out,
                           int *data_offset_out);
 extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
-extern void do_io(struct io_thread_req *req, struct request *r,
-                 unsigned long *bitmap);
+extern void do_io(struct io_thread_req *req);
 
-static inline int ubd_test_bit(__u64 bit, void *data)
+static inline int ubd_test_bit(__u64 bit, unsigned char *data)
 {
-       unsigned char *buffer = data;
        __u64 n;
        int bits, off;
 
-       bits = sizeof(buffer[0]) * 8;
+       bits = sizeof(data[0]) * 8;
        n = bit / bits;
        off = bit % bits;
-       return((buffer[n] & (1 << off)) != 0);
+       return((data[n] & (1 << off)) != 0);
 }
 
-static inline void ubd_set_bit(__u64 bit, void *data)
+static inline void ubd_set_bit(__u64 bit, unsigned char *data)
 {
-       unsigned char *buffer = data;
        __u64 n;
        int bits, off;
 
-       bits = sizeof(buffer[0]) * 8;
+       bits = sizeof(data[0]) * 8;
        n = bit / bits;
        off = bit % bits;
-       buffer[n] |= (1 << off);
+       data[n] |= (1 << off);
 }
 /*End stuff from ubd_user.h*/
 
@@ -115,6 +110,8 @@ static inline void ubd_set_bit(__u64 bit, void *data)
 static DEFINE_SPINLOCK(ubd_io_lock);
 static DEFINE_SPINLOCK(ubd_lock);
 
+static void (*do_ubd)(void);
+
 static int ubd_open(struct inode * inode, struct file * filp);
 static int ubd_release(struct inode * inode, struct file * file);
 static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -161,8 +158,6 @@ struct cow {
         int data_offset;
 };
 
-#define MAX_SG 64
-
 struct ubd {
        char *file;
        int count;
@@ -173,7 +168,6 @@ struct ubd {
        int no_cow;
        struct cow cow;
        struct platform_device pdev;
-        struct scatterlist sg[MAX_SG];
 };
 
 #define DEFAULT_COW { \
@@ -466,114 +460,81 @@ __uml_help(fakehd,
 );
 
 static void do_ubd_request(request_queue_t * q);
-static int in_ubd;
+
+/* Only changed by ubd_init, which is an initcall. */
+int thread_fd = -1;
 
 /* Changed by ubd_handler, which is serialized because interrupts only
  * happen on CPU 0.
  */
 int intr_count = 0;
 
-static void ubd_end_request(struct request *req, int bytes, int uptodate)
+/* call ubd_finish if you need to serialize */
+static void __ubd_finish(struct request *req, int error)
 {
-       if (!end_that_request_first(req, uptodate, bytes >> 9)) {
-               add_disk_randomness(req->rq_disk);
-               end_that_request_last(req);
+       int nsect;
+
+       if(error){
+               end_request(req, 0);
+               return;
        }
+       nsect = req->current_nr_sectors;
+       req->sector += nsect;
+       req->buffer += nsect << 9;
+       req->errors = 0;
+       req->nr_sectors -= nsect;
+       req->current_nr_sectors = 0;
+       end_request(req, 1);
 }
 
-/* call ubd_finish if you need to serialize */
-static void __ubd_finish(struct request *req, int bytes)
+static inline void ubd_finish(struct request *req, int error)
 {
-       if(bytes < 0){
-               ubd_end_request(req, 0, 0);
-               return;
-       }
-
-       ubd_end_request(req, bytes, 1);
+       spin_lock(&ubd_io_lock);
+       __ubd_finish(req, error);
+       spin_unlock(&ubd_io_lock);
 }
 
-static inline void ubd_finish(struct request *req, int bytes)
+/* Called without ubd_io_lock held */
+static void ubd_handler(void)
 {
-       spin_lock(&ubd_io_lock);
-       __ubd_finish(req, bytes);
-       spin_unlock(&ubd_io_lock);
+       struct io_thread_req req;
+       struct request *rq = elv_next_request(ubd_queue);
+       int n;
+
+       do_ubd = NULL;
+       intr_count++;
+       n = os_read_file(thread_fd, &req, sizeof(req));
+       if(n != sizeof(req)){
+               printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
+                      "err = %d\n", os_getpid(), -n);
+               spin_lock(&ubd_io_lock);
+               end_request(rq, 0);
+               spin_unlock(&ubd_io_lock);
+               return;
+       }
+        
+       ubd_finish(rq, req.error);
+       reactivate_fd(thread_fd, UBD_IRQ);      
+       do_ubd_request(ubd_queue);
 }
 
-struct bitmap_io {
-        atomic_t count;
-        struct aio_context aio;
-};
-
-struct ubd_aio {
-        struct aio_context aio;
-        struct request *req;
-        int len;
-        struct bitmap_io *bitmap;
-        void *bitmap_buf;
-};
-
-static int ubd_reply_fd = -1;
-
 static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
 {
-       struct aio_thread_reply reply;
-       struct ubd_aio *aio;
-       struct request *req;
-       int err, n, fd = (int) (long) dev;
-
-       while(1){
-               err = os_read_file(fd, &reply, sizeof(reply));
-               if(err == -EAGAIN)
-                       break;
-               if(err < 0){
-                       printk("ubd_aio_handler - read returned err %d\n",
-                              -err);
-                       break;
-               }
-
-                aio = container_of(reply.data, struct ubd_aio, aio);
-                n = reply.err;
-
-               if(n == 0){
-                       req = aio->req;
-                       req->nr_sectors -= aio->len >> 9;
-
-                       if((aio->bitmap != NULL) &&
-                          (atomic_dec_and_test(&aio->bitmap->count))){
-                                aio->aio = aio->bitmap->aio;
-                                aio->len = 0;
-                                kfree(aio->bitmap);
-                                aio->bitmap = NULL;
-                                submit_aio(&aio->aio);
-                       }
-                       else {
-                               if((req->nr_sectors == 0) &&
-                                   (aio->bitmap == NULL)){
-                                       int len = req->hard_nr_sectors << 9;
-                                       ubd_finish(req, len);
-                               }
-
-                                if(aio->bitmap_buf != NULL)
-                                        kfree(aio->bitmap_buf);
-                               kfree(aio);
-                       }
-               }
-                else if(n < 0){
-                        ubd_finish(aio->req, n);
-                        if(aio->bitmap != NULL)
-                                kfree(aio->bitmap);
-                        if(aio->bitmap_buf != NULL)
-                                kfree(aio->bitmap_buf);
-                        kfree(aio);
-                }
-       }
-       reactivate_fd(fd, UBD_IRQ);
+       ubd_handler();
+       return(IRQ_HANDLED);
+}
 
-        do_ubd_request(ubd_queue);
+/* Only changed by ubd_init, which is an initcall. */
+static int io_pid = -1;
 
-       return(IRQ_HANDLED);
+void kill_io_thread(void)
+{
+       if(io_pid != -1) 
+               os_kill_process(io_pid, 1);
 }
 
+__uml_exitcall(kill_io_thread);
+
 static int ubd_file_size(struct ubd *dev, __u64 *size_out)
 {
        char *file;
@@ -608,7 +569,7 @@ static int ubd_open_dev(struct ubd *dev)
                                &dev->cow.data_offset, create_ptr);
 
        if((dev->fd == -ENOENT) && create_cow){
-               dev->fd = create_cow_file(dev->file, dev->cow.file,
+               dev->fd = create_cow_file(dev->file, dev->cow.file, 
                                          dev->openflags, 1 << 9, PAGE_SIZE,
                                          &dev->cow.bitmap_offset, 
                                          &dev->cow.bitmap_len,
@@ -870,10 +831,6 @@ int ubd_init(void)
 {
         int i;
 
-       ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr);
-       if(ubd_reply_fd < 0)
-               printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd);
-
        devfs_mk_dir("ubd");
        if (register_blkdev(MAJOR_NR, "ubd"))
                return -1;
@@ -884,7 +841,6 @@ int ubd_init(void)
                return -1;
        }
                
-       blk_queue_max_hw_segments(ubd_queue, MAX_SG);
        if (fake_major != MAJOR_NR) {
                char name[sizeof("ubd_nnn\0")];
 
@@ -896,12 +852,40 @@ int ubd_init(void)
        driver_register(&ubd_driver);
        for (i = 0; i < MAX_DEV; i++) 
                ubd_add(i);
-
        return 0;
 }
 
 late_initcall(ubd_init);
 
+int ubd_driver_init(void){
+       unsigned long stack;
+       int err;
+
+       /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
+       if(global_openflags.s){
+               printk(KERN_INFO "ubd: Synchronous mode\n");
+               /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
+                * enough. So use anyway the io thread. */
+       }
+       stack = alloc_stack(0, 0);
+       io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 
+                                &thread_fd);
+       if(io_pid < 0){
+               printk(KERN_ERR 
+                      "ubd : Failed to start I/O thread (errno = %d) - "
+                      "falling back to synchronous I/O\n", -io_pid);
+               io_pid = -1;
+               return(0);
+       }
+       err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 
+                            SA_INTERRUPT, "ubd", ubd_dev);
+       if(err != 0)
+               printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
+       return(err);
+}
+
+device_initcall(ubd_driver_init);
+
 static int ubd_open(struct inode *inode, struct file *filp)
 {
        struct gendisk *disk = inode->i_bdev->bd_disk;
@@ -939,55 +923,105 @@ static int ubd_release(struct inode * inode, struct file * file)
        return(0);
 }
 
-static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap)
+static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
+                         __u64 *cow_offset, unsigned long *bitmap,
+                         __u64 bitmap_offset, unsigned long *bitmap_words,
+                         __u64 bitmap_len)
 {
-        __u64 sector = req->offset / req->sectorsize;
-        int i;
+       __u64 sector = io_offset >> 9;
+       int i, update_bitmap = 0;
+
+       for(i = 0; i < length >> 9; i++){
+               if(cow_mask != NULL)
+                       ubd_set_bit(i, (unsigned char *) cow_mask);
+               if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
+                       continue;
 
-        for(i = 0; i < req->length / req->sectorsize; i++){
-                if(ubd_test_bit(sector + i, bitmap))
-                        continue;
+               update_bitmap = 1;
+               ubd_set_bit(sector + i, (unsigned char *) bitmap);
+       }
+
+       if(!update_bitmap)
+               return;
 
-                if(req->bitmap_start == -1)
-                        req->bitmap_start = sector + i;
-                req->bitmap_end = sector + i + 1;
+       *cow_offset = sector / (sizeof(unsigned long) * 8);
 
-                ubd_set_bit(sector + i, bitmap);
-        }
+       /* This takes care of the case where we're exactly at the end of the
+        * device, and *cow_offset + 1 is off the end.  So, just back it up
+        * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
+        * for the original diagnosis.
+        */
+       if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
+                          sizeof(unsigned long) - 1))
+               (*cow_offset)--;
+
+       bitmap_words[0] = bitmap[*cow_offset];
+       bitmap_words[1] = bitmap[*cow_offset + 1];
+
+       *cow_offset *= sizeof(unsigned long);
+       *cow_offset += bitmap_offset;
+}
+
+static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
+                      __u64 bitmap_offset, __u64 bitmap_len)
+{
+       __u64 sector = req->offset >> 9;
+       int i;
+
+       if(req->length > (sizeof(req->sector_mask) * 8) << 9)
+               panic("Operation too long");
+
+       if(req->op == UBD_READ) {
+               for(i = 0; i < req->length >> 9; i++){
+                       if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
+                               ubd_set_bit(i, (unsigned char *) 
+                                           &req->sector_mask);
+                }
+       }
+       else cowify_bitmap(req->offset, req->length, &req->sector_mask,
+                          &req->cow_offset, bitmap, bitmap_offset,
+                          req->bitmap_words, bitmap_len);
 }
 
 /* Called with ubd_io_lock held */
-static int prepare_request(struct request *req, struct io_thread_req *io_req,
-                           unsigned long long offset, int page_offset,
-                           int len, struct page *page)
+static int prepare_request(struct request *req, struct io_thread_req *io_req)
 {
        struct gendisk *disk = req->rq_disk;
        struct ubd *dev = disk->private_data;
+       __u64 offset;
+       int len;
+
+       if(req->rq_status == RQ_INACTIVE) return(1);
 
        /* This should be impossible now */
        if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
                printk("Write attempted on readonly ubd device %s\n", 
                       disk->disk_name);
-                ubd_end_request(req, 0, 0);
+               end_request(req, 0);
                return(1);
        }
 
+       offset = ((__u64) req->sector) << 9;
+       len = req->current_nr_sectors << 9;
+
        io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
        io_req->fds[1] = dev->fd;
+       io_req->cow_offset = -1;
        io_req->offset = offset;
        io_req->length = len;
        io_req->error = 0;
-       io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE;
+       io_req->sector_mask = 0;
+
+       io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
        io_req->offsets[0] = 0;
        io_req->offsets[1] = dev->cow.data_offset;
-        io_req->buffer = page_address(page) + page_offset;
+       io_req->buffer = req->buffer;
        io_req->sectorsize = 1 << 9;
-        io_req->bitmap_offset = dev->cow.bitmap_offset;
-        io_req->bitmap_start = -1;
-        io_req->bitmap_end = -1;
 
-        if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE))
-                cowify_bitmap(io_req, dev->cow.bitmap);
+       if(dev->cow.file != NULL)
+               cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
+                          dev->cow.bitmap_len);
+
        return(0);
 }
 
@@ -996,36 +1030,30 @@ static void do_ubd_request(request_queue_t *q)
 {
        struct io_thread_req io_req;
        struct request *req;
-       __u64 sector;
-       int err;
-
-       if(in_ubd)
-               return;
-       in_ubd = 1;
-       while((req = elv_next_request(q)) != NULL){
-               struct gendisk *disk = req->rq_disk;
-               struct ubd *dev = disk->private_data;
-               int n, i;
-
-               blkdev_dequeue_request(req);
-
-               sector = req->sector;
-               n = blk_rq_map_sg(q, req, dev->sg);
-
-               for(i = 0; i < n; i++){
-                       struct scatterlist *sg = &dev->sg[i];
-
-                       err = prepare_request(req, &io_req, sector << 9,
-                                             sg->offset, sg->length,
-                                             sg->page);
-                       if(err)
-                               continue;
-
-                       sector += sg->length >> 9;
-                       do_io(&io_req, req, dev->cow.bitmap);
+       int err, n;
+
+       if(thread_fd == -1){
+               while((req = elv_next_request(q)) != NULL){
+                       err = prepare_request(req, &io_req);
+                       if(!err){
+                               do_io(&io_req);
+                               __ubd_finish(req, io_req.error);
+                       }
+               }
+       }
+       else {
+               if(do_ubd || (req = elv_next_request(q)) == NULL)
+                       return;
+               err = prepare_request(req, &io_req);
+               if(!err){
+                       do_ubd = ubd_handler;
+                       n = os_write_file(thread_fd, (char *) &io_req,
+                                        sizeof(io_req));
+                       if(n != sizeof(io_req))
+                               printk("write to io thread failed, "
+                                      "errno = %d\n", -n);
                }
        }
-       in_ubd = 0;
 }
 
 static int ubd_ioctl(struct inode * inode, struct file * file,
@@ -1241,95 +1269,131 @@ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
        return(err);
 }
 
-void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap)
+static int update_bitmap(struct io_thread_req *req)
 {
-        struct ubd_aio *aio;
-        struct bitmap_io *bitmap_io = NULL;
-        char *buf;
-        void *bitmap_buf = NULL;
-        unsigned long len, sector;
-        int nsectors, start, end, bit, err;
-        __u64 off;
-
-        if(req->bitmap_start != -1){
-                /* Round up to the nearest word */
-                int round = sizeof(unsigned long);
-                len = (req->bitmap_end - req->bitmap_start +
-                       round * 8 - 1) / (round * 8);
-                len *= round;
-
-                off = req->bitmap_start / (8 * round);
-                off *= round;
-
-                bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL);
-                if(bitmap_io == NULL){
-                        printk("Failed to kmalloc bitmap IO\n");
-                        req->error = 1;
-                        return;
-                }
+       int n;
 
-                bitmap_buf = kmalloc(len, GFP_KERNEL);
-                if(bitmap_buf == NULL){
-                        printk("do_io : kmalloc of bitmap chunk "
-                               "failed\n");
-                        kfree(bitmap_io);
-                        req->error = 1;
-                        return;
-                }
-                memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len);
-
-                *bitmap_io = ((struct bitmap_io)
-                        { .count       = ATOMIC_INIT(0),
-                          .aio         = INIT_AIO(AIO_WRITE, req->fds[1],
-                                                   bitmap_buf, len,
-                                                   req->bitmap_offset + off,
-                                                   ubd_reply_fd) } );
-        }
+       if(req->cow_offset == -1)
+               return(0);
 
-        nsectors = req->length / req->sectorsize;
-        start = 0;
-        end = nsectors;
-        bit = 0;
-        do {
-                if(bitmap != NULL){
-                        sector = req->offset / req->sectorsize;
-                        bit = ubd_test_bit(sector + start, bitmap);
-                        end = start;
-                        while((end < nsectors) &&
-                              (ubd_test_bit(sector + end, bitmap) == bit))
-                                end++;
-                }
+       n = os_seek_file(req->fds[1], req->cow_offset);
+       if(n < 0){
+               printk("do_io - bitmap lseek failed : err = %d\n", -n);
+               return(1);
+       }
 
-                off = req->offsets[bit] + req->offset +
-                        start * req->sectorsize;
-                len = (end - start) * req->sectorsize;
-                buf = &req->buffer[start * req->sectorsize];
+       n = os_write_file(req->fds[1], &req->bitmap_words,
+                         sizeof(req->bitmap_words));
+       if(n != sizeof(req->bitmap_words)){
+               printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
+                      req->fds[1]);
+               return(1);
+       }
 
-                aio = kmalloc(sizeof(*aio), GFP_KERNEL);
-                if(aio == NULL){
-                        req->error = 1;
-                        return;
-                }
+       return(0);
+}
 
-                *aio = ((struct ubd_aio)
-                        { .aio         = INIT_AIO(req->op, req->fds[bit], buf,
-                                                   len, off, ubd_reply_fd),
-                          .len         = len,
-                          .req         = r,
-                          .bitmap      = bitmap_io,
-                          .bitmap_buf  = bitmap_buf });
-
-                if(aio->bitmap != NULL)
-                        atomic_inc(&aio->bitmap->count);
-
-                err = submit_aio(&aio->aio);
-                if(err){
-                        printk("do_io - submit_aio failed, "
-                               "err = %d\n", err);
-                        req->error = 1;
-                        return;
-                }
+void do_io(struct io_thread_req *req)
+{
+       char *buf;
+       unsigned long len;
+       int n, nsectors, start, end, bit;
+       int err;
+       __u64 off;
+
+       nsectors = req->length / req->sectorsize;
+       start = 0;
+       do {
+               bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
+               end = start;
+               while((end < nsectors) &&
+                     (ubd_test_bit(end, (unsigned char *)
+                                   &req->sector_mask) == bit))
+                       end++;
+
+               off = req->offset + req->offsets[bit] +
+                       start * req->sectorsize;
+               len = (end - start) * req->sectorsize;
+               buf = &req->buffer[start * req->sectorsize];
+
+               err = os_seek_file(req->fds[bit], off);
+               if(err < 0){
+                       printk("do_io - lseek failed : err = %d\n", -err);
+                       req->error = 1;
+                       return;
+               }
+               if(req->op == UBD_READ){
+                       n = 0;
+                       do {
+                               buf = &buf[n];
+                               len -= n;
+                               n = os_read_file(req->fds[bit], buf, len);
+                               if (n < 0) {
+                                       printk("do_io - read failed, err = %d "
+                                              "fd = %d\n", -n, req->fds[bit]);
+                                       req->error = 1;
+                                       return;
+                               }
+                       } while((n < len) && (n != 0));
+                       if (n < len) memset(&buf[n], 0, len - n);
+               } else {
+                       n = os_write_file(req->fds[bit], buf, len);
+                       if(n != len){
+                               printk("do_io - write failed err = %d "
+                                      "fd = %d\n", -n, req->fds[bit]);
+                               req->error = 1;
+                               return;
+                       }
+               }
+
+               start = end;
+       } while(start < nsectors);
 
-                start = end;
-        } while(start < nsectors);
+       req->error = update_bitmap(req);
 }
+
+/* Changed in start_io_thread, which is serialized by being called only
+ * from ubd_init, which is an initcall.
+ */
+int kernel_fd = -1;
+
+/* Only changed by the io thread */
+int io_count = 0;
+
+int io_thread(void *arg)
+{
+       struct io_thread_req req;
+       int n;
+
+       ignore_sigwinch_sig();
+       while(1){
+               n = os_read_file(kernel_fd, &req, sizeof(req));
+               if(n != sizeof(req)){
+                       if(n < 0)
+                               printk("io_thread - read failed, fd = %d, "
+                                      "err = %d\n", kernel_fd, -n);
+                       else {
+                               printk("io_thread - short read, fd = %d, "
+                                      "length = %d\n", kernel_fd, n);
+                       }
+                       continue;
+               }
+               io_count++;
+               do_io(&req);
+               n = os_write_file(kernel_fd, &req, sizeof(req));
+               if(n != sizeof(req))
+                       printk("io_thread - write failed, fd = %d, err = %d\n",
+                              kernel_fd, -n);
+       }
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
new file mode 100644 (file)
index 0000000..b94d2bc
--- /dev/null
@@ -0,0 +1,75 @@
+/* 
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com)
+ * Licensed under the GPL
+ */
+
+#include <stddef.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include "asm/types.h"
+#include "user_util.h"
+#include "kern_util.h"
+#include "user.h"
+#include "ubd_user.h"
+#include "os.h"
+#include "cow.h"
+
+#include <endian.h>
+#include <byteswap.h>
+
+void ignore_sigwinch_sig(void)
+{
+       signal(SIGWINCH, SIG_IGN);
+}
+
+int start_io_thread(unsigned long sp, int *fd_out)
+{
+       int pid, fds[2], err;
+
+       err = os_pipe(fds, 1, 1);
+       if(err < 0){
+               printk("start_io_thread - os_pipe failed, err = %d\n", -err);
+               goto out;
+       }
+
+       kernel_fd = fds[0];
+       *fd_out = fds[1];
+
+       pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD,
+                   NULL);
+       if(pid < 0){
+               printk("start_io_thread - clone failed : errno = %d\n", errno);
+               err = -errno;
+               goto out_close;
+       }
+
+       return(pid);
+
+ out_close:
+       os_close_file(fds[0]);
+       os_close_file(fds[1]);
+       kernel_fd = -1;
+       *fd_out = -1;
+ out:
+       return(err);
+}
+
+/*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+ * adjust the settings for this buffer only.  This must remain at the end
+ * of the file.
+ * ---------------------------------------------------------------------------
+ * Local variables:
+ * c-file-style: "linux"
+ * End:
+ */
index 83f16877ab0828baf413d5e0025fe2b1965c23db..423bae9153f8adf359330a94321cfc6ab89ee422 100644 (file)
@@ -14,27 +14,15 @@ struct aio_thread_reply {
 };
 
 struct aio_context {
-       enum aio_type type;
-       int fd;
-       void *data;
-       int len;
-       unsigned long long offset;
        int reply_fd;
        struct aio_context *next;
 };
 
-#define INIT_AIO(aio_type, aio_fd, aio_data, aio_len, aio_offset, \
-                aio_reply_fd) \
-       { .type         = aio_type, \
-         .fd           = aio_fd, \
-         .data         = aio_data, \
-         .len          = aio_len, \
-         .offset       = aio_offset, \
-         .reply_fd     = aio_reply_fd }
-
 #define INIT_AIO_CONTEXT { .reply_fd   = -1, \
                           .next        = NULL }
 
-extern int submit_aio(struct aio_context *aio);
+extern int submit_aio(enum aio_type type, int fd, char *buf, int len,
+                     unsigned long long offset, int reply_fd,
+                      struct aio_context *aio);
 
 #endif
index f6e64026f9952b90d1af98625b152ef394c454e9..41cfb09442013325f7511949f8d6171e2bbac487 100644 (file)
@@ -6,7 +6,6 @@
 #include <stdlib.h>
 #include <unistd.h>
 #include <signal.h>
-#include <string.h>
 #include <errno.h>
 #include <sched.h>
 #include <sys/syscall.h>
 #include "user.h"
 #include "mode.h"
 
+struct aio_thread_req {
+        enum aio_type type;
+        int io_fd;
+        unsigned long long offset;
+        char *buf;
+        int len;
+        struct aio_context *aio;
+};
+
 static int aio_req_fd_r = -1;
 static int aio_req_fd_w = -1;
 
-static int update_aio(struct aio_context *aio, int res)
-{
-        if(res < 0)
-                aio->len = res;
-        else if((res == 0) && (aio->type == AIO_READ)){
-                /* This is the EOF case - we have hit the end of the file
-                 * and it ends in a partial block, so we fill the end of
-                 * the block with zeros and claim success.
-                 */
-                memset(aio->data, 0, aio->len);
-                aio->len = 0;
-        }
-        else if(res > 0){
-                aio->len -= res;
-                aio->data += res;
-                aio->offset += res;
-                return aio->len;
-        }
-
-        return 0;
-}
-
 #if defined(HAVE_AIO_ABI)
 #include <linux/aio_abi.h>
 
@@ -80,7 +66,8 @@ static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
  * that it now backs the mmapped area.
  */
 
-static int do_aio(aio_context_t ctx, struct aio_context *aio)
+static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
+                  int len, unsigned long long offset, struct aio_context *aio)
 {
         struct iocb iocb, *iocbp = &iocb;
         char c;
@@ -88,39 +75,40 @@ static int do_aio(aio_context_t ctx, struct aio_context *aio)
 
         iocb = ((struct iocb) { .aio_data      = (unsigned long) aio,
                                 .aio_reqprio   = 0,
-                                .aio_fildes    = aio->fd,
-                                .aio_buf       = (unsigned long) aio->data,
-                                .aio_nbytes    = aio->len,
-                                .aio_offset    = aio->offset,
+                                .aio_fildes    = fd,
+                                .aio_buf       = (unsigned long) buf,
+                                .aio_nbytes    = len,
+                                .aio_offset    = offset,
                                 .aio_reserved1 = 0,
                                 .aio_reserved2 = 0,
                                 .aio_reserved3 = 0 });
 
-        switch(aio->type){
+        switch(type){
         case AIO_READ:
                 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
+                err = io_submit(ctx, 1, &iocbp);
                 break;
         case AIO_WRITE:
                 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
+                err = io_submit(ctx, 1, &iocbp);
                 break;
         case AIO_MMAP:
                 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
                 iocb.aio_buf = (unsigned long) &c;
                 iocb.aio_nbytes = sizeof(c);
+                err = io_submit(ctx, 1, &iocbp);
                 break;
         default:
-                printk("Bogus op in do_aio - %d\n", aio->type);
+                printk("Bogus op in do_aio - %d\n", type);
                 err = -EINVAL;
-                goto out;
+                break;
         }
 
-        err = io_submit(ctx, 1, &iocbp);
         if(err > 0)
                 err = 0;
        else
                err = -errno;
 
- out:
         return err;
 }
 
@@ -129,9 +117,8 @@ static aio_context_t ctx = 0;
 static int aio_thread(void *arg)
 {
         struct aio_thread_reply reply;
-        struct aio_context *aio;
         struct io_event event;
-        int err, n;
+        int err, n, reply_fd;
 
         signal(SIGWINCH, SIG_IGN);
 
@@ -144,22 +131,14 @@ static int aio_thread(void *arg)
                                "errno = %d\n", errno);
                 }
                 else {
-                       /* This is safe as we've just a pointer here. */
-                       aio = (struct aio_context *) (long) event.data;
-                       if(update_aio(aio, event.res)){
-                               do_aio(ctx, aio);
-                               continue;
-                       }
-
                         reply = ((struct aio_thread_reply)
-                               { .data = aio,
-                                 .err  = aio->len });
-                       err = os_write_file(aio->reply_fd, &reply,
-                                           sizeof(reply));
+                                { .data = (void *) (long) event.data,
+                                  .err = event.res });
+                       reply_fd = ((struct aio_context *) reply.data)->reply_fd;
+                       err = os_write_file(reply_fd, &reply, sizeof(reply));
                         if(err != sizeof(reply))
-                               printk("aio_thread - write failed, "
-                                      "fd = %d, err = %d\n", aio->reply_fd,
-                                      -err);
+                               printk("aio_thread - write failed, fd = %d, "
+                                       "err = %d\n", aio_req_fd_r, -err);
                 }
         }
         return 0;
@@ -167,35 +146,35 @@ static int aio_thread(void *arg)
 
 #endif
 
-static int do_not_aio(struct aio_context *aio)
+static int do_not_aio(struct aio_thread_req *req)
 {
         char c;
         int err;
 
-        switch(aio->type){
+        switch(req->type){
         case AIO_READ:
-                err = os_seek_file(aio->fd, aio->offset);
+                err = os_seek_file(req->io_fd, req->offset);
                 if(err)
                         goto out;
 
-                err = os_read_file(aio->fd, aio->data, aio->len);
+                err = os_read_file(req->io_fd, req->buf, req->len);
                 break;
         case AIO_WRITE:
-                err = os_seek_file(aio->fd, aio->offset);
+                err = os_seek_file(req->io_fd, req->offset);
                 if(err)
                         goto out;
 
-                err = os_write_file(aio->fd, aio->data, aio->len);
+                err = os_write_file(req->io_fd, req->buf, req->len);
                 break;
         case AIO_MMAP:
-                err = os_seek_file(aio->fd, aio->offset);
+                err = os_seek_file(req->io_fd, req->offset);
                 if(err)
                         goto out;
 
-                err = os_read_file(aio->fd, &c, sizeof(c));
+                err = os_read_file(req->io_fd, &c, sizeof(c));
                 break;
         default:
-                printk("do_not_aio - bad request type : %d\n", aio->type);
+                printk("do_not_aio - bad request type : %d\n", req->type);
                 err = -EINVAL;
                 break;
         }
@@ -206,14 +185,14 @@ static int do_not_aio(struct aio_context *aio)
 
 static int not_aio_thread(void *arg)
 {
-        struct aio_context *aio;
+        struct aio_thread_req req;
         struct aio_thread_reply reply;
         int err;
 
         signal(SIGWINCH, SIG_IGN);
         while(1){
-                err = os_read_file(aio_req_fd_r, &aio, sizeof(aio));
-                if(err != sizeof(aio)){
+                err = os_read_file(aio_req_fd_r, &req, sizeof(req));
+                if(err != sizeof(req)){
                         if(err < 0)
                                 printk("not_aio_thread - read failed, "
                                        "fd = %d, err = %d\n", aio_req_fd_r,
@@ -224,34 +203,17 @@ static int not_aio_thread(void *arg)
                         }
                         continue;
                 }
- again:
-                err = do_not_aio(aio);
-
-                if(update_aio(aio, err))
-                        goto again;
-
-                reply = ((struct aio_thread_reply) { .data     = aio,
-                                                     .err      = aio->len });
-                err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
+                err = do_not_aio(&req);
+                reply = ((struct aio_thread_reply) { .data     = req.aio,
+                                                     .err      = err });
+                err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
                 if(err != sizeof(reply))
                         printk("not_aio_thread - write failed, fd = %d, "
                                "err = %d\n", aio_req_fd_r, -err);
         }
 }
 
-static int submit_aio_24(struct aio_context *aio)
-{
-        int err;
-
-        err = os_write_file(aio_req_fd_w, &aio, sizeof(aio));
-        if(err == sizeof(aio))
-                err = 0;
-
-        return err;
-}
-
 static int aio_pid = -1;
-static int (*submit_proc)(struct aio_context *aio);
 
 static int init_aio_24(void)
 {
@@ -283,33 +245,11 @@ static int init_aio_24(void)
 #endif
        printk("2.6 host AIO support not used - falling back to I/O "
               "thread\n");
-
-       submit_proc = submit_aio_24;
-
         return 0;
 }
 
 #ifdef HAVE_AIO_ABI
 #define DEFAULT_24_AIO 0
-static int submit_aio_26(struct aio_context *aio)
-{
-       struct aio_thread_reply reply;
-       int err;
-
-       err = do_aio(ctx, aio);
-       if(err){
-               reply = ((struct aio_thread_reply) { .data = aio,
-                                                    .err  = err });
-               err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
-               if(err != sizeof(reply))
-                       printk("submit_aio_26 - write failed, "
-                              "fd = %d, err = %d\n", aio->reply_fd, -err);
-               else err = 0;
-       }
-
-       return err;
-}
-
 static int init_aio_26(void)
 {
         unsigned long stack;
@@ -330,22 +270,39 @@ static int init_aio_26(void)
         aio_pid = err;
 
        printk("Using 2.6 host AIO\n");
+        return 0;
+}
+
+static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
+                        unsigned long long offset, struct aio_context *aio)
+{
+        struct aio_thread_reply reply;
+        int err;
 
-       submit_proc = submit_aio_26;
+        err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
+        if(err){
+                reply = ((struct aio_thread_reply) { .data = aio,
+                                                     .err  = err });
+                err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
+                if(err != sizeof(reply))
+                        printk("submit_aio_26 - write failed, "
+                               "fd = %d, err = %d\n", aio->reply_fd, -err);
+                else err = 0;
+        }
 
-        return 0;
+        return err;
 }
 
 #else
 #define DEFAULT_24_AIO 1
-static int submit_aio_26(struct aio_context *aio)
+static int init_aio_26(void)
 {
         return -ENOSYS;
 }
 
-static int init_aio_26(void)
+static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
+                        unsigned long long offset, struct aio_context *aio)
 {
-       submit_proc = submit_aio_26;
         return -ENOSYS;
 }
 #endif
@@ -412,7 +369,33 @@ static void exit_aio(void)
 
 __uml_exitcall(exit_aio);
 
-int submit_aio(struct aio_context *aio)
+static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
+                        unsigned long long offset, struct aio_context *aio)
 {
-       return (*submit_proc)(aio);
+        struct aio_thread_req req = { .type            = type,
+                                      .io_fd           = io_fd,
+                                      .offset          = offset,
+                                      .buf             = buf,
+                                      .len             = len,
+                                      .aio             = aio,
+        };
+        int err;
+
+        err = os_write_file(aio_req_fd_w, &req, sizeof(req));
+        if(err == sizeof(req))
+                err = 0;
+
+        return err;
+}
+
+int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
+               unsigned long long offset, int reply_fd,
+               struct aio_context *aio)
+{
+        aio->reply_fd = reply_fd;
+        if(aio_24)
+                return submit_aio_24(type, io_fd, buf, len, offset, aio);
+        else {
+                return submit_aio_26(type, io_fd, buf, len, offset, aio);
+        }
 }