direct-io: Handle O_(D)SYNC AIO
authorChristoph Hellwig <hch@infradead.org>
Wed, 4 Sep 2013 13:04:40 +0000 (15:04 +0200)
committerAl Viro <viro@zeniv.linux.org.uk>
Wed, 4 Sep 2013 13:23:46 +0000 (09:23 -0400)
Call generic_write_sync() from the deferred I/O completion handler if
O_DSYNC is set for a write request.  Also make sure various callers
don't call generic_write_sync if the direct I/O code returns
-EIOCBQUEUED.

Based on an earlier patch from Jan Kara <jack@suse.cz> with updates from
Jeff Moyer <jmoyer@redhat.com> and Darrick J. Wong <darrick.wong@oracle.com>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
fs/block_dev.c
fs/btrfs/file.c
fs/cifs/file.c
fs/direct-io.c
fs/ext4/file.c
mm/filemap.c

index c7bda5cd3da74daa998d1fda16c3e78240d7e872..1173a4ee0830a41fd39d1546921af239645d9507 100644 (file)
@@ -1519,7 +1519,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        blk_start_plug(&plug);
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
-       if (ret > 0 || ret == -EIOCBQUEUED) {
+       if (ret > 0) {
                ssize_t err;
 
                err = generic_write_sync(file, pos, ret);
index 8e686a427ce2e8e5e824419321f1f8f2dfe19acb..4d2eb6417145964c8731bf5e56673a0e816d59aa 100644 (file)
@@ -1727,7 +1727,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
         */
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
        BTRFS_I(inode)->last_sub_trans = root->log_transid;
-       if (num_written > 0 || num_written == -EIOCBQUEUED) {
+       if (num_written > 0) {
                err = generic_write_sync(file, pos, num_written);
                if (err < 0 && num_written > 0)
                        num_written = err;
index 7e36ae34e9479b2614d61c919d393291593ef28f..9d0dd952ad7954ae3dcbfaeb5a953a0e0b13a99e 100644 (file)
@@ -2553,7 +2553,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
                mutex_unlock(&inode->i_mutex);
        }
 
-       if (rc > 0 || rc == -EIOCBQUEUED) {
+       if (rc > 0) {
                ssize_t err;
 
                err = generic_write_sync(file, pos, rc);
index 8b31b9f449f4603f0a68336f3f716aab751a6231..1782023bd68a6655d6def2258263d22080c06c33 100644 (file)
@@ -266,8 +266,18 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
                dio->end_io(dio->iocb, offset, transferred, dio->private);
 
        inode_dio_done(dio->inode);
-       if (is_async)
+       if (is_async) {
+               if (dio->rw & WRITE) {
+                       int err;
+
+                       err = generic_write_sync(dio->iocb->ki_filp, offset,
+                                                transferred);
+                       if (err < 0 && ret > 0)
+                               ret = err;
+               }
+
                aio_complete(dio->iocb, ret, 0);
+       }
 
        kmem_cache_free(dio_cache, dio);
        return ret;
@@ -1182,11 +1192,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
                }
        }
 
-       /*
-        * Will be decremented at I/O completion time.
-        */
-       atomic_inc(&inode->i_dio_count);
-
        /*
         * For file extending writes updating i_size before data
         * writeouts complete can expose uninitialized blocks. So
@@ -1195,11 +1200,33 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
         */
        dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
                (end > i_size_read(inode)));
-
-       retval = 0;
-
        dio->inode = inode;
        dio->rw = rw;
+
+       /*
+        * For AIO O_(D)SYNC writes we need to defer completions to a workqueue
+        * so that we can call ->fsync.
+        */
+       if (dio->is_async && (rw & WRITE) &&
+           ((iocb->ki_filp->f_flags & O_DSYNC) ||
+            IS_SYNC(iocb->ki_filp->f_mapping->host))) {
+               retval = dio_set_defer_completion(dio);
+               if (retval) {
+                       /*
+                        * We grab i_mutex only for reads so we don't have
+                        * to release it here
+                        */
+                       kmem_cache_free(dio_cache, dio);
+                       goto out;
+               }
+       }
+
+       /*
+        * Will be decremented at I/O completion time.
+        */
+       atomic_inc(&inode->i_dio_count);
+
+       retval = 0;
        sdio.blkbits = blkbits;
        sdio.blkfactor = i_blkbits - blkbits;
        sdio.block_in_file = offset >> blkbits;
index 319c9d26279a94fa7039dc2f8730747fe2404697..3da21945ff1fff3e16b8b70b332a0a6ee330c93e 100644 (file)
@@ -149,7 +149,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
 
-       if (ret > 0 || ret == -EIOCBQUEUED) {
+       if (ret > 0) {
                ssize_t err;
 
                err = generic_write_sync(file, pos, ret);
index 4b51ac1acae7c6ae5233afeb8018dd31cadf909b..731a2c24532df32ca532184e7c714911a338dd9c 100644 (file)
@@ -2550,7 +2550,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
 
-       if (ret > 0 || ret == -EIOCBQUEUED) {
+       if (ret > 0) {
                ssize_t err;
 
                err = generic_write_sync(file, pos, ret);