cifs: convert cifs_iovec_read to use async reads
authorJeff Layton <jlayton@redhat.com>
Wed, 16 May 2012 11:13:17 +0000 (07:13 -0400)
committerSteve French <sfrench@us.ibm.com>
Thu, 17 May 2012 01:13:31 +0000 (20:13 -0500)
Convert cifs_iovec_read to use async I/O. This also raises the limit on
the rsize for uncached reads. We first allocate a set of pages to hold
the replies, then issue the reads in parallel and then collect the
replies and copy the results into the iovec.

A possible future optimization would be to kmap and inline the iovec
buffers and read the data directly from the socket into that. That would
require some rather complex conversion of the iovec into a kvec however.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
fs/cifs/cifsproto.h
fs/cifs/file.c

index 63e91c79564dacbf302e868ab27a07b331598acb..eeb789d8e85500699df3493b00091b7f4e7b6a36 100644 (file)
@@ -465,6 +465,8 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 /* asynchronous read support */
 struct cifs_readdata {
        struct kref                     refcount;
+       struct list_head                list;
+       struct completion               done;
        struct cifsFileInfo             *cfile;
        struct address_space            *mapping;
        __u64                           offset;
index d2a4259408e6b37c09bf161574542282a1845b26..4b5fe398cbf6de524d1be3b12170ef113093c479 100644 (file)
@@ -2348,6 +2348,8 @@ cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
                        sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
        if (rdata != NULL) {
                kref_init(&rdata->refcount);
+               INIT_LIST_HEAD(&rdata->list);
+               init_completion(&rdata->done);
                INIT_WORK(&rdata->work, complete);
                INIT_LIST_HEAD(&rdata->pages);
        }
@@ -2366,6 +2368,45 @@ cifs_readdata_release(struct kref *refcount)
        kfree(rdata);
 }
 
+static int
+cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
+{
+       int rc = 0;
+       struct page *page, *tpage;
+       unsigned int i;
+
+       for (i = 0; i < npages; i++) {
+               page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+               if (!page) {
+                       rc = -ENOMEM;
+                       break;
+               }
+               list_add(&page->lru, list);
+       }
+
+       if (rc) {
+               list_for_each_entry_safe(page, tpage, list, lru) {
+                       list_del(&page->lru);
+                       put_page(page);
+               }
+       }
+       return rc;
+}
+
+static void
+cifs_uncached_readdata_release(struct kref *refcount)
+{
+       struct page *page, *tpage;
+       struct cifs_readdata *rdata = container_of(refcount,
+                                       struct cifs_readdata, refcount);
+
+       list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+               list_del(&page->lru);
+               put_page(page);
+       }
+       cifs_readdata_release(refcount);
+}
+
 static int
 cifs_retry_async_readv(struct cifs_readdata *rdata)
 {
@@ -2383,24 +2424,139 @@ cifs_retry_async_readv(struct cifs_readdata *rdata)
        return rc;
 }
 
+/**
+ * cifs_readdata_to_iov - copy data from pages in response to an iovec
+ * @rdata:     the readdata response with list of pages holding data
+ * @iov:       vector in which we should copy the data
+ * @nr_segs:   number of segments in vector
+ * @offset:    offset into file of the first iovec
+ * @copied:    used to return the amount of data copied to the iov
+ *
+ * This function copies data from a list of pages in a readdata response into
+ * an array of iovecs. It will first calculate where the data should go
+ * based on the info in the readdata and then copy the data into that spot.
+ */
+static ssize_t
+cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
+                       unsigned long nr_segs, loff_t offset, ssize_t *copied)
+{
+       int rc = 0;
+       struct iov_iter ii;
+       size_t pos = rdata->offset - offset;
+       struct page *page, *tpage;
+       ssize_t remaining = rdata->bytes;
+       unsigned char *pdata;
+
+       /* set up iov_iter and advance to the correct offset */
+       iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
+       iov_iter_advance(&ii, pos);
+
+       *copied = 0;
+       list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+               ssize_t copy;
+
+               /* copy a whole page or whatever's left */
+               copy = min_t(ssize_t, remaining, PAGE_SIZE);
+
+               /* ...but limit it to whatever space is left in the iov */
+               copy = min_t(ssize_t, copy, iov_iter_count(&ii));
+
+               /* go while there's data to be copied and no errors */
+               if (copy && !rc) {
+                       pdata = kmap(page);
+                       rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
+                                               (int)copy);
+                       kunmap(page);
+                       if (!rc) {
+                               *copied += copy;
+                               remaining -= copy;
+                               iov_iter_advance(&ii, copy);
+                       }
+               }
+
+               list_del(&page->lru);
+               put_page(page);
+       }
+
+       return rc;
+}
+
+static void
+cifs_uncached_readv_complete(struct work_struct *work)
+{
+       struct cifs_readdata *rdata = container_of(work,
+                                               struct cifs_readdata, work);
+
+       /* if the result is non-zero then the pages weren't kmapped */
+       if (rdata->result == 0) {
+               struct page *page;
+
+               list_for_each_entry(page, &rdata->pages, lru)
+                       kunmap(page);
+       }
+
+       complete(&rdata->done);
+       kref_put(&rdata->refcount, cifs_uncached_readdata_release);
+}
+
+static int
+cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
+                               unsigned int remaining)
+{
+       int len = 0;
+       struct page *page, *tpage;
+
+       rdata->nr_iov = 1;
+       list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
+               if (remaining >= PAGE_SIZE) {
+                       /* enough data to fill the page */
+                       rdata->iov[rdata->nr_iov].iov_base = kmap(page);
+                       rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
+                       cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
+                               rdata->nr_iov, page->index,
+                               rdata->iov[rdata->nr_iov].iov_base,
+                               rdata->iov[rdata->nr_iov].iov_len);
+                       ++rdata->nr_iov;
+                       len += PAGE_SIZE;
+                       remaining -= PAGE_SIZE;
+               } else if (remaining > 0) {
+                       /* enough for partial page, fill and zero the rest */
+                       rdata->iov[rdata->nr_iov].iov_base = kmap(page);
+                       rdata->iov[rdata->nr_iov].iov_len = remaining;
+                       cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
+                               rdata->nr_iov, page->index,
+                               rdata->iov[rdata->nr_iov].iov_base,
+                               rdata->iov[rdata->nr_iov].iov_len);
+                       memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
+                               '\0', PAGE_SIZE - remaining);
+                       ++rdata->nr_iov;
+                       len += remaining;
+                       remaining = 0;
+               } else {
+                       /* no need to hold page hostage */
+                       list_del(&page->lru);
+                       put_page(page);
+               }
+       }
+
+       return len;
+}
+
 static ssize_t
 cifs_iovec_read(struct file *file, const struct iovec *iov,
                 unsigned long nr_segs, loff_t *poffset)
 {
-       int rc;
-       int xid;
-       ssize_t total_read;
-       unsigned int bytes_read = 0;
+       ssize_t rc;
        size_t len, cur_len;
-       int iov_offset = 0;
+       ssize_t total_read = 0;
+       loff_t offset = *poffset;
+       unsigned int npages;
        struct cifs_sb_info *cifs_sb;
-       struct cifs_tcon *pTcon;
+       struct cifs_tcon *tcon;
        struct cifsFileInfo *open_file;
-       struct smb_com_read_rsp *pSMBr;
-       struct cifs_io_parms io_parms;
-       char *read_data;
-       unsigned int rsize;
-       __u32 pid;
+       struct cifs_readdata *rdata, *tmp;
+       struct list_head rdata_list;
+       pid_t pid;
 
        if (!nr_segs)
                return 0;
@@ -2409,14 +2565,10 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
        if (!len)
                return 0;
 
-       xid = GetXid();
+       INIT_LIST_HEAD(&rdata_list);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-
-       /* FIXME: set up handlers for larger reads and/or convert to async */
-       rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
-
        open_file = file->private_data;
-       pTcon = tlink_tcon(open_file->tlink);
+       tcon = tlink_tcon(open_file->tlink);
 
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
                pid = open_file->pid;
@@ -2426,56 +2578,78 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
        if ((file->f_flags & O_ACCMODE) == O_WRONLY)
                cFYI(1, "attempting read on write only file instance");
 
-       for (total_read = 0; total_read < len; total_read += bytes_read) {
-               cur_len = min_t(const size_t, len - total_read, rsize);
-               rc = -EAGAIN;
-               read_data = NULL;
+       do {
+               cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
+               npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
 
-               while (rc == -EAGAIN) {
-                       int buf_type = CIFS_NO_BUFFER;
-                       if (open_file->invalidHandle) {
-                               rc = cifs_reopen_file(open_file, true);
-                               if (rc != 0)
-                                       break;
-                       }
-                       io_parms.netfid = open_file->netfid;
-                       io_parms.pid = pid;
-                       io_parms.tcon = pTcon;
-                       io_parms.offset = *poffset;
-                       io_parms.length = cur_len;
-                       rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
-                                        &read_data, &buf_type);
-                       pSMBr = (struct smb_com_read_rsp *)read_data;
-                       if (read_data) {
-                               char *data_offset = read_data + 4 +
-                                               le16_to_cpu(pSMBr->DataOffset);
-                               if (memcpy_toiovecend(iov, data_offset,
-                                                     iov_offset, bytes_read))
-                                       rc = -EFAULT;
-                               if (buf_type == CIFS_SMALL_BUFFER)
-                                       cifs_small_buf_release(read_data);
-                               else if (buf_type == CIFS_LARGE_BUFFER)
-                                       cifs_buf_release(read_data);
-                               read_data = NULL;
-                               iov_offset += bytes_read;
-                       }
+               /* allocate a readdata struct */
+               rdata = cifs_readdata_alloc(npages,
+                                           cifs_uncached_readv_complete);
+               if (!rdata) {
+                       rc = -ENOMEM;
+                       goto error;
                }
 
-               if (rc || (bytes_read == 0)) {
-                       if (total_read) {
-                               break;
-                       } else {
-                               FreeXid(xid);
-                               return rc;
+               rc = cifs_read_allocate_pages(&rdata->pages, npages);
+               if (rc)
+                       goto error;
+
+               rdata->cfile = cifsFileInfo_get(open_file);
+               rdata->offset = offset;
+               rdata->bytes = cur_len;
+               rdata->pid = pid;
+               rdata->marshal_iov = cifs_uncached_read_marshal_iov;
+
+               rc = cifs_retry_async_readv(rdata);
+error:
+               if (rc) {
+                       kref_put(&rdata->refcount,
+                                cifs_uncached_readdata_release);
+                       break;
+               }
+
+               list_add_tail(&rdata->list, &rdata_list);
+               offset += cur_len;
+               len -= cur_len;
+       } while (len > 0);
+
+       /* if at least one read request send succeeded, then reset rc */
+       if (!list_empty(&rdata_list))
+               rc = 0;
+
+       /* the loop below should proceed in the order of increasing offsets */
+restart_loop:
+       list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+               if (!rc) {
+                       ssize_t copied;
+
+                       /* FIXME: freezable sleep too? */
+                       rc = wait_for_completion_killable(&rdata->done);
+                       if (rc)
+                               rc = -EINTR;
+                       else if (rdata->result)
+                               rc = rdata->result;
+                       else {
+                               rc = cifs_readdata_to_iov(rdata, iov,
+                                                       nr_segs, *poffset,
+                                                       &copied);
+                               total_read += copied;
+                       }
+
+                       /* resend call if it's a retryable error */
+                       if (rc == -EAGAIN) {
+                               rc = cifs_retry_async_readv(rdata);
+                               goto restart_loop;
                        }
-               } else {
-                       cifs_stats_bytes_read(pTcon, bytes_read);
-                       *poffset += bytes_read;
                }
+               list_del_init(&rdata->list);
+               kref_put(&rdata->refcount, cifs_uncached_readdata_release);
        }
 
-       FreeXid(xid);
-       return total_read;
+       cifs_stats_bytes_read(tcon, total_read);
+       *poffset += total_read;
+
+       return total_read ? total_read : rc;
 }
 
 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,