orangefs: don't bother with splitting iovecs
authorAl Viro <viro@zeniv.linux.org.uk>
Thu, 8 Oct 2015 22:17:26 +0000 (18:17 -0400)
committerMike Marshall <hubcap@omnibond.com>
Fri, 13 Nov 2015 16:23:02 +0000 (11:23 -0500)
copy_page_{to,from}_iter() advances it just fine *and* it has no
problem with partially consumed segments.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
fs/orangefs/file.c

index 9a439b2e8bde32db429c945d915b9ac9fb1df690..ff7fe37f5a220eca29b757d7e9d4fbb2d5c76870 100644 (file)
@@ -256,168 +256,6 @@ out:
        return ret;
 }
 
-/*
- * The reason we need to do this is to be able to support readv and writev
- * that are larger than (pvfs_bufmap_size_query()) Default is
- * PVFS2_BUFMAP_DEFAULT_DESC_SIZE MB. What that means is that we will
- * create a new io vec descriptor for those memory addresses that
- * go beyond the limit. Return value for this routine is negative in case
- * of errors and 0 in case of success.
- *
- * Further, the new_nr_segs pointer is updated to hold the new value
- * of number of iovecs, the new_vec pointer is updated to hold the pointer
- * to the new split iovec, and the size array is an array of integers holding
- * the number of iovecs that straddle pvfs_bufmap_size_query().
- * The max_new_nr_segs value is computed by the caller and returned.
- * (It will be (count of all iov_len/ block_size) + 1).
- */
-static int split_iovecs(unsigned long max_new_nr_segs,         /* IN */
-                       unsigned long nr_segs,                  /* IN */
-                       const struct iovec *original_iovec,     /* IN */
-                       unsigned long *new_nr_segs,             /* OUT */
-                       struct iovec **new_vec,                 /* OUT */
-                       unsigned long *seg_count,               /* OUT */
-                       unsigned long **seg_array)              /* OUT */
-{
-       unsigned long seg;
-       unsigned long count = 0;
-       unsigned long begin_seg;
-       unsigned long tmpnew_nr_segs = 0;
-       struct iovec *new_iovec = NULL;
-       struct iovec *orig_iovec;
-       unsigned long *sizes = NULL;
-       unsigned long sizes_count = 0;
-
-       if (nr_segs <= 0 ||
-           original_iovec == NULL ||
-           new_nr_segs == NULL ||
-           new_vec == NULL ||
-           seg_count == NULL ||
-           seg_array == NULL ||
-           max_new_nr_segs <= 0) {
-               gossip_err("Invalid parameters to split_iovecs\n");
-               return -EINVAL;
-       }
-       *new_nr_segs = 0;
-       *new_vec = NULL;
-       *seg_count = 0;
-       *seg_array = NULL;
-       /* copy the passed in iovec descriptor to a temp structure */
-       orig_iovec = kmalloc_array(nr_segs,
-                                  sizeof(*orig_iovec),
-                                  PVFS2_BUFMAP_GFP_FLAGS);
-       if (orig_iovec == NULL) {
-               gossip_err(
-                   "split_iovecs: Could not allocate memory for %lu bytes!\n",
-                   (unsigned long)(nr_segs * sizeof(*orig_iovec)));
-               return -ENOMEM;
-       }
-       new_iovec = kcalloc(max_new_nr_segs,
-                           sizeof(*new_iovec),
-                           PVFS2_BUFMAP_GFP_FLAGS);
-       if (new_iovec == NULL) {
-               kfree(orig_iovec);
-               gossip_err(
-                   "split_iovecs: Could not allocate memory for %lu bytes!\n",
-                   (unsigned long)(max_new_nr_segs * sizeof(*new_iovec)));
-               return -ENOMEM;
-       }
-       sizes = kcalloc(max_new_nr_segs,
-                       sizeof(*sizes),
-                       PVFS2_BUFMAP_GFP_FLAGS);
-       if (sizes == NULL) {
-               kfree(new_iovec);
-               kfree(orig_iovec);
-               gossip_err(
-                   "split_iovecs: Could not allocate memory for %lu bytes!\n",
-                   (unsigned long)(max_new_nr_segs * sizeof(*sizes)));
-               return -ENOMEM;
-       }
-       /* copy the passed in iovec to a temp structure */
-       memcpy(orig_iovec, original_iovec, nr_segs * sizeof(*orig_iovec));
-       begin_seg = 0;
-repeat:
-       for (seg = begin_seg; seg < nr_segs; seg++) {
-               if (tmpnew_nr_segs >= max_new_nr_segs ||
-                   sizes_count >= max_new_nr_segs) {
-                       kfree(sizes);
-                       kfree(orig_iovec);
-                       kfree(new_iovec);
-                       gossip_err
-                           ("split_iovecs: exceeded the index limit (%lu)\n",
-                           tmpnew_nr_segs);
-                       return -EINVAL;
-               }
-               if (count + orig_iovec[seg].iov_len <
-                   pvfs_bufmap_size_query()) {
-                       count += orig_iovec[seg].iov_len;
-                       memcpy(&new_iovec[tmpnew_nr_segs],
-                              &orig_iovec[seg],
-                              sizeof(*new_iovec));
-                       tmpnew_nr_segs++;
-                       sizes[sizes_count]++;
-               } else {
-                       new_iovec[tmpnew_nr_segs].iov_base =
-                           orig_iovec[seg].iov_base;
-                       new_iovec[tmpnew_nr_segs].iov_len =
-                           (pvfs_bufmap_size_query() - count);
-                       tmpnew_nr_segs++;
-                       sizes[sizes_count]++;
-                       sizes_count++;
-                       begin_seg = seg;
-                       orig_iovec[seg].iov_base +=
-                           (pvfs_bufmap_size_query() - count);
-                       orig_iovec[seg].iov_len -=
-                           (pvfs_bufmap_size_query() - count);
-                       count = 0;
-                       break;
-               }
-       }
-       if (seg != nr_segs)
-               goto repeat;
-       else
-               sizes_count++;
-
-       *new_nr_segs = tmpnew_nr_segs;
-       /* new_iovec is freed by the caller */
-       *new_vec = new_iovec;
-       *seg_count = sizes_count;
-       /* seg_array is also freed by the caller */
-       *seg_array = sizes;
-       kfree(orig_iovec);
-       return 0;
-}
-
-static long bound_max_iovecs(const struct iovec *curr, unsigned long nr_segs,
-                            ssize_t *total_count)
-{
-       unsigned long i;
-       long max_nr_iovecs;
-       ssize_t total;
-       ssize_t count;
-
-       total = 0;
-       count = 0;
-       max_nr_iovecs = 0;
-       for (i = 0; i < nr_segs; i++) {
-               const struct iovec *iv = &curr[i];
-
-               count += iv->iov_len;
-               if (unlikely((ssize_t) (count | iv->iov_len) < 0))
-                       return -EINVAL;
-               if (total + iv->iov_len < pvfs_bufmap_size_query()) {
-                       total += iv->iov_len;
-                       max_nr_iovecs++;
-               } else {
-                       total =
-                           (total + iv->iov_len - pvfs_bufmap_size_query());
-                       max_nr_iovecs += (total / pvfs_bufmap_size_query() + 2);
-               }
-       }
-       *total_count = count;
-       return max_nr_iovecs;
-}
-
 /*
  * Common entry point for read/write/readv/writev
  * This function will dispatch it to either the direct I/O
@@ -431,25 +269,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
        struct inode *inode = file->f_mapping->host;
        struct pvfs2_inode_s *pvfs2_inode = PVFS2_I(inode);
        struct pvfs2_khandle *handle = &pvfs2_inode->refn.khandle;
-       ssize_t ret;
-       ssize_t total_count;
-       unsigned int to_free;
-       size_t count;
-       unsigned long seg;
-       unsigned long new_nr_segs;
-       unsigned long max_new_nr_segs;
-       unsigned long seg_count;
-       unsigned long *seg_array;
-       struct iovec *iovecptr;
-       struct iovec *ptr;
-
-       total_count = 0;
-       ret = -EINVAL;
-       count = 0;
-       to_free = 0;
-
-       /* Compute total and max number of segments after split */
-       max_new_nr_segs = bound_max_iovecs(iov, nr_segs, &count);
+       struct iov_iter iter;
+       size_t count = iov_length(iov, nr_segs);
+       ssize_t total_count = 0;
+       ssize_t ret = -EINVAL;
 
        gossip_debug(GOSSIP_FILE_DEBUG,
                "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
@@ -472,93 +295,10 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
                goto out;
        }
 
-       /*
-        * if the total size of data transfer requested is greater than
-        * the kernel-set blocksize of PVFS2, then we split the iovecs
-        * such that no iovec description straddles a block size limit
-        */
-
-       gossip_debug(GOSSIP_FILE_DEBUG,
-                    "%s: pvfs_bufmap_size:%d\n",
-                    __func__,
-                    pvfs_bufmap_size_query());
-
-       if (count > pvfs_bufmap_size_query()) {
-               /*
-                * Split up the given iovec description such that
-                * no iovec descriptor straddles over the block-size limitation.
-                * This makes us our job easier to stage the I/O.
-                * In addition, this function will also compute an array
-                * with seg_count entries that will store the number of
-                * segments that straddle the block-size boundaries.
-                */
-               ret = split_iovecs(max_new_nr_segs,     /* IN */
-                                  nr_segs,             /* IN */
-                                  iov,                 /* IN */
-                                  &new_nr_segs,        /* OUT */
-                                  &iovecptr,           /* OUT */
-                                  &seg_count,          /* OUT */
-                                  &seg_array);         /* OUT */
-               if (ret < 0) {
-                       gossip_err("%s: Failed to split iovecs to satisfy larger than blocksize readv/writev request %zd\n",
-                               __func__,
-                               ret);
-                       goto out;
-               }
-               gossip_debug(GOSSIP_FILE_DEBUG,
-                            "%s: Splitting iovecs from %lu to %lu"
-                            " [max_new %lu]\n",
-                            __func__,
-                            nr_segs,
-                            new_nr_segs,
-                            max_new_nr_segs);
-               /* We must free seg_array and iovecptr */
-               to_free = 1;
-       } else {
-               new_nr_segs = nr_segs;
-               /* use the given iovec description */
-               iovecptr = (struct iovec *)iov;
-               /* There is only 1 element in the seg_array */
-               seg_count = 1;
-               /* and its value is the number of segments passed in */
-               seg_array = &nr_segs;
-               /* We dont have to free up anything */
-               to_free = 0;
-       }
-       ptr = iovecptr;
+       iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE,
+                             iov, nr_segs, count);
 
-       gossip_debug(GOSSIP_FILE_DEBUG,
-                    "%s(%pU) %zd@%llu\n",
-                    __func__,
-                    handle,
-                    count,
-                    llu(*offset));
-       gossip_debug(GOSSIP_FILE_DEBUG,
-                    "%s(%pU): new_nr_segs: %lu, seg_count: %lu\n",
-                    __func__,
-                    handle,
-                    new_nr_segs, seg_count);
-
-/* PVFS2_KERNEL_DEBUG is a CFLAGS define. */
-#ifdef PVFS2_KERNEL_DEBUG
-       for (seg = 0; seg < new_nr_segs; seg++)
-               gossip_debug(GOSSIP_FILE_DEBUG,
-                            "%s: %d) %p to %p [%d bytes]\n",
-                            __func__,
-                            (int)seg + 1,
-                            iovecptr[seg].iov_base,
-                            iovecptr[seg].iov_base + iovecptr[seg].iov_len,
-                            (int)iovecptr[seg].iov_len);
-       for (seg = 0; seg < seg_count; seg++)
-               gossip_debug(GOSSIP_FILE_DEBUG,
-                            "%s: %zd) %lu\n",
-                            __func__,
-                            seg + 1,
-                            seg_array[seg]);
-#endif
-       seg = 0;
        while (total_count < count) {
-               struct iov_iter iter;
                size_t each_count;
                size_t amt_complete;
 
@@ -579,9 +319,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
                             handle,
                             (int)*offset);
 
-               iov_iter_init(&iter, type == PVFS_IO_READ ? READ : WRITE,
-                             ptr, seg_array[seg], each_count);
-
                ret = wait_for_direct_io(type, inode, offset, &iter,
                                each_count, 0);
                gossip_debug(GOSSIP_FILE_DEBUG,
@@ -593,9 +330,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
                if (ret < 0)
                        goto out;
 
-               /* advance the iovec pointer */
-               ptr += seg_array[seg];
-               seg++;
                *offset += ret;
                total_count += ret;
                amt_complete = ret;
@@ -617,10 +351,6 @@ static ssize_t do_readv_writev(enum PVFS_io_type type, struct file *file,
        if (total_count > 0)
                ret = total_count;
 out:
-       if (to_free) {
-               kfree(iovecptr);
-               kfree(seg_array);
-       }
        if (ret > 0) {
                if (type == PVFS_IO_READ) {
                        file_accessed(file);