Orangefs: Clean up pvfs2_devreq_read.
authorMartin Brandenburg <martin@omnibond.com>
Fri, 13 Nov 2015 19:26:10 +0000 (14:26 -0500)
committerMike Marshall <hubcap@omnibond.com>
Fri, 13 Nov 2015 19:50:20 +0000 (14:50 -0500)
* Kick invalid arguments out early, so handling them does not clutter
  the code.
* Avoid possibility of race by not releasing lock until completely done.
* Do not leak ops (memory) in certain error condition.
* Check for more error conditions.
* Put module name in all error and debug logs.
* Document behavior.

Signed-off-by: Martin Brandenburg <martin@omnibond.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
fs/orangefs/devpvfs2-req.c

index 34e2240f1d2952cb2e50236d82a800fc6424459f..e37b6479a6a19116c8ddd842018e117b3a2ed467 100644 (file)
@@ -104,110 +104,137 @@ static ssize_t pvfs2_devreq_read(struct file *file,
                                 char __user *buf,
                                 size_t count, loff_t *offset)
 {
-       int ret = 0;
-       ssize_t len = 0;
-       struct pvfs2_kernel_op_s *cur_op = NULL;
-       static __s32 magic = PVFS2_DEVREQ_MAGIC;
+       struct pvfs2_kernel_op_s *op, *temp;
        __s32 proto_ver = PVFS_KERNEL_PROTO_VERSION;
+       static __s32 magic = PVFS2_DEVREQ_MAGIC;
+       struct pvfs2_kernel_op_s *cur_op = NULL;
+       unsigned long ret;
 
+       /* We do not support blocking IO. */
        if (!(file->f_flags & O_NONBLOCK)) {
-               /* We do not support blocking reads/opens any more */
-               gossip_err("pvfs2: blocking reads are not supported! (pvfs2-client-core bug)\n");
+               gossip_err("orangefs: blocking reads are not supported! (pvfs2-client-core bug)\n");
                return -EINVAL;
-       } else {
-               struct pvfs2_kernel_op_s *op = NULL, *temp = NULL;
-               /* get next op (if any) from top of list */
-               spin_lock(&pvfs2_request_list_lock);
-               list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) {
-                       __s32 fsid = fsid_of_op(op);
-                       /*
-                        * Check if this op's fsid is known and needs
-                        * remounting
-                        */
-                       if (fsid != PVFS_FS_ID_NULL &&
-                           fs_mount_pending(fsid) == 1) {
+       }
+
+       /*
+        * The client will do an ioctl to find MAX_ALIGNED_DEV_REQ_UPSIZE, then
+        * always read with that size buffer.
+        */
+       if (count != MAX_ALIGNED_DEV_REQ_UPSIZE) {
+               gossip_err("orangefs: client-core tried to read wrong size\n");
+               return -EINVAL;
+       }
+
+       /* Get next op (if any) from top of list. */
+       spin_lock(&pvfs2_request_list_lock);
+       list_for_each_entry_safe(op, temp, &pvfs2_request_list, list) {
+               __s32 fsid;
+               /* This lock is held past the end of the loop when we break. */
+               spin_lock(&op->lock);
+
+               fsid = fsid_of_op(op);
+               if (fsid != PVFS_FS_ID_NULL) {
+                       int ret;
+                       /* Skip ops whose filesystem needs to be mounted. */
+                       ret = fs_mount_pending(fsid);
+                       if (ret == 1) {
                                gossip_debug(GOSSIP_DEV_DEBUG,
-                                            "Skipping op tag %llu %s\n",
-                                            llu(op->tag),
-                                            get_opname_string(op));
+                                   "orangefs: skipping op tag %llu %s\n",
+                                   llu(op->tag), get_opname_string(op));
+                               spin_unlock(&op->lock);
+                               continue;
+                       /* Skip ops whose filesystem we don't know about unless
+                        * it is being mounted. */
+                       /* XXX: is there a better way to detect this? */
+                       } else if (ret == -1 &&
+                                  !(op->upcall.type == PVFS2_VFS_OP_FS_MOUNT ||
+                                    op->upcall.type == PVFS2_VFS_OP_GETATTR)) {
+                               gossip_debug(GOSSIP_DEV_DEBUG,
+                                   "orangefs: skipping op tag %llu %s\n",
+                                   llu(op->tag), get_opname_string(op));
+                               gossip_err(
+                                   "orangefs: ERROR: fs_mount_pending %d\n",
+                                   fsid);
+                               spin_unlock(&op->lock);
                                continue;
-                       } else {
-                               /*
-                                * op does not belong to any particular fsid
-                                * or already mounted.. let it through
-                                */
-                               cur_op = op;
-                               spin_lock(&cur_op->lock);
-                               list_del(&cur_op->list);
-                               spin_unlock(&cur_op->lock);
-                               break;
                        }
                }
-               spin_unlock(&pvfs2_request_list_lock);
+               /*
+                * Either this op does not pertain to a filesystem, is mounting
+                * a filesystem, or pertains to a mounted filesystem. Let it
+                * through.
+                */
+               cur_op = op;
+               break;
        }
 
-       if (cur_op) {
-               spin_lock(&cur_op->lock);
+       /*
+        * At this point we either have a valid op and can continue or have not
+        * found an op and must ask the client to try again later.
+        */
+       if (!cur_op) {
+               spin_unlock(&pvfs2_request_list_lock);
+               return -EAGAIN;
+       }
 
-               gossip_debug(GOSSIP_DEV_DEBUG,
-                            "client-core: reading op tag %llu %s\n",
-                            llu(cur_op->tag), get_opname_string(cur_op));
-               if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
-                       gossip_err("WARNING: Current op already queued...skipping\n");
-               } else  {
-                       /*
-                        * atomically move the operation to the
-                        * htable_ops_in_progress
-                        */
-                       set_op_state_inprogress(cur_op);
-                       pvfs2_devreq_add_op(cur_op);
-               }
+       gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n",
+                    llu(cur_op->tag), get_opname_string(cur_op));
 
+       /*
+        * Such an op should never be on the list in the first place. If so, we
+        * will abort.
+        */
+       if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
+               gossip_err("orangefs: ERROR: Current op already queued.\n");
+               list_del(&cur_op->list);
                spin_unlock(&cur_op->lock);
-
-               /* Push the upcall out */
-               len = MAX_ALIGNED_DEV_REQ_UPSIZE;
-               if ((size_t) len <= count) {
-                   ret = copy_to_user(buf,
-                                      &proto_ver,
-                              sizeof(__s32));
-                   if (ret == 0) {
-                       ret = copy_to_user(buf + sizeof(__s32),
-                                          &magic,
-                                          sizeof(__s32));
-                       if (ret == 0) {
-                           ret = copy_to_user(buf+2 * sizeof(__s32),
-                                              &cur_op->tag,
-                                              sizeof(__u64));
-                           if (ret == 0) {
-                               ret = copy_to_user(
-                                       buf +
-                                         2 *
-                                         sizeof(__s32) +
-                                         sizeof(__u64),
-                                       &cur_op->upcall,
-                                       sizeof(struct pvfs2_upcall_s));
-                           }
-                       }
-                   }
-
-                   if (ret) {
-                       gossip_err("Failed to copy data to user space\n");
-                       len = -EFAULT;
-                   }
-               } else {
-                       gossip_err
-                           ("Failed to copy data to user space\n");
-                       len = -EIO;
-               }
-       } else if (file->f_flags & O_NONBLOCK) {
-               /*
-                * if in non-blocking mode, return EAGAIN since no requests are
-                * ready yet
-                */
-               len = -EAGAIN;
+               spin_unlock(&pvfs2_request_list_lock);
+               return -EAGAIN;
        }
-       return len;
+
+       /*
+        * Set the operation to be in progress and move it between lists since
+        * it has been sent to the client.
+        */
+       set_op_state_inprogress(cur_op);
+
+       list_del(&cur_op->list);
+       spin_unlock(&pvfs2_request_list_lock);
+       pvfs2_devreq_add_op(cur_op);
+       spin_unlock(&cur_op->lock);
+
+       /* Push the upcall out. */
+       ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
+       if (ret != 0)
+               goto error;
+       ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
+       if (ret != 0)
+               goto error;
+       ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
+       if (ret != 0)
+               goto error;
+       ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
+                          sizeof(struct pvfs2_upcall_s));
+       if (ret != 0)
+               goto error;
+
+       /* The client only asks to read one size buffer. */
+       return MAX_ALIGNED_DEV_REQ_UPSIZE;
+error:
+       /*
+        * We were unable to copy the op data to the client. Put the op back in
+        * list. If client has crashed, the op will be purged later when the
+        * device is released.
+        */
+       gossip_err("orangefs: Failed to copy data to user space\n");
+       spin_lock(&pvfs2_request_list_lock);
+       spin_lock(&cur_op->lock);
+       set_op_state_waiting(cur_op);
+       pvfs2_devreq_remove_op(cur_op->tag);
+       list_add(&cur_op->list, &pvfs2_request_list);
+       spin_unlock(&cur_op->lock);
+       spin_unlock(&pvfs2_request_list_lock);
+       return -EFAULT;
 }
 
 /* Function for writev() callers into the device */