From 78699e29fd784a4613d254a22627f336c55c4a76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Feb 2016 23:07:19 -0500 Subject: [PATCH] orangefs: delay freeing slot until cancel completes Make cancels reuse the aborted read/write op, to make sure they do not fail on lack of memory. Don't issue a cancel unless the daemon has seen our read/write, has not replied and isn't being shut down. If cancel *is* issued, don't wait for it to complete; stash the slot in there and just have it freed when cancel is finally replied to or purged (and delay dropping the reference until then, obviously). Signed-off-by: Al Viro Signed-off-by: Mike Marshall --- fs/orangefs/devorangefs-req.c | 7 ++ fs/orangefs/file.c | 16 +--- fs/orangefs/orangefs-cache.c | 16 ++-- fs/orangefs/orangefs-kernel.h | 40 ++++++++-- fs/orangefs/orangefs-mod.c | 2 - fs/orangefs/orangefs-utils.c | 32 -------- fs/orangefs/waitqueue.c | 133 ++++++++++------------------------ 7 files changed, 93 insertions(+), 153 deletions(-) diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c index 37278f5878b3..6a7df1204bfc 100644 --- a/fs/orangefs/devorangefs-req.c +++ b/fs/orangefs/devorangefs-req.c @@ -438,6 +438,8 @@ wakeup: } } out: + if (unlikely(op_is_cancel(op))) + put_cancel(op); op_release(op); return ret; @@ -546,6 +548,11 @@ int is_daemon_in_service(void) return in_service; } +bool __is_daemon_in_service(void) +{ + return open_access_count == 1; +} + static inline long check_ioctl_command(unsigned int command) { /* Check for valid ioctl codes */ diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 193671c137c3..3b1e9e83eb91 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -181,17 +181,6 @@ populate_shared_memory: } if (ret < 0) { - /* - * XXX: needs to be optimized - we only need to cancel if it - * had been seen by daemon and not completed - */ - if (!op_state_serviced(new_op)) { - orangefs_cancel_op_in_progress(new_op->tag); - } else { - complete(&new_op->done); - } - orangefs_bufmap_put(buffer_index); - buffer_index = -1; /* * don't write an error to syslog on signaled operation * termination unless we've got debugging turned on, as @@ -207,7 +196,10 @@ populate_shared_memory: type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); - goto out; + if (orangefs_cancel_op_in_progress(new_op)) + return ret; + + goto done_copying; } /* diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c index 3b3de91406ca..59ab0c207e90 100644 --- a/fs/orangefs/orangefs-cache.c +++ b/fs/orangefs/orangefs-cache.c @@ -101,6 +101,15 @@ char *get_opname_string(struct orangefs_kernel_op_s *new_op) return "OP_UNKNOWN?"; } +void orangefs_new_tag(struct orangefs_kernel_op_s *op) +{ + spin_lock(&next_tag_value_lock); + op->tag = next_tag_value++; + if (next_tag_value == 0) + next_tag_value = 100; + spin_unlock(&next_tag_value_lock); +} + struct orangefs_kernel_op_s *op_alloc(__s32 type) { struct orangefs_kernel_op_s *new_op = NULL; @@ -120,14 +129,9 @@ struct orangefs_kernel_op_s *op_alloc(__s32 type) new_op->downcall.status = -1; new_op->op_state = OP_VFS_STATE_UNKNOWN; - new_op->tag = 0; /* initialize the op specific tag and upcall credentials */ - spin_lock(&next_tag_value_lock); - new_op->tag = next_tag_value++; - if (next_tag_value == 0) - next_tag_value = 100; - spin_unlock(&next_tag_value_lock); + orangefs_new_tag(new_op); new_op->upcall.type = type; new_op->attempts = 0; gossip_debug(GOSSIP_CACHE_DEBUG, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index a8cde9019efe..3ceeeaed4143 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -190,9 +190,14 @@ struct orangefs_kernel_op_s { /* * Set uses_shared_memory to 1 if this operation uses shared memory. * If true, then a retry on the op must also get a new shared memory - * buffer and re-populate it. + * buffer and re-populate it. Cancels don't care - it only matters + * for service_operation() retry logics and cancels don't go through + * it anymore. */ - int uses_shared_memory; + union { + int uses_shared_memory; + int slot_to_free; + }; struct orangefs_upcall_s upcall; struct orangefs_downcall_s downcall; @@ -219,17 +224,13 @@ static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op) op->op_state = OP_VFS_STATE_SERVICED; wake_up_interruptible(&op->waitq); } -static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) -{ - op->op_state |= OP_VFS_STATE_PURGED; - wake_up_interruptible(&op->waitq); -} #define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING) #define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR) #define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED) #define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED) #define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP) +#define op_is_cancel(op) ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL) static inline void get_op(struct orangefs_kernel_op_s *op) { @@ -249,6 +250,27 @@ static inline void op_release(struct orangefs_kernel_op_s *op) } } +extern void orangefs_bufmap_put(int); +static inline void put_cancel(struct orangefs_kernel_op_s *op) +{ + orangefs_bufmap_put(op->slot_to_free); + op_release(op); +} + +static inline void set_op_state_purged(struct orangefs_kernel_op_s *op) +{ + spin_lock(&op->lock); + if (unlikely(op_is_cancel(op))) { + list_del(&op->list); + spin_unlock(&op->lock); + put_cancel(op); + } else { + op->op_state |= OP_VFS_STATE_PURGED; + wake_up_interruptible(&op->waitq); + spin_unlock(&op->lock); + } +} + /* per inode private orangefs info */ struct orangefs_inode_s { struct orangefs_object_kref refn; @@ -448,6 +470,7 @@ static inline int match_handle(struct orangefs_khandle resp_handle, int op_cache_initialize(void); int op_cache_finalize(void); struct orangefs_kernel_op_s *op_alloc(__s32 type); +void orangefs_new_tag(struct orangefs_kernel_op_s *op); char *get_opname_string(struct orangefs_kernel_op_s *new_op); int orangefs_inode_cache_initialize(void); @@ -528,6 +551,7 @@ ssize_t orangefs_inode_read(struct inode *inode, int orangefs_dev_init(void); void orangefs_dev_cleanup(void); int is_daemon_in_service(void); +bool __is_daemon_in_service(void); int fs_mount_pending(__s32 fsid); /* @@ -562,7 +586,7 @@ void orangefs_set_signals(sigset_t *); int orangefs_unmount_sb(struct super_block *sb); -int orangefs_cancel_op_in_progress(__u64 tag); +bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op); static inline __u64 orangefs_convert_time_field(const struct timespec *ts) { diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 7639ab2df711..965959cb11d1 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -260,14 +260,12 @@ void purge_inprogress_ops(void) next, &htable_ops_in_progress[i], list) { - spin_lock(&op->lock); gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2-client-core: purging in-progress op tag " "%llu %s\n", llu(op->tag), get_opname_string(op)); set_op_state_purged(op); - spin_unlock(&op->lock); } spin_unlock(&htable_ops_in_progress_lock); } diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c index fa3ed8ad35be..08f9c2dab0fe 100644 --- a/fs/orangefs/orangefs-utils.c +++ b/fs/orangefs/orangefs-utils.c @@ -688,38 +688,6 @@ int orangefs_unmount_sb(struct super_block *sb) return ret; } -/* - * NOTE: on successful cancellation, be sure to return -EINTR, as - * that's the return value the caller expects - */ -int orangefs_cancel_op_in_progress(__u64 tag) -{ - int ret = -EINVAL; - struct orangefs_kernel_op_s *new_op = NULL; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_cancel_op_in_progress called on tag %llu\n", - llu(tag)); - - new_op = op_alloc(ORANGEFS_VFS_OP_CANCEL); - if (!new_op) - return -ENOMEM; - new_op->upcall.req.cancel.op_tag = tag; - - gossip_debug(GOSSIP_UTILS_DEBUG, - "Attempting ORANGEFS operation cancellation of tag %llu\n", - llu(new_op->upcall.req.cancel.op_tag)); - - ret = service_operation(new_op, "orangefs_cancel", ORANGEFS_OP_CANCELLATION); - - gossip_debug(GOSSIP_UTILS_DEBUG, - "orangefs_cancel_op_in_progress: got return value of %d\n", - ret); - - op_release(new_op); - return ret; -} - void orangefs_make_bad_inode(struct inode *inode) { if (is_root_handle(inode)) { diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index 191d886ccc57..3ea1665efdf0 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -16,7 +16,6 @@ #include "orangefs-kernel.h" #include "orangefs-bufmap.h" -static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *); static int wait_for_matching_downcall(struct orangefs_kernel_op_s *); /* @@ -36,25 +35,29 @@ void purge_waiting_ops(void) "pvfs2-client-core: purging op tag %llu %s\n", llu(op->tag), get_opname_string(op)); - spin_lock(&op->lock); set_op_state_purged(op); - spin_unlock(&op->lock); } spin_unlock(&orangefs_request_list_lock); } static inline void -add_op_to_request_list(struct orangefs_kernel_op_s *op) +__add_op_to_request_list(struct orangefs_kernel_op_s *op) { - spin_lock(&orangefs_request_list_lock); spin_lock(&op->lock); set_op_state_waiting(op); list_add_tail(&op->list, &orangefs_request_list); - spin_unlock(&orangefs_request_list_lock); spin_unlock(&op->lock); wake_up_interruptible(&orangefs_request_list_waitq); } +static inline void +add_op_to_request_list(struct orangefs_kernel_op_s *op) +{ + spin_lock(&orangefs_request_list_lock); + __add_op_to_request_list(op); + spin_unlock(&orangefs_request_list_lock); +} + static inline void add_priority_op_to_request_list(struct orangefs_kernel_op_s *op) { @@ -159,15 +162,7 @@ retry_servicing: if (flags & ORANGEFS_OP_ASYNC) return 0; - if (flags & ORANGEFS_OP_CANCELLATION) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:" - "About to call wait_for_cancellation_downcall.\n", - __func__); - ret = wait_for_cancellation_downcall(op); - } else { - ret = wait_for_matching_downcall(op); - } + ret = wait_for_matching_downcall(op); if (ret < 0) { /* failed to get matching downcall */ @@ -273,6 +268,36 @@ retry_servicing: return ret; } +bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op) +{ + u64 tag = op->tag; + if (!op_state_in_progress(op)) + return false; + + op->slot_to_free = op->upcall.req.io.buf_index; + memset(&op->upcall, 0, sizeof(op->upcall)); + memset(&op->downcall, 0, sizeof(op->downcall)); + op->upcall.type = ORANGEFS_VFS_OP_CANCEL; + op->upcall.req.cancel.op_tag = tag; + op->downcall.type = ORANGEFS_VFS_OP_INVALID; + op->downcall.status = -1; + orangefs_new_tag(op); + + spin_lock(&orangefs_request_list_lock); + /* orangefs_request_list_lock is enough of a barrier here */ + if (!__is_daemon_in_service()) { + spin_unlock(&orangefs_request_list_lock); + return false; + } + __add_op_to_request_list(op); + spin_unlock(&orangefs_request_list_lock); + + gossip_debug(GOSSIP_UTILS_DEBUG, + "Attempting ORANGEFS operation cancellation of tag %llu\n", + llu(tag)); + return true; +} + static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op) { /* @@ -426,81 +451,3 @@ static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op) return ret; } - -/* - * similar to wait_for_matching_downcall(), but used in the special case - * of I/O cancellations. - * - * Note we need a special wait function because if this is called we already - * know that a signal is pending in current and need to service the - * cancellation upcall anyway. the only way to exit this is to either - * timeout or have the cancellation be serviced properly. - */ -static int wait_for_cancellation_downcall(struct orangefs_kernel_op_s *op) -{ - int ret = -EINVAL; - DEFINE_WAIT(wait_entry); - - while (1) { - spin_lock(&op->lock); - prepare_to_wait(&op->waitq, &wait_entry, TASK_INTERRUPTIBLE); - if (op_state_serviced(op)) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:op-state is SERVICED.\n", - __func__); - spin_unlock(&op->lock); - ret = 0; - break; - } - - if (signal_pending(current)) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:operation interrupted by a signal (tag" - " %llu, op %p)\n", - __func__, - llu(op->tag), - op); - orangefs_clean_up_interrupted_operation(op); - ret = -EINTR; - break; - } - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:About to call schedule_timeout.\n", - __func__); - spin_unlock(&op->lock); - ret = schedule_timeout(op_timeout_secs * HZ); - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:Value returned from schedule_timeout(%d).\n", - __func__, - ret); - if (!ret) { - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:*** operation timed out: %p\n", - __func__, - op); - spin_lock(&op->lock); - orangefs_clean_up_interrupted_operation(op); - ret = -ETIMEDOUT; - break; - } - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:Breaking out of loop, regardless of value returned by schedule_timeout.\n", - __func__); - ret = -ETIMEDOUT; - break; - } - - spin_lock(&op->lock); - finish_wait(&op->waitq, &wait_entry); - spin_unlock(&op->lock); - - gossip_debug(GOSSIP_WAIT_DEBUG, - "%s:returning ret(%d)\n", - __func__, - ret); - - return ret; -} -- 2.20.1