From 81ea39ecf963fe0c44b8772a19863ecb154782f6 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Mon, 20 Jun 2016 16:55:34 -0400 Subject: [PATCH] staging/lustre/ptlrpc: lost bulk leads to a hang The reverse order of request_out_callback() and reply_in_callback() puts the RPC into UNREGISTERING state, which is waiting for RPC & bulk md unlink, whereas only RPC md unlink has been called so far. If bulk is lost, even expired_set does not check for UNREGISTERING state. The same for write if server returns an error. This phase is ambiguous, split to UNREG_RPC and UNREG_BULK. Signed-off-by: Vitaly Fertman Seagate-bug-id: MRP-2953, MRP-3206 Reviewed-by: Andriy Skulysh Reviewed-by: Alexey Leonidovich Lyashkov Tested-by: Elena V. Gryaznova Reviewed-on: http://review.whamcloud.com/19953 Reviewed-by: Chris Horn Reviewed-by: Ann Koehler Reviewed-by: Andreas Dilger Signed-off-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- .../lustre/lustre/include/lustre_net.h | 48 ++++++++------ .../lustre/lustre/include/obd_support.h | 3 + drivers/staging/lustre/lustre/ptlrpc/client.c | 64 ++++++++++++++++--- drivers/staging/lustre/lustre/ptlrpc/import.c | 3 +- drivers/staging/lustre/lustre/ptlrpc/niobuf.c | 4 +- 5 files changed, 91 insertions(+), 31 deletions(-) diff --git a/drivers/staging/lustre/lustre/include/lustre_net.h b/drivers/staging/lustre/lustre/include/lustre_net.h index 0fedef97b80b..d5debd615fdf 100644 --- a/drivers/staging/lustre/lustre/include/lustre_net.h +++ b/drivers/staging/lustre/lustre/include/lustre_net.h @@ -480,8 +480,9 @@ enum rq_phase { RQ_PHASE_BULK = 0xebc0de02, RQ_PHASE_INTERPRET = 0xebc0de03, RQ_PHASE_COMPLETE = 0xebc0de04, - RQ_PHASE_UNREGISTERING = 0xebc0de05, - RQ_PHASE_UNDEFINED = 0xebc0de06 + RQ_PHASE_UNREG_RPC = 0xebc0de05, + RQ_PHASE_UNREG_BULK = 0xebc0de06, + RQ_PHASE_UNDEFINED = 0xebc0de07 }; /** Type of request interpreter call-back */ @@ -1263,6 +1264,8 @@ struct ptlrpc_cli_req { time_t cr_reply_deadline; /** when req bulk unlink must finish. */ time_t cr_bulk_deadline; + /** when req unlink must finish. */ + time_t cr_req_deadline; /** Portal to which this request would be sent */ short cr_req_ptl; /** Portal where to wait for reply and where reply would be sent */ @@ -1318,6 +1321,7 @@ struct ptlrpc_cli_req { #define rq_real_sent rq_cli.cr_sent_out #define rq_reply_deadline rq_cli.cr_reply_deadline #define rq_bulk_deadline rq_cli.cr_bulk_deadline +#define rq_req_deadline rq_cli.cr_req_deadline #define rq_nr_resend rq_cli.cr_resend_nr #define rq_request_portal rq_cli.cr_req_ptl #define rq_reply_portal rq_cli.cr_rep_ptl @@ -1692,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase) return "Interpret"; case RQ_PHASE_COMPLETE: return "Complete"; - case RQ_PHASE_UNREGISTERING: - return "Unregistering"; + case RQ_PHASE_UNREG_RPC: + return "UnregRPC"; + case RQ_PHASE_UNREG_BULK: + return "UnregBULK"; default: return "?Phase?"; } @@ -1720,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) #define DEBUG_REQ_FLAGS(req) \ ptlrpc_rqphase2str(req), \ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_err, "E"), \ + FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_no_resend, "N"), \ @@ -1728,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ FLAG(req->rq_committed, "M") -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s" +#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s" void _debug_req(struct ptlrpc_request *req, struct libcfs_debug_msg_data *data, const char *fmt, ...) @@ -2379,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) desc = req->rq_bulk; - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - req->rq_bulk_deadline > ktime_get_real_seconds()) + if (req->rq_bulk_deadline > ktime_get_real_seconds()) return 1; if (!desc) @@ -2727,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) if (req->rq_phase == new_phase) return; - if (new_phase == RQ_PHASE_UNREGISTERING) { + if (new_phase == RQ_PHASE_UNREG_RPC || + new_phase == RQ_PHASE_UNREG_BULK) { + /* No embedded unregistering phases */ + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) + return; + req->rq_next_phase = req->rq_phase; if (req->rq_import) atomic_inc(&req->rq_import->imp_unregistering); } - if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) { if (req->rq_import) atomic_dec(&req->rq_import->imp_unregistering); } @@ -2750,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) static inline int ptlrpc_client_early(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) - return 0; return req->rq_early; } @@ -2762,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req) static inline int ptlrpc_client_replied(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 0; return req->rq_replied; } @@ -2772,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req) static inline int ptlrpc_client_recv(struct ptlrpc_request *req) { - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) + if (req->rq_reply_deadline > ktime_get_real_seconds()) return 1; return req->rq_receiving_reply; } @@ -2784,8 +2791,11 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) int rc; spin_lock(&req->rq_lock); - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - req->rq_reply_deadline > ktime_get_real_seconds()) { + if (req->rq_reply_deadline > ktime_get_real_seconds()) { + spin_unlock(&req->rq_lock); + return 1; + } + if (req->rq_req_deadline > ktime_get_real_seconds()) { spin_unlock(&req->rq_lock); return 1; } diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index cdf20d6470d7..845e64a56c21 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -364,6 +364,9 @@ extern char obd_jobid_var[]; #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516 #define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517 +#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a +#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b +#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c #define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 diff --git a/drivers/staging/lustre/lustre/ptlrpc/client.c b/drivers/staging/lustre/lustre/ptlrpc/client.c index 5d832eb540d3..d4463d7c81d2 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/client.c +++ b/drivers/staging/lustre/lustre/ptlrpc/client.c @@ -621,6 +621,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, request->rq_reply_cbid.cbid_arg = request; request->rq_reply_deadline = 0; + request->rq_bulk_deadline = 0; + request->rq_req_deadline = 0; request->rq_phase = RQ_PHASE_NEW; request->rq_next_phase = RQ_PHASE_UNDEFINED; @@ -632,6 +634,37 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, request->rq_xid = ptlrpc_next_xid(); lustre_msg_set_opc(request->rq_reqmsg, opcode); + /* Let's setup deadline for req/reply/bulk unlink for opcode. */ + if (cfs_fail_val == opcode) { + time_t *fail_t = NULL, *fail2_t = NULL; + + if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) { + fail_t = &request->rq_bulk_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) { + fail_t = &request->rq_reply_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) { + fail_t = &request->rq_req_deadline; + } else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) { + fail_t = &request->rq_reply_deadline; + fail2_t = &request->rq_bulk_deadline; + } + + if (fail_t) { + *fail_t = ktime_get_real_seconds() + LONG_UNLINK; + + if (fail2_t) + *fail2_t = ktime_get_real_seconds() + + LONG_UNLINK; + + /* The RPC is infected, let the test change the + * fail_loc + */ + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(2)); + set_current_state(TASK_RUNNING); + } + } + return 0; out_ctx: @@ -1481,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) if (!(req->rq_phase == RQ_PHASE_RPC || req->rq_phase == RQ_PHASE_BULK || req->rq_phase == RQ_PHASE_INTERPRET || - req->rq_phase == RQ_PHASE_UNREGISTERING || + req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK || req->rq_phase == RQ_PHASE_COMPLETE)) { DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); LBUG(); } - if (req->rq_phase == RQ_PHASE_UNREGISTERING) { + if (req->rq_phase == RQ_PHASE_UNREG_RPC || + req->rq_phase == RQ_PHASE_UNREG_BULK) { LASSERT(req->rq_next_phase != req->rq_phase); LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); + if (req->rq_req_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) + req->rq_req_deadline = 0; + if (req->rq_reply_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) + req->rq_reply_deadline = 0; + if (req->rq_bulk_deadline && + !OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) + req->rq_bulk_deadline = 0; + /* * Skip processing until reply is unlinked. We * can't return to pool before that and we can't @@ -1498,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) * sure that all rdma transfers finished and will * not corrupt any data. */ - if (ptlrpc_client_recv_or_unlink(req) || + if (req->rq_phase == RQ_PHASE_UNREG_RPC && + ptlrpc_client_recv_or_unlink(req)) + continue; + if (req->rq_phase == RQ_PHASE_UNREG_BULK && ptlrpc_client_bulk_active(req)) continue; @@ -1976,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data) list_entry(tmp, struct ptlrpc_request, rq_set_chain); if (req->rq_phase != RQ_PHASE_RPC && - req->rq_phase != RQ_PHASE_UNREGISTERING) + req->rq_phase != RQ_PHASE_UNREG_RPC) continue; ptlrpc_mark_interrupted(req); @@ -2288,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && - async && request->rq_reply_deadline == 0) - request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK; + async && request->rq_reply_deadline == 0 && cfs_fail_val == 0) + request->rq_reply_deadline = + ktime_get_real_seconds() + LONG_UNLINK; /* Nothing left to do. */ if (!ptlrpc_client_recv_or_unlink(request)) @@ -2302,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) return 1; /* Move to "Unregistering" phase as reply was not unlinked yet. */ - ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING); + ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC); /* Do not wait for unlink to finish. */ if (async) @@ -2932,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req) req->rq_timeout = obd_timeout; req->rq_sent = ktime_get_real_seconds(); req->rq_deadline = req->rq_sent + req->rq_timeout; - req->rq_reply_deadline = req->rq_deadline; req->rq_phase = RQ_PHASE_INTERPRET; req->rq_next_phase = RQ_PHASE_COMPLETE; req->rq_xid = ptlrpc_next_xid(); diff --git a/drivers/staging/lustre/lustre/ptlrpc/import.c b/drivers/staging/lustre/lustre/ptlrpc/import.c index 914bbd2956fb..3292e6ea0102 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/import.c +++ b/drivers/staging/lustre/lustre/ptlrpc/import.c @@ -356,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp) "still on delayed list"); } - CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n", + CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n", cli_tgt, - ptlrpc_phase2str(RQ_PHASE_UNREGISTERING), atomic_read(&imp-> imp_unregistering)); } diff --git a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c index 8c49f5ed2fca..11ec82545347 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/niobuf.c +++ b/drivers/staging/lustre/lustre/ptlrpc/niobuf.c @@ -247,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) /* Let's setup deadline for reply unlink. */ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && - async && req->rq_bulk_deadline == 0) + async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0) req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ @@ -266,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) return 1; /* never registered */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */ - ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); + ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK); /* Do not wait for unlink to finish. */ if (async) -- 2.20.1