Drivers: hv: vmbus: Increase wait time for VMbus unload

author Michael Kelley <mikelley@microsoft.com>

Tue, 20 Apr 2021 04:48:09 +0000 (21:48 -0700)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 22 May 2021 08:40:24 +0000 (10:40 +0200)
author Michael Kelley <mikelley@microsoft.com>
Tue, 20 Apr 2021 04:48:09 +0000 (21:48 -0700)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 22 May 2021 08:40:24 +0000 (10:40 +0200)
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c

index 7bf5e2fe17516bd9537b13b3d70c47fa2a3adea9..60c122b355ea5b2c47e46bcbc8664634282326c8 100644 (file)
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -675,6 +675,12 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
         channel->target_vp = hv_context.vp_index[cur_cpu];
  }
  
+#define UNLOAD_DELAY_UNIT_MS   10              /* 10 milliseconds */
+#define UNLOAD_WAIT_MS         (100*1000)      /* 100 seconds */
+#define UNLOAD_WAIT_LOOPS      (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
+#define UNLOAD_MSG_MS          (5*1000)        /* Every 5 seconds */
+#define UNLOAD_MSG_LOOPS       (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
+
  static void vmbus_wait_for_unload(void)
  {
         int cpu;
@@ -692,12 +698,17 @@ static void vmbus_wait_for_unload(void)
          * vmbus_connection.unload_event. If not, the last thing we can do is
          * read message pages for all CPUs directly.
          *
-        * Wait no more than 10 seconds so that the panic path can't get
-        * hung forever in case the response message isn't seen.
+        * Wait up to 100 seconds since an Azure host must writeback any dirty
+        * data in its disk cache before the VMbus UNLOAD request will
+        * complete. This flushing has been empirically observed to take up
+        * to 50 seconds in cases with a lot of dirty data, so allow additional
+        * leeway and for inaccuracies in mdelay(). But eventually time out so
+        * that the panic path can't get hung forever in case the response
+        * message isn't seen.
          */
-       for (i = 0; i < 1000; i++) {
+       for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
                 if (completion_done(&vmbus_connection.unload_event))
-                       break;
+                       goto completed;
  
                 for_each_online_cpu(cpu) {
                         page_addr = hv_context.synic_message_page[cpu];
@@ -717,9 +728,18 @@ static void vmbus_wait_for_unload(void)
                         vmbus_signal_eom(msg, message_type);
                 }
  
-               mdelay(10);
+               /*
+                * Give a notice periodically so someone watching the
+                * serial output won't think it is completely hung.
+                */
+               if (!(i % UNLOAD_MSG_LOOPS))
+                       pr_notice("Waiting for VMBus UNLOAD to complete\n");
+
+               mdelay(UNLOAD_DELAY_UNIT_MS);
         }
+       pr_err("Continuing even though VMBus UNLOAD did not complete\n");
  
+completed:
         /*
          * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
          * maybe-pending messages on all CPUs to be able to receive new
author	Michael Kelley <mikelley@microsoft.com>
	Tue, 20 Apr 2021 04:48:09 +0000 (21:48 -0700)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 22 May 2021 08:40:24 +0000 (10:40 +0200)