drbd: Do no sleep long in drbd_start_resync
authorPhilipp Reisner <philipp.reisner@linbit.com>
Sat, 5 Feb 2011 16:34:11 +0000 (17:34 +0100)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 28 Sep 2011 08:26:47 +0000 (10:26 +0200)
Work items that sleep too long can cause requests to take as
long as the longest sleeping work item.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_worker.c

index 9a351a2cab7c1edb536f73e12df55210633b4168..eec36af567444937256836743bfaec6a942dcb99 100644 (file)
@@ -787,6 +787,7 @@ enum {
        NEW_CUR_UUID,           /* Create new current UUID when thawing IO */
        AL_SUSPENDED,           /* Activity logging is currently suspended. */
        AHEAD_TO_SYNC_SOURCE,   /* Ahead -> SyncSource queued */
+       B_RS_H_DONE,            /* Before resync handler done (already executed) */
 };
 
 struct drbd_bitmap; /* opaque for drbd_conf */
index 28925d3d1a2fcd0dbbbf2cedc4fc63db7850e976..a705979c71f89cc4e3fd322842a2c42995e0ac33 100644 (file)
@@ -1487,35 +1487,49 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                   Ahead/Behind and SyncSource/SyncTarget */
        }
 
-       if (side == C_SYNC_TARGET) {
-               /* Since application IO was locked out during C_WF_BITMAP_T and
-                  C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
-                  we check that we might make the data inconsistent. */
-               r = drbd_khelper(mdev, "before-resync-target");
-               r = (r >> 8) & 0xff;
-               if (r > 0) {
-                       dev_info(DEV, "before-resync-target handler returned %d, "
-                            "dropping connection.\n", r);
-                       drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
-                       return;
-               }
-       } else /* C_SYNC_SOURCE */ {
-               r = drbd_khelper(mdev, "before-resync-source");
-               r = (r >> 8) & 0xff;
-               if (r > 0) {
-                       if (r == 3) {
-                               dev_info(DEV, "before-resync-source handler returned %d, "
-                                        "ignoring. Old userland tools?", r);
-                       } else {
-                               dev_info(DEV, "before-resync-source handler returned %d, "
+       if (!test_bit(B_RS_H_DONE, &mdev->flags)) {
+               if (side == C_SYNC_TARGET) {
+                       /* Since application IO was locked out during C_WF_BITMAP_T and
+                          C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
+                          we check that we might make the data inconsistent. */
+                       r = drbd_khelper(mdev, "before-resync-target");
+                       r = (r >> 8) & 0xff;
+                       if (r > 0) {
+                               dev_info(DEV, "before-resync-target handler returned %d, "
                                         "dropping connection.\n", r);
                                drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
                                return;
                        }
+               } else /* C_SYNC_SOURCE */ {
+                       r = drbd_khelper(mdev, "before-resync-source");
+                       r = (r >> 8) & 0xff;
+                       if (r > 0) {
+                               if (r == 3) {
+                                       dev_info(DEV, "before-resync-source handler returned %d, "
+                                                "ignoring. Old userland tools?", r);
+                               } else {
+                                       dev_info(DEV, "before-resync-source handler returned %d, "
+                                                "dropping connection.\n", r);
+                                       drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+                                       return;
+                               }
+                       }
                }
        }
 
-       drbd_state_lock(mdev);
+       if (current == mdev->tconn->worker.task) {
+               /* The worker should not sleep waiting for drbd_state_lock(),
+                  that can take long */
+               if (test_and_set_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
+                       set_bit(B_RS_H_DONE, &mdev->flags);
+                       mdev->start_resync_timer.expires = jiffies + HZ/5;
+                       add_timer(&mdev->start_resync_timer);
+                       return;
+               }
+       } else {
+               drbd_state_lock(mdev);
+       }
+       clear_bit(B_RS_H_DONE, &mdev->flags);
 
        if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
                drbd_state_unlock(mdev);