drbd: Create new current UUID as late as possible
authorPhilipp Reisner <philipp.reisner@linbit.com>
Mon, 17 May 2010 14:10:43 +0000 (16:10 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Tue, 18 May 2010 00:03:49 +0000 (02:03 +0200)
The choice was to either delay creation of the new UUID until
IO got thawed or to delay it until the first IO request.

Both are correct, the later is more friendly to users of
dual-primary setups, that actually only write on one side.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c

index c194348a46ed0833f58ffb3b0d40ab9cfd8be361..e9654c8d5b6265f5f125c80185bd54b8034b697a 100644 (file)
@@ -943,7 +943,8 @@ struct drbd_conf {
        struct drbd_work  resync_work,
                          unplug_work,
                          md_sync_work,
-                         delay_probe_work;
+                         delay_probe_work,
+                         uuid_work;
        struct timer_list resync_timer;
        struct timer_list md_sync_timer;
        struct timer_list delay_probe_timer;
@@ -1068,6 +1069,7 @@ struct drbd_conf {
        struct timeval dps_time; /* delay-probes-start-time */
        unsigned int dp_volume_last;  /* send_cnt of last delay probe */
        int c_sync_rate; /* current resync rate after delay_probe magic */
+       atomic_t new_c_uuid;
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)
@@ -2217,6 +2219,8 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
                return 0;
        if (test_bit(BITMAP_IO, &mdev->flags))
                return 0;
+       if (atomic_read(&mdev->new_c_uuid))
+               return 0;
        return 1;
 }
 
@@ -2237,6 +2241,9 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
         * to avoid races with the reconnect code,
         * we need to atomic_inc within the spinlock. */
 
+       if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1))
+               drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work);
+
        spin_lock_irq(&mdev->req_lock);
        while (!__inc_ap_bio_cond(mdev)) {
                prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
index d0fabace14528506aaa808d9c06d34ea2a1b3084..c144509011b851005ccf3fff7f517d3f5464c718 100644 (file)
@@ -1217,17 +1217,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                mdev->p_uuid = NULL;
                if (get_ldev(mdev)) {
                        if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
-                           mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-                               drbd_uuid_new_current(mdev);
-                               drbd_send_uuids(mdev);
-                       }
+                           mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE)
+                               atomic_set(&mdev->new_c_uuid, 2);
                        put_ldev(mdev);
                }
        }
 
        if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
+               /* Diskless peer becomes primary or got connected do diskless, primary peer. */
                if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
-                       drbd_uuid_new_current(mdev);
+                       atomic_set(&mdev->new_c_uuid, 2);
 
                /* D_DISKLESS Peer becomes secondary */
                if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1351,6 +1350,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        drbd_md_sync(mdev);
 }
 
+static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       if (get_ldev(mdev)) {
+               drbd_uuid_new_current(mdev);
+               drbd_send_uuids(mdev);
+               drbd_md_sync(mdev);
+               put_ldev(mdev);
+       }
+       atomic_dec(&mdev->new_c_uuid);
+       wake_up(&mdev->misc_wait);
+
+       return 1;
+}
 
 static int drbd_thread_setup(void *arg)
 {
@@ -2691,6 +2703,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        atomic_set(&mdev->net_cnt, 0);
        atomic_set(&mdev->packet_seq, 0);
        atomic_set(&mdev->pp_in_use, 0);
+       atomic_set(&mdev->new_c_uuid, 0);
 
        mutex_init(&mdev->md_io_mutex);
        mutex_init(&mdev->data.mutex);
@@ -2721,12 +2734,14 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
        INIT_LIST_HEAD(&mdev->delay_probes);
        INIT_LIST_HEAD(&mdev->delay_probe_work.list);
+       INIT_LIST_HEAD(&mdev->uuid_work.list);
 
        mdev->resync_work.cb  = w_resync_inactive;
        mdev->unplug_work.cb  = w_send_write_hint;
        mdev->md_sync_work.cb = w_md_sync;
        mdev->bm_io_work.w.cb = w_bitmap_io;
        mdev->delay_probe_work.cb = w_delay_probes;
+       mdev->uuid_work.cb = w_new_current_uuid;
        init_timer(&mdev->resync_timer);
        init_timer(&mdev->md_sync_timer);
        init_timer(&mdev->delay_probe_timer);
index a04ec01ab3ce0f1b11b7b1dc05e75971577e6399..461d9872d4d395a92e4475fc76cdb16c058fb252 100644 (file)
@@ -1150,6 +1150,17 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
        unsigned n_bios = 0;
        unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
 
+       if (atomic_read(&mdev->new_c_uuid)) {
+               if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) {
+                       drbd_uuid_new_current(mdev);
+                       drbd_md_sync(mdev);
+
+                       atomic_dec(&mdev->new_c_uuid);
+                       wake_up(&mdev->misc_wait);
+               }
+               wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid));
+       }
+
        /* In most cases, we will only need one bio.  But in case the lower
         * level restrictions happen to be different at this offset on this
         * side than those of the sending peer, we may need to submit the