dm snapshot: trigger exceptions in remaining snapshots during merge
authorMikulas Patocka <mpatocka@redhat.com>
Thu, 10 Dec 2009 23:52:34 +0000 (23:52 +0000)
committerAlasdair G Kergon <agk@redhat.com>
Thu, 10 Dec 2009 23:52:34 +0000 (23:52 +0000)
When there is one merging snapshot and other non-merging snapshots,
snapshot_merge_process() must make exceptions in the non-merging
snapshots.

Use a sequence count to resolve the race between I/O to chunks that are
about to be merged.  The count increases each time an exception
reallocation finishes.  Use wait_event() to wait until the count
changes.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
drivers/md/dm-snap.c

index bc52776c69ccaddb9151547e2b886c2031953152..1498704467a7e12dcf33da9dd10ecd020e35efe5 100644 (file)
@@ -270,6 +270,10 @@ struct origin {
 static struct list_head *_origins;
 static struct rw_semaphore _origins_lock;
 
+static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
+static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
+static uint64_t _pending_exceptions_done_count;
+
 static int init_origin_hash(void)
 {
        int i;
@@ -847,14 +851,38 @@ out:
        return r;
 }
 
+static int origin_write_extent(struct dm_snapshot *merging_snap,
+                              sector_t sector, unsigned chunk_size);
+
 static void merge_callback(int read_err, unsigned long write_err,
                           void *context);
 
+static uint64_t read_pending_exceptions_done_count(void)
+{
+       uint64_t pending_exceptions_done;
+
+       spin_lock(&_pending_exceptions_done_spinlock);
+       pending_exceptions_done = _pending_exceptions_done_count;
+       spin_unlock(&_pending_exceptions_done_spinlock);
+
+       return pending_exceptions_done;
+}
+
+static void increment_pending_exceptions_done_count(void)
+{
+       spin_lock(&_pending_exceptions_done_spinlock);
+       _pending_exceptions_done_count++;
+       spin_unlock(&_pending_exceptions_done_spinlock);
+
+       wake_up_all(&_pending_exceptions_done);
+}
+
 static void snapshot_merge_next_chunks(struct dm_snapshot *s)
 {
        int r;
        chunk_t old_chunk, new_chunk;
        struct dm_io_region src, dest;
+       uint64_t previous_count;
 
        BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
        if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
@@ -887,6 +915,24 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s)
        src.sector = chunk_to_sector(s->store, new_chunk);
        src.count = dest.count;
 
+       /*
+        * Reallocate any exceptions needed in other snapshots then
+        * wait for the pending exceptions to complete.
+        * Each time any pending exception (globally on the system)
+        * completes we are woken and repeat the process to find out
+        * if we can proceed.  While this may not seem a particularly
+        * efficient algorithm, it is not expected to have any
+        * significant impact on performance.
+        */
+       previous_count = read_pending_exceptions_done_count();
+       while (origin_write_extent(s, dest.sector, s->store->chunk_size)) {
+               wait_event(_pending_exceptions_done,
+                          (read_pending_exceptions_done_count() !=
+                           previous_count));
+               /* Retry after the wait, until all exceptions are done. */
+               previous_count = read_pending_exceptions_done_count();
+       }
+
        down_write(&s->lock);
        s->first_merging_chunk = old_chunk;
        s->num_merging_chunks = 1;
@@ -1372,6 +1418,8 @@ static void pending_complete(struct dm_snap_pending_exception *pe, int success)
        origin_bios = bio_list_get(&pe->origin_bios);
        free_pending_exception(pe);
 
+       increment_pending_exceptions_done_count();
+
        up_write(&s->lock);
 
        /* Submit any pending write bios */
@@ -1962,6 +2010,41 @@ static int do_origin(struct dm_dev *origin, struct bio *bio)
        return r;
 }
 
+/*
+ * Trigger exceptions in all non-merging snapshots.
+ *
+ * The chunk size of the merging snapshot may be larger than the chunk
+ * size of some other snapshot so we may need to reallocate multiple
+ * chunks in other snapshots.
+ *
+ * We scan all the overlapping exceptions in the other snapshots.
+ * Returns 1 if anything was reallocated and must be waited for,
+ * otherwise returns 0.
+ *
+ * size must be a multiple of merging_snap's chunk_size.
+ */
+static int origin_write_extent(struct dm_snapshot *merging_snap,
+                              sector_t sector, unsigned size)
+{
+       int must_wait = 0;
+       sector_t n;
+       struct origin *o;
+
+       /*
+        * The origin's __minimum_chunk_size() got stored in split_io
+        * by snapshot_merge_resume().
+        */
+       down_read(&_origins_lock);
+       o = __lookup_origin(merging_snap->origin->bdev);
+       for (n = 0; n < size; n += merging_snap->ti->split_io)
+               if (__origin_write(&o->snapshots, sector + n, NULL) ==
+                   DM_MAPIO_SUBMITTED)
+                       must_wait = 1;
+       up_read(&_origins_lock);
+
+       return must_wait;
+}
+
 /*
  * Origin: maps a linear range of a device, with hooks for snapshotting.
  */