dm crypt: fix async split
authorMilan Broz <mbroz@redhat.com>
Tue, 21 Oct 2008 16:45:02 +0000 (17:45 +0100)
committerAlasdair G Kergon <agk@redhat.com>
Tue, 21 Oct 2008 16:45:02 +0000 (17:45 +0100)
When writing io, dm-crypt has to allocate a new cloned bio
and encrypt the data into newly-allocated pages attached to this bio.
In rare cases, because of hw restrictions (e.g. physical segment limit)
or memory pressure, sometimes more than one cloned bio has to be used,
each processing a different fragment of the original.

Currently there is one waitqueue which waits for one fragment to finish
and continues processing the next fragment.

But when using asynchronous crypto this doesn't work, because several
fragments may be processed asynchronously or in parallel and there is
only one crypt context that cannot be shared between the bio fragments.
The result may be corruption of the data contained in the encrypted bio.

The patch fixes this by allocating new dm_crypt_io structs (with new
crypto contexts) and running them independently.

The fragments contains a pointer to the base dm_crypt_io struct to
handle reference counting, so the base one is properly deallocated
after all the fragments are finished.

In a low memory situation, this only uses one additional object from the
mempool.  If the mempool is empty, the next allocation simple waits for
previous fragments to complete.

Signed-off-by: Milan Broz <mbroz@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
drivers/md/dm-crypt.c

index ad98ded3008cd9367927af8974b181f249c60b8e..046ee516074b6a2d12dd977cb2635198e0913100 100644 (file)
@@ -56,6 +56,7 @@ struct dm_crypt_io {
        atomic_t pending;
        int error;
        sector_t sector;
+       struct dm_crypt_io *base_io;
 };
 
 struct dm_crypt_request {
@@ -534,6 +535,7 @@ static struct dm_crypt_io *crypt_io_alloc(struct dm_target *ti,
        io->base_bio = bio;
        io->sector = sector;
        io->error = 0;
+       io->base_io = NULL;
        atomic_set(&io->pending, 0);
 
        return io;
@@ -547,6 +549,7 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
 /*
  * One of the bios was finished. Check for completion of
  * the whole request and correctly clean up the buffer.
+ * If base_io is set, wait for the last fragment to complete.
  */
 static void crypt_dec_pending(struct dm_crypt_io *io)
 {
@@ -555,7 +558,14 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
        if (!atomic_dec_and_test(&io->pending))
                return;
 
-       bio_endio(io->base_bio, io->error);
+       if (likely(!io->base_io))
+               bio_endio(io->base_bio, io->error);
+       else {
+               if (io->error && !io->base_io->error)
+                       io->base_io->error = io->error;
+               crypt_dec_pending(io->base_io);
+       }
+
        mempool_free(io, cc->io_pool);
 }
 
@@ -699,6 +709,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 {
        struct crypt_config *cc = io->target->private;
        struct bio *clone;
+       struct dm_crypt_io *new_io;
        int crypt_finished;
        unsigned out_of_pages = 0;
        unsigned remaining = io->base_bio->bi_size;
@@ -753,6 +764,34 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
                if (unlikely(out_of_pages))
                        congestion_wait(WRITE, HZ/100);
 
+               /*
+                * With async crypto it is unsafe to share the crypto context
+                * between fragments, so switch to a new dm_crypt_io structure.
+                */
+               if (unlikely(!crypt_finished && remaining)) {
+                       new_io = crypt_io_alloc(io->target, io->base_bio,
+                                               sector);
+                       crypt_inc_pending(new_io);
+                       crypt_convert_init(cc, &new_io->ctx, NULL,
+                                          io->base_bio, sector);
+                       new_io->ctx.idx_in = io->ctx.idx_in;
+                       new_io->ctx.offset_in = io->ctx.offset_in;
+
+                       /*
+                        * Fragments after the first use the base_io
+                        * pending count.
+                        */
+                       if (!io->base_io)
+                               new_io->base_io = io;
+                       else {
+                               new_io->base_io = io->base_io;
+                               crypt_inc_pending(io->base_io);
+                               crypt_dec_pending(io);
+                       }
+
+                       io = new_io;
+               }
+
                if (unlikely(remaining))
                        wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
        }