We need to check if the exception was completed after dropping the lock.
After regaining the lock, __find_pending_exception checks if the exception
was already placed into &s->pending hash.
But we don't check if the exception was already completed and placed into
&s->complete hash. If the process waiting in alloc_pending_exception was
delayed at this point because of a scheduling latency and the exception
was meanwhile completed, we'd miss that and allocate another pending
exception for already completed chunk.
It would lead to a situation where two records for the same chunk exist
and potential data corruption because multiple snapshot I/Os to the
affected chunk could be redirected to different locations in the
snapshot.
Cc: stable@kernel.org
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
goto out_unlock;
}
+ e = lookup_exception(&s->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ remap_exception(s, e, bio, chunk);
+ goto out_unlock;
+ }
+
pe = __find_pending_exception(s, pe, chunk);
if (!pe) {
__invalidate_snapshot(s, -ENOMEM);
goto next_snapshot;
}
+ e = lookup_exception(&snap->complete, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ goto next_snapshot;
+ }
+
pe = __find_pending_exception(snap, pe, chunk);
if (!pe) {
__invalidate_snapshot(snap, -ENOMEM);