block copy: use a timer to fix a theoretical deadlock The block layer creates two bios for each copy operation. The bios travel independently through the storage stack and they are paired at the block device. There is a theoretical problem with this - the block device stack only guarantees forward progress for a single bio. When two bios are sent, it is possible (though very unlikely) that the first bio exhausts some mempool and the second bio waits until there is free space in the mempool (and thus it waits until the first bio finishes). To avoid this deadlock, we introduce a timer. If the two bios are not paired at the physical block device within 10 seconds, the copy operation is aborted and the bio that waits to be paired is released with an error. Note that there is no guarantee that any XCOPY operation succeed, so aborting an operation with an error shouldn't cause any problems - the caller is supposed to perform the copy manually if XCOPY fails. Signed-off-by: Mikulas Patocka --- block/blk-lib.c | 31 +++++++++++++++++++++++++++++++ include/linux/blk_types.h | 2 ++ 2 files changed, 33 insertions(+) Index: linux-4.11-rc2/block/blk-lib.c =================================================================== --- linux-4.11-rc2.orig/block/blk-lib.c +++ linux-4.11-rc2/block/blk-lib.c @@ -387,6 +387,34 @@ int blkdev_issue_zeroout(struct block_de } EXPORT_SYMBOL(blkdev_issue_zeroout); +#define BLK_COPY_TIMEOUT (10 * HZ) + +static void blk_copy_timeout(unsigned long bc_) +{ + struct bio_copy *bc = (struct bio_copy *)bc_; + struct bio *bio0 = NULL, *bio1 = NULL; + + WARN_ON(!irqs_disabled()); + + spin_lock(&bc->spinlock); /* the timer is IRQSAFE */ + if (bc->error == 1) { + bc->error = -ETIMEDOUT; + bio0 = bc->pair[0]; + bio1 = bc->pair[1]; + bc->pair[0] = bc->pair[1] = NULL; + } + spin_unlock(&bc->spinlock); + + if (bio0) { + bio0->bi_error = -ETIMEDOUT; + bio_endio(bio0); + } + if (bio1) { + bio1->bi_error = -ETIMEDOUT; + bio_endio(bio1); + } +} + static void bio_copy_end_io(struct bio *bio) { struct bio_copy *bc = bio->bi_copy; @@ -422,6 +450,7 @@ static void bio_copy_end_io(struct bio * } while (unlikely(atomic64_cmpxchg(bc->first_error, first_error, bc->offset) != first_error)); } + del_timer_sync(&bc->timer); kfree(bc); if (atomic_dec_and_test(&bb->done)) complete(bb->wait); @@ -512,6 +541,8 @@ int blkdev_issue_copy(struct block_devic bc->first_error = &first_error; bc->offset = offset; spin_lock_init(&bc->spinlock); + __setup_timer(&bc->timer, blk_copy_timeout, (unsigned long)bc, TIMER_IRQSAFE); + mod_timer(&bc->timer, jiffies + BLK_COPY_TIMEOUT); bio_set_op_attrs(read_bio, REQ_OP_COPY_READ, 0); read_bio->bi_iter.bi_sector = src_sector; Index: linux-4.11-rc2/include/linux/blk_types.h =================================================================== --- linux-4.11-rc2.orig/include/linux/blk_types.h +++ linux-4.11-rc2/include/linux/blk_types.h @@ -8,6 +8,7 @@ #include #include #include +#include struct bio_set; struct bio; @@ -104,6 +105,7 @@ struct bio_copy { atomic64_t *first_error; sector_t offset; spinlock_t spinlock; + struct timer_list timer; }; /*