dm optimize: take io_lock when changing map This patch causes that all pieces of code that take map_lock for write hold also io_lock for write. If we enforce this rule, we can avoid taking map_lock for read and incrementing table count from the common i/o path in __split_and_process_bio. io_lock is held for read on the i/o path, so we can be sure that the table doesn't change under us. Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) Index: linux-3.3-devel/drivers/md/dm.c =================================================================== --- linux-3.3-devel.orig/drivers/md/dm.c 2012-04-18 05:07:51.000000000 +0200 +++ linux-3.3-devel/drivers/md/dm.c 2012-04-18 05:07:54.000000000 +0200 @@ -676,7 +676,7 @@ struct dm_table *dm_get_live_table(struc } /* - * A fast alternative of dm_get_live_table. + * A fast alternative to dm_get_live_table. * * Use dm_put_live_table_fast to release the table. dm_put_live_table_fast must * be called in all cases, regardless if this function returns NULL or not. @@ -1442,7 +1442,12 @@ static void __split_and_process_bio(stru struct clone_info ci; int error = 0; - ci.map = dm_get_live_table(md); + /* + * Note. We hold io_lock for read here, so we can access md->map + * without using dm_get_live_table. When we drop io_lock, this pointer + * becomes invalid. + */ + ci.map = md->map; if (unlikely(!ci.map)) { bio_io_error(bio); return; @@ -1473,7 +1478,6 @@ static void __split_and_process_bio(stru /* drop the extra reference count */ dec_pending(ci.io, error); - dm_table_put(ci.map); } /*----------------------------------------------------------------- * CRUD END @@ -2241,6 +2245,7 @@ static struct dm_table *__bind(struct ma merge_is_optional = dm_table_merge_is_optional(t); + down_write_percpu_rw_semaphore(&md->io_lock); down_write_percpu_rw_spinlock(&md->map_lock); old_map = md->map; md->map = t; @@ -2252,6 +2257,7 @@ static struct dm_table *__bind(struct ma else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); up_write_percpu_rw_spinlock(&md->map_lock); + up_write_percpu_rw_semaphore(&md->io_lock); return old_map; } @@ -2267,9 +2273,11 @@ static struct dm_table *__unbind(struct return NULL; dm_table_event_callback(map, NULL, NULL); + down_write_percpu_rw_semaphore(&md->io_lock); down_write_percpu_rw_spinlock(&md->map_lock); md->map = NULL; up_write_percpu_rw_spinlock(&md->map_lock); + up_write_percpu_rw_semaphore(&md->io_lock); return map; } @@ -2518,14 +2526,12 @@ static void dm_wq_work(struct work_struc if (!c) break; - up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu); - - if (dm_request_based(md)) + if (dm_request_based(md)) { + up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu); generic_make_request(c); - else + lock_cpu = down_read_percpu_rw_semaphore(&md->io_lock); + } else __split_and_process_bio(md, c); - - lock_cpu = down_read_percpu_rw_semaphore(&md->io_lock); } up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu);