dm optimize: take io_lock when changing map This patch causes that all pieces of code that take map_lock for write hold also io_lock for write. If we enforce this rule, we can avoid taking map_lock for read and incrementing table count from the common i/o path in __split_and_process_bio. io_lock is held for read on the i/o path, so we can be sure that the table doesn't change under us. Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) Index: linux-3.3-fast/drivers/md/dm.c =================================================================== --- linux-3.3-fast.orig/drivers/md/dm.c 2012-04-21 03:25:36.000000000 +0200 +++ linux-3.3-fast/drivers/md/dm.c 2012-04-21 03:30:36.000000000 +0200 @@ -679,7 +679,7 @@ struct dm_table *dm_get_live_table(struc } /* - * A fast alternative of dm_get_live_table. + * A fast alternative to dm_get_live_table. * * Use dm_put_live_table_fast to release the table. dm_put_live_table_fast must * be called in all cases, regardless if this function returns NULL or not. @@ -1443,7 +1443,12 @@ static void __split_and_process_bio(stru struct clone_info ci; int error = 0; - ci.map = dm_get_live_table(md); + /* + * Note. We hold io_lock for read here, so we can access md->map + * without using dm_get_live_table. When we drop io_lock, this pointer + * becomes invalid. + */ + ci.map = md->map; if (unlikely(!ci.map)) { bio_io_error(bio); return; @@ -1474,7 +1479,6 @@ static void __split_and_process_bio(stru /* drop the extra reference count */ dec_pending(ci.io, error); - dm_table_put(ci.map); } /*----------------------------------------------------------------- * CRUD END @@ -2232,6 +2236,8 @@ static struct dm_table *__bind(struct ma merge_is_optional = dm_table_merge_is_optional(t); + down_write_percpu_rw_semaphore(&md->io_lock); + old_map = md->map; rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); @@ -2242,6 +2248,7 @@ static struct dm_table *__bind(struct ma else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); + up_write_percpu_rw_semaphore(&md->io_lock); synchronize_rcu_expedited(); return old_map; @@ -2257,11 +2264,15 @@ static struct dm_table *__unbind(struct if (!map) return NULL; + down_write_percpu_rw_semaphore(&md->io_lock); + dm_table_event_callback(map, NULL, NULL); rcu_assign_pointer(md->map, NULL); synchronize_rcu_expedited(); + up_write_percpu_rw_semaphore(&md->io_lock); + return map; } @@ -2509,14 +2520,12 @@ static void dm_wq_work(struct work_struc if (!c) break; - up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu); - - if (dm_request_based(md)) + if (dm_request_based(md)) { + up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu); generic_make_request(c); - else + lock_cpu = down_read_percpu_rw_semaphore(&md->io_lock); + } else __split_and_process_bio(md, c); - - lock_cpu = down_read_percpu_rw_semaphore(&md->io_lock); } up_read_percpu_rw_semaphore(&md->io_lock, lock_cpu);