dm optimize: change map lock to rcu Rw-spinlock map_lock is removed and RCU is used instead. If we need to get the current table, we take rcu read lock, then dereference the pointer to the current table, then increase the atomic table reference count and finally drop the rcu read lock. If we change the map, we change the pointer first, then we call synchronize_rcu_expedited (no rcu reader will get pointer to the old table past this point). Then, we wait for the table reference count to drop to zero. Finally we destroy the table. Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) Index: linux-3.3-fast/drivers/md/dm.c =================================================================== --- linux-3.3-fast.orig/drivers/md/dm.c 2012-04-21 03:06:21.000000000 +0200 +++ linux-3.3-fast/drivers/md/dm.c 2012-04-21 03:19:50.000000000 +0200 @@ -123,6 +123,14 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_MERGE_IS_OPTIONAL 6 /* + * A dummy definition to make RCU happy. + * struct dm_table should never be dereferenced in this file. + */ +struct dm_table { + int undefined__; +}; + +/* * Work processed by per-device workqueue. */ struct mapped_device { @@ -546,13 +554,12 @@ static void queue_io(struct mapped_devic struct dm_table *dm_get_live_table(struct mapped_device *md) { struct dm_table *t; - unsigned long flags; - read_lock_irqsave(&md->map_lock, flags); - t = md->map; + rcu_read_lock(); + t = rcu_dereference(md->map); if (t) dm_table_get(t); - read_unlock_irqrestore(&md->map_lock, flags); + rcu_read_unlock(); return t; } @@ -1843,7 +1850,6 @@ static struct mapped_device *alloc_dev(i mutex_init(&md->suspend_lock); mutex_init(&md->type_lock); spin_lock_init(&md->deferred_lock); - rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); atomic_set(&md->event_nr, 0); @@ -2057,7 +2063,6 @@ static struct dm_table *__bind(struct ma struct dm_table *old_map; struct request_queue *q = md->queue; sector_t size; - unsigned long flags; int merge_is_optional; size = dm_table_get_size(t); @@ -2086,9 +2091,8 @@ static struct dm_table *__bind(struct ma merge_is_optional = dm_table_merge_is_optional(t); - write_lock_irqsave(&md->map_lock, flags); old_map = md->map; - md->map = t; + rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); dm_table_set_restrictions(t, q, limits); @@ -2096,7 +2100,8 @@ static struct dm_table *__bind(struct ma set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - write_unlock_irqrestore(&md->map_lock, flags); + + synchronize_rcu_expedited(); return old_map; } @@ -2107,15 +2112,14 @@ static struct dm_table *__bind(struct ma static struct dm_table *__unbind(struct mapped_device *md) { struct dm_table *map = md->map; - unsigned long flags; if (!map) return NULL; dm_table_event_callback(map, NULL, NULL); - write_lock_irqsave(&md->map_lock, flags); - md->map = NULL; - write_unlock_irqrestore(&md->map_lock, flags); + rcu_assign_pointer(md->map, NULL); + + synchronize_rcu_expedited(); return map; }