dm optimize: introduce dm_get_live_table_fast and dm_put_live_table_fast If the code needs to take a current dm_table it uses dm_get_live_table and dm_table_put. dm_get_live_table takes a rw-spinlock map_lock (made per-cpu by the previous patch), increases a table reference count and then drops the rw-spinlock. dm_table_put decreases the table reference count. The table reference count is changed by every process doing I/O, so it subject to cache line bouncing between multiple CPUs. This patch introduces functions dm_get_live_table_fast and dm_put_live_table_fast. dm_get_live_table_fast takes the percpu rw-spinlock and returns a dm table (without changing table reference count). dm_put_live_table_fast drops the percpu rw-spinlock. Consequently, there is no cache line bouncing. These fast functions can be used if the caller does not sleep between dm_get_live_table_fast and dm_put_live_table_fast. If the caller could sleep, old functions dm_get_live_table and dm_table_put must be used. Signed-off-by: Mikulas Patocka --- drivers/md/dm.c | 41 +++++++++++++++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 8 deletions(-) Index: linux-3.3-devel/drivers/md/dm.c =================================================================== --- linux-3.3-devel.orig/drivers/md/dm.c 2012-04-18 05:07:46.000000000 +0200 +++ linux-3.3-devel/drivers/md/dm.c 2012-04-18 05:07:51.000000000 +0200 @@ -676,6 +676,28 @@ struct dm_table *dm_get_live_table(struc } /* + * A fast alternative of dm_get_live_table. + * + * Use dm_put_live_table_fast to release the table. dm_put_live_table_fast must + * be called in all cases, regardless if this function returns NULL or not. + * + * This function doesn't increase table reference count, but holds a table + * spinlock instead. + * The caller must not sleep untill dm_put_live_table_fast is called. + */ +struct dm_table *dm_get_live_table_fast(struct mapped_device *md, int *lock_cpu) +{ + BUG_ON(in_interrupt()); + *lock_cpu = down_read_percpu_rw_spinlock(&md->map_lock); + return md->map; +} + +void dm_put_live_table_fast(struct mapped_device *md, int lock_cpu) +{ + up_read_percpu_rw_spinlock(&md->map_lock, lock_cpu); +} + +/* * Get the geometry associated with a dm device */ int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) @@ -1461,8 +1483,9 @@ static int dm_merge_bvec(struct request_ struct bvec_merge_data *bvm, struct bio_vec *biovec) { + int lock_cpu; struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + struct dm_table *map = dm_get_live_table_fast(md, &lock_cpu); struct dm_target *ti; sector_t max_sectors; int max_size = 0; @@ -1472,7 +1495,7 @@ static int dm_merge_bvec(struct request_ ti = dm_table_find_target(map, bvm->bi_sector); if (!dm_target_is_valid(ti)) - goto out_table; + goto out; /* * Find maximum amount of I/O that won't need splitting @@ -1501,10 +1524,10 @@ static int dm_merge_bvec(struct request_ max_size = 0; -out_table: - dm_table_put(map); out: + dm_put_live_table_fast(md, lock_cpu); + /* * Always allow an entire first page */ @@ -1795,14 +1818,15 @@ static int dm_lld_busy(struct request_qu { int r; struct mapped_device *md = q->queuedata; - struct dm_table *map = dm_get_live_table(md); + int lock_cpu; + struct dm_table *map = dm_get_live_table_fast(md, &lock_cpu); if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) r = 1; else r = dm_table_any_busy_target(map); - dm_table_put(map); + dm_put_live_table_fast(md, lock_cpu); return r; } @@ -1814,7 +1838,8 @@ static int dm_any_congested(void *conges struct dm_table *map; if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { - map = dm_get_live_table(md); + int lock_cpu; + map = dm_get_live_table_fast(md, &lock_cpu); if (map) { /* * Request-based dm cares about only own queue for @@ -1826,8 +1851,8 @@ static int dm_any_congested(void *conges else r = dm_table_any_congested(map, bdi_bits); - dm_table_put(map); } + dm_put_live_table_fast(md, lock_cpu); } return r;