From: Mike Snitzer Use dedicated caches prefixed with a "dm_" name rather than relying on kmalloc mempools backed by generic slab caches so the memory usage of thin provisioning (and any leaks) can be accounted for independently. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon --- drivers/md/dm-thin.c | 161 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 99 insertions(+), 62 deletions(-) Index: linux/drivers/md/dm-thin.c =================================================================== --- linux.orig/drivers/md/dm-thin.c +++ linux/drivers/md/dm-thin.c @@ -111,7 +111,7 @@ struct cell_key { dm_block_t block; }; -struct cell { +struct dm_bio_prison_cell { struct hlist_node list; struct bio_prison *prison; struct cell_key key; @@ -141,6 +141,8 @@ static uint32_t calc_nr_buckets(unsigned return n; } +static struct kmem_cache *_cell_cache; + /* * @nr_cells should be the number of cells you want in use _concurrently_. * Don't confuse it with the number of distinct keys. @@ -157,8 +159,7 @@ static struct bio_prison *prison_create( return NULL; spin_lock_init(&prison->lock); - prison->cell_pool = mempool_create_kmalloc_pool(nr_cells, - sizeof(struct cell)); + prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); if (!prison->cell_pool) { kfree(prison); return NULL; @@ -194,10 +195,10 @@ static int keys_equal(struct cell_key *l (lhs->block == rhs->block); } -static struct cell *__search_bucket(struct hlist_head *bucket, - struct cell_key *key) +static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, + struct cell_key *key) { - struct cell *cell; + struct dm_bio_prison_cell *cell; struct hlist_node *tmp; hlist_for_each_entry(cell, tmp, bucket, list) @@ -214,12 +215,12 @@ static struct cell *__search_bucket(stru * Returns 1 if the cell was already held, 0 if @inmate is the new holder. */ static int bio_detain(struct bio_prison *prison, struct cell_key *key, - struct bio *inmate, struct cell **ref) + struct bio *inmate, struct dm_bio_prison_cell **ref) { int r = 1; unsigned long flags; uint32_t hash = hash_key(prison, key); - struct cell *cell, *cell2; + struct dm_bio_prison_cell *cell, *cell2; BUG_ON(hash > prison->nr_buckets); @@ -273,7 +274,7 @@ out: /* * @inmates must have been initialised prior to this call */ -static void __cell_release(struct cell *cell, struct bio_list *inmates) +static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) { struct bio_prison *prison = cell->prison; @@ -287,7 +288,7 @@ static void __cell_release(struct cell * mempool_free(cell, prison->cell_pool); } -static void cell_release(struct cell *cell, struct bio_list *bios) +static void cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) { unsigned long flags; struct bio_prison *prison = cell->prison; @@ -303,7 +304,7 @@ static void cell_release(struct cell *ce * bio may be in the cell. This function releases the cell, and also does * a sanity check. */ -static void __cell_release_singleton(struct cell *cell, struct bio *bio) +static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) { BUG_ON(cell->holder != bio); BUG_ON(!bio_list_empty(&cell->bios)); @@ -311,7 +312,7 @@ static void __cell_release_singleton(str __cell_release(cell, NULL); } -static void cell_release_singleton(struct cell *cell, struct bio *bio) +static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) { unsigned long flags; struct bio_prison *prison = cell->prison; @@ -324,7 +325,8 @@ static void cell_release_singleton(struc /* * Sometimes we don't want the holder, just the additional bios. */ -static void __cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { struct bio_prison *prison = cell->prison; @@ -334,7 +336,8 @@ static void __cell_release_no_holder(str mempool_free(cell, prison->cell_pool); } -static void cell_release_no_holder(struct cell *cell, struct bio_list *inmates) +static void cell_release_no_holder(struct dm_bio_prison_cell *cell, + struct bio_list *inmates) { unsigned long flags; struct bio_prison *prison = cell->prison; @@ -344,7 +347,7 @@ static void cell_release_no_holder(struc spin_unlock_irqrestore(&prison->lock, flags); } -static void cell_error(struct cell *cell) +static void cell_error(struct dm_bio_prison_cell *cell) { struct bio_prison *prison = cell->prison; struct bio_list bios; @@ -491,7 +494,7 @@ static void build_virtual_key(struct dm_ * also provides the interface for creating and destroying internal * devices. */ -struct new_mapping; +struct dm_thin_new_mapping; struct pool_features { unsigned zero_new_blocks:1; @@ -537,7 +540,7 @@ struct pool { struct deferred_set shared_read_ds; struct deferred_set all_io_ds; - struct new_mapping *next_mapping; + struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; mempool_t *endio_hook_pool; }; @@ -630,11 +633,11 @@ static struct pool *__pool_table_lookup_ /*----------------------------------------------------------------*/ -struct endio_hook { +struct dm_thin_endio_hook { struct thin_c *tc; struct deferred_entry *shared_read_entry; struct deferred_entry *all_io_entry; - struct new_mapping *overwrite_mapping; + struct dm_thin_new_mapping *overwrite_mapping; }; static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) @@ -647,7 +650,8 @@ static void __requeue_bio_list(struct th bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); else @@ -736,7 +740,7 @@ static void wake_worker(struct pool *poo /* * Bio endio functions. */ -struct new_mapping { +struct dm_thin_new_mapping { struct list_head list; unsigned quiesced:1; @@ -746,7 +750,7 @@ struct new_mapping { struct thin_c *tc; dm_block_t virt_block; dm_block_t data_block; - struct cell *cell, *cell2; + struct dm_bio_prison_cell *cell, *cell2; int err; /* @@ -759,7 +763,7 @@ struct new_mapping { bio_end_io_t *saved_bi_end_io; }; -static void __maybe_add_mapping(struct new_mapping *m) +static void __maybe_add_mapping(struct dm_thin_new_mapping *m) { struct pool *pool = m->tc->pool; @@ -772,7 +776,7 @@ static void __maybe_add_mapping(struct n static void copy_complete(int read_err, unsigned long write_err, void *context) { unsigned long flags; - struct new_mapping *m = context; + struct dm_thin_new_mapping *m = context; struct pool *pool = m->tc->pool; m->err = read_err || write_err ? -EIO : 0; @@ -786,8 +790,8 @@ static void copy_complete(int read_err, static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; - struct new_mapping *m = h->overwrite_mapping; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; m->err = err; @@ -811,7 +815,7 @@ static void overwrite_endio(struct bio * /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct cell *cell, +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, dm_block_t data_block) { struct pool *pool = tc->pool; @@ -828,7 +832,7 @@ static void cell_defer(struct thin_c *tc * Same as cell_defer above, except it omits one particular detainee, * a write bio that covers the block and has already been processed. */ -static void cell_defer_except(struct thin_c *tc, struct cell *cell) +static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct bio_list bios; struct pool *pool = tc->pool; @@ -843,7 +847,7 @@ static void cell_defer_except(struct thi wake_worker(pool); } -static void process_prepared_mapping(struct new_mapping *m) +static void process_prepared_mapping(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; struct bio *bio; @@ -886,7 +890,7 @@ static void process_prepared_mapping(str mempool_free(m, tc->pool->mapping_pool); } -static void process_prepared_discard(struct new_mapping *m) +static void process_prepared_discard(struct dm_thin_new_mapping *m) { int r; struct thin_c *tc = m->tc; @@ -909,11 +913,11 @@ static void process_prepared_discard(str } static void process_prepared(struct pool *pool, struct list_head *head, - void (*fn)(struct new_mapping *)) + void (*fn)(struct dm_thin_new_mapping *)) { unsigned long flags; struct list_head maps; - struct new_mapping *m, *tmp; + struct dm_thin_new_mapping *m, *tmp; INIT_LIST_HEAD(&maps); spin_lock_irqsave(&pool->lock, flags); @@ -957,9 +961,9 @@ static int ensure_next_mapping(struct po return pool->next_mapping ? 0 : -ENOMEM; } -static struct new_mapping *get_next_mapping(struct pool *pool) +static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) { - struct new_mapping *r = pool->next_mapping; + struct dm_thin_new_mapping *r = pool->next_mapping; BUG_ON(!pool->next_mapping); @@ -971,11 +975,11 @@ static struct new_mapping *get_next_mapp static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, struct dm_dev *origin, dm_block_t data_origin, dm_block_t data_dest, - struct cell *cell, struct bio *bio) + struct dm_bio_prison_cell *cell, struct bio *bio) { int r; struct pool *pool = tc->pool; - struct new_mapping *m = get_next_mapping(pool); + struct dm_thin_new_mapping *m = get_next_mapping(pool); INIT_LIST_HEAD(&m->list); m->quiesced = 0; @@ -997,7 +1001,8 @@ static void schedule_copy(struct thin_c * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); @@ -1025,7 +1030,7 @@ static void schedule_copy(struct thin_c static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, dm_block_t data_origin, dm_block_t data_dest, - struct cell *cell, struct bio *bio) + struct dm_bio_prison_cell *cell, struct bio *bio) { schedule_copy(tc, virt_block, tc->pool_dev, data_origin, data_dest, cell, bio); @@ -1033,18 +1038,18 @@ static void schedule_internal_copy(struc static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, dm_block_t data_dest, - struct cell *cell, struct bio *bio) + struct dm_bio_prison_cell *cell, struct bio *bio) { schedule_copy(tc, virt_block, tc->origin_dev, virt_block, data_dest, cell, bio); } static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, - dm_block_t data_block, struct cell *cell, + dm_block_t data_block, struct dm_bio_prison_cell *cell, struct bio *bio) { struct pool *pool = tc->pool; - struct new_mapping *m = get_next_mapping(pool); + struct dm_thin_new_mapping *m = get_next_mapping(pool); INIT_LIST_HEAD(&m->list); m->quiesced = 1; @@ -1065,12 +1070,12 @@ static void schedule_zero(struct thin_c process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); remap_and_issue(tc, bio, data_block); - } else { int r; struct dm_io_region to; @@ -1155,7 +1160,7 @@ static int alloc_data_block(struct thin_ */ static void retry_on_resume(struct bio *bio) { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -1165,7 +1170,7 @@ static void retry_on_resume(struct bio * spin_unlock_irqrestore(&pool->lock, flags); } -static void no_space(struct cell *cell) +static void no_space(struct dm_bio_prison_cell *cell) { struct bio *bio; struct bio_list bios; @@ -1182,11 +1187,11 @@ static void process_discard(struct thin_ int r; unsigned long flags; struct pool *pool = tc->pool; - struct cell *cell, *cell2; + struct dm_bio_prison_cell *cell, *cell2; struct cell_key key, key2; dm_block_t block = get_bio_block(tc, bio); struct dm_thin_lookup_result lookup_result; - struct new_mapping *m; + struct dm_thin_new_mapping *m; build_virtual_key(tc->td, block, &key); if (bio_detain(tc->pool->prison, &key, bio, &cell)) @@ -1263,7 +1268,7 @@ static void process_discard(struct thin_ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, struct cell_key *key, struct dm_thin_lookup_result *lookup_result, - struct cell *cell) + struct dm_bio_prison_cell *cell) { int r; dm_block_t data_block; @@ -1290,7 +1295,7 @@ static void process_shared_bio(struct th dm_block_t block, struct dm_thin_lookup_result *lookup_result) { - struct cell *cell; + struct dm_bio_prison_cell *cell; struct pool *pool = tc->pool; struct cell_key key; @@ -1305,7 +1310,7 @@ static void process_shared_bio(struct th if (bio_data_dir(bio) == WRITE) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; h->shared_read_entry = ds_inc(&pool->shared_read_ds); @@ -1315,7 +1320,7 @@ static void process_shared_bio(struct th } static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block, - struct cell *cell) + struct dm_bio_prison_cell *cell) { int r; dm_block_t data_block; @@ -1363,7 +1368,7 @@ static void process_bio(struct thin_c *t { int r; dm_block_t block = get_bio_block(tc, bio); - struct cell *cell; + struct dm_bio_prison_cell *cell; struct cell_key key; struct dm_thin_lookup_result lookup_result; @@ -1432,7 +1437,7 @@ static void process_deferred_bios(struct spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; struct thin_c *tc = h->tc; /* @@ -1522,10 +1527,10 @@ static void thin_defer_bio(struct thin_c wake_worker(pool); } -static struct endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; - struct endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); h->tc = tc; h->shared_read_entry = NULL; @@ -1687,6 +1692,9 @@ static void __pool_destroy(struct pool * kfree(pool); } +static struct kmem_cache *_new_mapping_cache; +static struct kmem_cache *_endio_hook_cache; + static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, unsigned long block_size, char **error) @@ -1755,16 +1763,16 @@ static struct pool *pool_create(struct m ds_init(&pool->all_io_ds); pool->next_mapping = NULL; - pool->mapping_pool = - mempool_create_kmalloc_pool(MAPPING_POOL_SIZE, sizeof(struct new_mapping)); + pool->mapping_pool = mempool_create_slab_pool(MAPPING_POOL_SIZE, + _new_mapping_cache); if (!pool->mapping_pool) { *error = "Error creating pool's mapping mempool"; err_p = ERR_PTR(-ENOMEM); goto bad_mapping_pool; } - pool->endio_hook_pool = - mempool_create_kmalloc_pool(ENDIO_HOOK_POOL_SIZE, sizeof(struct endio_hook)); + pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, + _endio_hook_cache); if (!pool->endio_hook_pool) { *error = "Error creating pool's endio_hook mempool"; err_p = ERR_PTR(-ENOMEM); @@ -2613,9 +2621,9 @@ static int thin_endio(struct dm_target * union map_info *map_context) { unsigned long flags; - struct endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = map_context->ptr; struct list_head work; - struct new_mapping *m, *tmp; + struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; if (h->shared_read_entry) { @@ -2755,7 +2763,32 @@ static int __init dm_thin_init(void) r = dm_register_target(&pool_target); if (r) - dm_unregister_target(&thin_target); + goto bad_pool_target; + + r = -ENOMEM; + + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); + if (!_cell_cache) + goto bad_cell_cache; + + _new_mapping_cache = KMEM_CACHE(dm_thin_new_mapping, 0); + if (!_new_mapping_cache) + goto bad_new_mapping_cache; + + _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); + if (!_endio_hook_cache) + goto bad_endio_hook_cache; + + return 0; + +bad_endio_hook_cache: + kmem_cache_destroy(_new_mapping_cache); +bad_new_mapping_cache: + kmem_cache_destroy(_cell_cache); +bad_cell_cache: + dm_unregister_target(&pool_target); +bad_pool_target: + dm_unregister_target(&thin_target); return r; } @@ -2764,6 +2797,10 @@ static void dm_thin_exit(void) { dm_unregister_target(&thin_target); dm_unregister_target(&pool_target); + + kmem_cache_destroy(_cell_cache); + kmem_cache_destroy(_new_mapping_cache); + kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init);