From 743875e36d63994737fffc0eaeec320f64ab2368 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 24 Aug 2011 18:13:27 -0400 Subject: [CRAZY RFC PATCH 1/4] dm: share mempools for all request-based DM devices Sharing mempools across all request-based DM devices eliminates a huge amount of slab use but in doing so it may leave the system susceptible to live-locks. The one thing that enabled me to implement this is the belief that forward progress will be made (eventually) because: When a request is cloned blk_rq_prep_clone will allocate a bio at a time for all bios in the request being cloned: dm_prep_fn -> clone_rq -> setup_clone -> blk_rq_prep_clone -> __rq_for_each_bio { bio = bio_alloc_bioset } If bio cannot be allocated from the bioset: - blk_rq_prep_clone's error path calls blk_rq_prep_clone to free all bios previously allocated from the bioset - ENOMEM is returned, which results in dm_prep_fn returning with BLKPREP_DEFER. So long story short progress _should_ be made. But sharing mempools for request-based DM is a wild (likely broken?) idea unless more sophisticated per-device proportional slab allocation guarantees were put in place (ala per-bdi dirty page accounting). Not-Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 files changed, 92 insertions(+), 18 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 490a7a1..d04fe17 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -195,15 +195,21 @@ struct dm_md_mempools { mempool_t *io_pool; mempool_t *tio_pool; struct bio_set *bs; + unsigned type; + atomic_t refcount; }; #define RESERVED_BIO_BASED_IOS 16 #define RESERVED_REQUEST_BASED_IOS 256 + static struct kmem_cache *_io_cache; static struct kmem_cache *_tio_cache; static struct kmem_cache *_rq_tio_cache; static struct kmem_cache *_rq_bio_info_cache; +static struct dm_md_mempools *_rq_pools; +static DEFINE_MUTEX(_rq_pools_mutex); + static int __init local_init(void) { int r = -ENOMEM; @@ -238,6 +244,8 @@ static int __init local_init(void) if (!_major) _major = r; + _rq_pools = NULL; + return 0; out_uevent_exit: @@ -254,8 +262,15 @@ out_free_io_cache: return r; } +static void __free_md_mempools(struct dm_md_mempools *pools); + static void local_exit(void) { + /* + * Request-based table may have never been made live. + */ + __free_md_mempools(_rq_pools); + kmem_cache_destroy(_rq_bio_info_cache); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_tio_cache); @@ -1915,12 +1930,22 @@ static void free_dev(struct mapped_device *md) unlock_fs(md); bdput(md->bdev); destroy_workqueue(md->wq); - if (md->tio_pool) - mempool_destroy(md->tio_pool); - if (md->io_pool) - mempool_destroy(md->io_pool); - if (md->bs) - bioset_free(md->bs); + if (dm_request_based(md)) { + if (atomic_dec_and_test(&_rq_pools->refcount)) { + mutex_lock(&_rq_pools_mutex); + __free_md_mempools(_rq_pools); + _rq_pools = NULL; + mutex_unlock(&_rq_pools_mutex); + } + } else { + if (md->tio_pool) + mempool_destroy(md->tio_pool); + if (md->io_pool) + mempool_destroy(md->io_pool); + if (md->bs) + bioset_free(md->bs); + } + blk_integrity_unregister(md->disk); del_gendisk(md->disk); free_minor(minor); @@ -1947,10 +1972,14 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; - p->io_pool = NULL; md->tio_pool = p->tio_pool; - p->tio_pool = NULL; md->bs = p->bs; + + if (p->type == DM_TYPE_REQUEST_BASED) + return; /* avoid shared mempools cleanup */ + + p->io_pool = NULL; + p->tio_pool = NULL; p->bs = NULL; out: @@ -2684,24 +2713,38 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +static struct dm_md_mempools * __alloc_md_pools(unsigned type, unsigned integrity) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); - unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? - RESERVED_BIO_BASED_IOS : RESERVED_REQUEST_BASED_IOS; + unsigned int uninitialized_var(pool_size); + struct kmem_cache *uninitialized_var(io_cache); + struct kmem_cache *uninitialized_var(tio_cache); if (!pools) return NULL; - pools->io_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(pool_size, _io_cache) : - mempool_create_slab_pool(pool_size, _rq_bio_info_cache); + switch (type) { + case DM_TYPE_BIO_BASED: + pool_size = RESERVED_BIO_BASED_IOS; + io_cache = _io_cache; + tio_cache = _tio_cache; + break; + case DM_TYPE_REQUEST_BASED: + pool_size = RESERVED_REQUEST_BASED_IOS; + io_cache = _rq_bio_info_cache; + tio_cache = _rq_tio_cache; + _rq_pools = pools; + break; + } + + pools->type = type; + atomic_set(&pools->refcount, 0); + + pools->io_pool = mempool_create_slab_pool(pool_size, io_cache); if (!pools->io_pool) goto free_pools_and_out; - pools->tio_pool = (type == DM_TYPE_BIO_BASED) ? - mempool_create_slab_pool(pool_size, _tio_cache) : - mempool_create_slab_pool(pool_size, _rq_tio_cache); + pools->tio_pool = mempool_create_slab_pool(pool_size, tio_cache); if (!pools->tio_pool) goto free_io_pool_and_out; @@ -2729,7 +2772,30 @@ free_pools_and_out: return NULL; } -void dm_free_md_mempools(struct dm_md_mempools *pools) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +{ + if (type == DM_TYPE_REQUEST_BASED) { + struct dm_md_mempools *pools = NULL; + + mutex_lock(&_rq_pools_mutex); + if (likely(_rq_pools)) { + pools = _rq_pools; + } else { + /* + * Allocate shared request-based mempools. + * Must always allocate integrity pool too. + */ + pools = __alloc_md_pools(type, 1); + } + atomic_inc(&_rq_pools->refcount); + mutex_unlock(&_rq_pools_mutex); + return pools; + } + + return __alloc_md_pools(type, integrity); +} + +static void __free_md_mempools(struct dm_md_mempools *pools) { if (!pools) return; @@ -2746,6 +2812,14 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) kfree(pools); } +void dm_free_md_mempools(struct dm_md_mempools *pools) +{ + if (!pools || pools->type == DM_TYPE_REQUEST_BASED) + return; + + __free_md_mempools(pools); +} + static const struct block_device_operations dm_blk_dops = { .open = dm_blk_open, .release = dm_blk_close, -- 1.7.1