block/elevator.c | 2 +- drivers/md/dm-ioctl.c | 148 +++++++++++++++++++++++++++------ drivers/md/dm.c | 216 +++++++++++++++++++++++++++++++++++++++++------- drivers/md/dm.h | 12 +++ 4 files changed, 318 insertions(+), 60 deletions(-) diff --git a/block/elevator.c b/block/elevator.c index 6ec9137..168112e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -1088,7 +1088,7 @@ ssize_t elv_iosched_show(struct request_queue *q, char *name) struct elevator_type *__e; int len = 0; - if (!q->elevator) + if (!q->elevator || !blk_queue_stackable(q)) return sprintf(name, "none\n"); elv = e->elevator_type; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index d7500e1..1c8718c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -174,6 +174,19 @@ static void free_cell(struct hash_cell *hc) } } +static struct hash_cell *dm_get_verified_mdptr(struct mapped_device *md) +{ + struct hash_cell *hc; + + hc = dm_get_mdptr(md); + if (!hc || hc->md != md) { + DMWARN("device has been removed from the dev hash table."); + return NULL; + } + + return hc; +} + /* * The kdev_t and uuid of a device can never change once it is * initially inserted. @@ -546,11 +559,9 @@ static struct dm_table *dm_get_inactive_table(struct mapped_device *md) struct dm_table *table = NULL; down_read(&_hash_lock); - hc = dm_get_mdptr(md); - if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + hc = dm_get_verified_mdptr(md); + if (!hc) goto out; - } table = hc->new_map; if (table) @@ -707,23 +718,40 @@ static struct mapped_device *find_device(struct dm_ioctl *param) return md; } +static struct mapped_device *find_device_noinit(struct dm_ioctl *param) +{ + struct hash_cell *hc; + struct mapped_device *md = NULL; + + down_write(&_hash_lock); + hc = __find_device_hash_cell(param); + if (hc) + md = hc->md; + else + DMWARN("device doesn't appear to be in the dev hash table."); + up_write(&_hash_lock); + + return md; +} + static int dev_remove(struct dm_ioctl *param, size_t param_size) { struct hash_cell *hc; struct mapped_device *md; int r; - down_write(&_hash_lock); - hc = __find_device_hash_cell(param); + md = find_device_noinit(param); + if (!md) + return -ENXIO; + down_write(&_hash_lock); + hc = dm_get_verified_mdptr(md); if (!hc) { - DMWARN("device doesn't appear to be in the dev hash table."); up_write(&_hash_lock); + dm_put(md); return -ENXIO; } - md = hc->md; - /* * Ensure the device is not open and nothing further can open it. */ @@ -861,17 +889,20 @@ static int do_resume(struct dm_ioctl *param) struct mapped_device *md; struct dm_table *new_map, *old_map = NULL; - down_write(&_hash_lock); + md = find_device_noinit(param); + if (!md) + return -ENXIO; - hc = __find_device_hash_cell(param); + dm_lock_resume(md); + + down_write(&_hash_lock); + hc = dm_get_verified_mdptr(md); if (!hc) { - DMWARN("device doesn't appear to be in the dev hash table."); up_write(&_hash_lock); - return -ENXIO; + r = -ENXIO; + goto out; } - md = hc->md; - new_map = hc->new_map; hc->new_map = NULL; param->flags &= ~DM_INACTIVE_PRESENT_FLAG; @@ -891,8 +922,8 @@ static int do_resume(struct dm_ioctl *param) old_map = dm_swap_table(md, new_map); if (IS_ERR(old_map)) { dm_table_destroy(new_map); - dm_put(md); - return PTR_ERR(old_map); + r = PTR_ERR(old_map); + goto out; } if (dm_table_get_mode(new_map) & FMODE_WRITE) @@ -913,6 +944,8 @@ static int do_resume(struct dm_ioctl *param) if (!r) r = __dev_status(md, param); +out: + dm_unlock_resume(md); dm_put(md); return r; } @@ -1140,6 +1173,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) struct hash_cell *hc; struct dm_table *t; struct mapped_device *md; + int initial_table_load = 0; md = find_device(param); if (!md) @@ -1170,12 +1204,55 @@ static int table_load(struct dm_ioctl *param, size_t param_size) goto out; } + /* + * Protect md->type and md->queue against concurrent table loads. + * Locking strategy: + * + Leverage fact that md's type cannot change after initial table load. + * - Only protect type in table_load() -- not in do_resume(). + * + * + Protect type and queue while working to stage an inactive table: + * - check if table's type conflicts with md->type + * (holding: md->type_lock) + * - setup md->queue based on md->type + * (holding: md->type_lock) + * - stage inactive table (hc->new_map) + * (holding: md->type_lock + _hash_lock) + */ + dm_lock_md_type(md); + + if (dm_unknown_md_type(md)) { + /* set md's type based on table's type */ + dm_set_md_type(md, t); + /* note initial_table_load to clear md's type on error */ + initial_table_load = 1; + } else if (!dm_md_type_matches_table(md, t)) { + DMWARN("can't change device type after initial table load."); + dm_table_destroy(t); + dm_unlock_md_type(md); + r = -EINVAL; + goto out; + } + + /* setup md->queue to reflect md's and table's type (may block) */ + r = dm_setup_md_queue(md); + if (r) { + DMWARN("unable to setup device queue for this table."); + dm_table_destroy(t); + if (initial_table_load) + dm_clear_md_type(md); + dm_unlock_md_type(md); + goto out; + } + + /* stage inactive table */ down_write(&_hash_lock); - hc = dm_get_mdptr(md); - if (!hc || hc->md != md) { - DMWARN("device has been removed from the dev hash table."); + hc = dm_get_verified_mdptr(md); + if (!hc) { dm_table_destroy(t); up_write(&_hash_lock); + if (initial_table_load) + dm_clear_md_type(md); + dm_unlock_md_type(md); r = -ENXIO; goto out; } @@ -1185,6 +1262,8 @@ static int table_load(struct dm_ioctl *param, size_t param_size) hc->new_map = t; up_write(&_hash_lock); + dm_unlock_md_type(md); + param->flags |= DM_INACTIVE_PRESENT_FLAG; r = __dev_status(md, param); @@ -1199,16 +1278,29 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) int r; struct hash_cell *hc; struct mapped_device *md; + struct dm_table *live_table; - down_write(&_hash_lock); + md = find_device_noinit(param); + if (!md) + return -ENXIO; - hc = __find_device_hash_cell(param); + dm_lock_resume(md); + dm_lock_md_type(md); /* May need to clear md's type */ + + down_write(&_hash_lock); + hc = dm_get_verified_mdptr(md); if (!hc) { - DMWARN("device doesn't appear to be in the dev hash table."); up_write(&_hash_lock); - return -ENXIO; + r = -ENXIO; + goto out; } + /* Clear md's type if there is no live table */ + live_table = dm_get_live_table(md); + if (!live_table) + dm_clear_md_type(md); + dm_table_put(live_table); + if (hc->new_map) { dm_table_destroy(hc->new_map); hc->new_map = NULL; @@ -1217,8 +1309,10 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) param->flags &= ~DM_INACTIVE_PRESENT_FLAG; r = __dev_status(hc->md, param); - md = hc->md; up_write(&_hash_lock); +out: + dm_unlock_md_type(md); + dm_unlock_resume(md); dm_put(md); return r; } @@ -1652,8 +1746,8 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid) return -ENXIO; mutex_lock(&dm_hash_cells_mutex); - hc = dm_get_mdptr(md); - if (!hc || hc->md != md) { + hc = dm_get_verified_mdptr(md); + if (!hc) { r = -ENXIO; goto out; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d21e128..40b8b53 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -111,11 +111,26 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_QUEUE_IO_TO_THREAD 6 /* + * Type for md->type field. + */ +enum mapped_device_type { + UNKNOWN_MD_TYPE, + BIO_BASED_MD_TYPE, + REQUEST_BASED_MD_TYPE, +}; + +/* * Work processed by per-device workqueue. */ struct mapped_device { struct rw_semaphore io_lock; struct mutex suspend_lock; + /* + * Resuming inactive table lacks testable state during its + * transition to being live. Interlock allows other operations + * (e.g. table_clear) to _know_ there isn't a live table yet. + */ + struct mutex resume_lock; rwlock_t map_lock; atomic_t holders; atomic_t open_count; @@ -123,6 +138,12 @@ struct mapped_device { unsigned long flags; struct request_queue *queue; + enum mapped_device_type type; + /* + * Protect queue and type from concurrent access. + */ + struct mutex type_lock; + struct gendisk *disk; char name[16]; @@ -1849,6 +1870,28 @@ static const struct block_device_operations dm_blk_dops; static void dm_wq_work(struct work_struct *work); static void dm_rq_barrier_work(struct work_struct *work); +static void dm_init_md_queue(struct mapped_device *md) +{ + /* + * Request-based dm devices cannot be stacked on top of bio-based dm + * devices. The type of this dm device has not been decided yet. + * The type is decided at the first table loading time. + * To prevent problematic device stacking, clear the queue flag + * for request stacking support until then. + * + * This queue is new, so no concurrency on the queue_flags. + */ + queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); + + md->queue->queuedata = md; + md->queue->backing_dev_info.congested_fn = dm_any_congested; + md->queue->backing_dev_info.congested_data = md; + blk_queue_make_request(md->queue, dm_request); + blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); + md->queue->unplug_fn = dm_unplug_all; + blk_queue_merge_bvec(md->queue, dm_merge_bvec); +} + /* * Allocate and initialise a blank device with a given minor. */ @@ -1874,8 +1917,11 @@ static struct mapped_device *alloc_dev(int minor) if (r < 0) goto bad_minor; + md->type = UNKNOWN_MD_TYPE; init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); + mutex_init(&md->resume_lock); + mutex_init(&md->type_lock); spin_lock_init(&md->deferred_lock); spin_lock_init(&md->barrier_error_lock); rwlock_init(&md->map_lock); @@ -1886,34 +1932,11 @@ static struct mapped_device *alloc_dev(int minor) INIT_LIST_HEAD(&md->uevent_list); spin_lock_init(&md->uevent_lock); - md->queue = blk_init_queue(dm_request_fn, NULL); + md->queue = blk_alloc_queue(GFP_KERNEL); if (!md->queue) goto bad_queue; - /* - * Request-based dm devices cannot be stacked on top of bio-based dm - * devices. The type of this dm device has not been decided yet, - * although we initialized the queue using blk_init_queue(). - * The type is decided at the first table loading time. - * To prevent problematic device stacking, clear the queue flag - * for request stacking support until then. - * - * This queue is new, so no concurrency on the queue_flags. - */ - queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue); - md->saved_make_request_fn = md->queue->make_request_fn; - md->queue->queuedata = md; - md->queue->backing_dev_info.congested_fn = dm_any_congested; - md->queue->backing_dev_info.congested_data = md; - blk_queue_make_request(md->queue, dm_request); - blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); - md->queue->unplug_fn = dm_unplug_all; - blk_queue_merge_bvec(md->queue, dm_merge_bvec); - blk_queue_softirq_done(md->queue, dm_softirq_done); - blk_queue_prep_rq(md->queue, dm_prep_fn); - blk_queue_lld_busy(md->queue, dm_lld_busy); - blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH, - dm_rq_prepare_flush); + dm_init_md_queue(md); md->disk = alloc_disk(1); if (!md->disk) @@ -1997,6 +2020,16 @@ static void free_dev(struct mapped_device *md) kfree(md); } +void dm_lock_resume(struct mapped_device *md) +{ + mutex_lock(&md->resume_lock); +} + +void dm_unlock_resume(struct mapped_device *md) +{ + mutex_unlock(&md->resume_lock); +} + static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { struct dm_md_mempools *p; @@ -2128,6 +2161,132 @@ int dm_create(int minor, struct mapped_device **result) return 0; } +/* + * Functions to manage md->type. + * All are required to hold md->type_lock. + */ +void dm_lock_md_type(struct mapped_device *md) +{ + mutex_lock(&md->type_lock); +} + +void dm_unlock_md_type(struct mapped_device *md) +{ + mutex_unlock(&md->type_lock); +} + +void dm_set_md_type(struct mapped_device *md, struct dm_table* t) +{ + if (dm_table_request_based(t)) + md->type = REQUEST_BASED_MD_TYPE; + else + md->type = BIO_BASED_MD_TYPE; +} + +void dm_clear_md_type(struct mapped_device *md) +{ + md->type = UNKNOWN_MD_TYPE; +} + +bool dm_unknown_md_type(struct mapped_device *md) +{ + return md->type == UNKNOWN_MD_TYPE; +} + +static bool dm_bio_based_md_type(struct mapped_device *md) +{ + return md->type == BIO_BASED_MD_TYPE; +} + +static bool dm_request_based_md_type(struct mapped_device *md) +{ + return md->type == REQUEST_BASED_MD_TYPE; +} + +bool dm_md_type_matches_table(struct mapped_device *md, struct dm_table* t) +{ + if (dm_request_based_md_type(md)) + return dm_table_request_based(t); + else if (dm_bio_based_md_type(md)) + return !dm_table_request_based(t); + + return 0; +} + +/* + * Functions to manage md->queue. + * All are required to hold md->type_lock. + */ +static bool dm_bio_based_md_queue(struct mapped_device *md) +{ + return (md->queue->request_fn) ? 0 : 1; +} + +/* + * Fully initialize a request-based queue (->elevator, ->request_fn, etc). + */ +static int dm_init_request_based_queue(struct mapped_device *md) +{ + struct request_queue *q = NULL; + + /* Avoid re-initializing the queue if already fully initialized */ + if (!md->queue->elevator) { + /* Fully initialize the queue */ + q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL); + if (!q) + return 0; + md->queue = q; + md->saved_make_request_fn = md->queue->make_request_fn; + dm_init_md_queue(md); + blk_queue_softirq_done(md->queue, dm_softirq_done); + blk_queue_prep_rq(md->queue, dm_prep_fn); + blk_queue_lld_busy(md->queue, dm_lld_busy); + blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH, + dm_rq_prepare_flush); + } else if (dm_bio_based_md_queue(md)) { + /* + * Queue was fully initialized on behalf of a previous + * request-based table load. Table is now switching from + * bio-based back to request-based, e.g.: rq -> bio -> rq + */ + md->queue->request_fn = dm_request_fn; + } else + return 1; /* queue already request-based */ + + elv_register_queue(md->queue); + + return 1; +} + +static void dm_clear_request_based_queue(struct mapped_device *md) +{ + if (dm_bio_based_md_queue(md)) + return; /* queue already bio-based */ + + /* Unregister elevator from sysfs and clear ->request_fn */ + elv_unregister_queue(md->queue); + md->queue->request_fn = NULL; +} + +/* + * Setup the DM device's queue based on md's type + */ +int dm_setup_md_queue(struct mapped_device *md) +{ + BUG_ON(!mutex_is_locked(&md->type_lock)); + BUG_ON(dm_unknown_md_type(md)); + + if (dm_request_based_md_type(md)) { + if (!dm_init_request_based_queue(md)) { + DMWARN("Cannot initialize queue for Request-based dm"); + return -EINVAL; + } + } else if (dm_bio_based_md_type(md)) + dm_clear_request_based_queue(md); + + return 0; +} + static struct mapped_device *dm_find_md(dev_t dev) { struct mapped_device *md; @@ -2403,13 +2562,6 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) goto out; } - /* cannot change the device type, once a table is bound */ - if (md->map && - (dm_table_get_type(md->map) != dm_table_get_type(table))) { - DMWARN("can't change the device type after a table is bound"); - goto out; - } - map = __bind(md, table, &limits); out: diff --git a/drivers/md/dm.h b/drivers/md/dm.h index bad1724..d5b244f 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -66,6 +66,18 @@ int dm_table_alloc_md_mempools(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); +void dm_lock_md_type(struct mapped_device *md); +void dm_unlock_md_type(struct mapped_device *md); +void dm_set_md_type(struct mapped_device *md, struct dm_table* t); +void dm_clear_md_type(struct mapped_device *md); +bool dm_unknown_md_type(struct mapped_device *md); +bool dm_md_type_matches_table(struct mapped_device *md, struct dm_table* t); + +int dm_setup_md_queue(struct mapped_device *md); + +void dm_lock_resume(struct mapped_device *md); +void dm_unlock_resume(struct mapped_device *md); + /* * To check the return value from dm_table_find_target(). */