Reorder functions, trying to group together functions that only run in particular contexts (e.g. workqueue worker thread). --- drivers/md/dm-thin.c | 538 ++++++++++++++++++++++++++++----------------------- 1 file changed, 300 insertions(+), 238 deletions(-) Index: linux-3.1-rc9/drivers/md/dm-thin.c =================================================================== --- linux-3.1-rc9.orig/drivers/md/dm-thin.c +++ linux-3.1-rc9/drivers/md/dm-thin.c @@ -164,6 +164,7 @@ static struct bio_prison *prison_create( kfree(prison); return NULL; } + prison->nr_buckets = nr_buckets; prison->hash_mask = nr_buckets - 1; prison->cells = (struct hlist_head *) (prison + 1); @@ -526,48 +527,10 @@ struct thin_c { struct dm_thin_device *td; }; -/* FIXME: Can cells and new_mappings be combined? */ - -struct endio_hook { - struct thin_c *tc; - bio_end_io_t *saved_bi_end_io; - struct deferred_entry *entry; -}; - -struct new_mapping { - struct list_head list; - - int prepared; - - struct thin_c *tc; - dm_block_t virt_block; - dm_block_t data_block; - struct cell *cell; - int err; - - /* - * If the bio covers the whole area of a block then we can avoid - * zeroing or copying. Instead this bio is hooked. The bio will - * still be in the cell, so care has to be taken to avoid issuing - * the bio twice. - */ - struct bio *bio; - bio_end_io_t *saved_bi_end_io; -}; - -/*----------------------------------------------------------------*/ - -static void save_and_set_endio(struct bio *bio, bio_end_io_t **save, - bio_end_io_t *fn) -{ - *save = bio->bi_end_io; - bio->bi_end_io = fn; -} - /*----------------------------------------------------------------*/ /* - * A global list that uses a struct mapped_device as a key. + * A global list of pools that uses a struct mapped_device as a key. */ static struct dm_thin_pool_table { struct mutex mutex; @@ -626,42 +589,12 @@ static struct pool *__pool_table_lookup_ /*----------------------------------------------------------------*/ -static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) -{ - struct bio *bio; - struct bio_list bios; - - bio_list_init(&bios); - bio_list_merge(&bios, master); - bio_list_init(master); - - while ((bio = bio_list_pop(&bios))) { - if (dm_get_mapinfo(bio)->ptr == tc) - bio_endio(bio, DM_ENDIO_REQUEUE); - else - bio_list_add(master, bio); - } -} - -static void requeue_io(struct thin_c *tc) -{ - struct pool *pool = tc->pool; - unsigned long flags; - - spin_lock_irqsave(&pool->lock, flags); - __requeue_bio_list(tc, &pool->deferred_bios); - __requeue_bio_list(tc, &pool->retry_on_resume_list); - spin_unlock_irqrestore(&pool->lock, flags); -} - /* - * This section of code contains the logic for processing a thin devices' IO. + * This section of code contains the logic for processing a thin device's IO. * Much of the code depends on pool object resources (lists, workqueues, etc) * but most is exclusively called from the thin target rather than the thin-pool - * target. wake_worker() being the most notable exception (which is also used - * by thin-pool to continue deferred IO processing after pool resume). + * target. */ -static void process_prepared_mapping(struct new_mapping *m); static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { @@ -693,11 +626,48 @@ static void remap_and_issue(struct thin_ generic_make_request(bio); } +/* + * wake_worker() is used when new work is queued and when + * pool_resume is ready to continue deferred IO + * processing. + */ static void wake_worker(struct pool *pool) { queue_work(pool->wq, &pool->worker); } +/*----------------------------------------------------------------*/ + +/* + * Bio endio functions. + */ +struct endio_hook { + struct thin_c *tc; + bio_end_io_t *saved_bi_end_io; + struct deferred_entry *entry; +}; + +struct new_mapping { + struct list_head list; + + int prepared; + + struct thin_c *tc; + dm_block_t virt_block; + dm_block_t data_block; + struct cell *cell; + int err; + + /* + * If the bio covers the whole area of a block then we can avoid + * zeroing or copying. Instead this bio is hooked. The bio will + * still be in the cell, so care has to be taken to avoid issuing + * the bio twice. + */ + struct bio *bio; + bio_end_io_t *saved_bi_end_io; +}; + static void __maybe_add_mapping(struct new_mapping *m) { struct pool *pool = m->tc->pool; @@ -761,6 +731,117 @@ static void shared_read_endio(struct bio mempool_free(h, pool->endio_hook_pool); } +/*----------------------------------------------------------------*/ + +/* + * Workqueue. + */ + +/* + * Prepared mapping jobs. + */ + +/* + * This sends the bios in the cell back to the deferred_bios list. + */ +static void cell_defer(struct thin_c *tc, struct cell *cell, + dm_block_t data_block) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + cell_release(cell, &pool->deferred_bios); + spin_unlock_irqrestore(&tc->pool->lock, flags); + + wake_worker(pool); +} + +/* + * Same as cell_defer above, except it omits one particular detainee, + * a write bio that covers the block and has already been processed. + */ +static void cell_defer_except(struct thin_c *tc, struct cell *cell, + struct bio *exception) +{ + struct bio_list bios; + struct bio *bio; + struct pool *pool = tc->pool; + unsigned long flags; + + bio_list_init(&bios); + cell_release(cell, &bios); + + spin_lock_irqsave(&pool->lock, flags); + while ((bio = bio_list_pop(&bios))) + if (bio != exception) + bio_list_add(&pool->deferred_bios, bio); + spin_unlock_irqrestore(&pool->lock, flags); + + wake_worker(pool); +} + +static void process_prepared_mapping(struct new_mapping *m) +{ + struct thin_c *tc = m->tc; + struct bio *bio; + int r; + + bio = m->bio; + if (bio) + bio->bi_end_io = m->saved_bi_end_io; + + if (m->err) { + cell_error(m->cell); + return; + } + + /* + * Commit the prepared block into the mapping btree. + * Any I/O for this block arriving after this point will get + * remapped to it directly. + */ + r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); + if (r) { + DMERR("dm_thin_insert_block() failed"); + cell_error(m->cell); + return; + } + + /* + * Release any bios held while the block was being provisioned. + * If we are processing a write bio that completely covers the block, + * we already processed it so can ignore it now when processing + * the bios in the cell. + */ + if (bio) { + cell_defer_except(tc, m->cell, bio); + bio_endio(bio, 0); + } else + cell_defer(tc, m->cell, m->data_block); + + list_del(&m->list); /* FIXME: unnecc.? */ + mempool_free(m, tc->pool->mapping_pool); +} + +static void process_prepared_mappings(struct pool *pool) +{ + unsigned long flags; + struct list_head maps; + struct new_mapping *m, *tmp; + + INIT_LIST_HEAD(&maps); + spin_lock_irqsave(&pool->lock, flags); + list_splice_init(&pool->prepared_mappings, &maps); + spin_unlock_irqrestore(&pool->lock, flags); + + list_for_each_entry_safe(m, tmp, &maps, list) + process_prepared_mapping(m); +} + +/* + * Deferred bio jobs. + */ static int io_overwrites_block(struct pool *pool, struct bio *bio) { return ((bio_data_dir(bio) == WRITE) && @@ -768,20 +849,31 @@ static int io_overwrites_block(struct po (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); } +static void save_and_set_endio(struct bio *bio, bio_end_io_t **save, + bio_end_io_t *fn) +{ + *save = bio->bi_end_io; + bio->bi_end_io = fn; +} + static int ensure_next_mapping(struct pool *pool) { if (pool->next_mapping) return 0; pool->next_mapping = mempool_alloc(pool->mapping_pool, GFP_ATOMIC); + return pool->next_mapping ? 0 : -ENOMEM; } static struct new_mapping *get_next_mapping(struct pool *pool) { struct new_mapping *r = pool->next_mapping; + BUG_ON(!pool->next_mapping); + pool->next_mapping = NULL; + return r; } @@ -805,6 +897,8 @@ static void schedule_copy(struct thin_c ds_add_work(&pool->ds, &m->list); /* + * IO to pool_dev remaps to the pool target's data_dev. + * * If the whole block of data is being overwritten, we can issue the * bio immediately. Otherwise we use kcopyd to clone the data first. */ @@ -816,7 +910,6 @@ static void schedule_copy(struct thin_c } else { struct dm_io_region from, to; - /* IO to pool_dev remaps to the pool target's data_dev */ from.bdev = tc->pool_dev->bdev; from.sector = data_origin * pool->sectors_per_block; from.count = pool->sectors_per_block; @@ -882,56 +975,6 @@ static void schedule_zero(struct thin_c } } -/* - * This sends the bios in the cell back to the deferred_bios list. - */ -static void cell_defer(struct thin_c *tc, struct cell *cell, - dm_block_t data_block) -{ - struct pool *pool = tc->pool; - unsigned long flags; - - spin_lock_irqsave(&pool->lock, flags); - cell_release(cell, &pool->deferred_bios); - spin_unlock_irqrestore(&tc->pool->lock, flags); - - wake_worker(pool); -} - -/* - * Same as cell_defer, except it omits one particular detainee. - */ -static void cell_defer_except(struct thin_c *tc, struct cell *cell, - struct bio *exception) -{ - struct bio_list bios; - struct bio *bio; - struct pool *pool = tc->pool; - unsigned long flags; - - bio_list_init(&bios); - cell_release(cell, &bios); - - spin_lock_irqsave(&pool->lock, flags); - while ((bio = bio_list_pop(&bios))) - if (bio != exception) - bio_list_add(&pool->deferred_bios, bio); - spin_unlock_irqrestore(&pool->lock, flags); - - wake_worker(pool); -} - -static void retry_on_resume(struct bio *bio) -{ - struct thin_c *tc = dm_get_mapinfo(bio)->ptr; - struct pool *pool = tc->pool; - unsigned long flags; - - spin_lock_irqsave(&pool->lock, flags); - bio_list_add(&pool->retry_on_resume_list, bio); - spin_unlock_irqrestore(&pool->lock, flags); -} - static int alloc_data_block(struct thin_c *tc, dm_block_t *result) { int r; @@ -952,12 +995,12 @@ static int alloc_data_block(struct thin_ dm_table_event(pool->ti->table); } - if (free_blocks == 0) { + if (!free_blocks) { if (pool->no_free_space) return -ENOSPC; else { /* - * Try and commit to see if that will free up some + * Try to commit to see if that will free up some * more space. */ r = dm_pool_commit_metadata(pool->pmd); @@ -975,7 +1018,7 @@ static int alloc_data_block(struct thin_ * If we still have no space we set a flag to avoid * doing all this checking and return -ENOSPC. */ - if (free_blocks == 0) { + if (!free_blocks) { DMWARN("%s: no free space available.", dm_device_name(pool->pool_md)); spin_lock_irqsave(&pool->lock, flags); @@ -993,6 +1036,21 @@ static int alloc_data_block(struct thin_ return 0; } +/* + * If we have run out of space, queue bios until the device is + * resumed, presumably after having been reloaded with more space. + */ +static void retry_on_resume(struct bio *bio) +{ + struct thin_c *tc = dm_get_mapinfo(bio)->ptr; + struct pool *pool = tc->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->retry_on_resume_list, bio); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void no_space(struct cell *cell) { struct bio *bio; @@ -1040,8 +1098,8 @@ static void process_shared_bio(struct th struct cell_key key; /* - * If data_cell is already occupied, then sharing is already in the - * process of being broken so we have nothing further to do here. + * If cell is already occupied, then sharing is already in the process + * of being broken so we have nothing further to do here. */ build_data_key(tc->td, lookup_result->block, &key); if (bio_detain(pool->prison, &key, bio, &cell)) @@ -1069,16 +1127,18 @@ static void provision_block(struct thin_ int r; dm_block_t data_block; - if (bio->bi_size == 0) { - /* - * Remap it anywhere. This is probably a flush, so we want - * it to go through the remap_and_issue path. - */ + /* + * Remap empty bios (flushes) immediately, without provisioning. + */ + if (!bio->bi_size) { cell_release_singleton(cell, bio); remap_and_issue(tc, bio, 0); return; } + /* + * Fill read bios with zeroes and complete them immediately. + */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); cell_release_singleton(cell, bio); @@ -1125,7 +1185,7 @@ static void process_bio(struct thin_c *t /* * We can release this cell now. This thread is the only * one that puts bios into a cell, and we know there were - * no preceeding bios. + * no preceding bios. */ /* * TODO: this will probably have to change when discard goes @@ -1165,13 +1225,12 @@ static void process_deferred_bios(struct while ((bio = bio_list_pop(&bios))) { struct thin_c *tc = dm_get_mapinfo(bio)->ptr; + /* + * If we've got no free new_mapping structs, and processing + * this bio might require one, we pause until there are some + * prepared mappings to process. + */ if (ensure_next_mapping(pool)) { - /* - * We've got no free new_mapping structs, and - * processing this bio might require one. So we - * pause until there are some prepared mappings to - * process. - */ spin_lock_irqsave(&pool->lock, flags); bio_list_merge(&pool->deferred_bios, &bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -1202,53 +1261,6 @@ static void process_deferred_bios(struct } } -static void process_prepared_mapping(struct new_mapping *m) -{ - struct thin_c *tc = m->tc; - struct bio *bio; - int r; - - bio = m->bio; - if (bio) - bio->bi_end_io = m->saved_bi_end_io; - - if (m->err) { - cell_error(m->cell); - return; - } - - r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); - if (r) { - DMERR("dm_thin_insert_block() failed"); - cell_error(m->cell); - return; - } - - if (bio) { - cell_defer_except(tc, m->cell, bio); - bio_endio(bio, 0); - } else - cell_defer(tc, m->cell, m->data_block); - - list_del(&m->list); /* FIXME: unnecc.? */ - mempool_free(m, tc->pool->mapping_pool); -} - -static void process_prepared_mappings(struct pool *pool) -{ - unsigned long flags; - struct list_head maps; - struct new_mapping *m, *tmp; - - INIT_LIST_HEAD(&maps); - spin_lock_irqsave(&pool->lock, flags); - list_splice_init(&pool->prepared_mappings, &maps); - spin_unlock_irqrestore(&pool->lock, flags); - - list_for_each_entry_safe(m, tmp, &maps, list) - process_prepared_mapping(m); -} - static void do_worker(struct work_struct *ws) { struct pool *pool = container_of(ws, struct pool, worker); @@ -1257,6 +1269,15 @@ static void do_worker(struct work_struct process_deferred_bios(pool); } +/*----------------------------------------------------------------*/ + +/* + * Mapping functions. + */ + +/* + * Called only while mapping a thin bio to hand it over to the workqueue. + */ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) { unsigned long flags; @@ -1270,8 +1291,7 @@ static void thin_defer_bio(struct thin_c } /* - * Non-blocking function designed to be called from the target's map - * function. + * Non-blocking function called from the thin target's map function. */ static int thin_bio_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) @@ -1311,7 +1331,7 @@ static int thin_bio_map(struct dm_target * ensure a consistent application view * (i.e. lockfs). * - * More distant ancestors are irrelevant, the + * More distant ancestors are irrelevant. The * shared flag will be set in their case. */ thin_defer_bio(tc, bio); @@ -1323,6 +1343,10 @@ static int thin_bio_map(struct dm_target break; case -ENODATA: + /* + * In future, the failed dm_thin_find_block above could + * provide the hint to load the metadata into cache. + */ case -EWOULDBLOCK: thin_defer_bio(tc, bio); r = DM_MAPIO_SUBMITTED; @@ -1332,30 +1356,25 @@ static int thin_bio_map(struct dm_target return r; } -static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) +static int pool_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) { int r; + struct pool_c *pt = ti->private; + struct pool *pool = pt->pool; unsigned long flags; - struct pool_c *pt = container_of(cb, struct pool_c, callbacks); - - spin_lock_irqsave(&pt->pool->lock, flags); - r = !bio_list_empty(&pt->pool->retry_on_resume_list); - spin_unlock_irqrestore(&pt->pool->lock, flags); - if (!r) { - struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); - r = bdi_congested(&q->backing_dev_info, bdi_bits); - } + /* + * As this is a singleton target, ti->begin is always zero. + */ + spin_lock_irqsave(&pool->lock, flags); + bio->bi_bdev = pt->data_dev->bdev; + r = DM_MAPIO_REMAPPED; + spin_unlock_irqrestore(&pool->lock, flags); return r; } -static void __requeue_bios(struct pool *pool) -{ - bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list); - bio_list_init(&pool->retry_on_resume_list); -} - /*---------------------------------------------------------------- * Binding of control targets to a pool object *--------------------------------------------------------------*/ @@ -1446,11 +1465,10 @@ static struct pool *pool_create(struct m } /* - * Create singlethreaded workqueues that will service all devices + * Create singlethreaded workqueue that will service all devices * that use this metadata. */ - pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, - WQ_MEM_RECLAIM); + pool->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); if (!pool->wq) { *error = "Error creating pool's workqueue"; err_p = ERR_PTR(-ENOMEM); @@ -1546,21 +1564,6 @@ static struct pool *__pool_find(struct m /*---------------------------------------------------------------- * Pool target methods *--------------------------------------------------------------*/ -static void pool_dtr(struct dm_target *ti) -{ - struct pool_c *pt = ti->private; - - mutex_lock(&dm_thin_pool_table.mutex); - - unbind_control_target(pt->pool, ti); - __pool_dec(pt->pool); - dm_put_device(ti, pt->metadata_dev); - dm_put_device(ti, pt->data_dev); - kfree(pt); - - mutex_unlock(&dm_thin_pool_table.mutex); -} - struct pool_features { unsigned zero_new_blocks:1; }; @@ -1576,7 +1579,9 @@ static int parse_pool_features(struct dm {0, 1, "Invalid number of pool feature arguments"}, }; - /* No feature arguments supplied. */ + /* + * No feature arguments supplied. + */ if (!as->argc) return 0; @@ -1600,6 +1605,24 @@ static int parse_pool_features(struct dm return r; } +static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) +{ + int r; + unsigned long flags; + struct pool_c *pt = container_of(cb, struct pool_c, callbacks); + + spin_lock_irqsave(&pt->pool->lock, flags); + r = !bio_list_empty(&pt->pool->retry_on_resume_list); + spin_unlock_irqrestore(&pt->pool->lock, flags); + + if (!r) { + struct request_queue *q = bdev_get_queue(pt->data_dev->bdev); + r = bdi_congested(&q->backing_dev_info, bdi_bits); + } + + return r; +} + /* * thin-pool * @@ -1719,20 +1742,25 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static void pool_dtr(struct dm_target *ti) { - int r; struct pool_c *pt = ti->private; - struct pool *pool = pt->pool; - unsigned long flags; - spin_lock_irqsave(&pool->lock, flags); - bio->bi_bdev = pt->data_dev->bdev; - r = DM_MAPIO_REMAPPED; - spin_unlock_irqrestore(&pool->lock, flags); + mutex_lock(&dm_thin_pool_table.mutex); - return r; + unbind_control_target(pt->pool, ti); + __pool_dec(pt->pool); + dm_put_device(ti, pt->metadata_dev); + dm_put_device(ti, pt->data_dev); + kfree(pt); + + mutex_unlock(&dm_thin_pool_table.mutex); +} + +static void __requeue_bios(struct pool *pool) +{ + bio_list_merge(&pool->deferred_bios, &pool->retry_on_resume_list); + bio_list_init(&pool->retry_on_resume_list); } /* @@ -1746,6 +1774,9 @@ static int pool_map(struct dm_target *ti * calling the resume method individually after userspace has * grown the data device in reaction to a table event. */ +/* + * FIXME Part of this ought to be moved to pool_resume. + */ static int pool_preresume(struct dm_target *ti) { int r; @@ -2124,8 +2155,9 @@ static struct target_type pool_target = .io_hints = pool_io_hints, }; -/*----------------------------------------------------------------*/ - +/*---------------------------------------------------------------- + * Thin target methods + *--------------------------------------------------------------*/ static void thin_dtr(struct dm_target *ti) { struct thin_c *tc = ti->private; @@ -2237,12 +2269,43 @@ static int thin_map(struct dm_target *ti return thin_bio_map(ti, bio, map_context); } +static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) +{ + struct bio *bio; + struct bio_list bios; + + bio_list_init(&bios); + bio_list_merge(&bios, master); + bio_list_init(master); + + while ((bio = bio_list_pop(&bios))) { + if (dm_get_mapinfo(bio)->ptr == tc) + bio_endio(bio, DM_ENDIO_REQUEUE); + else + bio_list_add(master, bio); + } +} + +static void requeue_io(struct thin_c *tc) +{ + struct pool *pool = tc->pool; + unsigned long flags; + + spin_lock_irqsave(&pool->lock, flags); + __requeue_bio_list(tc, &pool->deferred_bios); + __requeue_bio_list(tc, &pool->retry_on_resume_list); + spin_unlock_irqrestore(&pool->lock, flags); +} + static void thin_postsuspend(struct dm_target *ti) { if (dm_noflush_suspending(ti)) requeue_io((struct thin_c *)ti->private); } +/* + * + */ static int thin_status(struct dm_target *ti, status_type_t type, char *result, unsigned maxlen) { @@ -2277,9 +2340,8 @@ static int thin_status(struct dm_target (unsigned long) tc->dev_id); break; } - } else { + } else DMEMIT("-"); - } return 0; }