This is an optimization that saves two allocations per any dm request. struct dm_target_io and struct bio had one-to-one relation, so I moved struct bio to struct dm_target_io. bio_vec is shared and not cloned --- because no part of bio_vec is passed to more than one target, sharing can be done. Drivers are allowed to change bio_vec. If the targets and rest of the kernel are written with proper assumptions about manipulation of bio_vec (bio_vec can be altered by the driver, its content is undefined during processing of request and after the end), this patch should not generate regressions. If these assumptions are broken in some other kernel code, there could be regressions --- they need to be analyzed. --- drivers/md/dm.c | 135 ++++++++++++++++++++++++-------------------------------- 1 file changed, 59 insertions(+), 76 deletions(-) Index: linux-2.6.26-rc4/drivers/md/dm.c =================================================================== --- linux-2.6.26-rc4.orig/drivers/md/dm.c 2008-06-11 19:22:32.000000000 +0200 +++ linux-2.6.26-rc4/drivers/md/dm.c 2008-06-11 19:22:57.000000000 +0200 @@ -49,14 +49,15 @@ struct dm_io { struct dm_target_io { struct dm_io *io; struct dm_target *ti; + struct bio bio; + struct bio_vec bio_vec; union map_info info; }; union map_info *dm_get_mapinfo(struct bio *bio) { - if (bio && bio->bi_private) - return &((struct dm_target_io *)bio->bi_private)->info; - return NULL; + struct dm_target_io *tio = container_of(bio, struct dm_target_io, bio); + return &tio->info; } #define MINOR_ALLOCED ((void *)-1) @@ -124,8 +125,6 @@ struct mapped_device { mempool_t *io_pool; mempool_t *tio_pool; - struct bio_set *bs; - /* * Event handling. */ @@ -371,6 +370,13 @@ static struct dm_target_io *alloc_tio(st static void free_tio(struct mapped_device *md, struct dm_target_io *tio) { + /* + * Drivers are not supposed to hold extra references on bios after + * completing them. If they do, we'd better fail and not corrupt freed + * memory. + */ + BUG_ON(atomic_read(&tio->bio.bi_cnt) != 1); + mempool_free(tio, md->tio_pool); } @@ -527,7 +533,7 @@ static void dec_pending(struct dm_io *io static void clone_endio(struct bio *bio, int error) { int r = 0; - struct dm_target_io *tio = bio->bi_private; + struct dm_target_io *tio = container_of(bio, struct dm_target_io, bio); struct mapped_device *md = tio->io->md; dm_endio_fn endio = tio->ti->type->end_io; @@ -553,12 +559,6 @@ static void clone_endio(struct bio *bio, dec_pending(tio->io, error); - /* - * Store md for cleanup instead of tio which is about to get freed. - */ - bio->bi_private = md->bs; - - bio_put(bio); free_tio(md, tio); } @@ -582,8 +582,7 @@ static sector_t max_io_len(struct mapped return len; } -static void __map_bio(struct dm_target *ti, struct bio *clone, - struct dm_target_io *tio) +static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) { int r; sector_t sector; @@ -592,10 +591,9 @@ static void __map_bio(struct dm_target * /* * Sanity checks. */ - BUG_ON(!clone->bi_size); + BUG_ON(!tio->bio.bi_size); - clone->bi_end_io = clone_endio; - clone->bi_private = tio; + tio->bio.bi_end_io = clone_endio; /* * Map the clone. If r == 0 we don't need to do @@ -603,16 +601,16 @@ static void __map_bio(struct dm_target * * this io. */ atomic_inc(&tio->io->io_count); - sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + sector = tio->bio.bi_sector; + r = ti->type->map(ti, &tio->bio, &tio->info); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ - blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, + blk_add_trace_remap(bdev_get_queue(tio->bio.bi_bdev), &tio->bio, tio->io->bio->bi_bdev->bd_dev, - clone->bi_sector, sector); + tio->bio.bi_sector, sector); - generic_make_request(clone); + generic_make_request(&tio->bio); } else if (r < 0 || r == DM_MAPIO_REQUEUE) { /* error the io and bail out, or requeue it if needed */ md = tio->io->md; @@ -620,8 +618,6 @@ static void __map_bio(struct dm_target * /* * Store bio_set for cleanup. */ - clone->bi_private = md->bs; - bio_put(clone); free_tio(md, tio); } else if (r) { DMWARN("unimplemented target map return value: %d", r); @@ -639,62 +635,48 @@ struct clone_info { unsigned short idx; }; -static void dm_bio_destructor(struct bio *bio) -{ - struct bio_set *bs = bio->bi_private; - - bio_free(bio, bs); -} - /* * Creates a little bio that is just does part of a bvec. */ -static struct bio *split_bvec(struct bio *bio, sector_t sector, - unsigned short idx, unsigned int offset, - unsigned int len, struct bio_set *bs) +static void split_bvec(struct bio *clone, struct bio_vec *clone_vec, + struct bio *bio, sector_t sector, unsigned short idx, + unsigned int offset, unsigned int len) { - struct bio *clone; struct bio_vec *bv = bio->bi_io_vec + idx; - clone = bio_alloc_bioset(GFP_NOIO, 1, bs); - clone->bi_destructor = dm_bio_destructor; - *clone->bi_io_vec = *bv; + bio_init(clone); - clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; + clone->bi_sector = sector; clone->bi_rw = bio->bi_rw; + clone->bi_io_vec = clone_vec; clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); - clone->bi_io_vec->bv_offset = offset; - clone->bi_io_vec->bv_len = clone->bi_size; - - return clone; + clone_vec->bv_page = bv->bv_page; + clone_vec->bv_offset = offset; + clone_vec->bv_len = clone->bi_size; } /* * Creates a bio that consists of range of complete bvecs. */ -static struct bio *clone_bio(struct bio *bio, sector_t sector, - unsigned short idx, unsigned short bv_count, - unsigned int len, struct bio_set *bs) -{ - struct bio *clone; - - clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); - __bio_clone(clone, bio); - clone->bi_destructor = dm_bio_destructor; +static void clone_bio(struct bio *clone, struct bio *bio, sector_t sector, + unsigned short idx, unsigned short bv_count, + unsigned int len) +{ + bio_init(clone); + + clone->bi_bdev = bio->bi_bdev; clone->bi_sector = sector; - clone->bi_idx = idx; - clone->bi_vcnt = idx + bv_count; + clone->bi_rw = bio->bi_rw; + clone->bi_io_vec = bio->bi_io_vec + idx; + clone->bi_vcnt = clone->bi_max_vecs = bv_count; clone->bi_size = to_bytes(len); - clone->bi_flags &= ~(1 << BIO_SEG_VALID); - - return clone; } static int __clone_and_map(struct clone_info *ci) { - struct bio *clone, *bio = ci->bio; + struct bio *bio = ci->bio; struct dm_target *ti; sector_t len = 0, max; struct dm_target_io *tio; @@ -713,15 +695,24 @@ static int __clone_and_map(struct clone_ tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + /* + * Warning: it is essential that this routine never maps the same + * bvec entry to more than one target. + * + * If some bvec entry is processed by more targets, it must be copied + * to tio->bio_vec. + * + * For the current implementation, it is valid, but keep eye on it + * if you change it. + */ if (ci->sector_count <= max) { /* * Optimise for the simple case where we can do all of * the remaining io with a single clone. */ - clone = clone_bio(bio, ci->sector, ci->idx, - bio->bi_vcnt - ci->idx, ci->sector_count, - ci->md->bs); - __map_bio(ti, clone, tio); + clone_bio(&tio->bio, bio, ci->sector, ci->idx, + bio->bi_vcnt - ci->idx, ci->sector_count); + __map_bio(ti, tio); ci->sector_count = 0; } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { @@ -743,9 +734,9 @@ static int __clone_and_map(struct clone_ len += bv_len; } - clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len, - ci->md->bs); - __map_bio(ti, clone, tio); + clone_bio(&tio->bio, bio, ci->sector, ci->idx, i - ci->idx, + len); + __map_bio(ti, tio); ci->sector += len; ci->sector_count -= len; @@ -775,11 +766,10 @@ static int __clone_and_map(struct clone_ len = min(remaining, max); - clone = split_bvec(bio, ci->sector, ci->idx, - bv->bv_offset + offset, len, - ci->md->bs); + split_bvec(&tio->bio, &tio->bio_vec, bio, ci->sector, + ci->idx, bv->bv_offset + offset, len); - __map_bio(ti, clone, tio); + __map_bio(ti, tio); ci->sector += len; ci->sector_count -= len; @@ -1041,10 +1031,6 @@ static struct mapped_device *alloc_dev(i if (!md->tio_pool) goto bad_tio_pool; - md->bs = bioset_create(16, 16); - if (!md->bs) - goto bad_no_bioset; - md->disk = alloc_disk(1); if (!md->disk) goto bad_disk; @@ -1084,8 +1070,6 @@ bad_bdev: bad_thread: put_disk(md->disk); bad_disk: - bioset_free(md->bs); -bad_no_bioset: mempool_destroy(md->tio_pool); bad_tio_pool: mempool_destroy(md->io_pool); @@ -1111,7 +1095,6 @@ static void free_dev(struct mapped_devic destroy_workqueue(md->wq); mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); - bioset_free(md->bs); del_gendisk(md->disk); free_minor(minor);