From: Mike Snitzer WRITE SAME is a SCSI command that can be leveraged for more efficient zeroing of a specified logical extent of a device which supports it. Only a single zeroed logical block is transfered to the target for each WRITE SAME and the target then writes that same block across the specified extent. Add WRITE SAME support to dm-io and make it accessible to dm_kcopyd_zero(). dm_kcopyd_zero() provides an asynchronous interface whereas the blkdev_issue_write_same() interface is synchronous. Signed-off-by: Mike Snitzer FIXME - depends on some patch that isn't yet upstream - find out those details --- drivers/md/dm-io.c | 23 ++++++++++++++++++----- drivers/md/dm-kcopyd.c | 18 +++++++++++++++--- 2 files changed, 33 insertions(+), 8 deletions(-) Index: linux-3.4/drivers/md/dm-io.c =================================================================== --- linux-3.4.orig/drivers/md/dm-io.c +++ linux-3.4/drivers/md/dm-io.c @@ -297,7 +297,8 @@ static void do_region(int rw, unsigned r unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -307,7 +308,7 @@ static void do_region(int rw, unsigned r /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -321,9 +322,21 @@ static void do_region(int rw, unsigned r store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. Index: linux-3.4/drivers/md/dm-kcopyd.c =================================================================== --- linux-3.4.orig/drivers/md/dm-kcopyd.c +++ linux-3.4/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long er struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long er } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -608,10 +608,22 @@ int dm_kcopyd_copy(struct dm_kcopyd_clie job->pages = NULL; job->rw = READ; } else { + int i; + memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; job->rw = WRITE; + /* + * Optimize zeroing via WRITE SAME if all dests support it. + */ + job->rw |= REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) { + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw &= ~REQ_WRITE_SAME; + break; + } + } } job->fn = fn;