Merge a linear region of chunks using one large IO

s->store->type->prepare_merge returns the number of chunks that can be
linearly copied starting from the returned chunk number backward. (but
the caller is allowed to copy less, and the caller puts the number of
copied chunks to s->store->type->commit_merge)

I.e. if returned chunk numbers are old_chunk == 10 and new_chunk == 20
and returned value is 3, then chunk 20 can be copied to 10, chunk 19 to
9 and 18 to 8.

There is a variable, s->merge_write_interlock_n, that is now always one,
but can hold larger number --- the number of chunks that are being
copied.

Also, snapshot_merge_process() should delay the merging of _all_ chunks
that have in-progress writes; not just the first chunk in the region
that is to be merged


Here are performance results from some mkfs-based testing:

# lvcreate -n testlv -L 32G test
# lvcreate -n testlv_snap -s -L 7G test/testlv

# time mkfs.ext3 /dev/test/testlv
...
real    1m7.827s
user    0m0.116s
sys     0m11.017s

# lvs
 LV          VG   Attr   LSize  Origin Snap%  Move Log Copy%  Convert
 testlv      test owi-a- 32.00G
 testlv_snap test swi-a-  7.00G testlv   9.05

before:
-------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 ...
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    22m33.100s
user    0m0.045s
sys     0m0.711s


after:
------
# time lvconvert --merge test/testlv_snap
 Merging of volume testlv_snap started.
 testlv: Merged: 6.4%
 testlv: Merged: 3.5%
 testlv: Merged: 0.9%
 testlv: Merged: 0.0%
 Merge into logical volume testlv finished.
 Logical volume "snapshot1" successfully removed

real    1m0.881s
user    0m0.015s
sys     0m0.560s


So we're now seeing _very_ respectible snapshot-merge performance.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-snap.c |   35 +++++++++++++++++++++++------------
 1 file changed, 23 insertions(+), 12 deletions(-)

Index: linux-2.6.31-fast-new-2/drivers/md/dm-snap.c
===================================================================
--- linux-2.6.31-fast-new-2.orig/drivers/md/dm-snap.c	2009-10-16 21:48:48.000000000 +0200
+++ linux-2.6.31-fast-new-2/drivers/md/dm-snap.c	2009-10-16 21:48:51.000000000 +0200
@@ -681,12 +681,13 @@ static void merge_callback(int read_err,
 
 static void snapshot_merge_process(struct dm_snapshot *s)
 {
-	int r;
+	int r, i, linear_chunks;
 	chunk_t old_chunk, new_chunk;
 	struct origin *o;
 	chunk_t min_chunksize;
 	int must_wait;
 	struct dm_io_region src, dest;
+	sector_t io_size;
 
 	BUG_ON(!s->merge_running);
 	if (s->merge_shutdown)
@@ -697,34 +698,41 @@ static void snapshot_merge_process(struc
 		goto shut;
 	}
 
-	r = s->store->type->prepare_merge(s->store, &old_chunk, &new_chunk);
-	if (r <= 0) {
-		if (r < 0)
+	linear_chunks = s->store->type->prepare_merge(s->store,
+						      &old_chunk, &new_chunk);
+	if (linear_chunks <= 0) {
+		if (linear_chunks < 0)
 			DMERR("Read error in exception store, "
 			      "shutting down merge");
 		goto shut;
 	}
+	/* Adjust old_chunk and new_chunk to reflect start of linear region */
+	old_chunk = old_chunk + 1 - linear_chunks;
+	new_chunk = new_chunk + 1 - linear_chunks;
 
-	/* TODO: use larger I/O size once we verify that kcopyd handles it */
+	/*
+	 * Use one (potentially large) I/O to copy all 'linear_chunks'
+	 * from the exception store to the origin
+	 */
+	io_size = linear_chunks * s->store->chunk_size;
 
 	dest.bdev = s->origin->bdev;
 	dest.sector = chunk_to_sector(s->store, old_chunk);
-	dest.count = min((sector_t)s->store->chunk_size,
-			 get_dev_size(dest.bdev) - dest.sector);
+	dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
 
 	src.bdev = s->cow->bdev;
 	src.sector = chunk_to_sector(s->store, new_chunk);
 	src.count = dest.count;
 
 test_again:
-	/* Reallocate other snapshots */
+	/* Reallocate other snapshots; must account for all 'linear_chunks' */
 	down_read(&_origins_lock);
 	o = __lookup_origin(s->origin->bdev);
 	must_wait = 0;
 	min_chunksize = __minimum_chunk_size(o);
 	if (min_chunksize) {
 		chunk_t n;
-		for (n = 0; n < s->store->chunk_size; n += min_chunksize) {
+		for (n = 0; n < io_size; n += min_chunksize) {
 			r = __origin_write(&o->snapshots, dest.sector + n,
 					   NULL);
 			if (r == DM_MAPIO_SUBMITTED)
@@ -739,11 +747,14 @@ test_again:
 
 	down_write(&s->lock);
 	s->merge_write_interlock = old_chunk;
-	s->merge_write_interlock_n = 1;
+	s->merge_write_interlock_n = linear_chunks;
 	up_write(&s->lock);
 
-	while (__chunk_is_tracked(s, old_chunk))
-		msleep(1);
+	/* Wait until writes to all 'linear_chunks' drain */
+	for (i = 0; i < linear_chunks; i++) {
+		while (__chunk_is_tracked(s, old_chunk + i))
+			msleep(1);
+	}
 
 	dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
 	return;