thin-dev must first be merged with v3.5, e.g.: git merge v3.5
after resolving conflict in dm-thin.c, and commiting the merge, this
patch may be applied.
---
 Documentation/device-mapper/striped.txt            |    7 +-
 Documentation/device-mapper/thin-provisioning.txt  |   24 ++-
 drivers/md/dm-crypt.c                              |    9 +-
 drivers/md/dm-delay.c                              |    2 +-
 drivers/md/dm-exception-store.c                    |   13 +-
 drivers/md/dm-flakey.c                             |    2 +-
 drivers/md/dm-ioctl.c                              |    5 +-
 drivers/md/dm-linear.c                             |    2 +-
 drivers/md/dm-mpath.c                              |   49 +++-
 drivers/md/dm-raid.c                               |   13 +-
 drivers/md/dm-raid1.c                              |   10 +-
 drivers/md/dm-snap.c                               |   34 +-
 drivers/md/dm-stripe.c                             |   87 +++---
 drivers/md/dm-thin-metadata.c                      |  344 ++++++++++----------
 drivers/md/dm-thin-metadata.h                      |   11 +-
 drivers/md/dm-thin.c                               |  241 ++++++++------
 drivers/md/dm-verity.c                             |    2 +-
 drivers/md/dm.c                                    |   35 ++-
 drivers/md/dm.h                                    |    5 +
 drivers/md/persistent-data/dm-block-manager.c      |   36 +-
 drivers/md/persistent-data/dm-block-manager.h      |    9 +-
 drivers/md/persistent-data/dm-space-map-common.c   |    6 +-
 drivers/md/persistent-data/dm-space-map-common.h   |    2 +-
 drivers/md/persistent-data/dm-space-map-disk.c     |   49 +--
 .../md/persistent-data/dm-transaction-manager.c    |    8 +-
 drivers/scsi/device_handler/scsi_dh.c              |   38 ++-
 include/linux/device-mapper.h                      |   19 +-
 include/linux/dm-ioctl.h                           |    6 +-
 include/scsi/scsi_dh.h                             |    6 +
 29 files changed, 619 insertions(+), 455 deletions(-)

diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt
index f34d323..45f3b91 100644
--- a/Documentation/device-mapper/striped.txt
+++ b/Documentation/device-mapper/striped.txt
@@ -9,15 +9,14 @@ devices in parallel.
 
 Parameters: <num devs> <chunk size> [<dev path> <offset>]+
     <num devs>: Number of underlying devices.
-    <chunk size>: Size of each chunk of data. Must be a power-of-2 and at
-                  least as large as the system's PAGE_SIZE.
+    <chunk size>: Size of each chunk of data. Must be at least as
+                  large as the system's PAGE_SIZE.
     <dev path>: Full pathname to the underlying block-device, or a
                 "major:minor" device-number.
     <offset>: Starting sector within the device.
 
 One or more underlying devices can be specified. The striped device size must
-be a multiple of the chunk size and a multiple of the number of underlying
-devices.
+be a multiple of the chunk size multiplied by the number of underlying devices.
 
 
 Example scripts
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
index f5cfc62..652975b 100644
--- a/Documentation/device-mapper/thin-provisioning.txt
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -231,6 +231,9 @@ i) Constructor
       no_discard_passdown: Don't pass discards down to the underlying
 			   data device, but just remove the mapping.
 
+      read_only: Don't allow any changes to be made to the pool
+		 metadata.
+
     Data block size must be between 64KB (128 sectors) and 1GB
     (2097152 sectors) inclusive.
 
@@ -239,7 +242,7 @@ ii) Status
 
     <transaction id> <used metadata blocks>/<total metadata blocks>
     <used data blocks>/<total data blocks> <held metadata root>
-
+    [no_]discard_passdown read_[only|write]
 
     transaction id:
 	A 64-bit number used by userspace to help synchronise with metadata
@@ -257,6 +260,21 @@ ii) Status
 	held root.  This feature is not yet implemented so '-' is
 	always returned.
 
+    discard_passdown|no_discard_passdown
+	Whether or not discards are actually being passed down to the
+	underlying device.  When this is enabled when loading the table,
+	it can get disabled if the underlying device doesn't support it.
+
+    read_only|read_write
+	If the pool encounters certain types of device failures it will
+	drop into a read-only metadata mode in which no changes to
+	the pool metadata (like allocating new blocks) are permitted.
+
+	In serious cases where even a read-only mode is deemed unsafe
+	no further I/O will be permitted and the status will just
+	contain the string 'Fail'.  The userspace recovery tools
+	should then be used.
+
 iii) Messages
 
     create_thin <dev id>
@@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool.
 ii) Status
 
      <nr mapped sectors> <highest mapped sector>
+
+	If the pool has encountered device errors and failed, the status
+	will just contain the string 'Fail'.  The userspace recovery
+	tools should then be used.
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 3f06df5..ca4f8ad 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1241,7 +1241,6 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
 static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
 {
 	char buffer[3];
-	char *endp;
 	unsigned int i;
 
 	buffer[2] = '\0';
@@ -1250,9 +1249,7 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size)
 		buffer[0] = *hex++;
 		buffer[1] = *hex++;
 
-		key[i] = (u8)simple_strtoul(buffer, &endp, 16);
-
-		if (endp != &buffer[2])
+		if (kstrtou8(buffer, 16, &key[i]))
 			return -EINVAL;
 	}
 
@@ -1702,7 +1699,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->num_flush_requests = 1;
-	ti->discard_zeroes_data_unsupported = 1;
+	ti->discard_zeroes_data_unsupported = true;
 
 	return 0;
 
@@ -1742,7 +1739,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
 }
 
 static int crypt_status(struct dm_target *ti, status_type_t type,
-			char *result, unsigned int maxlen)
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct crypt_config *cc = ti->private;
 	unsigned int sz = 0;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2dc22dd..f53846f 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio,
 }
 
 static int delay_status(struct dm_target *ti, status_type_t type,
-			char *result, unsigned maxlen)
+			unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct delay_c *dc = ti->private;
 	int sz = 0;
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index aa70f7d..ebaa4f8 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister);
 static int set_chunk_size(struct dm_exception_store *store,
 			  const char *chunk_size_arg, char **error)
 {
-	unsigned long chunk_size_ulong;
-	char *value;
+	unsigned chunk_size;
 
-	chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
-	if (*chunk_size_arg == '\0' || *value != '\0' ||
-	    chunk_size_ulong > UINT_MAX) {
+	if (kstrtouint(chunk_size_arg, 10, &chunk_size)) {
 		*error = "Invalid chunk size";
 		return -EINVAL;
 	}
 
-	if (!chunk_size_ulong) {
+	if (!chunk_size) {
 		store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
 		return 0;
 	}
 
-	return dm_exception_store_set_chunk_size(store,
-						 (unsigned) chunk_size_ulong,
-						 error);
+	return dm_exception_store_set_chunk_size(store, chunk_size, error);
 }
 
 int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index ac49c01..cc15543 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
 }
 
 static int flakey_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct flakey_c *fc = ti->private;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a1a3e6d..afd9598 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table,
 	char *outbuf, *outptr;
 	status_type_t type;
 	size_t remaining, len, used = 0;
+	unsigned status_flags = 0;
 
 	outptr = outbuf = get_result_buffer(param, param_size, &len);
 
@@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table,
 
 		/* Get the status/table string from the target driver */
 		if (ti->type->status) {
-			if (ti->type->status(ti, type, outptr, remaining)) {
+			if (param->flags & DM_NOFLUSH_FLAG)
+				status_flags |= DM_STATUS_NOFLUSH_FLAG;
+			if (ti->type->status(ti, type, status_flags, outptr, remaining)) {
 				param->flags |= DM_BUFFER_FULL_FLAG;
 				break;
 			}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 3639eea..1bf19a9 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio,
 }
 
 static int linear_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct linear_c *lc = (struct linear_c *) ti->private;
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 638dae0..d8abb90 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -85,6 +85,7 @@ struct multipath {
 	unsigned queue_io:1;		/* Must we queue all I/O? */
 	unsigned queue_if_no_path:1;	/* Queue I/O if last path fails? */
 	unsigned saved_queue_if_no_path:1; /* Saved state during suspension */
+	unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */
 
 	unsigned pg_init_retries;	/* Number of times to retry pg_init */
 	unsigned pg_init_count;		/* Number of times pg_init called */
@@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	int r;
 	struct pgpath *p;
 	struct multipath *m = ti->private;
+	struct request_queue *q = NULL;
+	const char *attached_handler_name;
 
 	/* we need at least a path arg */
 	if (as->argc < 1) {
@@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 		goto bad;
 	}
 
-	if (m->hw_handler_name) {
-		struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
+	if (m->retain_attached_hw_handler || m->hw_handler_name)
+		q = bdev_get_queue(p->path.dev->bdev);
+
+	if (m->retain_attached_hw_handler) {
+		attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
+		if (attached_handler_name) {
+			/*
+			 * Reset hw_handler_name to match the attached handler
+			 * and clear any hw_handler_params associated with the
+			 * ignored handler.
+			 *
+			 * NB. This modifies the table line to show the actual
+			 * handler instead of the original table passed in.
+			 */
+			kfree(m->hw_handler_name);
+			m->hw_handler_name = attached_handler_name;
+
+			kfree(m->hw_handler_params);
+			m->hw_handler_params = NULL;
+		}
+	}
 
+	if (m->hw_handler_name) {
+		/*
+		 * Increments scsi_dh reference, even when using an
+		 * already-attached handler.
+		 */
 		r = scsi_dh_attach(q, m->hw_handler_name);
 		if (r == -EBUSY) {
 			/*
-			 * Already attached to different hw_handler,
+			 * Already attached to different hw_handler:
 			 * try to reattach with correct one.
 			 */
 			scsi_dh_detach(q);
@@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 	const char *arg_name;
 
 	static struct dm_arg _args[] = {
-		{0, 5, "invalid number of feature args"},
+		{0, 6, "invalid number of feature args"},
 		{1, 50, "pg_init_retries must be between 1 and 50"},
 		{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
 	};
@@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 			continue;
 		}
 
+		if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
+			m->retain_attached_hw_handler = 1;
+			continue;
+		}
+
 		if (!strcasecmp(arg_name, "pg_init_retries") &&
 		    (argc >= 1)) {
 			r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
@@ -1346,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti)
  *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
  */
 static int multipath_status(struct dm_target *ti, status_type_t type,
-			    char *result, unsigned int maxlen)
+			    unsigned status_flags, char *result, unsigned maxlen)
 {
 	int sz = 0;
 	unsigned long flags;
@@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
 	else {
 		DMEMIT("%u ", m->queue_if_no_path +
 			      (m->pg_init_retries > 0) * 2 +
-			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2);
+			      (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
+			      m->retain_attached_hw_handler);
 		if (m->queue_if_no_path)
 			DMEMIT("queue_if_no_path ");
 		if (m->pg_init_retries)
 			DMEMIT("pg_init_retries %u ", m->pg_init_retries);
 		if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
 			DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
+		if (m->retain_attached_hw_handler)
+			DMEMIT("retain_attached_hw_handler ");
 	}
 
 	if (!m->hw_handler_name || type == STATUSTYPE_INFO)
@@ -1656,7 +1691,7 @@ out:
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 4, 0},
+	.version = {1, 5, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 017c34d..ca9a246 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -353,6 +353,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 {
 	unsigned i, rebuild_cnt = 0;
 	unsigned long value, region_size = 0;
+	sector_t max_io_len;
 	char *key;
 
 	/*
@@ -522,14 +523,12 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
 		return -EINVAL;
 
 	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
+		max_io_len = rs->md.chunk_sectors;
 	else
-		rs->ti->split_io = region_size;
+		max_io_len = region_size;
 
-	if (rs->md.chunk_sectors)
-		rs->ti->split_io = rs->md.chunk_sectors;
-	else
-		rs->ti->split_io = region_size;
+	if (dm_set_target_max_io_len(rs->ti, max_io_len))
+		return -EINVAL;
 
 	/* Assume there are no metadata devices until the drives are parsed */
 	rs->md.persistent = 0;
@@ -1067,7 +1066,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c
 }
 
 static int raid_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
+		       unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct raid_set *rs = ti->private;
 	unsigned raid_param_cnt = 1; /* at least 1 for chunksize */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index b58b7a3..bc5ddba 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1081,10 +1081,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ti->private = ms;
-	ti->split_io = dm_rh_get_region_size(ms->rh);
+
+	r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh));
+	if (r)
+		goto err_free_context;
+
 	ti->num_flush_requests = 1;
 	ti->num_discard_requests = 1;
-	ti->discard_zeroes_data_unsupported = 1;
+	ti->discard_zeroes_data_unsupported = true;
 
 	ms->kmirrord_wq = alloc_workqueue("kmirrord",
 					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
@@ -1363,7 +1367,7 @@ static char device_status_char(struct mirror *m)
 
 
 static int mirror_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned int maxlen)
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned int m, sz = 0;
 	struct mirror_set *ms = (struct mirror_set *) ti->private;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 6f75887..a143921 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new)
  * Return a minimum chunk size of all snapshots that have the specified origin.
  * Return zero if the origin has no snapshots.
  */
-static sector_t __minimum_chunk_size(struct origin *o)
+static uint32_t __minimum_chunk_size(struct origin *o)
 {
 	struct dm_snapshot *snap;
 	unsigned chunk_size = 0;
@@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o)
 			chunk_size = min_not_zero(chunk_size,
 						  snap->store->chunk_size);
 
-	return chunk_size;
+	return (uint32_t) chunk_size;
 }
 
 /*
@@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ti->error = "Chunk size not set";
 		goto bad_read_metadata;
 	}
-	ti->split_io = s->store->chunk_size;
+
+	r = dm_set_target_max_io_len(ti, s->store->chunk_size);
+	if (r)
+		goto bad_read_metadata;
 
 	return 0;
 
@@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src,
 	snap_dest->store->snap = snap_dest;
 	snap_src->store->snap = snap_src;
 
-	snap_dest->ti->split_io = snap_dest->store->chunk_size;
+	snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
 	snap_dest->valid = snap_src->valid;
 
 	/*
@@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti)
 	up_write(&s->lock);
 }
 
-static sector_t get_origin_minimum_chunksize(struct block_device *bdev)
+static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
 {
-	sector_t min_chunksize;
+	uint32_t min_chunksize;
 
 	down_read(&_origins_lock);
 	min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
@@ -1838,15 +1841,15 @@ static void snapshot_merge_resume(struct dm_target *ti)
 	snapshot_resume(ti);
 
 	/*
-	 * snapshot-merge acts as an origin, so set ti->split_io
+	 * snapshot-merge acts as an origin, so set ti->max_io_len
 	 */
-	ti->split_io = get_origin_minimum_chunksize(s->origin->bdev);
+	ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
 
 	start_merge(s);
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
-			   char *result, unsigned int maxlen)
+			   unsigned status_flags, char *result, unsigned maxlen)
 {
 	unsigned sz = 0;
 	struct dm_snapshot *snap = ti->private;
@@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
 	struct origin *o;
 
 	/*
-	 * The origin's __minimum_chunk_size() got stored in split_io
+	 * The origin's __minimum_chunk_size() got stored in max_io_len
 	 * by snapshot_merge_resume().
 	 */
 	down_read(&_origins_lock);
 	o = __lookup_origin(merging_snap->origin->bdev);
-	for (n = 0; n < size; n += merging_snap->ti->split_io)
+	for (n = 0; n < size; n += merging_snap->ti->max_io_len)
 		if (__origin_write(&o->snapshots, sector + n, NULL) ==
 		    DM_MAPIO_SUBMITTED)
 			must_wait = 1;
@@ -2138,18 +2141,18 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
 }
 
 /*
- * Set the target "split_io" field to the minimum of all the snapshots'
+ * Set the target "max_io_len" field to the minimum of all the snapshots'
  * chunk sizes.
  */
 static void origin_resume(struct dm_target *ti)
 {
 	struct dm_dev *dev = ti->private;
 
-	ti->split_io = get_origin_minimum_chunksize(dev->bdev);
+	ti->max_io_len = get_origin_minimum_chunksize(dev->bdev);
 }
 
-static int origin_status(struct dm_target *ti, status_type_t type, char *result,
-			 unsigned int maxlen)
+static int origin_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_dev *dev = ti->private;
 
@@ -2176,7 +2179,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 		return max_size;
 
 	bvm->bi_bdev = dev->bdev;
-	bvm->bi_sector = bvm->bi_sector;
 
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 35c94ff..a087bf2 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -26,14 +26,12 @@ struct stripe {
 struct stripe_c {
 	uint32_t stripes;
 	int stripes_shift;
-	sector_t stripes_mask;
 
 	/* The size of this target / num. stripes */
 	sector_t stripe_width;
 
-	/* stripe chunk size */
-	uint32_t chunk_shift;
-	sector_t chunk_mask;
+	uint32_t chunk_size;
+	int chunk_size_shift;
 
 	/* Needed for handling events */
 	struct dm_target *ti;
@@ -91,7 +89,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
 
 /*
  * Construct a striped mapping.
- * <number of stripes> <chunk size (2^^n)> [<dev_path> <offset>]+
+ * <number of stripes> <chunk size> [<dev_path> <offset>]+
  */
 static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
@@ -99,7 +97,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	sector_t width;
 	uint32_t stripes;
 	uint32_t chunk_size;
-	char *end;
 	int r;
 	unsigned int i;
 
@@ -108,34 +105,23 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		return -EINVAL;
 	}
 
-	stripes = simple_strtoul(argv[0], &end, 10);
-	if (!stripes || *end) {
+	if (kstrtouint(argv[0], 10, &stripes) || !stripes) {
 		ti->error = "Invalid stripe count";
 		return -EINVAL;
 	}
 
-	chunk_size = simple_strtoul(argv[1], &end, 10);
-	if (*end) {
+	if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) {
 		ti->error = "Invalid chunk_size";
 		return -EINVAL;
 	}
 
-	/*
-	 * chunk_size is a power of two
-	 */
-	if (!is_power_of_2(chunk_size) ||
-	    (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) {
-		ti->error = "Invalid chunk size";
-		return -EINVAL;
-	}
-
-	if (ti->len & (chunk_size - 1)) {
+	width = ti->len;
+	if (sector_div(width, chunk_size)) {
 		ti->error = "Target length not divisible by "
 		    "chunk size";
 		return -EINVAL;
 	}
 
-	width = ti->len;
 	if (sector_div(width, stripes)) {
 		ti->error = "Target length not divisible by "
 		    "number of stripes";
@@ -167,17 +153,21 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	if (stripes & (stripes - 1))
 		sc->stripes_shift = -1;
-	else {
-		sc->stripes_shift = ffs(stripes) - 1;
-		sc->stripes_mask = ((sector_t) stripes) - 1;
-	}
+	else
+		sc->stripes_shift = __ffs(stripes);
+
+	r = dm_set_target_max_io_len(ti, chunk_size);
+	if (r)
+		return r;
 
-	ti->split_io = chunk_size;
 	ti->num_flush_requests = stripes;
 	ti->num_discard_requests = stripes;
 
-	sc->chunk_shift = ffs(chunk_size) - 1;
-	sc->chunk_mask = ((sector_t) chunk_size) - 1;
+	sc->chunk_size = chunk_size;
+	if (chunk_size & (chunk_size - 1))
+		sc->chunk_size_shift = -1;
+	else
+		sc->chunk_size_shift = __ffs(chunk_size);
 
 	/*
 	 * Get the stripe destinations.
@@ -216,17 +206,29 @@ static void stripe_dtr(struct dm_target *ti)
 static void stripe_map_sector(struct stripe_c *sc, sector_t sector,
 			      uint32_t *stripe, sector_t *result)
 {
-	sector_t offset = dm_target_offset(sc->ti, sector);
-	sector_t chunk = offset >> sc->chunk_shift;
+	sector_t chunk = dm_target_offset(sc->ti, sector);
+	sector_t chunk_offset;
+
+	if (sc->chunk_size_shift < 0)
+		chunk_offset = sector_div(chunk, sc->chunk_size);
+	else {
+		chunk_offset = chunk & (sc->chunk_size - 1);
+		chunk >>= sc->chunk_size_shift;
+	}
 
 	if (sc->stripes_shift < 0)
 		*stripe = sector_div(chunk, sc->stripes);
 	else {
-		*stripe = chunk & sc->stripes_mask;
+		*stripe = chunk & (sc->stripes - 1);
 		chunk >>= sc->stripes_shift;
 	}
 
-	*result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask);
+	if (sc->chunk_size_shift < 0)
+		chunk *= sc->chunk_size;
+	else
+		chunk <<= sc->chunk_size_shift;
+
+	*result = chunk + chunk_offset;
 }
 
 static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
@@ -237,9 +239,16 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector,
 	stripe_map_sector(sc, sector, &stripe, result);
 	if (stripe == target_stripe)
 		return;
-	*result &= ~sc->chunk_mask;			/* round down */
+
+	/* round down */
+	sector = *result;
+	if (sc->chunk_size_shift < 0)
+		*result -= sector_div(sector, sc->chunk_size);
+	else
+		*result = sector & ~(sector_t)(sc->chunk_size - 1);
+
 	if (target_stripe < stripe)
-		*result += sc->chunk_mask + 1;		/* next chunk */
+		*result += sc->chunk_size;		/* next chunk */
 }
 
 static int stripe_map_discard(struct stripe_c *sc, struct bio *bio,
@@ -302,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio,
  *
  */
 
-static int stripe_status(struct dm_target *ti,
-			 status_type_t type, char *result, unsigned int maxlen)
+static int stripe_status(struct dm_target *ti, status_type_t type,
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct stripe_c *sc = (struct stripe_c *) ti->private;
 	char buffer[sc->stripes + 1];
@@ -324,7 +333,7 @@ static int stripe_status(struct dm_target *ti,
 
 	case STATUSTYPE_TABLE:
 		DMEMIT("%d %llu", sc->stripes,
-			(unsigned long long)sc->chunk_mask + 1);
+			(unsigned long long)sc->chunk_size);
 		for (i = 0; i < sc->stripes; i++)
 			DMEMIT(" %s %llu", sc->stripe[i].dev->name,
 			    (unsigned long long)sc->stripe[i].physical_start);
@@ -391,7 +400,7 @@ static void stripe_io_hints(struct dm_target *ti,
 			    struct queue_limits *limits)
 {
 	struct stripe_c *sc = ti->private;
-	unsigned chunk_size = (sc->chunk_mask + 1) << 9;
+	unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT;
 
 	blk_limits_io_min(limits, chunk_size);
 	blk_limits_io_opt(limits, chunk_size * sc->stripes);
@@ -419,7 +428,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm,
 
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 4, 0},
+	.version = {1, 5, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 2c80364..f78f9d3 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -185,6 +185,12 @@ struct dm_pool_metadata {
 	unsigned long flags;
 	sector_t data_block_size;
 	bool read_only:1;
+
+	/*
+	 * Set if a transaction has to be aborted but the attempt to roll back
+	 * to the previous (good) transaction failed.  The only pool metadata
+	 * operation possible in this state is the closing of the device.
+	 */
 	bool fail_io:1;
 };
 
@@ -194,10 +200,8 @@ struct dm_thin_device {
 	dm_thin_id id;
 
 	int open_count;
-
 	bool changed:1;
 	bool aborted_with_changes:1;
-
 	uint64_t mapped_blocks;
 	uint64_t transaction_id;
 	uint32_t creation_time;
@@ -348,17 +352,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le)
 
 /*----------------------------------------------------------------*/
 
-static int superblock_lock_zero(struct dm_pool_metadata *pmd, struct dm_block **sblock)
+static int superblock_lock_zero(struct dm_pool_metadata *pmd,
+				struct dm_block **sblock)
 {
-	return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, sblock);
+	return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+				     &sb_validator, sblock);
 }
 
-static int superblock_lock(struct dm_pool_metadata *pmd, struct dm_block **sblock)
+static int superblock_lock(struct dm_pool_metadata *pmd,
+			   struct dm_block **sblock)
 {
-	return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, sblock);
+	return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+				&sb_validator, sblock);
 }
 
-static int superblock_all_zeroes(struct dm_block_manager *bm, int *result)
+static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
 {
 	int r;
 	unsigned i;
@@ -427,8 +435,8 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
 {
 	int r;
 	struct dm_block *sblock;
-	struct thin_disk_superblock *disk_super;
 	size_t metadata_len, data_len;
+	struct thin_disk_superblock *disk_super;
 	sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT;
 
 	if (bdev_size > THIN_METADATA_MAX_SECTORS)
@@ -463,26 +471,25 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd)
 	disk_super->trans_id = 0;
 	disk_super->held_root = 0;
 
-	r = dm_sm_copy_root(pmd->metadata_sm,
-			    &disk_super->metadata_space_map_root,
+	r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root,
 			    metadata_len);
 	if (r < 0)
-		goto out_locked;
+		goto bad_locked;
 
-	r = dm_sm_copy_root(pmd->data_sm,
-			    &disk_super->data_space_map_root,
+	r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root,
 			    data_len);
 	if (r < 0)
-		goto out_locked;
+		goto bad_locked;
 
 	disk_super->data_mapping_root = cpu_to_le64(pmd->root);
 	disk_super->device_details_root = cpu_to_le64(pmd->details_root);
+	disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT);
 	disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT);
 	disk_super->data_block_size = cpu_to_le32(pmd->data_block_size);
 
 	return dm_tm_commit(pmd->tm, sblock);
 
-out_locked:
+bad_locked:
 	dm_bm_unlock(sblock);
 	return r;
 }
@@ -493,7 +500,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
 
 	r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 				 &pmd->tm, &pmd->metadata_sm);
-	if (r) {
+	if (r < 0) {
 		DMERR("tm_create_with_sm failed");
 		return r;
 	}
@@ -502,39 +509,41 @@ static int __format_metadata(struct dm_pool_metadata *pmd)
 	if (IS_ERR(pmd->data_sm)) {
 		DMERR("sm_disk_create failed");
 		r = PTR_ERR(pmd->data_sm);
-		goto cleanup_tm;
+		goto bad_cleanup_tm;
 	}
 
 	pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
 	if (!pmd->nb_tm) {
 		DMERR("could not create non-blocking clone tm");
 		r = -ENOMEM;
-		goto cleanup_data_sm;
+		goto bad_cleanup_data_sm;
 	}
 
 	__setup_btree_details(pmd);
 
 	r = dm_btree_empty(&pmd->info, &pmd->root);
 	if (r < 0)
-		goto cleanup_data_sm;
+		goto bad_cleanup_nb_tm;
 
 	r = dm_btree_empty(&pmd->details_info, &pmd->details_root);
 	if (r < 0) {
 		DMERR("couldn't create devices root");
-		goto cleanup_data_sm;
+		goto bad_cleanup_nb_tm;
 	}
 
 	r = __write_initial_superblock(pmd);
 	if (r)
-		goto cleanup_data_sm;
+		goto bad_cleanup_nb_tm;
 
 	return 0;
 
-cleanup_data_sm:
+bad_cleanup_nb_tm:
+	dm_tm_destroy(pmd->nb_tm);
+bad_cleanup_data_sm:
 	dm_sm_destroy(pmd->data_sm);
-cleanup_tm:
-	dm_sm_destroy(pmd->metadata_sm);
+bad_cleanup_tm:
 	dm_tm_destroy(pmd->tm);
+	dm_sm_destroy(pmd->metadata_sm);
 
 	return r;
 }
@@ -542,12 +551,11 @@ cleanup_tm:
 static int __check_incompat_features(struct thin_disk_superblock *disk_super,
 				     struct dm_pool_metadata *pmd)
 {
-	u32 features;
+	uint32_t features;
 
 	features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP;
 	if (features) {
-		DMERR("could not access metadata due to "
-		      "unsupported optional features (%lx).",
+		DMERR("could not access metadata due to unsupported optional features (%lx).",
 		      (unsigned long)features);
 		return -EINVAL;
 	}
@@ -560,8 +568,7 @@ static int __check_incompat_features(struct thin_disk_superblock *disk_super,
 
 	features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP;
 	if (features) {
-		DMERR("could not access metadata RDWR due to "
-		      "unsupported optional features (%lx).",
+		DMERR("could not access metadata RDWR due to unsupported optional features (%lx).",
 		      (unsigned long)features);
 		return -EINVAL;
 	}
@@ -575,7 +582,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 	struct dm_block *sblock;
 	struct thin_disk_superblock *disk_super;
 
-	r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock);
+	r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION,
+			    &sb_validator, &sblock);
 	if (r < 0) {
 		DMERR("couldn't read superblock");
 		return r;
@@ -585,7 +593,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 
 	r = __check_incompat_features(disk_super, pmd);
 	if (r < 0)
-		goto out_unlock_sblock;
+		goto bad_unlock_sblock;
 
 	r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION,
 			       disk_super->metadata_space_map_root,
@@ -593,7 +601,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 			       &pmd->tm, &pmd->metadata_sm);
 	if (r < 0) {
 		DMERR("tm_open_with_sm failed");
-		goto out_unlock_sblock;
+		goto bad_unlock_sblock;
 	}
 
 	pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root,
@@ -601,64 +609,72 @@ static int __open_metadata(struct dm_pool_metadata *pmd)
 	if (IS_ERR(pmd->data_sm)) {
 		DMERR("sm_disk_open failed");
 		r = PTR_ERR(pmd->data_sm);
-		goto out_cleanup_tm;
+		goto bad_cleanup_tm;
 	}
 
 	pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm);
 	if (!pmd->nb_tm) {
 		DMERR("could not create non-blocking clone tm");
 		r = -ENOMEM;
-		goto out_cleanup_data_sm;
+		goto bad_cleanup_data_sm;
 	}
 
 	__setup_btree_details(pmd);
 	return dm_bm_unlock(sblock);
 
-out_cleanup_data_sm:
+bad_cleanup_data_sm:
 	dm_sm_destroy(pmd->data_sm);
-out_cleanup_tm:
-	dm_sm_destroy(pmd->metadata_sm);
+bad_cleanup_tm:
 	dm_tm_destroy(pmd->tm);
-out_unlock_sblock:
+	dm_sm_destroy(pmd->metadata_sm);
+bad_unlock_sblock:
 	dm_bm_unlock(sblock);
 
 	return r;
 }
 
-static int __open_or_format_metadata(struct dm_pool_metadata *pmd, enum dm_thin_metadata_mode mode)
+static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device)
 {
 	int r, unformatted;
 
-	r = superblock_all_zeroes(pmd->bm, &unformatted);
+	r = __superblock_all_zeroes(pmd->bm, &unformatted);
 	if (r)
 		return r;
 
 	if (unformatted)
-		return (mode & DM_THIN_FORMAT) ? __format_metadata(pmd) : -EPERM;
-	else
-		return (mode & DM_THIN_OPEN) ? __open_metadata(pmd) : -EPERM;
+		return format_device ? __format_metadata(pmd) : -EPERM;
+
+	return __open_metadata(pmd);
 }
 
-static int __create_persistent_data_objects(struct dm_pool_metadata *pmd,
-					    enum dm_thin_metadata_mode mode)
+static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device)
 {
 	int r;
 
 	pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE,
 					  THIN_METADATA_CACHE_SIZE,
 					  THIN_MAX_CONCURRENT_LOCKS);
-	if (!pmd->bm) {
+	if (IS_ERR(pmd->bm)) {
 		DMERR("could not create block manager");
-		return -ENOMEM;
+		return PTR_ERR(pmd->bm);
 	}
 
-	r = __open_or_format_metadata(pmd, mode);
+	r = __open_or_format_metadata(pmd, format_device);
 	if (r)
 		dm_block_manager_destroy(pmd->bm);
 
 	return r;
 }
 
+static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
+{
+	dm_sm_destroy(pmd->data_sm);
+	dm_sm_destroy(pmd->metadata_sm);
+	dm_tm_destroy(pmd->nb_tm);
+	dm_tm_destroy(pmd->tm);
+	dm_block_manager_destroy(pmd->bm);
+}
+
 static int __begin_transaction(struct dm_pool_metadata *pmd)
 {
 	int r;
@@ -683,7 +699,7 @@ static int __begin_transaction(struct dm_pool_metadata *pmd)
 	pmd->data_block_size = le32_to_cpu(disk_super->data_block_size);
 
 	dm_bm_unlock(sblock);
-	return r;
+	return 0;
 }
 
 static int __write_changed_details(struct dm_pool_metadata *pmd)
@@ -759,19 +775,17 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
 
 	disk_super = dm_block_data(sblock);
 	disk_super->time = cpu_to_le32(pmd->time);
-	disk_super->trans_id = cpu_to_le64(pmd->trans_id);
 	disk_super->data_mapping_root = cpu_to_le64(pmd->root);
 	disk_super->device_details_root = cpu_to_le64(pmd->details_root);
+	disk_super->trans_id = cpu_to_le64(pmd->trans_id);
 	disk_super->flags = cpu_to_le32(pmd->flags);
 
-	r = dm_sm_copy_root(pmd->metadata_sm,
-			    &disk_super->metadata_space_map_root,
+	r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root,
 			    metadata_len);
 	if (r < 0)
 		goto out_locked;
 
-	r = dm_sm_copy_root(pmd->data_sm,
-			    &disk_super->data_space_map_root,
+	r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root,
 			    data_len);
 	if (r < 0)
 		goto out_locked;
@@ -785,7 +799,7 @@ out_locked:
 
 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 					       sector_t data_block_size,
-					       enum dm_thin_metadata_mode mode)
+					       bool format_device)
 {
 	int r;
 	struct dm_pool_metadata *pmd;
@@ -799,12 +813,12 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 	init_rwsem(&pmd->root_lock);
 	pmd->time = 0;
 	INIT_LIST_HEAD(&pmd->thin_devices);
-	pmd->read_only = 0;
-	pmd->fail_io = 0;
+	pmd->read_only = false;
+	pmd->fail_io = false;
 	pmd->bdev = bdev;
 	pmd->data_block_size = data_block_size;
 
-	r = __create_persistent_data_objects(pmd, mode);
+	r = __create_persistent_data_objects(pmd, format_device);
 	if (r) {
 		kfree(pmd);
 		return ERR_PTR(r);
@@ -820,15 +834,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 	return pmd;
 }
 
-static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd)
-{
-	dm_sm_destroy(pmd->data_sm);
-	dm_sm_destroy(pmd->metadata_sm);
-	dm_tm_destroy(pmd->tm);
-	dm_tm_destroy(pmd->nb_tm);
-	dm_block_manager_destroy(pmd->bm);
-}
-
 int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
 {
 	int r;
@@ -862,6 +867,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd)
 	if (!pmd->fail_io)
 		__destroy_persistent_data_objects(pmd);
 
+	kfree(pmd);
 	return 0;
 }
 
@@ -922,7 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd,
 	(*td)->id = dev;
 	(*td)->open_count = 1;
 	(*td)->changed = changed;
-	(*td)->aborted_with_changes = 0;
+	(*td)->aborted_with_changes = false;
 	(*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks);
 	(*td)->transaction_id = le64_to_cpu(details_le.transaction_id);
 	(*td)->creation_time = le32_to_cpu(details_le.creation_time);
@@ -984,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd,
 
 int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __create_thin(pmd, dev);
+	if (!pmd->fail_io)
+		r = __create_thin(pmd, dev);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1007,9 +1014,6 @@ static int __set_snapshot_details(struct dm_pool_metadata *pmd,
 	td->changed = 1;
 	td->snapshotted_time = time;
 
-	/*
-	 * snap's changed flag already set when the device was created.
-	 */
 	snap->mapped_blocks = td->mapped_blocks;
 	snap->snapshotted_time = time;
 	__close_device(td);
@@ -1077,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd,
 				 dm_thin_id dev,
 				 dm_thin_id origin)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __create_snap(pmd, dev, origin);
+	if (!pmd->fail_io)
+		r = __create_snap(pmd, dev, origin);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1119,10 +1124,11 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev)
 int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd,
 			       dm_thin_id dev)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __delete_device(pmd, dev);
+	if (!pmd->fail_io)
+		r = __delete_device(pmd, dev);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1132,20 +1138,22 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
 					uint64_t current_id,
 					uint64_t new_id)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
+
 	if (pmd->fail_io)
-		r = -EINVAL;
-	else {
-		if (pmd->trans_id != current_id) {
-			DMERR("mismatched transaction id");
-			r = -EINVAL;
-		} else {
-			pmd->trans_id = new_id;
-			r = 0;
-		}
+		goto out;
+
+	if (pmd->trans_id != current_id) {
+		DMERR("mismatched transaction id");
+		goto out;
 	}
+
+	pmd->trans_id = new_id;
+	r = 0;
+
+out:
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1154,12 +1162,10 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd,
 int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd,
 					uint64_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	if (pmd->fail_io)
-		r = -EINVAL;
-	else {
+	if (!pmd->fail_io) {
 		*result = pmd->trans_id;
 		r = 0;
 	}
@@ -1188,8 +1194,10 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
 
 	held_root = dm_block_location(copy);
 	disk_super = dm_block_data(copy);
+
 	if (le64_to_cpu(disk_super->held_root)) {
 		DMWARN("Pool metadata snapshot already exists: release this before taking another.");
+
 		dm_tm_dec(pmd->tm, held_root);
 		dm_tm_unlock(pmd->tm, copy);
 		return -EBUSY;
@@ -1221,17 +1229,17 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd)
 
 	disk_super = dm_block_data(sblock);
 	disk_super->held_root = cpu_to_le64(held_root);
-	dm_tm_unlock(pmd->tm, sblock);
-
+	dm_bm_unlock(sblock);
 	return 0;
 }
 
 int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __reserve_metadata_snap(pmd);
+	if (!pmd->fail_io)
+		r = __reserve_metadata_snap(pmd);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1252,10 +1260,10 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd)
 	held_root = le64_to_cpu(disk_super->held_root);
 	disk_super->held_root = cpu_to_le64(0);
 
-	dm_tm_unlock(pmd->tm, sblock);
+	dm_bm_unlock(sblock);
 
 	if (!held_root) {
-		DMWARN("pool has no metadata snap");
+		DMWARN("No pool metadata snapshot found: nothing to release.");
 		return -EINVAL;
 	}
 
@@ -1267,15 +1275,17 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd)
 	dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root));
 	dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root));
 	dm_sm_dec_block(pmd->metadata_sm, held_root);
+
 	return dm_tm_unlock(pmd->tm, copy);
 }
 
 int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __release_metadata_snap(pmd);
+	if (!pmd->fail_io)
+		r = __release_metadata_snap(pmd);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1302,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd,
 int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
 			      dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __get_metadata_snap(pmd, result);
+	if (!pmd->fail_io)
+		r = __get_metadata_snap(pmd, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1314,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd,
 int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev,
 			     struct dm_thin_device **td)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __open_device(pmd, dev, 0, td);
+	if (!pmd->fail_io)
+		r = __open_device(pmd, dev, 0, td);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1337,7 +1349,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td)
 	return td->id;
 }
 
-static int __snapshotted_since(struct dm_thin_device *td, uint32_t time)
+static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time)
 {
 	return td->snapshotted_time > time;
 }
@@ -1345,31 +1357,31 @@ static int __snapshotted_since(struct dm_thin_device *td, uint32_t time)
 int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block,
 		       int can_block, struct dm_thin_lookup_result *result)
 {
-	int r;
+	int r = -EINVAL;
 	uint64_t block_time = 0;
 	__le64 value;
 	struct dm_pool_metadata *pmd = td->pmd;
 	dm_block_t keys[2] = { td->id, block };
-
-	if (pmd->fail_io)
-		return -EINVAL;
+	struct dm_btree_info *info;
 
 	if (can_block) {
 		down_read(&pmd->root_lock);
-		r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value);
-		if (!r)
-			block_time = le64_to_cpu(value);
-		up_read(&pmd->root_lock);
-
-	} else if (down_read_trylock(&pmd->root_lock)) {
-		r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value);
-		if (!r)
-			block_time = le64_to_cpu(value);
-		up_read(&pmd->root_lock);
-
-	} else
+		info = &pmd->info;
+	} else if (down_read_trylock(&pmd->root_lock))
+		info = &pmd->nb_info;
+	else
 		return -EWOULDBLOCK;
 
+	if (pmd->fail_io)
+		goto out;
+
+	r = dm_btree_lookup(info, pmd->root, keys, &value);
+	if (!r)
+		block_time = le64_to_cpu(value);
+
+out:
+	up_read(&pmd->root_lock);
+
 	if (!r) {
 		dm_block_t exception_block;
 		uint32_t exception_time;
@@ -1408,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block,
 int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block,
 			 dm_block_t data_block)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&td->pmd->root_lock);
-	r = td->pmd->fail_io ? -EINVAL : __insert(td, block, data_block);
+	if (!td->pmd->fail_io)
+		r = __insert(td, block, data_block);
 	up_write(&td->pmd->root_lock);
 
 	return r;
@@ -1433,45 +1446,47 @@ static int __remove(struct dm_thin_device *td, dm_block_t block)
 	return 0;
 }
 
-bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
+int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
 {
-	int r;
+	int r = -EINVAL;
 
-	down_read(&td->pmd->root_lock);
-	r = td->changed;
-	up_read(&td->pmd->root_lock);
+	down_write(&td->pmd->root_lock);
+	if (!td->pmd->fail_io)
+		r = __remove(td, block);
+	up_write(&td->pmd->root_lock);
 
 	return r;
 }
 
-bool dm_thin_aborted_changes(struct dm_thin_device *td)
+bool dm_thin_changed_this_transaction(struct dm_thin_device *td)
 {
 	int r;
 
 	down_read(&td->pmd->root_lock);
-	r = td->aborted_with_changes;
+	r = td->changed;
 	up_read(&td->pmd->root_lock);
 
 	return r;
 }
 
-int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block)
+bool dm_thin_aborted_changes(struct dm_thin_device *td)
 {
-	int r;
+	bool r;
 
-	down_write(&td->pmd->root_lock);
-	r = td->pmd->fail_io ? -EINVAL : __remove(td, block);
-	up_write(&td->pmd->root_lock);
+	down_read(&td->pmd->root_lock);
+	r = td->aborted_with_changes;
+	up_read(&td->pmd->root_lock);
 
 	return r;
 }
 
 int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : dm_sm_new_block(pmd->data_sm, result);
+	if (!pmd->fail_io)
+		r = dm_sm_new_block(pmd->data_sm, result);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1479,13 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result)
 
 int dm_pool_commit_metadata(struct dm_pool_metadata *pmd)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	if (pmd->fail_io) {
-		r = -EINVAL;
+	if (pmd->fail_io)
 		goto out;
-	}
 
 	r = __commit_transaction(pmd);
 	if (r <= 0)
@@ -1510,31 +1523,31 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd)
 
 int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	if (pmd->fail_io) {
-		r = -EINVAL;
+	if (pmd->fail_io)
 		goto out;
-	}
 
 	__set_abort_with_changes_flags(pmd);
 	__destroy_persistent_data_objects(pmd);
-	r = __create_persistent_data_objects(pmd, DM_THIN_OPEN);
+	r = __create_persistent_data_objects(pmd, false);
 	if (r)
-		pmd->fail_io = 1;
+		pmd->fail_io = true;
 
 out:
 	up_write(&pmd->root_lock);
+
 	return r;
 }
 
 int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->data_sm, result);
+	if (!pmd->fail_io)
+		r = dm_sm_get_nr_free(pmd->data_sm, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1543,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul
 int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
 					  dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->metadata_sm, result);
+	if (!pmd->fail_io)
+		r = dm_sm_get_nr_free(pmd->metadata_sm, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1555,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd,
 int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd,
 				  dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->metadata_sm, result);
+	if (!pmd->fail_io)
+		r = dm_sm_get_nr_blocks(pmd->metadata_sm, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1575,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result)
 
 int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->data_sm, result);
+	if (!pmd->fail_io)
+		r = dm_sm_get_nr_blocks(pmd->data_sm, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1586,13 +1602,11 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result)
 
 int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 	struct dm_pool_metadata *pmd = td->pmd;
 
 	down_read(&pmd->root_lock);
-	if (pmd->fail_io)
-		r = -EINVAL;
-	else {
+	if (!pmd->fail_io) {
 		*result = td->mapped_blocks;
 		r = 0;
 	}
@@ -1620,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result)
 int dm_thin_get_highest_mapped_block(struct dm_thin_device *td,
 				     dm_block_t *result)
 {
-	int r;
+	int r = -EINVAL;
 	struct dm_pool_metadata *pmd = td->pmd;
 
 	down_read(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __highest_block(td, result);
+	if (!pmd->fail_io)
+		r = __highest_block(td, result);
 	up_read(&pmd->root_lock);
 
 	return r;
@@ -1652,10 +1667,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
 
 int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
 {
-	int r;
+	int r = -EINVAL;
 
 	down_write(&pmd->root_lock);
-	r = pmd->fail_io ? -EINVAL : __resize_data_dev(pmd, new_count);
+	if (!pmd->fail_io)
+		r = __resize_data_dev(pmd, new_count);
 	up_write(&pmd->root_lock);
 
 	return r;
@@ -1664,7 +1680,7 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count)
 void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd)
 {
 	down_write(&pmd->root_lock);
-	pmd->read_only = 1;
-	dm_bm_read_only(pmd->bm);
+	pmd->read_only = true;
+	dm_bm_set_read_only(pmd->bm);
 	up_write(&pmd->root_lock);
 }
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index b7d0946..0cecc37 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -37,14 +37,9 @@ typedef uint64_t dm_thin_id;
 /*
  * Reopens or creates a new, empty metadata volume.
  */
-enum dm_thin_metadata_mode {
-	DM_THIN_OPEN = 1,
-	DM_THIN_FORMAT = 2
-};
-
 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
 					       sector_t data_block_size,
-					       enum dm_thin_metadata_mode mode);
+					       bool format_device);
 
 int dm_pool_metadata_close(struct dm_pool_metadata *pmd);
 
@@ -86,8 +81,8 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd);
 
 /*
  * Discards all uncommitted changes.  Rereads the superblock, rolling back
- * to the last good transaction.  Thin devices remain open, if they had
- * uncommitted changes dm_thin_aborted_changes() will tell you.
+ * to the last good transaction.  Thin devices remain open.
+ * dm_thin_aborted_changes() tells you if they had uncommitted changes.
  *
  * If this call fails it's only useful to call dm_pool_metadata_close().
  * All other methods will fail with -EINVAL.
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 6a1e76e..a7f92ff 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -5,6 +5,7 @@
  */
 
 #include "dm-thin-metadata.h"
+#include "dm.h"
 
 #include <linux/device-mapper.h>
 #include <linux/dm-io.h>
@@ -267,6 +268,7 @@ out:
 	spin_unlock_irqrestore(&prison->lock, flags);
 
 	*ref = cell;
+
 	return r;
 }
 
@@ -501,7 +503,7 @@ struct dm_thin_new_mapping;
 enum pool_mode {
 	PM_WRITE,		/* metadata may be changed */
 	PM_READ_ONLY,		/* metadata may not be changed */
-	PM_FAIL			/* all io fails */
+	PM_FAIL,		/* all I/O fails */
 };
 
 struct pool_features {
@@ -526,6 +528,7 @@ struct pool {
 
 	dm_block_t low_water_blocks;
 	uint32_t sectors_per_block;
+	int sectors_per_block_shift;
 
 	struct pool_features pf;
 	unsigned low_water_triggered:1;	/* A dm event has been sent */
@@ -563,6 +566,9 @@ struct pool {
 	process_mapping_fn process_prepared_discard;
 };
 
+static enum pool_mode get_pool_mode(struct pool *pool);
+static void set_pool_mode(struct pool *pool, enum pool_mode mode);
+
 /*
  * Target context for a pool.
  */
@@ -695,32 +701,30 @@ static void requeue_io(struct thin_c *tc)
  * target.
  */
 
-/*
- * do_div wrappers that don't modify the dividend
- */
-static sector_t dm_thin_do_div(sector_t a, __u32 b)
+static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 {
-	do_div(a, b);
-	return a;
-}
+	sector_t block_nr = bio->bi_sector;
 
-static sector_t dm_thin_do_mod(sector_t a, __u32 b)
-{
-	return do_div(a, b);
-}
+	if (tc->pool->sectors_per_block_shift < 0)
+		(void) sector_div(block_nr, tc->pool->sectors_per_block);
+	else
+		block_nr >>= tc->pool->sectors_per_block_shift;
 
-static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
-{
-	return dm_thin_do_div(bio->bi_sector, tc->pool->sectors_per_block);
+	return block_nr;
 }
 
 static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 {
 	struct pool *pool = tc->pool;
+	sector_t bi_sector = bio->bi_sector;
 
 	bio->bi_bdev = tc->pool_dev->bdev;
-	bio->bi_sector = (block * pool->sectors_per_block) +
-		dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block);
+	if (tc->pool->sectors_per_block_shift < 0)
+		bio->bi_sector = (block * pool->sectors_per_block) +
+				 sector_div(bi_sector, pool->sectors_per_block);
+	else
+		bio->bi_sector = (block << pool->sectors_per_block_shift) |
+				(bi_sector & (pool->sectors_per_block - 1));
 }
 
 static void remap_to_origin(struct thin_c *tc, struct bio *bio)
@@ -739,25 +743,28 @@ static void issue(struct thin_c *tc, struct bio *bio)
 	struct pool *pool = tc->pool;
 	unsigned long flags;
 
+	if (!bio_triggers_commit(tc, bio)) {
+		generic_make_request(bio);
+		return;
+	}
+
+	/*
+	 * Complete bio with an error if earlier I/O caused changes to
+	 * the metadata that can't be committed e.g, due to I/O errors
+	 * on the metadata device.
+	 */
+	if (dm_thin_aborted_changes(tc->td)) {
+		bio_io_error(bio);
+		return;
+	}
+
 	/*
 	 * Batch together any bios that trigger commits and then issue a
 	 * single commit for them in process_deferred_bios().
 	 */
-	if (bio_triggers_commit(tc, bio)) {
-		if (dm_thin_aborted_changes(tc->td)) {
-			/*
-			 * Earlier io caused changes to the metadata that
-			 * can't be committed (eg, due to io errors on the
-			 * metadata device).
-			 */
-			bio_io_error(bio);
-		} else {
-			spin_lock_irqsave(&pool->lock, flags);
-			bio_list_add(&pool->deferred_flush_bios, bio);
-			spin_unlock_irqrestore(&pool->lock, flags);
-		}
-	} else
-		generic_make_request(bio);
+	spin_lock_irqsave(&pool->lock, flags);
+	bio_list_add(&pool->deferred_flush_bios, bio);
+	spin_unlock_irqrestore(&pool->lock, flags);
 }
 
 static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio)
@@ -851,8 +858,13 @@ static void overwrite_endio(struct bio *bio, int err)
 
 /*----------------------------------------------------------------*/
 
-static enum pool_mode get_pool_mode(struct pool *pool);
-static void set_pool_mode(struct pool *pool, enum pool_mode mode);
+/*
+ * Workqueue.
+ */
+
+/*
+ * Prepared mapping jobs.
+ */
 
 /*
  * This sends the bios in the cell back to the deferred_bios list.
@@ -998,8 +1010,7 @@ static void process_prepared(struct pool *pool, struct list_head *head,
  */
 static int io_overlaps_block(struct pool *pool, struct bio *bio)
 {
-	return !dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block) &&
-		(bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT));
+	return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT);
 }
 
 static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1030,6 +1041,7 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
 	struct dm_thin_new_mapping *r = pool->next_mapping;
 
 	BUG_ON(!pool->next_mapping);
+
 	pool->next_mapping = NULL;
 
 	return r;
@@ -1160,8 +1172,6 @@ static int commit(struct pool *pool)
 {
 	int r;
 
-	BUG_ON(!get_pool_mode(pool) == PM_WRITE);
-
 	r = dm_pool_commit_metadata(pool->pmd);
 	if (r)
 		DMERR("commit failed, error = %d", r);
@@ -1170,15 +1180,15 @@ static int commit(struct pool *pool)
 }
 
 /*
- * Returns a boolean to indicate whether we're in a fallback mode after this
- * call.  Many callers don't care about the return value.
+ * A non-zero return indicates read_only or fail_io mode.
+ * Many callers don't care about the return value.
  */
 static int commit_or_fallback(struct pool *pool)
 {
 	int r;
 
 	if (get_pool_mode(pool) != PM_WRITE)
-		return 1;
+		return -EINVAL;
 
 	r = commit(pool);
 	if (r)
@@ -1215,7 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
 			 * Try to commit to see if that will free up some
 			 * more space.
 			 */
-			commit_or_fallback(pool);
+			(void) commit_or_fallback(pool);
 
 			r = dm_pool_get_free_block_count(pool->pmd, &free_blocks);
 			if (r)
@@ -1323,15 +1333,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
 			}
 		} else {
 			/*
-			 * This path is hit if people are ignoring
-			 * limits->discard_granularity.  It ignores any
-			 * part of the discard that is in a subsequent
-			 * block.
+			 * The DM core makes sure that the discard doesn't span
+			 * a block boundary.  So we submit the discard of a
+			 * partial block appropriately.
 			 */
-			sector_t offset = bio->bi_sector - (block * pool->sectors_per_block);
-			unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT;
-			bio->bi_size = min(bio->bi_size, remaining);
-
 			cell_release_singleton(cell, bio);
 			cell_release_singleton(cell2, bio);
 			if ((!lookup_result.shared) && pool->pf.discard_passdown)
@@ -1511,29 +1516,33 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
 
 static void process_bio_read_only(struct thin_c *tc, struct bio *bio)
 {
-	int r, dir = bio_data_dir(bio);
+	int r;
+	int rw = bio_data_dir(bio);
 	dm_block_t block = get_bio_block(tc, bio);
 	struct dm_thin_lookup_result lookup_result;
 
 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
 	switch (r) {
 	case 0:
-		if (lookup_result.shared && (dir == WRITE) && bio->bi_size)
+		if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
 			bio_io_error(bio);
 		else
 			remap_and_issue(tc, bio, lookup_result.block);
 		break;
 
 	case -ENODATA:
-		if (dir == READ && tc->origin_dev)
-			remap_to_origin_and_issue(tc, bio);
+		if (rw != READ) {
+			bio_io_error(bio);
+			break;
+		}
 
-		else if (dir == READ) {
-			zero_fill_bio(bio);
-			bio_endio(bio, 0);
+		if (tc->origin_dev) {
+			remap_to_origin_and_issue(tc, bio);
+			break;
+		}
 
-		} else
-			bio_io_error(bio);
+		zero_fill_bio(bio);
+		bio_endio(bio, 0);
 		break;
 
 	default:
@@ -1647,7 +1656,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
 
 	pool->pf.mode = mode;
 
-	switch(mode) {
+	switch (mode) {
 	case PM_FAIL:
 		DMERR("switching pool to failure mode");
 		pool->process_bio = process_bio_fail;
@@ -1790,12 +1799,16 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio,
 		break;
 
 	default:
+		/*
+		 * Must always call bio_io_error on failure.
+		 * dm_thin_find_block can fail with -EINVAL if the
+		 * pool is switched to fail-io mode.
+		 */
 		bio_io_error(bio);
 		r = DM_MAPIO_SUBMITTED;
 		break;
 	}
 
-	BUG_ON(r < 0);
 	return r;
 }
 
@@ -1913,9 +1926,9 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 	void *err_p;
 	struct pool *pool;
 	struct dm_pool_metadata *pmd;
-	enum dm_thin_metadata_mode mode = DM_THIN_OPEN | (read_only ? 0 : DM_THIN_FORMAT);
+	bool format_device = read_only ? false : true;
 
-	pmd = dm_pool_metadata_open(metadata_dev, block_size, mode);
+	pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device);
 	if (IS_ERR(pmd)) {
 		*error = "Error creating metadata object";
 		return (struct pool *)pmd;
@@ -1930,6 +1943,10 @@ static struct pool *pool_create(struct mapped_device *pool_md,
 
 	pool->pmd = pmd;
 	pool->sectors_per_block = block_size;
+	if (block_size & (block_size - 1))
+		pool->sectors_per_block_shift = -1;
+	else
+		pool->sectors_per_block_shift = __ffs(block_size);
 	pool->low_water_blocks = 0;
 	pool_features_init(&pool->pf);
 	pool->prison = prison_create(PRISON_CELLS);
@@ -2181,7 +2198,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	if (kstrtoul(argv[2], 10, &block_size) || !block_size ||
 	    block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
 	    block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
-	    dm_thin_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) {
+	    block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
 		ti->error = "Invalid block size";
 		r = -EINVAL;
 		goto out;
@@ -2228,6 +2245,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto out_flags_changed;
 	}
 
+	/*
+	 * The block layer requires discard_granularity to be a power of 2.
+	 */
+	if (pf.discard_enabled && !is_power_of_2(block_size)) {
+		ti->error = "Discard support must be disabled when the block size is not a power of 2";
+		r = -EINVAL;
+		goto out_flags_changed;
+	}
+
 	pt->pool = pool;
 	pt->ti = ti;
 	pt->metadata_dev = metadata_dev;
@@ -2235,7 +2261,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	pt->low_water_blocks = low_water_blocks;
 	pt->pf = pf;
 	ti->num_flush_requests = 1;
-
 	/*
 	 * Only need to enable discards if the pool should pass
 	 * them down to the data device.  The thin device's discard
@@ -2248,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		 * stacking of discard limits (this keeps the pool and
 		 * thin devices' discard limits consistent).
 		 */
-		ti->discards_supported = 1;
+		ti->discards_supported = true;
 	}
 	ti->private = pt;
 
@@ -2308,7 +2333,8 @@ static int pool_preresume(struct dm_target *ti)
 	int r;
 	struct pool_c *pt = ti->private;
 	struct pool *pool = pt->pool;
-	dm_block_t data_size, sb_data_size;
+	sector_t data_size = ti->len;
+	dm_block_t sb_data_size;
 
 	/*
 	 * Take control of the pool object.
@@ -2317,7 +2343,8 @@ static int pool_preresume(struct dm_target *ti)
 	if (r)
 		return r;
 
-	data_size = dm_thin_do_div(ti->len, pool->sectors_per_block);
+	(void) sector_div(data_size, pool->sectors_per_block);
+
 	r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size);
 	if (r) {
 		DMERR("failed to retrieve data device size");
@@ -2326,19 +2353,19 @@ static int pool_preresume(struct dm_target *ti)
 
 	if (data_size < sb_data_size) {
 		DMERR("pool target too small, is %llu blocks (expected %llu)",
-		      data_size, sb_data_size);
+		      (unsigned long long)data_size, sb_data_size);
 		return -EINVAL;
 
 	} else if (data_size > sb_data_size) {
 		r = dm_pool_resize_data_dev(pool->pmd, data_size);
 		if (r) {
 			DMERR("failed to resize data device");
+			/* FIXME Stricter than necessary: Rollback transaction instead here */
+			set_pool_mode(pool, PM_READ_ONLY);
 			return r;
 		}
 
-		r = commit(pool);
-		if (r)
-			return r;
+		(void) commit_or_fallback(pool);
 	}
 
 	return 0;
@@ -2366,7 +2393,7 @@ static void pool_postsuspend(struct dm_target *ti)
 
 	cancel_delayed_work(&pool->waker);
 	flush_workqueue(pool->wq);
-	commit_or_fallback(pool);
+	(void) commit_or_fallback(pool);
 }
 
 static int check_arg_count(unsigned argc, unsigned args_required)
@@ -2500,7 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
 	if (r)
 		return r;
 
-	commit_or_fallback(pool);
+	(void) commit_or_fallback(pool);
 
 	r = dm_pool_reserve_metadata_snap(pool->pmd);
 	if (r)
@@ -2562,7 +2589,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv)
 		DMWARN("Unrecognised thin pool target message received: %s", argv[0]);
 
 	if (!r)
-		commit_or_fallback(pool);
+		(void) commit_or_fallback(pool);
 
 	return r;
 }
@@ -2572,7 +2599,7 @@ static void emit_flags(struct pool_features *pf, char *result,
 {
 	unsigned count = !pf->zero_new_blocks + !pf->discard_enabled +
 		!pf->discard_passdown + (pf->mode == PM_READ_ONLY);
-	DMEMIT(" %u ", count);
+	DMEMIT("%u ", count);
 
 	if (!pf->zero_new_blocks)
 		DMEMIT("skip_block_zeroing ");
@@ -2584,7 +2611,7 @@ static void emit_flags(struct pool_features *pf, char *result,
 		DMEMIT("no_discard_passdown ");
 
 	if (pf->mode == PM_READ_ONLY)
-		DMEMIT("read_only");
+		DMEMIT("read_only ");
 }
 
 /*
@@ -2593,7 +2620,7 @@ static void emit_flags(struct pool_features *pf, char *result,
  *    <used data sectors>/<total data sectors> <held metadata root>
  */
 static int pool_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
+		       unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	unsigned sz = 0;
@@ -2611,22 +2638,19 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 	switch (type) {
 	case STATUSTYPE_INFO:
 		if (get_pool_mode(pool) == PM_FAIL) {
-			DMEMIT("fail");
+			DMEMIT("Fail");
 			break;
 		}
 
+		/* Commit to ensure statistics aren't out-of-date */
+		if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
+			(void) commit_or_fallback(pool);
+
 		r = dm_pool_get_metadata_transaction_id(pool->pmd,
 							&transaction_id);
 		if (r)
 			return r;
 
-		/*
-		 * If we're in the middle of a transaction the free block
-		 * counts can be quite out of date, so we do a quick
-		 * commit.
-		 */
-		commit_or_fallback(pool);
-
 		r = dm_pool_get_free_metadata_block_count(pool->pmd,
 							  &nr_free_blocks_metadata);
 		if (r)
@@ -2657,15 +2681,23 @@ static int pool_status(struct dm_target *ti, status_type_t type,
 		       (unsigned long long)nr_blocks_data);
 
 		if (held_root)
-			DMEMIT("%llu", held_root);
+			DMEMIT("%llu ", held_root);
+		else
+			DMEMIT("- ");
+
+		if (pool->pf.discard_enabled && pool->pf.discard_passdown)
+			DMEMIT("discard_passdown ");
 		else
-			DMEMIT("-");
+			DMEMIT("no_discard_passdown ");
 
-		emit_flags(&pool->pf, result, sz, maxlen);
+		if (pool->pf.mode == PM_READ_ONLY)
+			DMEMIT("read_only");
+		else
+			DMEMIT("read_write");
 		break;
 
 	case STATUSTYPE_TABLE:
-		DMEMIT("%s %s %lu %llu",
+		DMEMIT("%s %s %lu %llu ",
 		       format_dev_t(buf, pt->metadata_dev->bdev->bd_dev),
 		       format_dev_t(buf2, pt->data_dev->bdev->bd_dev),
 		       (unsigned long)pool->sectors_per_block,
@@ -2708,7 +2740,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
 
 	/*
 	 * This is just a hint, and not enforced.  We have to cope with
-	 * bios that overlap 2 blocks.
+	 * bios that cover a block partially.  A discard that spans a block
+	 * boundary is not sent to this target.
 	 */
 	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
 	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
@@ -2729,7 +2762,7 @@ static struct target_type pool_target = {
 	.name = "thin-pool",
 	.features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
 		    DM_TARGET_IMMUTABLE,
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr = pool_ctr,
 	.dtr = pool_dtr,
@@ -2845,15 +2878,20 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
 		goto bad_thin_open;
 	}
 
-	ti->split_io = tc->pool->sectors_per_block;
+	r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block);
+	if (r)
+		goto bad_thin_open;
+
 	ti->num_flush_requests = 1;
-	ti->flush_supported = 1;
+	ti->flush_supported = true;
 
 	/* In case the pool supports discards, pass them on. */
 	if (tc->pool->pf.discard_enabled) {
-		ti->discards_supported = 1;
+		ti->discards_supported = true;
 		ti->num_discard_requests = 1;
-		ti->discard_zeroes_data_unsupported = 1;
+		ti->discard_zeroes_data_unsupported = true;
+		/* Discard requests must be split on a block boundary */
+		ti->split_discard_requests = true;
 	}
 
 	dm_put(pool_md);
@@ -2934,7 +2972,7 @@ static void thin_postsuspend(struct dm_target *ti)
  * <nr mapped sectors> <highest mapped sector>
  */
 static int thin_status(struct dm_target *ti, status_type_t type,
-		       char *result, unsigned maxlen)
+		       unsigned status_flags, char *result, unsigned maxlen)
 {
 	int r;
 	ssize_t sz = 0;
@@ -2943,7 +2981,7 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 	struct thin_c *tc = ti->private;
 
 	if (get_pool_mode(tc->pool) == PM_FAIL) {
-		DMEMIT("fail");
+		DMEMIT("Fail");
 		return 0;
 	}
 
@@ -2984,7 +3022,7 @@ static int thin_status(struct dm_target *ti, status_type_t type,
 static int thin_iterate_devices(struct dm_target *ti,
 				iterate_devices_callout_fn fn, void *data)
 {
-	dm_block_t blocks;
+	sector_t blocks;
 	struct thin_c *tc = ti->private;
 	struct pool *pool = tc->pool;
 
@@ -2995,7 +3033,8 @@ static int thin_iterate_devices(struct dm_target *ti,
 	if (!pool->ti)
 		return 0;	/* nothing is bound */
 
-	blocks = dm_thin_do_div(pool->ti->len, pool->sectors_per_block);
+	blocks = pool->ti->len;
+	(void) sector_div(blocks, pool->sectors_per_block);
 	if (blocks)
 		return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data);
 
@@ -3014,7 +3053,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type thin_target = {
 	.name = "thin",
-	.version = {1, 1, 0},
+	.version = {1, 3, 0},
 	.module	= THIS_MODULE,
 	.ctr = thin_ctr,
 	.dtr = thin_dtr,
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index fa365d3..254d192 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio,
  * Status: V (valid) or C (corruption found)
  */
 static int verity_status(struct dm_target *ti, status_type_t type,
-			 char *result, unsigned maxlen)
+			 unsigned status_flags, char *result, unsigned maxlen)
 {
 	struct dm_verity *v = ti->private;
 	unsigned sz = 0;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ec5d5d9..4e09b6f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti
 static sector_t max_io_len(sector_t sector, struct dm_target *ti)
 {
 	sector_t len = max_io_len_target_boundary(sector, ti);
+	sector_t offset, max_len;
 
 	/*
-	 * Does the target need to split even further ?
+	 * Does the target need to split even further?
 	 */
-	if (ti->split_io) {
-		sector_t offset = dm_target_offset(ti, sector);
-		sector_t boundary, tmp = offset + ti->split_io;
+	if (ti->max_io_len) {
+		offset = dm_target_offset(ti, sector);
+		if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
+			max_len = sector_div(offset, ti->max_io_len);
+		else
+			max_len = offset & (ti->max_io_len - 1);
+		max_len = ti->max_io_len - max_len;
 
-		boundary = ti->split_io - do_div(tmp, ti->split_io);
-		if (len > boundary)
-			len = boundary;
+		if (len > max_len)
+			len = max_len;
 	}
 
 	return len;
 }
 
+int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
+{
+	if (len > UINT_MAX) {
+		DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
+		      (unsigned long long)len, UINT_MAX);
+		ti->error = "Maximum size of target IO is too large";
+		return -EINVAL;
+	}
+
+	ti->max_io_len = (uint32_t) len;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
+
 static void __map_bio(struct dm_target *ti, struct bio *clone,
 		      struct dm_target_io *tio)
 {
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index b7dacd5..52eef49 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -23,6 +23,11 @@
 #define DM_SUSPEND_NOFLUSH_FLAG		(1 << 1)
 
 /*
+ * Status feature flags
+ */
+#define DM_STATUS_NOFLUSH_FLAG		(1 << 0)
+
+/*
  * Type of table and mapped_device's mempool
  */
 #define DM_TYPE_NONE		0
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index df9220d..5ba2777 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -364,7 +364,7 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf)
  *--------------------------------------------------------------*/
 struct dm_block_manager {
 	struct dm_bufio_client *bufio;
-	int read_only;
+	bool read_only:1;
 };
 
 struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
@@ -372,34 +372,36 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev,
 						 unsigned cache_size,
 						 unsigned max_held_per_thread)
 {
-	struct dm_block_manager *bm = kmalloc(sizeof(*bm), GFP_KERNEL);
+	int r;
+	struct dm_block_manager *bm;
 
-	if (!bm)
-		return NULL;
+	bm = kmalloc(sizeof(*bm), GFP_KERNEL);
+	if (!bm) {
+		r = -ENOMEM;
+		goto bad;
+	}
 
 	bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread,
 					   sizeof(struct buffer_aux),
 					   dm_block_manager_alloc_callback,
 					   dm_block_manager_write_callback);
-	if (!bm->bufio) {
+	if (IS_ERR(bm->bufio)) {
+		r = PTR_ERR(bm->bufio);
 		kfree(bm);
-		return NULL;
+		goto bad;
 	}
 
-	bm->read_only = 0;
+	bm->read_only = false;
+
 	return bm;
+
+bad:
+	return ERR_PTR(r);
 }
 EXPORT_SYMBOL_GPL(dm_block_manager_create);
 
 void dm_block_manager_destroy(struct dm_block_manager *bm)
 {
-	/*
-	 * This should only happen if there's an error while we're creating
-	 * a new pool metadata.  At which point work has been done that
-	 * incurs changes on disk, but we've not got enough pieces together
-	 * to do a tm commit.
-	 */
-	WARN_ON(dm_bufio_has_dirty_buffers(bm->bufio));
 	dm_bufio_client_destroy(bm->bufio);
 	kfree(bm);
 }
@@ -610,11 +612,11 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
 	return dm_bufio_write_dirty_buffers(bm->bufio);
 }
 
-void dm_bm_read_only(struct dm_block_manager *bm)
+void dm_bm_set_read_only(struct dm_block_manager *bm)
 {
-	bm->read_only = 1;
+	bm->read_only = true;
 }
-EXPORT_SYMBOL_GPL(dm_bm_read_only);
+EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
 
 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
 {
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index 175db19..be5bff6 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -100,11 +100,10 @@ int dm_bm_unlock(struct dm_block *b);
  * It's a common idiom to have a superblock that should be committed last.
  *
  * @superblock should be write-locked on entry. It will be unlocked during
- * this function (even if an IO error occurs and the data doesn't get
- * written to disk).  All dirty blocks are guaranteed to be written and
- * flushed before the superblock.
+ * this function.  All dirty blocks are guaranteed to be written and flushed
+ * before the superblock.
  *
- * This method may blocks.
+ * This method always blocks.
  */
 int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
 			   struct dm_block *superblock);
@@ -120,7 +119,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
  * Additionally you should not use dm_bm_unlock_move, however no error will
  * be returned if you do.
  */
-void dm_bm_read_only(struct dm_block_manager *bm);
+void dm_bm_set_read_only(struct dm_block_manager *bm);
 
 u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
 
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 71453ed..d77602d 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -224,7 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm)
 	ll->nr_blocks = 0;
 	ll->bitmap_root = 0;
 	ll->ref_count_root = 0;
-	ll->bitmap_index_changed = 0;
+	ll->bitmap_index_changed = false;
 
 	return 0;
 }
@@ -482,7 +482,7 @@ int sm_ll_commit(struct ll_disk *ll)
 	if (ll->bitmap_index_changed) {
 		r = ll->commit(ll);
 		if (!r)
-			ll->bitmap_index_changed = 0;
+			ll->bitmap_index_changed = false;
 	}
 
 	return r;
@@ -500,7 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index,
 static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index,
 			       struct disk_index_entry *ie)
 {
-	ll->bitmap_index_changed = 1;
+	ll->bitmap_index_changed = true;
 	memcpy(ll->mi_le.index + index, ie, sizeof(*ie));
 	return 0;
 }
diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h
index cee388f..b3078d5 100644
--- a/drivers/md/persistent-data/dm-space-map-common.h
+++ b/drivers/md/persistent-data/dm-space-map-common.h
@@ -78,7 +78,7 @@ struct ll_disk {
 	open_index_fn open_index;
 	max_index_entries_fn max_entries;
 	commit_fn commit;
-	int bitmap_index_changed;
+	bool bitmap_index_changed:1;
 };
 
 struct disk_sm_root {
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index 9f2288e..f6d29e6 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -28,15 +28,7 @@ struct sm_disk {
 	struct ll_disk old_ll;
 
 	dm_block_t begin;
-
-	/*
-	 * The allocator should not wrap past this block within a
-	 * transaction.
-	 */
-	dm_block_t end;
 	dm_block_t nr_allocated_this_transaction;
-
-	unsigned first_alloc:1;
 };
 
 static void sm_disk_destroy(struct dm_space_map *sm)
@@ -172,35 +164,20 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
 
 static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
 {
-	int r = -ENOSPC;
+	int r;
 	enum allocation_event ev;
 	struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
-	dm_block_t end;
-
-	while (smd->first_alloc || smd->begin != smd->end) {
-		smd->first_alloc = 0;
-
-		if (smd->begin >= smd->old_ll.nr_blocks)
-			smd->begin = 0;
-
-		end = (smd->begin < smd->end) ? smd->end : smd->old_ll.nr_blocks;
-
-		r = sm_ll_find_free_block(&smd->old_ll, smd->begin, end, b);
-		if (r && r != -ENOSPC)
-			return r;
-
-		else if (!r) {
-			smd->begin = *b + 1;
-			r = sm_ll_inc(&smd->ll, *b, &ev);
-			if (!r) {
-				BUG_ON(ev != SM_ALLOC);
-				smd->nr_allocated_this_transaction++;
-			}
 
-			return r;
-		}
+	/* FIXME: we should loop round a couple of times */
+	r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b);
+	if (r)
+		return r;
 
-		smd->begin = end;
+	smd->begin = *b + 1;
+	r = sm_ll_inc(&smd->ll, *b, &ev);
+	if (!r) {
+		BUG_ON(ev != SM_ALLOC);
+		smd->nr_allocated_this_transaction++;
 	}
 
 	return r;
@@ -221,8 +198,7 @@ static int sm_disk_commit(struct dm_space_map *sm)
 		return r;
 
 	memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll));
-	smd->end = smd->begin;
-	smd->first_alloc = 1;
+	smd->begin = 0;
 	smd->nr_allocated_this_transaction = 0;
 
 	r = sm_disk_get_nr_free(sm, &nr_free);
@@ -285,8 +261,7 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
 	if (!smd)
 		return ERR_PTR(-ENOMEM);
 
-	smd->begin = smd->end = 0;
-	smd->first_alloc = 1;
+	smd->begin = 0;
 	smd->nr_allocated_this_transaction = 0;
 	memcpy(&smd->sm, &ops, sizeof(smd->sm));
 
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 50f3be7..d247a35 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -220,9 +220,9 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig,
 		return r;
 
 	/*
-	 * It would be tempting to use dm_bm_unlock_move here.  But some
-	 * code, such as the space maps, keep using the old data structures
-	 * - secure in the knowledge they wont be changed until the next
+	 * It would be tempting to use dm_bm_unlock_move here, but some
+	 * code, such as the space maps, keeps using the old data structures
+	 * secure in the knowledge they won't be changed until the next
 	 * transaction.  Using unlock_move would force a synchronous read
 	 * since the old block would no longer be in the cache.
 	 */
@@ -345,6 +345,7 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
 			DMERR("couldn't create metadata space map");
 			goto bad;
 		}
+
 	} else {
 		r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len);
 		if (r) {
@@ -357,6 +358,7 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
 
 bad:
 	dm_tm_destroy(*tm);
+	dm_sm_destroy(*sm);
 	return r;
 }
 
diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c
index 48e46f5..33e422e 100644
--- a/drivers/scsi/device_handler/scsi_dh.c
+++ b/drivers/scsi/device_handler/scsi_dh.c
@@ -468,7 +468,8 @@ EXPORT_SYMBOL_GPL(scsi_dh_handler_exist);
 
 /*
  * scsi_dh_attach - Attach device handler
- * @sdev - sdev the handler should be attached to
+ * @q - Request queue that is associated with the scsi_device
+ *      the handler should be attached to
  * @name - name of the handler to attach
  */
 int scsi_dh_attach(struct request_queue *q, const char *name)
@@ -498,7 +499,8 @@ EXPORT_SYMBOL_GPL(scsi_dh_attach);
 
 /*
  * scsi_dh_detach - Detach device handler
- * @sdev - sdev the handler should be detached from
+ * @q - Request queue that is associated with the scsi_device
+ *      the handler should be detached from
  *
  * This function will detach the device handler only
  * if the sdev is not part of the internal list, ie
@@ -527,6 +529,38 @@ void scsi_dh_detach(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(scsi_dh_detach);
 
+/*
+ * scsi_dh_attached_handler_name - Get attached device handler's name
+ * @q - Request queue that is associated with the scsi_device
+ *      that may have a device handler attached
+ * @gfp - the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Returns name of attached handler, NULL if no handler is attached.
+ * Caller must take care to free the returned string.
+ */
+const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp)
+{
+	unsigned long flags;
+	struct scsi_device *sdev;
+	const char *handler_name = NULL;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	sdev = q->queuedata;
+	if (!sdev || !get_device(&sdev->sdev_gendev))
+		sdev = NULL;
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	if (!sdev)
+		return NULL;
+
+	if (sdev->scsi_dh_data)
+		handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp);
+
+	put_device(&sdev->sdev_gendev);
+	return handler_name;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name);
+
 static struct notifier_block scsi_dh_nb = {
 	.notifier_call = scsi_dh_notifier
 };
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e869312..38d27a1 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -72,7 +72,7 @@ typedef int (*dm_preresume_fn) (struct dm_target *ti);
 typedef void (*dm_resume_fn) (struct dm_target *ti);
 
 typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type,
-			     char *result, unsigned int maxlen);
+			     unsigned status_flags, char *result, unsigned maxlen);
 
 typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv);
 
@@ -186,8 +186,8 @@ struct dm_target {
 	sector_t begin;
 	sector_t len;
 
-	/* Always a power of 2 */
-	sector_t split_io;
+	/* If non-zero, maximum size of I/O submitted to a target. */
+	uint32_t max_io_len;
 
 	/*
 	 * A number of zero-length barrier requests that will be submitted
@@ -215,24 +215,24 @@ struct dm_target {
 	 * Set if this target needs to receive flushes regardless of
 	 * whether or not its underlying devices have support.
 	 */
-	unsigned flush_supported:1;
+	bool flush_supported:1;
 
 	/*
 	 * Set if this target needs to receive discards regardless of
 	 * whether or not its underlying devices have support.
 	 */
-	unsigned discards_supported:1;
+	bool discards_supported:1;
 
 	/*
 	 * Set if the target required discard request to be split
 	 * on max_io_len boundary.
 	 */
-	unsigned split_discard_requests:1;
+	bool split_discard_requests:1;
 
 	/*
 	 * Set if this target does not return zeroes on discarded blocks.
 	 */
-	unsigned discard_zeroes_data_unsupported:1;
+	bool discard_zeroes_data_unsupported:1;
 };
 
 /* Each target can link one of these into the table */
@@ -370,6 +370,11 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback
 int dm_table_complete(struct dm_table *t);
 
 /*
+ * Target may require that it is never sent I/O larger than len.
+ */
+int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len);
+
+/*
  * Table reference counting.
  */
 struct dm_table *dm_get_live_table(struct mapped_device *md);
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index 75fd557..2d45fd3 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -268,8 +268,8 @@ enum {
 
 #define DM_VERSION_MAJOR	4
 #define DM_VERSION_MINOR	22
-#define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2011-10-19)"
+#define DM_VERSION_PATCHLEVEL	1
+#define DM_VERSION_EXTRA	"-ioctl (2012-06-01)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -307,6 +307,8 @@ enum {
 
 /*
  * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
  */
 #define DM_NOFLUSH_FLAG		(1 << 11) /* In */
 
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index e3f2db2..620c723 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -60,6 +60,7 @@ extern int scsi_dh_activate(struct request_queue *, activate_complete, void *);
 extern int scsi_dh_handler_exist(const char *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
 extern void scsi_dh_detach(struct request_queue *);
+extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t);
 extern int scsi_dh_set_params(struct request_queue *, const char *);
 #else
 static inline int scsi_dh_activate(struct request_queue *req,
@@ -80,6 +81,11 @@ static inline void scsi_dh_detach(struct request_queue *q)
 {
 	return;
 }
+static inline const char *scsi_dh_attached_handler_name(struct request_queue *q,
+							gfp_t gfp)
+{
+	return NULL;
+}
 static inline int scsi_dh_set_params(struct request_queue *req, const char *params)
 {
 	return -SCSI_DH_NOSYS;