From: Mike Snitzer <snitzer@redhat.com>

The discard limits that were established for a thin-pool or thin device
may have been incompatible with the pool's data device.  Fix this by
checking the discard limits of the pool's data device accordingly.  If
an incompatibility is found then the pool's 'discard passdown' feature
is disabled.

Also, allow discards even if the pool's block size is not a power of 2.
The block layer assumes discard_granularity is a power of 2, so changes
are needed to allow discards when a pool is using a block size that is
not a power of 2.  This patch depends on commit c6e666345e1b79c62b
("block: split discard into aligned requests") to ensure properly
aligned discard requests are issued to thinp.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Joe Thornber <ejt@redhat.com>

---
drivers/md/dm-thin.c |  171 ++++++++++++++++++++++++++++++++++++--------------
 drivers/md/dm-thin.c |  103 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 80 insertions(+), 23 deletions(-)

Index: linux/drivers/md/dm-thin.c
===================================================================
--- linux.orig/drivers/md/dm-thin.c
+++ linux/drivers/md/dm-thin.c
@@ -2257,15 +2257,6 @@ static int pool_ctr(struct dm_target *ti
 		goto out_flags_changed;
 	}
 
-	/*
-	 * The block layer requires discard_granularity to be a power of 2.
-	 */
-	if (pf.discard_enabled && !is_power_of_2(block_size)) {
-		ti->error = "Discard support must be disabled when the block size is not a power of 2";
-		r = -EINVAL;
-		goto out_flags_changed;
-	}
-
 	pt->pool = pool;
 	pt->ti = ti;
 	pt->metadata_dev = metadata_dev;
@@ -2288,6 +2279,7 @@ static int pool_ctr(struct dm_target *ti
 		 * thin devices' discard limits consistent).
 		 */
 		ti->discards_supported = true;
+		ti->discard_zeroes_data_unsupported = true;
 	}
 	ti->private = pt;
 
@@ -2746,31 +2738,99 @@ static int pool_merge(struct dm_target *
 	return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
 }
 
-static void set_discard_limits(struct pool *pool, struct queue_limits *limits)
+static bool discard_limits_are_compatible(struct pool *pool,
+					  struct queue_limits *data_limits,
+					  const char **reason)
 {
+	sector_t block_size = pool->sectors_per_block << SECTOR_SHIFT;
+
 	/*
-	 * FIXME: these limits may be incompatible with the pool's data device
+	 * All reasons should be relative to the data device,
+	 * e.g.: Data device <reason>
 	 */
-	limits->max_discard_sectors = pool->sectors_per_block;
+	if (data_limits->max_discard_sectors < pool->sectors_per_block) {
+		*reason = "max discard sectors smaller than a block";
+		return false;
+	}
+
+	if (data_limits->discard_granularity > block_size) {
+		*reason = "discard granularity larger than a block";
+		return false;
+	}
+
+	if (block_size & (data_limits->discard_granularity - 1)) {
+		*reason = "discard granularity not a factor of block size";
+		return false;
+	}
+
+	return true;
+}
+
+static bool block_size_is_power_of_2(struct pool *pool)
+{
+	return pool->sectors_per_block_shift >= 0;
+}
+
+static void set_discard_granularity_no_passdown(struct pool *pool,
+						struct queue_limits *limits)
+{
+	unsigned dg_sectors;
 
 	/*
-	 * This is just a hint, and not enforced.  We have to cope with
-	 * bios that cover a block partially.  A discard that spans a block
-	 * boundary is not sent to this target.
+	 * Use largest power of 2 that is a factor of sectors_per_block
+	 * and at least DATA_DEV_BLOCK_SIZE_MIN_SECTORS.
 	 */
-	limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
-	limits->discard_zeroes_data = pool->pf.zero_new_blocks;
+	if (!block_size_is_power_of_2(pool))
+		dg_sectors = max(1 << (ffs(pool->sectors_per_block) - 1),
+				 DATA_DEV_BLOCK_SIZE_MIN_SECTORS);
+	else
+		dg_sectors = pool->sectors_per_block;
+
+	limits->discard_granularity = dg_sectors << SECTOR_SHIFT;
+}
+
+static void set_discard_limits(struct pool *pool,
+			       struct pool_features *pf,
+			       struct queue_limits *data_limits,
+			       struct queue_limits *limits)
+{
+	limits->max_discard_sectors = pool->sectors_per_block;
+
+	if (pf->discard_passdown)
+		limits->discard_granularity = data_limits->discard_granularity;
+	else
+		set_discard_granularity_no_passdown(pool, limits);
 }
 
 static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct pool_c *pt = ti->private;
 	struct pool *pool = pt->pool;
+	const char *reason;
+	struct block_device *data_bdev = pt->data_dev->bdev;
+	struct queue_limits *data_limits = &bdev_get_queue(data_bdev)->limits;
 
 	blk_limits_io_min(limits, 0);
 	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	if (pool->pf.discard_enabled)
-		set_discard_limits(pool, limits);
+
+	/*
+	 * pt->pf is used here because it reflects the features configured but
+	 * not yet transfered to the live pool (see: bind_control_target).
+	 */
+	if (!pt->pf.discard_enabled)
+		return;
+
+	disable_passdown_if_not_supported(pt, &pt->pf);
+
+	if (pt->pf.discard_passdown &&
+	    !discard_limits_are_compatible(pool, data_limits, &reason)) {
+		char buf[BDEVNAME_SIZE];
+		DMWARN("Data device (%s) %s: Disabling discard passdown.",
+		       bdevname(data_bdev, buf), reason);
+		pt->pf.discard_passdown = false;
+	}
+
+	set_discard_limits(pt->pool, &pt->pf, data_limits, limits);
 }
 
 static struct target_type pool_target = {
@@ -3059,11 +3119,8 @@ static int thin_iterate_devices(struct d
 static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct thin_c *tc = ti->private;
-	struct pool *pool = tc->pool;
 
-	blk_limits_io_min(limits, 0);
-	blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
-	set_discard_limits(pool, limits);
+	*limits = bdev_get_queue(tc->pool_dev->bdev)->limits;
 }
 
 static struct target_type thin_target = {