thin-dev must first be merged with v3.5, e.g.: git merge v3.5 after resolving conflict in dm-thin.c, and commiting the merge, this patch may be applied. --- Documentation/device-mapper/striped.txt | 7 +- Documentation/device-mapper/thin-provisioning.txt | 24 ++- drivers/md/dm-crypt.c | 9 +- drivers/md/dm-delay.c | 2 +- drivers/md/dm-exception-store.c | 13 +- drivers/md/dm-flakey.c | 2 +- drivers/md/dm-ioctl.c | 5 +- drivers/md/dm-linear.c | 2 +- drivers/md/dm-mpath.c | 49 +++- drivers/md/dm-raid.c | 13 +- drivers/md/dm-raid1.c | 10 +- drivers/md/dm-snap.c | 34 +- drivers/md/dm-stripe.c | 87 +++--- drivers/md/dm-thin-metadata.c | 344 ++++++++++---------- drivers/md/dm-thin-metadata.h | 11 +- drivers/md/dm-thin.c | 241 ++++++++------ drivers/md/dm-verity.c | 2 +- drivers/md/dm.c | 35 ++- drivers/md/dm.h | 5 + drivers/md/persistent-data/dm-block-manager.c | 36 +- drivers/md/persistent-data/dm-block-manager.h | 9 +- drivers/md/persistent-data/dm-space-map-common.c | 6 +- drivers/md/persistent-data/dm-space-map-common.h | 2 +- drivers/md/persistent-data/dm-space-map-disk.c | 49 +-- .../md/persistent-data/dm-transaction-manager.c | 8 +- drivers/scsi/device_handler/scsi_dh.c | 38 ++- include/linux/device-mapper.h | 19 +- include/linux/dm-ioctl.h | 6 +- include/scsi/scsi_dh.h | 6 + 29 files changed, 619 insertions(+), 455 deletions(-) diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt index f34d323..45f3b91 100644 --- a/Documentation/device-mapper/striped.txt +++ b/Documentation/device-mapper/striped.txt @@ -9,15 +9,14 @@ devices in parallel. Parameters: [ ]+ : Number of underlying devices. - : Size of each chunk of data. Must be a power-of-2 and at - least as large as the system's PAGE_SIZE. + : Size of each chunk of data. Must be at least as + large as the system's PAGE_SIZE. : Full pathname to the underlying block-device, or a "major:minor" device-number. : Starting sector within the device. One or more underlying devices can be specified. The striped device size must -be a multiple of the chunk size and a multiple of the number of underlying -devices. +be a multiple of the chunk size multiplied by the number of underlying devices. Example scripts diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index f5cfc62..652975b 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -231,6 +231,9 @@ i) Constructor no_discard_passdown: Don't pass discards down to the underlying data device, but just remove the mapping. + read_only: Don't allow any changes to be made to the pool + metadata. + Data block size must be between 64KB (128 sectors) and 1GB (2097152 sectors) inclusive. @@ -239,7 +242,7 @@ ii) Status / / - + [no_]discard_passdown read_[only|write] transaction id: A 64-bit number used by userspace to help synchronise with metadata @@ -257,6 +260,21 @@ ii) Status held root. This feature is not yet implemented so '-' is always returned. + discard_passdown|no_discard_passdown + Whether or not discards are actually being passed down to the + underlying device. When this is enabled when loading the table, + it can get disabled if the underlying device doesn't support it. + + read_only|read_write + If the pool encounters certain types of device failures it will + drop into a read-only metadata mode in which no changes to + the pool metadata (like allocating new blocks) are permitted. + + In serious cases where even a read-only mode is deemed unsafe + no further I/O will be permitted and the status will just + contain the string 'Fail'. The userspace recovery tools + should then be used. + iii) Messages create_thin @@ -329,3 +347,7 @@ regain some space then send the 'trim' message to the pool. ii) Status + + If the pool has encountered device errors and failed, the status + will just contain the string 'Fail'. The userspace recovery + tools should then be used. diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 3f06df5..ca4f8ad 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1241,7 +1241,6 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io) static int crypt_decode_key(u8 *key, char *hex, unsigned int size) { char buffer[3]; - char *endp; unsigned int i; buffer[2] = '\0'; @@ -1250,9 +1249,7 @@ static int crypt_decode_key(u8 *key, char *hex, unsigned int size) buffer[0] = *hex++; buffer[1] = *hex++; - key[i] = (u8)simple_strtoul(buffer, &endp, 16); - - if (endp != &buffer[2]) + if (kstrtou8(buffer, 16, &key[i])) return -EINVAL; } @@ -1702,7 +1699,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->num_flush_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; return 0; @@ -1742,7 +1739,7 @@ static int crypt_map(struct dm_target *ti, struct bio *bio, } static int crypt_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct crypt_config *cc = ti->private; unsigned int sz = 0; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 2dc22dd..f53846f 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -295,7 +295,7 @@ static int delay_map(struct dm_target *ti, struct bio *bio, } static int delay_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct delay_c *dc = ti->private; int sz = 0; diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index aa70f7d..ebaa4f8 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -142,24 +142,19 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister); static int set_chunk_size(struct dm_exception_store *store, const char *chunk_size_arg, char **error) { - unsigned long chunk_size_ulong; - char *value; + unsigned chunk_size; - chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0' || - chunk_size_ulong > UINT_MAX) { + if (kstrtouint(chunk_size_arg, 10, &chunk_size)) { *error = "Invalid chunk size"; return -EINVAL; } - if (!chunk_size_ulong) { + if (!chunk_size) { store->chunk_size = store->chunk_mask = store->chunk_shift = 0; return 0; } - return dm_exception_store_set_chunk_size(store, - (unsigned) chunk_size_ulong, - error); + return dm_exception_store_set_chunk_size(store, chunk_size, error); } int dm_exception_store_set_chunk_size(struct dm_exception_store *store, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index ac49c01..cc15543 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -333,7 +333,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, } static int flakey_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct flakey_c *fc = ti->private; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a1a3e6d..afd9598 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1054,6 +1054,7 @@ static void retrieve_status(struct dm_table *table, char *outbuf, *outptr; status_type_t type; size_t remaining, len, used = 0; + unsigned status_flags = 0; outptr = outbuf = get_result_buffer(param, param_size, &len); @@ -1090,7 +1091,9 @@ static void retrieve_status(struct dm_table *table, /* Get the status/table string from the target driver */ if (ti->type->status) { - if (ti->type->status(ti, type, outptr, remaining)) { + if (param->flags & DM_NOFLUSH_FLAG) + status_flags |= DM_STATUS_NOFLUSH_FLAG; + if (ti->type->status(ti, type, status_flags, outptr, remaining)) { param->flags |= DM_BUFFER_FULL_FLAG; break; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 3639eea..1bf19a9 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -96,7 +96,7 @@ static int linear_map(struct dm_target *ti, struct bio *bio, } static int linear_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct linear_c *lc = (struct linear_c *) ti->private; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 638dae0..d8abb90 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -85,6 +85,7 @@ struct multipath { unsigned queue_io:1; /* Must we queue all I/O? */ unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ + unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_count; /* Number of times pg_init called */ @@ -568,6 +569,8 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps int r; struct pgpath *p; struct multipath *m = ti->private; + struct request_queue *q = NULL; + const char *attached_handler_name; /* we need at least a path arg */ if (as->argc < 1) { @@ -586,13 +589,37 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps goto bad; } - if (m->hw_handler_name) { - struct request_queue *q = bdev_get_queue(p->path.dev->bdev); + if (m->retain_attached_hw_handler || m->hw_handler_name) + q = bdev_get_queue(p->path.dev->bdev); + + if (m->retain_attached_hw_handler) { + attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); + if (attached_handler_name) { + /* + * Reset hw_handler_name to match the attached handler + * and clear any hw_handler_params associated with the + * ignored handler. + * + * NB. This modifies the table line to show the actual + * handler instead of the original table passed in. + */ + kfree(m->hw_handler_name); + m->hw_handler_name = attached_handler_name; + + kfree(m->hw_handler_params); + m->hw_handler_params = NULL; + } + } + if (m->hw_handler_name) { + /* + * Increments scsi_dh reference, even when using an + * already-attached handler. + */ r = scsi_dh_attach(q, m->hw_handler_name); if (r == -EBUSY) { /* - * Already attached to different hw_handler, + * Already attached to different hw_handler: * try to reattach with correct one. */ scsi_dh_detach(q); @@ -760,7 +787,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) const char *arg_name; static struct dm_arg _args[] = { - {0, 5, "invalid number of feature args"}, + {0, 6, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, }; @@ -781,6 +808,11 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { + m->retain_attached_hw_handler = 1; + continue; + } + if (!strcasecmp(arg_name, "pg_init_retries") && (argc >= 1)) { r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); @@ -1346,7 +1378,7 @@ static void multipath_resume(struct dm_target *ti) * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ */ static int multipath_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int sz = 0; unsigned long flags; @@ -1364,13 +1396,16 @@ static int multipath_status(struct dm_target *ti, status_type_t type, else { DMEMIT("%u ", m->queue_if_no_path + (m->pg_init_retries > 0) * 2 + - (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); + (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + + m->retain_attached_hw_handler); if (m->queue_if_no_path) DMEMIT("queue_if_no_path "); if (m->pg_init_retries) DMEMIT("pg_init_retries %u ", m->pg_init_retries); if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); + if (m->retain_attached_hw_handler) + DMEMIT("retain_attached_hw_handler "); } if (!m->hw_handler_name || type == STATUSTYPE_INFO) @@ -1656,7 +1691,7 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 017c34d..ca9a246 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -353,6 +353,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, { unsigned i, rebuild_cnt = 0; unsigned long value, region_size = 0; + sector_t max_io_len; char *key; /* @@ -522,14 +523,12 @@ static int parse_raid_params(struct raid_set *rs, char **argv, return -EINVAL; if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; + max_io_len = rs->md.chunk_sectors; else - rs->ti->split_io = region_size; + max_io_len = region_size; - if (rs->md.chunk_sectors) - rs->ti->split_io = rs->md.chunk_sectors; - else - rs->ti->split_io = region_size; + if (dm_set_target_max_io_len(rs->ti, max_io_len)) + return -EINVAL; /* Assume there are no metadata devices until the drives are parsed */ rs->md.persistent = 0; @@ -1067,7 +1066,7 @@ static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_c } static int raid_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct raid_set *rs = ti->private; unsigned raid_param_cnt = 1; /* at least 1 for chunksize */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b58b7a3..bc5ddba 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1081,10 +1081,14 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = ms; - ti->split_io = dm_rh_get_region_size(ms->rh); + + r = dm_set_target_max_io_len(ti, dm_rh_get_region_size(ms->rh)); + if (r) + goto err_free_context; + ti->num_flush_requests = 1; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); @@ -1363,7 +1367,7 @@ static char device_status_char(struct mirror *m) static int mirror_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6f75887..a143921 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -691,7 +691,7 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) * Return a minimum chunk size of all snapshots that have the specified origin. * Return zero if the origin has no snapshots. */ -static sector_t __minimum_chunk_size(struct origin *o) +static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; unsigned chunk_size = 0; @@ -701,7 +701,7 @@ static sector_t __minimum_chunk_size(struct origin *o) chunk_size = min_not_zero(chunk_size, snap->store->chunk_size); - return chunk_size; + return (uint32_t) chunk_size; } /* @@ -1172,7 +1172,10 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Chunk size not set"; goto bad_read_metadata; } - ti->split_io = s->store->chunk_size; + + r = dm_set_target_max_io_len(ti, s->store->chunk_size); + if (r) + goto bad_read_metadata; return 0; @@ -1239,7 +1242,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src, snap_dest->store->snap = snap_dest; snap_src->store->snap = snap_src; - snap_dest->ti->split_io = snap_dest->store->chunk_size; + snap_dest->ti->max_io_len = snap_dest->store->chunk_size; snap_dest->valid = snap_src->valid; /* @@ -1817,9 +1820,9 @@ static void snapshot_resume(struct dm_target *ti) up_write(&s->lock); } -static sector_t get_origin_minimum_chunksize(struct block_device *bdev) +static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) { - sector_t min_chunksize; + uint32_t min_chunksize; down_read(&_origins_lock); min_chunksize = __minimum_chunk_size(__lookup_origin(bdev)); @@ -1838,15 +1841,15 @@ static void snapshot_merge_resume(struct dm_target *ti) snapshot_resume(ti); /* - * snapshot-merge acts as an origin, so set ti->split_io + * snapshot-merge acts as an origin, so set ti->max_io_len */ - ti->split_io = get_origin_minimum_chunksize(s->origin->bdev); + ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev); start_merge(s); } static int snapshot_status(struct dm_target *ti, status_type_t type, - char *result, unsigned int maxlen) + unsigned status_flags, char *result, unsigned maxlen) { unsigned sz = 0; struct dm_snapshot *snap = ti->private; @@ -2073,12 +2076,12 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, struct origin *o; /* - * The origin's __minimum_chunk_size() got stored in split_io + * The origin's __minimum_chunk_size() got stored in max_io_len * by snapshot_merge_resume(). */ down_read(&_origins_lock); o = __lookup_origin(merging_snap->origin->bdev); - for (n = 0; n < size; n += merging_snap->ti->split_io) + for (n = 0; n < size; n += merging_snap->ti->max_io_len) if (__origin_write(&o->snapshots, sector + n, NULL) == DM_MAPIO_SUBMITTED) must_wait = 1; @@ -2138,18 +2141,18 @@ static int origin_map(struct dm_target *ti, struct bio *bio, } /* - * Set the target "split_io" field to the minimum of all the snapshots' + * Set the target "max_io_len" field to the minimum of all the snapshots' * chunk sizes. */ static void origin_resume(struct dm_target *ti) { struct dm_dev *dev = ti->private; - ti->split_io = get_origin_minimum_chunksize(dev->bdev); + ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); } -static int origin_status(struct dm_target *ti, status_type_t type, char *result, - unsigned int maxlen) +static int origin_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct dm_dev *dev = ti->private; @@ -2176,7 +2179,6 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, return max_size; bvm->bi_bdev = dev->bdev; - bvm->bi_sector = bvm->bi_sector; return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 35c94ff..a087bf2 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -26,14 +26,12 @@ struct stripe { struct stripe_c { uint32_t stripes; int stripes_shift; - sector_t stripes_mask; /* The size of this target / num. stripes */ sector_t stripe_width; - /* stripe chunk size */ - uint32_t chunk_shift; - sector_t chunk_mask; + uint32_t chunk_size; + int chunk_size_shift; /* Needed for handling events */ struct dm_target *ti; @@ -91,7 +89,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, /* * Construct a striped mapping. - * [ ]+ + * [ ]+ */ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) { @@ -99,7 +97,6 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) sector_t width; uint32_t stripes; uint32_t chunk_size; - char *end; int r; unsigned int i; @@ -108,34 +105,23 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - stripes = simple_strtoul(argv[0], &end, 10); - if (!stripes || *end) { + if (kstrtouint(argv[0], 10, &stripes) || !stripes) { ti->error = "Invalid stripe count"; return -EINVAL; } - chunk_size = simple_strtoul(argv[1], &end, 10); - if (*end) { + if (kstrtouint(argv[1], 10, &chunk_size) || !chunk_size) { ti->error = "Invalid chunk_size"; return -EINVAL; } - /* - * chunk_size is a power of two - */ - if (!is_power_of_2(chunk_size) || - (chunk_size < (PAGE_SIZE >> SECTOR_SHIFT))) { - ti->error = "Invalid chunk size"; - return -EINVAL; - } - - if (ti->len & (chunk_size - 1)) { + width = ti->len; + if (sector_div(width, chunk_size)) { ti->error = "Target length not divisible by " "chunk size"; return -EINVAL; } - width = ti->len; if (sector_div(width, stripes)) { ti->error = "Target length not divisible by " "number of stripes"; @@ -167,17 +153,21 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (stripes & (stripes - 1)) sc->stripes_shift = -1; - else { - sc->stripes_shift = ffs(stripes) - 1; - sc->stripes_mask = ((sector_t) stripes) - 1; - } + else + sc->stripes_shift = __ffs(stripes); + + r = dm_set_target_max_io_len(ti, chunk_size); + if (r) + return r; - ti->split_io = chunk_size; ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; - sc->chunk_shift = ffs(chunk_size) - 1; - sc->chunk_mask = ((sector_t) chunk_size) - 1; + sc->chunk_size = chunk_size; + if (chunk_size & (chunk_size - 1)) + sc->chunk_size_shift = -1; + else + sc->chunk_size_shift = __ffs(chunk_size); /* * Get the stripe destinations. @@ -216,17 +206,29 @@ static void stripe_dtr(struct dm_target *ti) static void stripe_map_sector(struct stripe_c *sc, sector_t sector, uint32_t *stripe, sector_t *result) { - sector_t offset = dm_target_offset(sc->ti, sector); - sector_t chunk = offset >> sc->chunk_shift; + sector_t chunk = dm_target_offset(sc->ti, sector); + sector_t chunk_offset; + + if (sc->chunk_size_shift < 0) + chunk_offset = sector_div(chunk, sc->chunk_size); + else { + chunk_offset = chunk & (sc->chunk_size - 1); + chunk >>= sc->chunk_size_shift; + } if (sc->stripes_shift < 0) *stripe = sector_div(chunk, sc->stripes); else { - *stripe = chunk & sc->stripes_mask; + *stripe = chunk & (sc->stripes - 1); chunk >>= sc->stripes_shift; } - *result = (chunk << sc->chunk_shift) | (offset & sc->chunk_mask); + if (sc->chunk_size_shift < 0) + chunk *= sc->chunk_size; + else + chunk <<= sc->chunk_size_shift; + + *result = chunk + chunk_offset; } static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, @@ -237,9 +239,16 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, stripe_map_sector(sc, sector, &stripe, result); if (stripe == target_stripe) return; - *result &= ~sc->chunk_mask; /* round down */ + + /* round down */ + sector = *result; + if (sc->chunk_size_shift < 0) + *result -= sector_div(sector, sc->chunk_size); + else + *result = sector & ~(sector_t)(sc->chunk_size - 1); + if (target_stripe < stripe) - *result += sc->chunk_mask + 1; /* next chunk */ + *result += sc->chunk_size; /* next chunk */ } static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, @@ -302,8 +311,8 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, * */ -static int stripe_status(struct dm_target *ti, - status_type_t type, char *result, unsigned int maxlen) +static int stripe_status(struct dm_target *ti, status_type_t type, + unsigned status_flags, char *result, unsigned maxlen) { struct stripe_c *sc = (struct stripe_c *) ti->private; char buffer[sc->stripes + 1]; @@ -324,7 +333,7 @@ static int stripe_status(struct dm_target *ti, case STATUSTYPE_TABLE: DMEMIT("%d %llu", sc->stripes, - (unsigned long long)sc->chunk_mask + 1); + (unsigned long long)sc->chunk_size); for (i = 0; i < sc->stripes; i++) DMEMIT(" %s %llu", sc->stripe[i].dev->name, (unsigned long long)sc->stripe[i].physical_start); @@ -391,7 +400,7 @@ static void stripe_io_hints(struct dm_target *ti, struct queue_limits *limits) { struct stripe_c *sc = ti->private; - unsigned chunk_size = (sc->chunk_mask + 1) << 9; + unsigned chunk_size = sc->chunk_size << SECTOR_SHIFT; blk_limits_io_min(limits, chunk_size); blk_limits_io_opt(limits, chunk_size * sc->stripes); @@ -419,7 +428,7 @@ static int stripe_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static struct target_type stripe_target = { .name = "striped", - .version = {1, 4, 0}, + .version = {1, 5, 0}, .module = THIS_MODULE, .ctr = stripe_ctr, .dtr = stripe_dtr, diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 2c80364..f78f9d3 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -185,6 +185,12 @@ struct dm_pool_metadata { unsigned long flags; sector_t data_block_size; bool read_only:1; + + /* + * Set if a transaction has to be aborted but the attempt to roll back + * to the previous (good) transaction failed. The only pool metadata + * operation possible in this state is the closing of the device. + */ bool fail_io:1; }; @@ -194,10 +200,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - bool changed:1; bool aborted_with_changes:1; - uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -348,17 +352,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) /*----------------------------------------------------------------*/ -static int superblock_lock_zero(struct dm_pool_metadata *pmd, struct dm_block **sblock) +static int superblock_lock_zero(struct dm_pool_metadata *pmd, + struct dm_block **sblock) { - return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, sblock); + return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); } -static int superblock_lock(struct dm_pool_metadata *pmd, struct dm_block **sblock) +static int superblock_lock(struct dm_pool_metadata *pmd, + struct dm_block **sblock) { - return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, sblock); + return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); } -static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; unsigned i; @@ -427,8 +435,8 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; struct dm_block *sblock; - struct thin_disk_superblock *disk_super; size_t metadata_len, data_len; + struct thin_disk_superblock *disk_super; sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; if (bdev_size > THIN_METADATA_MAX_SECTORS) @@ -463,26 +471,25 @@ static int __write_initial_superblock(struct dm_pool_metadata *pmd) disk_super->trans_id = 0; disk_super->held_root = 0; - r = dm_sm_copy_root(pmd->metadata_sm, - &disk_super->metadata_space_map_root, + r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, metadata_len); if (r < 0) - goto out_locked; + goto bad_locked; - r = dm_sm_copy_root(pmd->data_sm, - &disk_super->data_space_map_root, + r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, data_len); if (r < 0) - goto out_locked; + goto bad_locked; disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->device_details_root = cpu_to_le64(pmd->details_root); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); return dm_tm_commit(pmd->tm, sblock); -out_locked: +bad_locked: dm_bm_unlock(sblock); return r; } @@ -493,7 +500,7 @@ static int __format_metadata(struct dm_pool_metadata *pmd) r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, &pmd->tm, &pmd->metadata_sm); - if (r) { + if (r < 0) { DMERR("tm_create_with_sm failed"); return r; } @@ -502,39 +509,41 @@ static int __format_metadata(struct dm_pool_metadata *pmd) if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_create failed"); r = PTR_ERR(pmd->data_sm); - goto cleanup_tm; + goto bad_cleanup_tm; } pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); if (!pmd->nb_tm) { DMERR("could not create non-blocking clone tm"); r = -ENOMEM; - goto cleanup_data_sm; + goto bad_cleanup_data_sm; } __setup_btree_details(pmd); r = dm_btree_empty(&pmd->info, &pmd->root); if (r < 0) - goto cleanup_data_sm; + goto bad_cleanup_nb_tm; r = dm_btree_empty(&pmd->details_info, &pmd->details_root); if (r < 0) { DMERR("couldn't create devices root"); - goto cleanup_data_sm; + goto bad_cleanup_nb_tm; } r = __write_initial_superblock(pmd); if (r) - goto cleanup_data_sm; + goto bad_cleanup_nb_tm; return 0; -cleanup_data_sm: +bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); +bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); -cleanup_tm: - dm_sm_destroy(pmd->metadata_sm); +bad_cleanup_tm: dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); return r; } @@ -542,12 +551,11 @@ cleanup_tm: static int __check_incompat_features(struct thin_disk_superblock *disk_super, struct dm_pool_metadata *pmd) { - u32 features; + uint32_t features; features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; if (features) { - DMERR("could not access metadata due to " - "unsupported optional features (%lx).", + DMERR("could not access metadata due to unsupported optional features (%lx).", (unsigned long)features); return -EINVAL; } @@ -560,8 +568,7 @@ static int __check_incompat_features(struct thin_disk_superblock *disk_super, features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; if (features) { - DMERR("could not access metadata RDWR due to " - "unsupported optional features (%lx).", + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", (unsigned long)features); return -EINVAL; } @@ -575,7 +582,8 @@ static int __open_metadata(struct dm_pool_metadata *pmd) struct dm_block *sblock; struct thin_disk_superblock *disk_super; - r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, &sb_validator, &sblock); + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); if (r < 0) { DMERR("couldn't read superblock"); return r; @@ -585,7 +593,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd) r = __check_incompat_features(disk_super, pmd); if (r < 0) - goto out_unlock_sblock; + goto bad_unlock_sblock; r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, disk_super->metadata_space_map_root, @@ -593,7 +601,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd) &pmd->tm, &pmd->metadata_sm); if (r < 0) { DMERR("tm_open_with_sm failed"); - goto out_unlock_sblock; + goto bad_unlock_sblock; } pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, @@ -601,64 +609,72 @@ static int __open_metadata(struct dm_pool_metadata *pmd) if (IS_ERR(pmd->data_sm)) { DMERR("sm_disk_open failed"); r = PTR_ERR(pmd->data_sm); - goto out_cleanup_tm; + goto bad_cleanup_tm; } pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); if (!pmd->nb_tm) { DMERR("could not create non-blocking clone tm"); r = -ENOMEM; - goto out_cleanup_data_sm; + goto bad_cleanup_data_sm; } __setup_btree_details(pmd); return dm_bm_unlock(sblock); -out_cleanup_data_sm: +bad_cleanup_data_sm: dm_sm_destroy(pmd->data_sm); -out_cleanup_tm: - dm_sm_destroy(pmd->metadata_sm); +bad_cleanup_tm: dm_tm_destroy(pmd->tm); -out_unlock_sblock: + dm_sm_destroy(pmd->metadata_sm); +bad_unlock_sblock: dm_bm_unlock(sblock); return r; } -static int __open_or_format_metadata(struct dm_pool_metadata *pmd, enum dm_thin_metadata_mode mode) +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) { int r, unformatted; - r = superblock_all_zeroes(pmd->bm, &unformatted); + r = __superblock_all_zeroes(pmd->bm, &unformatted); if (r) return r; if (unformatted) - return (mode & DM_THIN_FORMAT) ? __format_metadata(pmd) : -EPERM; - else - return (mode & DM_THIN_OPEN) ? __open_metadata(pmd) : -EPERM; + return format_device ? __format_metadata(pmd) : -EPERM; + + return __open_metadata(pmd); } -static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, - enum dm_thin_metadata_mode mode) +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) { int r; pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE, THIN_METADATA_CACHE_SIZE, THIN_MAX_CONCURRENT_LOCKS); - if (!pmd->bm) { + if (IS_ERR(pmd->bm)) { DMERR("could not create block manager"); - return -ENOMEM; + return PTR_ERR(pmd->bm); } - r = __open_or_format_metadata(pmd, mode); + r = __open_or_format_metadata(pmd, format_device); if (r) dm_block_manager_destroy(pmd->bm); return r; } +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +{ + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); + dm_block_manager_destroy(pmd->bm); +} + static int __begin_transaction(struct dm_pool_metadata *pmd) { int r; @@ -683,7 +699,7 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); dm_bm_unlock(sblock); - return r; + return 0; } static int __write_changed_details(struct dm_pool_metadata *pmd) @@ -759,19 +775,17 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); disk_super->time = cpu_to_le32(pmd->time); - disk_super->trans_id = cpu_to_le64(pmd->trans_id); disk_super->data_mapping_root = cpu_to_le64(pmd->root); disk_super->device_details_root = cpu_to_le64(pmd->details_root); + disk_super->trans_id = cpu_to_le64(pmd->trans_id); disk_super->flags = cpu_to_le32(pmd->flags); - r = dm_sm_copy_root(pmd->metadata_sm, - &disk_super->metadata_space_map_root, + r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, metadata_len); if (r < 0) goto out_locked; - r = dm_sm_copy_root(pmd->data_sm, - &disk_super->data_space_map_root, + r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, data_len); if (r < 0) goto out_locked; @@ -785,7 +799,7 @@ out_locked: struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size, - enum dm_thin_metadata_mode mode) + bool format_device) { int r; struct dm_pool_metadata *pmd; @@ -799,12 +813,12 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, init_rwsem(&pmd->root_lock); pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); - pmd->read_only = 0; - pmd->fail_io = 0; + pmd->read_only = false; + pmd->fail_io = false; pmd->bdev = bdev; pmd->data_block_size = data_block_size; - r = __create_persistent_data_objects(pmd, mode); + r = __create_persistent_data_objects(pmd, format_device); if (r) { kfree(pmd); return ERR_PTR(r); @@ -820,15 +834,6 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return pmd; } -static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) -{ - dm_sm_destroy(pmd->data_sm); - dm_sm_destroy(pmd->metadata_sm); - dm_tm_destroy(pmd->tm); - dm_tm_destroy(pmd->nb_tm); - dm_block_manager_destroy(pmd->bm); -} - int dm_pool_metadata_close(struct dm_pool_metadata *pmd) { int r; @@ -862,6 +867,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) if (!pmd->fail_io) __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -922,7 +928,7 @@ static int __open_device(struct dm_pool_metadata *pmd, (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; - (*td)->aborted_with_changes = 0; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -984,10 +990,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __create_thin(pmd, dev); + if (!pmd->fail_io) + r = __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1007,9 +1014,6 @@ static int __set_snapshot_details(struct dm_pool_metadata *pmd, td->changed = 1; td->snapshotted_time = time; - /* - * snap's changed flag already set when the device was created. - */ snap->mapped_blocks = td->mapped_blocks; snap->snapshotted_time = time; __close_device(td); @@ -1077,10 +1081,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev, dm_thin_id origin) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __create_snap(pmd, dev, origin); + if (!pmd->fail_io) + r = __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1119,10 +1124,11 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __delete_device(pmd, dev); + if (!pmd->fail_io) + r = __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1132,20 +1138,22 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t current_id, uint64_t new_id) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); + if (pmd->fail_io) - r = -EINVAL; - else { - if (pmd->trans_id != current_id) { - DMERR("mismatched transaction id"); - r = -EINVAL; - } else { - pmd->trans_id = new_id; - r = 0; - } + goto out; + + if (pmd->trans_id != current_id) { + DMERR("mismatched transaction id"); + goto out; } + + pmd->trans_id = new_id; + r = 0; + +out: up_write(&pmd->root_lock); return r; @@ -1154,12 +1162,10 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - if (pmd->fail_io) - r = -EINVAL; - else { + if (!pmd->fail_io) { *result = pmd->trans_id; r = 0; } @@ -1188,8 +1194,10 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) held_root = dm_block_location(copy); disk_super = dm_block_data(copy); + if (le64_to_cpu(disk_super->held_root)) { DMWARN("Pool metadata snapshot already exists: release this before taking another."); + dm_tm_dec(pmd->tm, held_root); dm_tm_unlock(pmd->tm, copy); return -EBUSY; @@ -1221,17 +1229,17 @@ static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); disk_super->held_root = cpu_to_le64(held_root); - dm_tm_unlock(pmd->tm, sblock); - + dm_bm_unlock(sblock); return 0; } int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __reserve_metadata_snap(pmd); + if (!pmd->fail_io) + r = __reserve_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1252,10 +1260,10 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) held_root = le64_to_cpu(disk_super->held_root); disk_super->held_root = cpu_to_le64(0); - dm_tm_unlock(pmd->tm, sblock); + dm_bm_unlock(sblock); if (!held_root) { - DMWARN("pool has no metadata snap"); + DMWARN("No pool metadata snapshot found: nothing to release."); return -EINVAL; } @@ -1267,15 +1275,17 @@ static int __release_metadata_snap(struct dm_pool_metadata *pmd) dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); dm_sm_dec_block(pmd->metadata_sm, held_root); + return dm_tm_unlock(pmd->tm, copy); } int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __release_metadata_snap(pmd); + if (!pmd->fail_io) + r = __release_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1302,10 +1312,11 @@ static int __get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __get_metadata_snap(pmd, result); + if (!pmd->fail_io) + r = __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1314,10 +1325,11 @@ int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, struct dm_thin_device **td) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __open_device(pmd, dev, 0, td); + if (!pmd->fail_io) + r = __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1337,7 +1349,7 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) return td->id; } -static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) +static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) { return td->snapshotted_time > time; } @@ -1345,31 +1357,31 @@ static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, int can_block, struct dm_thin_lookup_result *result) { - int r; + int r = -EINVAL; uint64_t block_time = 0; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; - - if (pmd->fail_io) - return -EINVAL; + struct dm_btree_info *info; if (can_block) { down_read(&pmd->root_lock); - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else if (down_read_trylock(&pmd->root_lock)) { - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else + info = &pmd->info; + } else if (down_read_trylock(&pmd->root_lock)) + info = &pmd->nb_info; + else return -EWOULDBLOCK; + if (pmd->fail_io) + goto out; + + r = dm_btree_lookup(info, pmd->root, keys, &value); + if (!r) + block_time = le64_to_cpu(value); + +out: + up_read(&pmd->root_lock); + if (!r) { dm_block_t exception_block; uint32_t exception_time; @@ -1408,10 +1420,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = td->pmd->fail_io ? -EINVAL : __insert(td, block, data_block); + if (!td->pmd->fail_io) + r = __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1433,45 +1446,47 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) return 0; } -bool dm_thin_changed_this_transaction(struct dm_thin_device *td) +int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { - int r; + int r = -EINVAL; - down_read(&td->pmd->root_lock); - r = td->changed; - up_read(&td->pmd->root_lock); + down_write(&td->pmd->root_lock); + if (!td->pmd->fail_io) + r = __remove(td, block); + up_write(&td->pmd->root_lock); return r; } -bool dm_thin_aborted_changes(struct dm_thin_device *td) +bool dm_thin_changed_this_transaction(struct dm_thin_device *td) { int r; down_read(&td->pmd->root_lock); - r = td->aborted_with_changes; + r = td->changed; up_read(&td->pmd->root_lock); return r; } -int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) +bool dm_thin_aborted_changes(struct dm_thin_device *td) { - int r; + bool r; - down_write(&td->pmd->root_lock); - r = td->pmd->fail_io ? -EINVAL : __remove(td, block); - up_write(&td->pmd->root_lock); + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); return r; } int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : dm_sm_new_block(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1479,13 +1494,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - if (pmd->fail_io) { - r = -EINVAL; + if (pmd->fail_io) goto out; - } r = __commit_transaction(pmd); if (r <= 0) @@ -1510,31 +1523,31 @@ static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - if (pmd->fail_io) { - r = -EINVAL; + if (pmd->fail_io) goto out; - } __set_abort_with_changes_flags(pmd); __destroy_persistent_data_objects(pmd); - r = __create_persistent_data_objects(pmd, DM_THIN_OPEN); + r = __create_persistent_data_objects(pmd, false); if (r) - pmd->fail_io = 1; + pmd->fail_io = true; out: up_write(&pmd->root_lock); + return r; } int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1543,10 +1556,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1555,10 +1569,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1575,10 +1590,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1586,13 +1602,11 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - if (pmd->fail_io) - r = -EINVAL; - else { + if (!pmd->fail_io) { *result = td->mapped_blocks; r = 0; } @@ -1620,11 +1634,12 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __highest_block(td, result); + if (!pmd->fail_io) + r = __highest_block(td, result); up_read(&pmd->root_lock); return r; @@ -1652,10 +1667,11 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = pmd->fail_io ? -EINVAL : __resize_data_dev(pmd, new_count); + if (!pmd->fail_io) + r = __resize_data_dev(pmd, new_count); up_write(&pmd->root_lock); return r; @@ -1664,7 +1680,7 @@ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) { down_write(&pmd->root_lock); - pmd->read_only = 1; - dm_bm_read_only(pmd->bm); + pmd->read_only = true; + dm_bm_set_read_only(pmd->bm); up_write(&pmd->root_lock); } diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index b7d0946..0cecc37 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -37,14 +37,9 @@ typedef uint64_t dm_thin_id; /* * Reopens or creates a new, empty metadata volume. */ -enum dm_thin_metadata_mode { - DM_THIN_OPEN = 1, - DM_THIN_FORMAT = 2 -}; - struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, sector_t data_block_size, - enum dm_thin_metadata_mode mode); + bool format_device); int dm_pool_metadata_close(struct dm_pool_metadata *pmd); @@ -86,8 +81,8 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); /* * Discards all uncommitted changes. Rereads the superblock, rolling back - * to the last good transaction. Thin devices remain open, if they had - * uncommitted changes dm_thin_aborted_changes() will tell you. + * to the last good transaction. Thin devices remain open. + * dm_thin_aborted_changes() tells you if they had uncommitted changes. * * If this call fails it's only useful to call dm_pool_metadata_close(). * All other methods will fail with -EINVAL. diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6a1e76e..a7f92ff 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,6 +5,7 @@ */ #include "dm-thin-metadata.h" +#include "dm.h" #include #include @@ -267,6 +268,7 @@ out: spin_unlock_irqrestore(&prison->lock, flags); *ref = cell; + return r; } @@ -501,7 +503,7 @@ struct dm_thin_new_mapping; enum pool_mode { PM_WRITE, /* metadata may be changed */ PM_READ_ONLY, /* metadata may not be changed */ - PM_FAIL /* all io fails */ + PM_FAIL, /* all I/O fails */ }; struct pool_features { @@ -526,6 +528,7 @@ struct pool { dm_block_t low_water_blocks; uint32_t sectors_per_block; + int sectors_per_block_shift; struct pool_features pf; unsigned low_water_triggered:1; /* A dm event has been sent */ @@ -563,6 +566,9 @@ struct pool { process_mapping_fn process_prepared_discard; }; +static enum pool_mode get_pool_mode(struct pool *pool); +static void set_pool_mode(struct pool *pool, enum pool_mode mode); + /* * Target context for a pool. */ @@ -695,32 +701,30 @@ static void requeue_io(struct thin_c *tc) * target. */ -/* - * do_div wrappers that don't modify the dividend - */ -static sector_t dm_thin_do_div(sector_t a, __u32 b) +static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) { - do_div(a, b); - return a; -} + sector_t block_nr = bio->bi_sector; -static sector_t dm_thin_do_mod(sector_t a, __u32 b) -{ - return do_div(a, b); -} + if (tc->pool->sectors_per_block_shift < 0) + (void) sector_div(block_nr, tc->pool->sectors_per_block); + else + block_nr >>= tc->pool->sectors_per_block_shift; -static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio) -{ - return dm_thin_do_div(bio->bi_sector, tc->pool->sectors_per_block); + return block_nr; } static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block) { struct pool *pool = tc->pool; + sector_t bi_sector = bio->bi_sector; bio->bi_bdev = tc->pool_dev->bdev; - bio->bi_sector = (block * pool->sectors_per_block) + - dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block); + if (tc->pool->sectors_per_block_shift < 0) + bio->bi_sector = (block * pool->sectors_per_block) + + sector_div(bi_sector, pool->sectors_per_block); + else + bio->bi_sector = (block << pool->sectors_per_block_shift) | + (bi_sector & (pool->sectors_per_block - 1)); } static void remap_to_origin(struct thin_c *tc, struct bio *bio) @@ -739,25 +743,28 @@ static void issue(struct thin_c *tc, struct bio *bio) struct pool *pool = tc->pool; unsigned long flags; + if (!bio_triggers_commit(tc, bio)) { + generic_make_request(bio); + return; + } + + /* + * Complete bio with an error if earlier I/O caused changes to + * the metadata that can't be committed e.g, due to I/O errors + * on the metadata device. + */ + if (dm_thin_aborted_changes(tc->td)) { + bio_io_error(bio); + return; + } + /* * Batch together any bios that trigger commits and then issue a * single commit for them in process_deferred_bios(). */ - if (bio_triggers_commit(tc, bio)) { - if (dm_thin_aborted_changes(tc->td)) { - /* - * Earlier io caused changes to the metadata that - * can't be committed (eg, due to io errors on the - * metadata device). - */ - bio_io_error(bio); - } else { - spin_lock_irqsave(&pool->lock, flags); - bio_list_add(&pool->deferred_flush_bios, bio); - spin_unlock_irqrestore(&pool->lock, flags); - } - } else - generic_make_request(bio); + spin_lock_irqsave(&pool->lock, flags); + bio_list_add(&pool->deferred_flush_bios, bio); + spin_unlock_irqrestore(&pool->lock, flags); } static void remap_to_origin_and_issue(struct thin_c *tc, struct bio *bio) @@ -851,8 +858,13 @@ static void overwrite_endio(struct bio *bio, int err) /*----------------------------------------------------------------*/ -static enum pool_mode get_pool_mode(struct pool *pool); -static void set_pool_mode(struct pool *pool, enum pool_mode mode); +/* + * Workqueue. + */ + +/* + * Prepared mapping jobs. + */ /* * This sends the bios in the cell back to the deferred_bios list. @@ -998,8 +1010,7 @@ static void process_prepared(struct pool *pool, struct list_head *head, */ static int io_overlaps_block(struct pool *pool, struct bio *bio) { - return !dm_thin_do_mod(bio->bi_sector, pool->sectors_per_block) && - (bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT)); + return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT); } static int io_overwrites_block(struct pool *pool, struct bio *bio) @@ -1030,6 +1041,7 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool) struct dm_thin_new_mapping *r = pool->next_mapping; BUG_ON(!pool->next_mapping); + pool->next_mapping = NULL; return r; @@ -1160,8 +1172,6 @@ static int commit(struct pool *pool) { int r; - BUG_ON(!get_pool_mode(pool) == PM_WRITE); - r = dm_pool_commit_metadata(pool->pmd); if (r) DMERR("commit failed, error = %d", r); @@ -1170,15 +1180,15 @@ static int commit(struct pool *pool) } /* - * Returns a boolean to indicate whether we're in a fallback mode after this - * call. Many callers don't care about the return value. + * A non-zero return indicates read_only or fail_io mode. + * Many callers don't care about the return value. */ static int commit_or_fallback(struct pool *pool) { int r; if (get_pool_mode(pool) != PM_WRITE) - return 1; + return -EINVAL; r = commit(pool); if (r) @@ -1215,7 +1225,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) * Try to commit to see if that will free up some * more space. */ - commit_or_fallback(pool); + (void) commit_or_fallback(pool); r = dm_pool_get_free_block_count(pool->pmd, &free_blocks); if (r) @@ -1323,15 +1333,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio) } } else { /* - * This path is hit if people are ignoring - * limits->discard_granularity. It ignores any - * part of the discard that is in a subsequent - * block. + * The DM core makes sure that the discard doesn't span + * a block boundary. So we submit the discard of a + * partial block appropriately. */ - sector_t offset = bio->bi_sector - (block * pool->sectors_per_block); - unsigned remaining = (pool->sectors_per_block - offset) << SECTOR_SHIFT; - bio->bi_size = min(bio->bi_size, remaining); - cell_release_singleton(cell, bio); cell_release_singleton(cell2, bio); if ((!lookup_result.shared) && pool->pf.discard_passdown) @@ -1511,29 +1516,33 @@ static void process_bio(struct thin_c *tc, struct bio *bio) static void process_bio_read_only(struct thin_c *tc, struct bio *bio) { - int r, dir = bio_data_dir(bio); + int r; + int rw = bio_data_dir(bio); dm_block_t block = get_bio_block(tc, bio); struct dm_thin_lookup_result lookup_result; r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - if (lookup_result.shared && (dir == WRITE) && bio->bi_size) + if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); else remap_and_issue(tc, bio, lookup_result.block); break; case -ENODATA: - if (dir == READ && tc->origin_dev) - remap_to_origin_and_issue(tc, bio); + if (rw != READ) { + bio_io_error(bio); + break; + } - else if (dir == READ) { - zero_fill_bio(bio); - bio_endio(bio, 0); + if (tc->origin_dev) { + remap_to_origin_and_issue(tc, bio); + break; + } - } else - bio_io_error(bio); + zero_fill_bio(bio); + bio_endio(bio, 0); break; default: @@ -1647,7 +1656,7 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) pool->pf.mode = mode; - switch(mode) { + switch (mode) { case PM_FAIL: DMERR("switching pool to failure mode"); pool->process_bio = process_bio_fail; @@ -1790,12 +1799,16 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, break; default: + /* + * Must always call bio_io_error on failure. + * dm_thin_find_block can fail with -EINVAL if the + * pool is switched to fail-io mode. + */ bio_io_error(bio); r = DM_MAPIO_SUBMITTED; break; } - BUG_ON(r < 0); return r; } @@ -1913,9 +1926,9 @@ static struct pool *pool_create(struct mapped_device *pool_md, void *err_p; struct pool *pool; struct dm_pool_metadata *pmd; - enum dm_thin_metadata_mode mode = DM_THIN_OPEN | (read_only ? 0 : DM_THIN_FORMAT); + bool format_device = read_only ? false : true; - pmd = dm_pool_metadata_open(metadata_dev, block_size, mode); + pmd = dm_pool_metadata_open(metadata_dev, block_size, format_device); if (IS_ERR(pmd)) { *error = "Error creating metadata object"; return (struct pool *)pmd; @@ -1930,6 +1943,10 @@ static struct pool *pool_create(struct mapped_device *pool_md, pool->pmd = pmd; pool->sectors_per_block = block_size; + if (block_size & (block_size - 1)) + pool->sectors_per_block_shift = -1; + else + pool->sectors_per_block_shift = __ffs(block_size); pool->low_water_blocks = 0; pool_features_init(&pool->pf); pool->prison = prison_create(PRISON_CELLS); @@ -2181,7 +2198,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) if (kstrtoul(argv[2], 10, &block_size) || !block_size || block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS || block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS || - dm_thin_do_mod(block_size, DATA_DEV_BLOCK_SIZE_MIN_SECTORS)) { + block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) { ti->error = "Invalid block size"; r = -EINVAL; goto out; @@ -2228,6 +2245,15 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) goto out_flags_changed; } + /* + * The block layer requires discard_granularity to be a power of 2. + */ + if (pf.discard_enabled && !is_power_of_2(block_size)) { + ti->error = "Discard support must be disabled when the block size is not a power of 2"; + r = -EINVAL; + goto out_flags_changed; + } + pt->pool = pool; pt->ti = ti; pt->metadata_dev = metadata_dev; @@ -2235,7 +2261,6 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->low_water_blocks = low_water_blocks; pt->pf = pf; ti->num_flush_requests = 1; - /* * Only need to enable discards if the pool should pass * them down to the data device. The thin device's discard @@ -2248,7 +2273,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * stacking of discard limits (this keeps the pool and * thin devices' discard limits consistent). */ - ti->discards_supported = 1; + ti->discards_supported = true; } ti->private = pt; @@ -2308,7 +2333,8 @@ static int pool_preresume(struct dm_target *ti) int r; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; - dm_block_t data_size, sb_data_size; + sector_t data_size = ti->len; + dm_block_t sb_data_size; /* * Take control of the pool object. @@ -2317,7 +2343,8 @@ static int pool_preresume(struct dm_target *ti) if (r) return r; - data_size = dm_thin_do_div(ti->len, pool->sectors_per_block); + (void) sector_div(data_size, pool->sectors_per_block); + r = dm_pool_get_data_dev_size(pool->pmd, &sb_data_size); if (r) { DMERR("failed to retrieve data device size"); @@ -2326,19 +2353,19 @@ static int pool_preresume(struct dm_target *ti) if (data_size < sb_data_size) { DMERR("pool target too small, is %llu blocks (expected %llu)", - data_size, sb_data_size); + (unsigned long long)data_size, sb_data_size); return -EINVAL; } else if (data_size > sb_data_size) { r = dm_pool_resize_data_dev(pool->pmd, data_size); if (r) { DMERR("failed to resize data device"); + /* FIXME Stricter than necessary: Rollback transaction instead here */ + set_pool_mode(pool, PM_READ_ONLY); return r; } - r = commit(pool); - if (r) - return r; + (void) commit_or_fallback(pool); } return 0; @@ -2366,7 +2393,7 @@ static void pool_postsuspend(struct dm_target *ti) cancel_delayed_work(&pool->waker); flush_workqueue(pool->wq); - commit_or_fallback(pool); + (void) commit_or_fallback(pool); } static int check_arg_count(unsigned argc, unsigned args_required) @@ -2500,7 +2527,7 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct if (r) return r; - commit_or_fallback(pool); + (void) commit_or_fallback(pool); r = dm_pool_reserve_metadata_snap(pool->pmd); if (r) @@ -2562,7 +2589,7 @@ static int pool_message(struct dm_target *ti, unsigned argc, char **argv) DMWARN("Unrecognised thin pool target message received: %s", argv[0]); if (!r) - commit_or_fallback(pool); + (void) commit_or_fallback(pool); return r; } @@ -2572,7 +2599,7 @@ static void emit_flags(struct pool_features *pf, char *result, { unsigned count = !pf->zero_new_blocks + !pf->discard_enabled + !pf->discard_passdown + (pf->mode == PM_READ_ONLY); - DMEMIT(" %u ", count); + DMEMIT("%u ", count); if (!pf->zero_new_blocks) DMEMIT("skip_block_zeroing "); @@ -2584,7 +2611,7 @@ static void emit_flags(struct pool_features *pf, char *result, DMEMIT("no_discard_passdown "); if (pf->mode == PM_READ_ONLY) - DMEMIT("read_only"); + DMEMIT("read_only "); } /* @@ -2593,7 +2620,7 @@ static void emit_flags(struct pool_features *pf, char *result, * / */ static int pool_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; unsigned sz = 0; @@ -2611,22 +2638,19 @@ static int pool_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: if (get_pool_mode(pool) == PM_FAIL) { - DMEMIT("fail"); + DMEMIT("Fail"); break; } + /* Commit to ensure statistics aren't out-of-date */ + if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) + (void) commit_or_fallback(pool); + r = dm_pool_get_metadata_transaction_id(pool->pmd, &transaction_id); if (r) return r; - /* - * If we're in the middle of a transaction the free block - * counts can be quite out of date, so we do a quick - * commit. - */ - commit_or_fallback(pool); - r = dm_pool_get_free_metadata_block_count(pool->pmd, &nr_free_blocks_metadata); if (r) @@ -2657,15 +2681,23 @@ static int pool_status(struct dm_target *ti, status_type_t type, (unsigned long long)nr_blocks_data); if (held_root) - DMEMIT("%llu", held_root); + DMEMIT("%llu ", held_root); + else + DMEMIT("- "); + + if (pool->pf.discard_enabled && pool->pf.discard_passdown) + DMEMIT("discard_passdown "); else - DMEMIT("-"); + DMEMIT("no_discard_passdown "); - emit_flags(&pool->pf, result, sz, maxlen); + if (pool->pf.mode == PM_READ_ONLY) + DMEMIT("read_only"); + else + DMEMIT("read_write"); break; case STATUSTYPE_TABLE: - DMEMIT("%s %s %lu %llu", + DMEMIT("%s %s %lu %llu ", format_dev_t(buf, pt->metadata_dev->bdev->bd_dev), format_dev_t(buf2, pt->data_dev->bdev->bd_dev), (unsigned long)pool->sectors_per_block, @@ -2708,7 +2740,8 @@ static void set_discard_limits(struct pool *pool, struct queue_limits *limits) /* * This is just a hint, and not enforced. We have to cope with - * bios that overlap 2 blocks. + * bios that cover a block partially. A discard that spans a block + * boundary is not sent to this target. */ limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; limits->discard_zeroes_data = pool->pf.zero_new_blocks; @@ -2729,7 +2762,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, @@ -2845,15 +2878,20 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad_thin_open; } - ti->split_io = tc->pool->sectors_per_block; + r = dm_set_target_max_io_len(ti, tc->pool->sectors_per_block); + if (r) + goto bad_thin_open; + ti->num_flush_requests = 1; - ti->flush_supported = 1; + ti->flush_supported = true; /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { - ti->discards_supported = 1; + ti->discards_supported = true; ti->num_discard_requests = 1; - ti->discard_zeroes_data_unsupported = 1; + ti->discard_zeroes_data_unsupported = true; + /* Discard requests must be split on a block boundary */ + ti->split_discard_requests = true; } dm_put(pool_md); @@ -2934,7 +2972,7 @@ static void thin_postsuspend(struct dm_target *ti) * */ static int thin_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { int r; ssize_t sz = 0; @@ -2943,7 +2981,7 @@ static int thin_status(struct dm_target *ti, status_type_t type, struct thin_c *tc = ti->private; if (get_pool_mode(tc->pool) == PM_FAIL) { - DMEMIT("fail"); + DMEMIT("Fail"); return 0; } @@ -2984,7 +3022,7 @@ static int thin_status(struct dm_target *ti, status_type_t type, static int thin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { - dm_block_t blocks; + sector_t blocks; struct thin_c *tc = ti->private; struct pool *pool = tc->pool; @@ -2995,7 +3033,8 @@ static int thin_iterate_devices(struct dm_target *ti, if (!pool->ti) return 0; /* nothing is bound */ - blocks = dm_thin_do_div(pool->ti->len, pool->sectors_per_block); + blocks = pool->ti->len; + (void) sector_div(blocks, pool->sectors_per_block); if (blocks) return fn(ti, tc->pool_dev, 0, pool->sectors_per_block * blocks, data); @@ -3014,7 +3053,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 1, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index fa365d3..254d192 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -515,7 +515,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio, * Status: V (valid) or C (corruption found) */ static int verity_status(struct dm_target *ti, status_type_t type, - char *result, unsigned maxlen) + unsigned status_flags, char *result, unsigned maxlen) { struct dm_verity *v = ti->private; unsigned sz = 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ec5d5d9..4e09b6f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -968,22 +968,41 @@ static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti static sector_t max_io_len(sector_t sector, struct dm_target *ti) { sector_t len = max_io_len_target_boundary(sector, ti); + sector_t offset, max_len; /* - * Does the target need to split even further ? + * Does the target need to split even further? */ - if (ti->split_io) { - sector_t offset = dm_target_offset(ti, sector); - sector_t boundary, tmp = offset + ti->split_io; + if (ti->max_io_len) { + offset = dm_target_offset(ti, sector); + if (unlikely(ti->max_io_len & (ti->max_io_len - 1))) + max_len = sector_div(offset, ti->max_io_len); + else + max_len = offset & (ti->max_io_len - 1); + max_len = ti->max_io_len - max_len; - boundary = ti->split_io - do_div(tmp, ti->split_io); - if (len > boundary) - len = boundary; + if (len > max_len) + len = max_len; } return len; } +int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) +{ + if (len > UINT_MAX) { + DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)", + (unsigned long long)len, UINT_MAX); + ti->error = "Maximum size of target IO is too large"; + return -EINVAL; + } + + ti->max_io_len = (uint32_t) len; + + return 0; +} +EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); + static void __map_bio(struct dm_target *ti, struct bio *clone, struct dm_target_io *tio) { diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b7dacd5..52eef49 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -23,6 +23,11 @@ #define DM_SUSPEND_NOFLUSH_FLAG (1 << 1) /* + * Status feature flags + */ +#define DM_STATUS_NOFLUSH_FLAG (1 << 0) + +/* * Type of table and mapped_device's mempool */ #define DM_TYPE_NONE 0 diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index df9220d..5ba2777 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -364,7 +364,7 @@ static void dm_block_manager_write_callback(struct dm_buffer *buf) *--------------------------------------------------------------*/ struct dm_block_manager { struct dm_bufio_client *bufio; - int read_only; + bool read_only:1; }; struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, @@ -372,34 +372,36 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, unsigned cache_size, unsigned max_held_per_thread) { - struct dm_block_manager *bm = kmalloc(sizeof(*bm), GFP_KERNEL); + int r; + struct dm_block_manager *bm; - if (!bm) - return NULL; + bm = kmalloc(sizeof(*bm), GFP_KERNEL); + if (!bm) { + r = -ENOMEM; + goto bad; + } bm->bufio = dm_bufio_client_create(bdev, block_size, max_held_per_thread, sizeof(struct buffer_aux), dm_block_manager_alloc_callback, dm_block_manager_write_callback); - if (!bm->bufio) { + if (IS_ERR(bm->bufio)) { + r = PTR_ERR(bm->bufio); kfree(bm); - return NULL; + goto bad; } - bm->read_only = 0; + bm->read_only = false; + return bm; + +bad: + return ERR_PTR(r); } EXPORT_SYMBOL_GPL(dm_block_manager_create); void dm_block_manager_destroy(struct dm_block_manager *bm) { - /* - * This should only happen if there's an error while we're creating - * a new pool metadata. At which point work has been done that - * incurs changes on disk, but we've not got enough pieces together - * to do a tm commit. - */ - WARN_ON(dm_bufio_has_dirty_buffers(bm->bufio)); dm_bufio_client_destroy(bm->bufio); kfree(bm); } @@ -610,11 +612,11 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, return dm_bufio_write_dirty_buffers(bm->bufio); } -void dm_bm_read_only(struct dm_block_manager *bm) +void dm_bm_set_read_only(struct dm_block_manager *bm) { - bm->read_only = 1; + bm->read_only = true; } -EXPORT_SYMBOL_GPL(dm_bm_read_only); +EXPORT_SYMBOL_GPL(dm_bm_set_read_only); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 175db19..be5bff6 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -100,11 +100,10 @@ int dm_bm_unlock(struct dm_block *b); * It's a common idiom to have a superblock that should be committed last. * * @superblock should be write-locked on entry. It will be unlocked during - * this function (even if an IO error occurs and the data doesn't get - * written to disk). All dirty blocks are guaranteed to be written and - * flushed before the superblock. + * this function. All dirty blocks are guaranteed to be written and flushed + * before the superblock. * - * This method may blocks. + * This method always blocks. */ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, struct dm_block *superblock); @@ -120,7 +119,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, * Additionally you should not use dm_bm_unlock_move, however no error will * be returned if you do. */ -void dm_bm_read_only(struct dm_block_manager *bm); +void dm_bm_set_read_only(struct dm_block_manager *bm); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 71453ed..d77602d 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -224,7 +224,7 @@ static int sm_ll_init(struct ll_disk *ll, struct dm_transaction_manager *tm) ll->nr_blocks = 0; ll->bitmap_root = 0; ll->ref_count_root = 0; - ll->bitmap_index_changed = 0; + ll->bitmap_index_changed = false; return 0; } @@ -482,7 +482,7 @@ int sm_ll_commit(struct ll_disk *ll) if (ll->bitmap_index_changed) { r = ll->commit(ll); if (!r) - ll->bitmap_index_changed = 0; + ll->bitmap_index_changed = false; } return r; @@ -500,7 +500,7 @@ static int metadata_ll_load_ie(struct ll_disk *ll, dm_block_t index, static int metadata_ll_save_ie(struct ll_disk *ll, dm_block_t index, struct disk_index_entry *ie) { - ll->bitmap_index_changed = 1; + ll->bitmap_index_changed = true; memcpy(ll->mi_le.index + index, ie, sizeof(*ie)); return 0; } diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index cee388f..b3078d5 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -78,7 +78,7 @@ struct ll_disk { open_index_fn open_index; max_index_entries_fn max_entries; commit_fn commit; - int bitmap_index_changed; + bool bitmap_index_changed:1; }; struct disk_sm_root { diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 9f2288e..f6d29e6 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -28,15 +28,7 @@ struct sm_disk { struct ll_disk old_ll; dm_block_t begin; - - /* - * The allocator should not wrap past this block within a - * transaction. - */ - dm_block_t end; dm_block_t nr_allocated_this_transaction; - - unsigned first_alloc:1; }; static void sm_disk_destroy(struct dm_space_map *sm) @@ -172,35 +164,20 @@ static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) { - int r = -ENOSPC; + int r; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - dm_block_t end; - - while (smd->first_alloc || smd->begin != smd->end) { - smd->first_alloc = 0; - - if (smd->begin >= smd->old_ll.nr_blocks) - smd->begin = 0; - - end = (smd->begin < smd->end) ? smd->end : smd->old_ll.nr_blocks; - - r = sm_ll_find_free_block(&smd->old_ll, smd->begin, end, b); - if (r && r != -ENOSPC) - return r; - - else if (!r) { - smd->begin = *b + 1; - r = sm_ll_inc(&smd->ll, *b, &ev); - if (!r) { - BUG_ON(ev != SM_ALLOC); - smd->nr_allocated_this_transaction++; - } - return r; - } + /* FIXME: we should loop round a couple of times */ + r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + if (r) + return r; - smd->begin = end; + smd->begin = *b + 1; + r = sm_ll_inc(&smd->ll, *b, &ev); + if (!r) { + BUG_ON(ev != SM_ALLOC); + smd->nr_allocated_this_transaction++; } return r; @@ -221,8 +198,7 @@ static int sm_disk_commit(struct dm_space_map *sm) return r; memcpy(&smd->old_ll, &smd->ll, sizeof(smd->old_ll)); - smd->end = smd->begin; - smd->first_alloc = 1; + smd->begin = 0; smd->nr_allocated_this_transaction = 0; r = sm_disk_get_nr_free(sm, &nr_free); @@ -285,8 +261,7 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, if (!smd) return ERR_PTR(-ENOMEM); - smd->begin = smd->end = 0; - smd->first_alloc = 1; + smd->begin = 0; smd->nr_allocated_this_transaction = 0; memcpy(&smd->sm, &ops, sizeof(smd->sm)); diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 50f3be7..d247a35 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -220,9 +220,9 @@ static int __shadow_block(struct dm_transaction_manager *tm, dm_block_t orig, return r; /* - * It would be tempting to use dm_bm_unlock_move here. But some - * code, such as the space maps, keep using the old data structures - * - secure in the knowledge they wont be changed until the next + * It would be tempting to use dm_bm_unlock_move here, but some + * code, such as the space maps, keeps using the old data structures + * secure in the knowledge they won't be changed until the next * transaction. Using unlock_move would force a synchronous read * since the old block would no longer be in the cache. */ @@ -345,6 +345,7 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, DMERR("couldn't create metadata space map"); goto bad; } + } else { r = dm_sm_metadata_open(*sm, *tm, sm_root, sm_len); if (r) { @@ -357,6 +358,7 @@ static int dm_tm_create_internal(struct dm_block_manager *bm, bad: dm_tm_destroy(*tm); + dm_sm_destroy(*sm); return r; } diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c index 48e46f5..33e422e 100644 --- a/drivers/scsi/device_handler/scsi_dh.c +++ b/drivers/scsi/device_handler/scsi_dh.c @@ -468,7 +468,8 @@ EXPORT_SYMBOL_GPL(scsi_dh_handler_exist); /* * scsi_dh_attach - Attach device handler - * @sdev - sdev the handler should be attached to + * @q - Request queue that is associated with the scsi_device + * the handler should be attached to * @name - name of the handler to attach */ int scsi_dh_attach(struct request_queue *q, const char *name) @@ -498,7 +499,8 @@ EXPORT_SYMBOL_GPL(scsi_dh_attach); /* * scsi_dh_detach - Detach device handler - * @sdev - sdev the handler should be detached from + * @q - Request queue that is associated with the scsi_device + * the handler should be detached from * * This function will detach the device handler only * if the sdev is not part of the internal list, ie @@ -527,6 +529,38 @@ void scsi_dh_detach(struct request_queue *q) } EXPORT_SYMBOL_GPL(scsi_dh_detach); +/* + * scsi_dh_attached_handler_name - Get attached device handler's name + * @q - Request queue that is associated with the scsi_device + * that may have a device handler attached + * @gfp - the GFP mask used in the kmalloc() call when allocating memory + * + * Returns name of attached handler, NULL if no handler is attached. + * Caller must take care to free the returned string. + */ +const char *scsi_dh_attached_handler_name(struct request_queue *q, gfp_t gfp) +{ + unsigned long flags; + struct scsi_device *sdev; + const char *handler_name = NULL; + + spin_lock_irqsave(q->queue_lock, flags); + sdev = q->queuedata; + if (!sdev || !get_device(&sdev->sdev_gendev)) + sdev = NULL; + spin_unlock_irqrestore(q->queue_lock, flags); + + if (!sdev) + return NULL; + + if (sdev->scsi_dh_data) + handler_name = kstrdup(sdev->scsi_dh_data->scsi_dh->name, gfp); + + put_device(&sdev->sdev_gendev); + return handler_name; +} +EXPORT_SYMBOL_GPL(scsi_dh_attached_handler_name); + static struct notifier_block scsi_dh_nb = { .notifier_call = scsi_dh_notifier }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index e869312..38d27a1 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -72,7 +72,7 @@ typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - char *result, unsigned int maxlen); + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); @@ -186,8 +186,8 @@ struct dm_target { sector_t begin; sector_t len; - /* Always a power of 2 */ - sector_t split_io; + /* If non-zero, maximum size of I/O submitted to a target. */ + uint32_t max_io_len; /* * A number of zero-length barrier requests that will be submitted @@ -215,24 +215,24 @@ struct dm_target { * Set if this target needs to receive flushes regardless of * whether or not its underlying devices have support. */ - unsigned flush_supported:1; + bool flush_supported:1; /* * Set if this target needs to receive discards regardless of * whether or not its underlying devices have support. */ - unsigned discards_supported:1; + bool discards_supported:1; /* * Set if the target required discard request to be split * on max_io_len boundary. */ - unsigned split_discard_requests:1; + bool split_discard_requests:1; /* * Set if this target does not return zeroes on discarded blocks. */ - unsigned discard_zeroes_data_unsupported:1; + bool discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ @@ -370,6 +370,11 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback int dm_table_complete(struct dm_table *t); /* + * Target may require that it is never sent I/O larger than len. + */ +int __must_check dm_set_target_max_io_len(struct dm_target *ti, sector_t len); + +/* * Table reference counting. */ struct dm_table *dm_get_live_table(struct mapped_device *md); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index 75fd557..2d45fd3 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 22 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2011-10-19)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-06-01)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -307,6 +307,8 @@ enum { /* * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. */ #define DM_NOFLUSH_FLAG (1 << 11) /* In */ diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h index e3f2db2..620c723 100644 --- a/include/scsi/scsi_dh.h +++ b/include/scsi/scsi_dh.h @@ -60,6 +60,7 @@ extern int scsi_dh_activate(struct request_queue *, activate_complete, void *); extern int scsi_dh_handler_exist(const char *); extern int scsi_dh_attach(struct request_queue *, const char *); extern void scsi_dh_detach(struct request_queue *); +extern const char *scsi_dh_attached_handler_name(struct request_queue *, gfp_t); extern int scsi_dh_set_params(struct request_queue *, const char *); #else static inline int scsi_dh_activate(struct request_queue *req, @@ -80,6 +81,11 @@ static inline void scsi_dh_detach(struct request_queue *q) { return; } +static inline const char *scsi_dh_attached_handler_name(struct request_queue *q, + gfp_t gfp) +{ + return NULL; +} static inline int scsi_dh_set_params(struct request_queue *req, const char *params) { return -SCSI_DH_NOSYS;