From: Joe Thornber Signed-off-by: Joe Thornber FIXME Needs a patch header --- drivers/md/dm-thin-metadata.c | 25 ++++ drivers/md/dm-thin-metadata.h | 7 + drivers/md/dm-thin.c | 69 +++++++++++- drivers/md/persistent-data/dm-space-map-disk.c | 3 drivers/md/persistent-data/dm-space-map-metadata.c | 118 +++++++++++++++++++-- drivers/md/persistent-data/dm-space-map.h | 22 +++ 6 files changed, 232 insertions(+), 12 deletions(-) Index: linux/drivers/md/dm-thin-metadata.c =================================================================== --- linux.orig/drivers/md/dm-thin-metadata.c +++ linux/drivers/md/dm-thin-metadata.c @@ -1676,6 +1676,17 @@ int dm_pool_resize_data_dev(struct dm_po return r; } +int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +{ + int r; + + down_write(&pmd->root_lock); + r = pmd->fail_io ? -EINVAL : __resize_space_map(pmd->metadata_sm, new_count); + up_write(&pmd->root_lock); + + return r; +} + void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) { down_write(&pmd->root_lock); @@ -1683,3 +1694,17 @@ void dm_pool_metadata_read_only(struct d dm_bm_set_read_only(pmd->bm); up_write(&pmd->root_lock); } + +int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + int r; + + down_write(&pmd->root_lock); + r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); + up_write(&pmd->root_lock); + + return r; +} Index: linux/drivers/md/dm-thin-metadata.h =================================================================== --- linux.orig/drivers/md/dm-thin-metadata.h +++ linux/drivers/md/dm-thin-metadata.h @@ -8,6 +8,7 @@ #define DM_THIN_METADATA_H #include "persistent-data/dm-block-manager.h" +#include "persistent-data/dm-space-map.h" #define THIN_METADATA_BLOCK_SIZE 4096 @@ -185,6 +186,7 @@ int dm_pool_get_data_dev_size(struct dm_ * blocks would be lost. */ int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); +int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_size); /* * Flicks the underlying block manager into read only mode, so you know @@ -192,6 +194,11 @@ int dm_pool_resize_data_dev(struct dm_po */ void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd); +int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context); + /*----------------------------------------------------------------*/ #endif Index: linux/drivers/md/dm-thin.c =================================================================== --- linux.orig/drivers/md/dm-thin.c +++ linux/drivers/md/dm-thin.c @@ -42,6 +42,14 @@ DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_P #define MAX_DEV_ID ((1 << 24) - 1) /* + * Metadata low water mark. + * + * If the free blocks in the metadata device pass this threshold an event + * will be generated. + */ +#define METADATA_LOW_WATER_MARK 64 + +/* * How do we handle breaking sharing of data blocks? * ================================================= * @@ -1909,6 +1917,16 @@ static int parse_pool_features(struct dm return r; } +static void metadata_low_callback(void *context) +{ + struct pool *pool = context; + + DMWARN("%s: reached low water mark for metadata device: sending event.", + dm_device_name(pool->pool_md)); + + dm_table_event(pool->ti->table); +} + static dm_block_t get_metadata_dev_size(struct block_device *bdev) { sector_t metadata_dev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; @@ -1964,7 +1982,6 @@ static int pool_ctr(struct dm_target *ti ti->error = "Error opening metadata block device"; goto out_unlock; } - (void) get_metadata_dev_size(metadata_dev->bdev); r = dm_get_device(ti, argv[1], FMODE_READ | FMODE_WRITE, &data_dev); @@ -2049,6 +2066,13 @@ static int pool_ctr(struct dm_target *ti } ti->private = pt; + r = dm_pool_register_metadata_threshold(pt->pool->pmd, + METADATA_LOW_WATER_MARK, + metadata_low_callback, + pool); + if (r) + goto out_free_pt; + pt->callbacks.congested_fn = pool_is_congested; dm_table_add_target_callbacks(ti->table, &pt->callbacks); @@ -2124,6 +2148,41 @@ static int maybe_resize_data_dev(struct return 0; } +static int maybe_resize_metadata_dev(struct dm_target *ti, int *need_commit) +{ + int r; + struct pool_c *pt = ti->private; + struct pool *pool = pt->pool; + dm_block_t metadata_dev_size, sb_metadata_dev_size; + + metadata_dev_size = get_metadata_dev_size(pool->md_dev); + + r = dm_pool_get_metadata_dev_size(pool->pmd, &sb_metadata_dev_size); + if (r) { + DMERR("failed to retrieve data device size"); + return r; + } + + if (metadata_dev_size < sb_metadata_dev_size) { + DMERR("metadata device too small, is %llu blocks (expected %llu)", + metadata_dev_size, sb_metadata_dev_size); + return -EINVAL; + + } else if (metadata_dev_size > sb_metadata_dev_size) { + r = dm_pool_resize_metadata_dev(pool->pmd, metadata_dev_size); + if (r) { + DMERR("failed to resize metadata device"); + /* FIXME Stricter than necessary: Rollback transaction instead here */ + set_pool_mode(pool, PM_READ_ONLY); + return r; + } + + *need_commit = 1; + } + + return 0; +} + /* * Retrieves the number of blocks of the data device from * the superblock and compares it to the actual device size, @@ -2137,7 +2196,7 @@ static int maybe_resize_data_dev(struct */ static int pool_preresume(struct dm_target *ti) { - int r, need_commit1 = 0; + int r, need_commit1 = 0, need_commit2 = 0; struct pool_c *pt = ti->private; struct pool *pool = pt->pool; @@ -2152,7 +2211,11 @@ static int pool_preresume(struct dm_targ if (r) return r; - if (need_commit1) + r = maybe_resize_metadata_dev(ti, &need_commit2); + if (r) + return r; + + if (need_commit1 || need_commit2) (void) commit_or_fallback(pool); return 0; Index: linux/drivers/md/persistent-data/dm-space-map-disk.c =================================================================== --- linux.orig/drivers/md/persistent-data/dm-space-map-disk.c +++ linux/drivers/md/persistent-data/dm-space-map-disk.c @@ -248,7 +248,8 @@ static struct dm_space_map ops = { .new_block = sm_disk_new_block, .commit = sm_disk_commit, .root_size = sm_disk_root_size, - .copy_root = sm_disk_copy_root + .copy_root = sm_disk_copy_root, + .register_threshold_callback = NULL }; struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, Index: linux/drivers/md/persistent-data/dm-space-map-metadata.c =================================================================== --- linux.orig/drivers/md/persistent-data/dm-space-map-metadata.c +++ linux/drivers/md/persistent-data/dm-space-map-metadata.c @@ -17,6 +17,56 @@ /*----------------------------------------------------------------*/ /* + * An edge triggered threshold. + */ +struct threshold { + bool threshold_set; + bool value_set; + dm_block_t threshold; + dm_block_t current_value; + dm_sm_threshold_fn fn; + void *context; +}; + +static void threshold_init(struct threshold *t) +{ + t->threshold_set = false; + t->value_set = false; +} + +static void set_threshold(struct threshold *t, dm_block_t value, + dm_sm_threshold_fn fn, void *context) +{ + t->threshold_set = true; + t->threshold = value; + t->fn = fn; + t->context = context; +} + +static bool below_threshold(struct threshold *t, + dm_block_t value) +{ + return t->threshold_set && value <= t->threshold; +} + +static bool threshold_already_triggered(struct threshold *t) +{ + return t->value_set && below_threshold(t, t->current_value); +} + +static void check_threshold(struct threshold *t, dm_block_t value) +{ + if (below_threshold(t, value) && + !threshold_already_triggered(t)) + t->fn(t->context); + + t->value_set = true; + t->current_value = value; +} + +/*----------------------------------------------------------------*/ + +/* * Space map interface. * * The low level disk format is written using the standard btree and @@ -54,6 +104,8 @@ struct sm_metadata { unsigned allocated_this_transaction; unsigned nr_uncommitted; struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + + struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) @@ -137,6 +189,8 @@ static int recursing(struct sm_metadata return smm->recursion_count; } +/*----------------------------------------------------------------*/ + static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); @@ -144,12 +198,6 @@ static void sm_metadata_destroy(struct d kfree(smm); } -static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) -{ - DMERR("doesn't support extend"); - return -EINVAL; -} - static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); @@ -335,9 +383,19 @@ static int sm_metadata_new_block_(struct static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { + dm_block_t count; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + int r = sm_metadata_new_block_(sm, b); if (r) DMERR("unable to allocate new metadata block"); + + r = sm_metadata_get_nr_free(sm, &count); + if (r) + DMERR("couldn't get free block count"); + + check_threshold(&smm->threshold, count); + return r; } @@ -357,6 +415,17 @@ static int sm_metadata_commit(struct dm_ return 0; } +static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + + set_threshold(&smm->threshold, threshold, fn, context); + return 0; +} + static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) { *result = sizeof(struct disk_sm_root); @@ -382,6 +451,8 @@ static int sm_metadata_copy_root(struct return 0; } +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); + static struct dm_space_map ops = { .destroy = sm_metadata_destroy, .extend = sm_metadata_extend, @@ -395,7 +466,8 @@ static struct dm_space_map ops = { .new_block = sm_metadata_new_block, .commit = sm_metadata_commit, .root_size = sm_metadata_root_size, - .copy_root = sm_metadata_copy_root + .copy_root = sm_metadata_copy_root, + .register_threshold_callback = sm_metadata_register_threshold_callback }; /*----------------------------------------------------------------*/ @@ -517,11 +589,39 @@ static struct dm_space_map bootstrap_ops .new_block = sm_bootstrap_new_block, .commit = sm_bootstrap_commit, .root_size = sm_bootstrap_root_size, - .copy_root = sm_bootstrap_copy_root + .copy_root = sm_bootstrap_copy_root, + .register_threshold_callback = NULL }; /*----------------------------------------------------------------*/ +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +{ + int r, i; + enum allocation_event ev; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + dm_block_t old_len = smm->ll.nr_blocks; + + /* + * We flick into a mode where all blocks get allocated in the new + * area, ... + */ + smm->begin = old_len; + memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); + + /* ... extend, ... */ + r = sm_ll_extend(&smm->ll, extra_blocks); + + /* ... then switch back to normal behaviour. */ + memcpy(&smm->sm, &ops, sizeof(smm->sm)); + for (i = old_len; !r && i < smm->begin; i++) + r = sm_ll_inc(&smm->ll, i, &ev); + + return r; +} + +/*----------------------------------------------------------------*/ + struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; @@ -549,6 +649,7 @@ int dm_sm_metadata_create(struct dm_spac smm->recursion_count = 0; smm->allocated_this_transaction = 0; smm->nr_uncommitted = 0; + threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -590,6 +691,7 @@ int dm_sm_metadata_open(struct dm_space_ smm->recursion_count = 0; smm->allocated_this_transaction = 0; smm->nr_uncommitted = 0; + threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); return 0; Index: linux/drivers/md/persistent-data/dm-space-map.h =================================================================== --- linux.orig/drivers/md/persistent-data/dm-space-map.h +++ linux/drivers/md/persistent-data/dm-space-map.h @@ -9,6 +9,8 @@ #include "dm-block-manager.h" +typedef void (*dm_sm_threshold_fn)(void *context); + /* * struct dm_space_map keeps a record of how many times each block in a device * is referenced. It needs to be fixed on disk as part of the transaction. @@ -53,6 +55,15 @@ struct dm_space_map { int (*new_block)(struct dm_space_map *sm, dm_block_t *b); /* + * You can register 1 threshold callback. This is edge triggered + * when the free space in the space map drops below the threshold. + */ + int (*register_threshold_callback)(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context); + + /* * The root contains all the information needed to fix the space map. * Generally this info is small, so squirrel it away in a disk block * along with other info. @@ -121,6 +132,17 @@ static inline int dm_sm_new_block(struct return sm->new_block(sm, b); } +static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + if (sm->register_threshold_callback) + return sm->register_threshold_callback(sm, threshold, fn, context); + + return -EINVAL; +} + static inline int dm_sm_root_size(struct dm_space_map *sm, size_t *result) { return sm->root_size(sm, result);