From: Joe Thornber dm thin metadata: introduce dm_pool_abort_metadata Aborts the current metadata transaction. Generally this will only be called when bad things are happening and dm-thin is trying to roll back to a good state for read-only mode. Complicated by the fact that the metadata device may have failed completely causing the abort to be unable to read the old transaction. In this case the metadata object is placed in a 'fail' mode and everything fails apart from destroying it. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 143 ++++++++++++++++++++++++++++++++---------- drivers/md/dm-thin-metadata.h | 12 +++ 2 files changed, 121 insertions(+), 34 deletions(-) Index: linux-3.5/drivers/md/dm-thin-metadata.c =================================================================== --- linux-3.5.orig/drivers/md/dm-thin-metadata.c +++ linux-3.5/drivers/md/dm-thin-metadata.c @@ -185,6 +185,7 @@ struct dm_pool_metadata { unsigned long flags; sector_t data_block_size; bool read_only:1; + bool fail_io:1; }; struct dm_thin_device { @@ -193,7 +194,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - int changed; + bool changed:1; + bool aborted_with_changes:1; uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -731,9 +733,6 @@ static int __write_changed_details(struc static int __commit_transaction(struct dm_pool_metadata *pmd) { - /* - * FIXME: Associated pool should be made read-only on failure. - */ int r; size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; @@ -809,6 +808,7 @@ struct dm_pool_metadata *dm_pool_metadat pmd->time = 0; INIT_LIST_HEAD(&pmd->thin_devices); pmd->read_only = false; + pmd->fail_io = false; pmd->bdev = bdev; pmd->data_block_size = data_block_size; @@ -851,16 +851,17 @@ int dm_pool_metadata_close(struct dm_poo return -EBUSY; } - if (!pmd->read_only) { + if (!pmd->read_only && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) DMWARN("%s: __commit_transaction() failed, error = %d", __func__, r); } - __destroy_persistent_data_objects(pmd); - kfree(pmd); + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -921,6 +922,7 @@ static int __open_device(struct dm_pool_ (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -985,7 +987,7 @@ int dm_pool_create_thin(struct dm_pool_m int r; down_write(&pmd->root_lock); - r = __create_thin(pmd, dev); + r = pmd->fail_io ? -EINVAL : __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1075,7 +1077,7 @@ int dm_pool_create_snap(struct dm_pool_m int r; down_write(&pmd->root_lock); - r = __create_snap(pmd, dev, origin); + r = pmd->fail_io ? -EINVAL : __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1117,7 +1119,7 @@ int dm_pool_delete_thin_device(struct dm int r; down_write(&pmd->root_lock); - r = __delete_device(pmd, dev); + r = pmd->fail_io ? -EINVAL : __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1127,27 +1129,40 @@ int dm_pool_set_metadata_transaction_id( uint64_t current_id, uint64_t new_id) { + int r; + down_write(&pmd->root_lock); - if (pmd->trans_id != current_id) { - up_write(&pmd->root_lock); - DMERR("mismatched transaction id"); - return -EINVAL; + if (pmd->fail_io) + r = -EINVAL; + else { + if (pmd->trans_id != current_id) { + DMERR("mismatched transaction id"); + r = -EINVAL; + } else { + pmd->trans_id = new_id; + r = 0; + } } - - pmd->trans_id = new_id; up_write(&pmd->root_lock); - return 0; + return r; } int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { + int r; + down_read(&pmd->root_lock); - *result = pmd->trans_id; + if (pmd->fail_io) + r = -EINVAL; + else { + *result = pmd->trans_id; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) @@ -1214,7 +1229,7 @@ int dm_pool_reserve_metadata_snap(struct int r; down_write(&pmd->root_lock); - r = __reserve_metadata_snap(pmd); + r = pmd->fail_io ? -EINVAL : __reserve_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1259,7 +1274,7 @@ int dm_pool_release_metadata_snap(struct int r; down_write(&pmd->root_lock); - r = __release_metadata_snap(pmd); + r = pmd->fail_io ? -EINVAL : __release_metadata_snap(pmd); up_write(&pmd->root_lock); return r; @@ -1289,7 +1304,7 @@ int dm_pool_get_metadata_snap(struct dm_ int r; down_read(&pmd->root_lock); - r = __get_metadata_snap(pmd, result); + r = pmd->fail_io ? -EINVAL : __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1301,7 +1316,7 @@ int dm_pool_open_thin_device(struct dm_p int r; down_write(&pmd->root_lock); - r = __open_device(pmd, dev, 0, td); + r = pmd->fail_io ? -EINVAL : __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1337,12 +1352,22 @@ int dm_thin_find_block(struct dm_thin_de if (can_block) { down_read(&pmd->root_lock); + if (pmd->fail_io) { + up_read(&pmd->root_lock); + return -EINVAL; + } + r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); if (!r) block_time = le64_to_cpu(value); up_read(&pmd->root_lock); } else if (down_read_trylock(&pmd->root_lock)) { + if (pmd->fail_io) { + up_read(&pmd->root_lock); + return -EINVAL; + } + r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); if (!r) block_time = le64_to_cpu(value); @@ -1392,7 +1417,7 @@ int dm_thin_insert_block(struct dm_thin_ int r; down_write(&td->pmd->root_lock); - r = __insert(td, block, data_block); + r = td->pmd->fail_io ? -EINVAL : __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1419,7 +1444,7 @@ int dm_thin_remove_block(struct dm_thin_ int r; down_write(&td->pmd->root_lock); - r = __remove(td, block); + r = td->pmd->fail_io ? -EINVAL : __remove(td, block); up_write(&td->pmd->root_lock); return r; @@ -1436,12 +1461,23 @@ bool dm_thin_changed_this_transaction(st return r; } +bool dm_thin_aborted_changes(struct dm_thin_device *td) +{ + int r; + + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); + + return r; +} + int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) { int r; down_write(&pmd->root_lock); - r = dm_sm_new_block(pmd->data_sm, result); + r = pmd->fail_io ? -EINVAL : dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1452,6 +1488,10 @@ int dm_pool_commit_metadata(struct dm_po int r; down_write(&pmd->root_lock); + if (pmd->fail_io) { + r = -EINVAL; + goto out; + } r = __commit_transaction(pmd); if (r <= 0) @@ -1466,12 +1506,41 @@ out: return r; } +static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) +{ + struct dm_thin_device *td; + + list_for_each_entry(td, &pmd->thin_devices, list) + td->aborted_with_changes = td->changed; +} + +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) +{ + int r; + + down_write(&pmd->root_lock); + if (pmd->fail_io) { + r = -EINVAL; + goto out; + } + + __set_abort_with_changes_flags(pmd); + __destroy_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, false); + if (r) + pmd->fail_io = true; + +out: + up_write(&pmd->root_lock); + return r; +} + int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { int r; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->data_sm, result); + r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1483,7 +1552,7 @@ int dm_pool_get_free_metadata_block_coun int r; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->metadata_sm, result); + r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1495,7 +1564,7 @@ int dm_pool_get_metadata_dev_size(struct int r; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); + r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1515,7 +1584,7 @@ int dm_pool_get_data_dev_size(struct dm_ int r; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->data_sm, result); + r = pmd->fail_io ? -EINVAL : dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1523,13 +1592,19 @@ int dm_pool_get_data_dev_size(struct dm_ int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { + int r; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - *result = td->mapped_blocks; + if (pmd->fail_io) + r = -EINVAL; + else { + *result = td->mapped_blocks; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __highest_block(struct dm_thin_device *td, dm_block_t *result) @@ -1555,7 +1630,7 @@ int dm_thin_get_highest_mapped_block(str struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = __highest_block(td, result); + r = pmd->fail_io ? -EINVAL : __highest_block(td, result); up_read(&pmd->root_lock); return r; @@ -1586,7 +1661,7 @@ int dm_pool_resize_data_dev(struct dm_po int r; down_write(&pmd->root_lock); - r = __resize_data_dev(pmd, new_count); + r = pmd->fail_io ? -EINVAL : __resize_data_dev(pmd, new_count); up_write(&pmd->root_lock); return r; Index: linux-3.5/drivers/md/dm-thin-metadata.h =================================================================== --- linux-3.5.orig/drivers/md/dm-thin-metadata.h +++ linux-3.5/drivers/md/dm-thin-metadata.h @@ -80,6 +80,16 @@ int dm_pool_delete_thin_device(struct dm int dm_pool_commit_metadata(struct dm_pool_metadata *pmd); /* + * Discards all uncommitted changes. Rereads the superblock, rolling back + * to the last good transaction. Thin devices remain open, if they had + * uncommitted changes dm_thin_aborted_changes() will tell you. + * + * If this call fails it's only useful to call dm_pool_metadata_close(). + * All other methods will fail with -EINVAL. + */ +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd); + +/* * Set/get userspace transaction id. */ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, @@ -150,6 +160,8 @@ int dm_thin_remove_block(struct dm_thin_ */ bool dm_thin_changed_this_transaction(struct dm_thin_device *td); +bool dm_thin_aborted_changes(struct dm_thin_device *td); + int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *highest_mapped);