drivers/md/Kconfig | 25 drivers/md/Makefile | 4 drivers/md/dm-cache-metadata.c | 146 ++++ drivers/md/dm-cache-metadata.h | 21 drivers/md/dm-cache-policy-cleaner.c | 2 drivers/md/dm-cache-policy-hints.c | 772 ++++++++++++++++++++++++++ drivers/md/dm-cache-policy-internal.h | 13 drivers/md/dm-cache-policy-mq-era.c | 546 ++++++++++++++++++ drivers/md/dm-cache-policy-mq.c | 44 - drivers/md/dm-cache-policy.c | 17 drivers/md/dm-cache-policy.h | 20 drivers/md/dm-cache-target.c | 281 ++++++++- drivers/md/persistent-data/dm-block-manager.h | 5 13 files changed, 1796 insertions(+), 100 deletions(-) Index: linux/drivers/md/Kconfig =================================================================== --- linux.orig/drivers/md/Kconfig +++ linux/drivers/md/Kconfig @@ -282,6 +282,24 @@ config DM_CACHE_MQ This is meant to be a general purpose policy. It prioritises reads over writes. +config DM_CACHE_MQ_ERA + tristate "MQ-ERA Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE_MQ + default y + ---help--- + A wrapper for the MQ policy that adds an "era" property to + the per-cache-block metadata, to facilitate the implementation + of cache coherency validation and recovery tools. This mechanism + works as follows. There is a monotonically increasing 32-bit era + counter associated with each cache instance. Each cache block is + tagged with the era during which it was last written. A device + mapper message interface is provided to obtain the current era, + advance to the next era, and invalidate blocks from before or + after a given era. NOTE: this policy will provide the same + performance benefits as MQ but requires significantly more memory + to support the era mechanism. If you do not need era support, + use MQ rather than MQ-ERA. + config DM_CACHE_CLEANER tristate "Cleaner Cache Policy (EXPERIMENTAL)" depends on DM_CACHE @@ -290,6 +308,13 @@ config DM_CACHE_CLEANER A simple cache policy that writes back all data to the origin. Used when decommissioning a dm-cache. +config DM_CACHE_HINTS + tristate "Hint Size Test Cache Policy (EXPERIMENTAL)" + depends on DM_CACHE + default y + ---help--- + A dumb cache policy just for the purpose to test variable hint size + config DM_MIRROR tristate "Mirror target" depends on BLK_DEV_DM Index: linux/drivers/md/Makefile =================================================================== --- linux.orig/drivers/md/Makefile +++ linux/drivers/md/Makefile @@ -13,7 +13,9 @@ dm-log-userspace-y \ dm-thin-pool-y += dm-thin.o dm-thin-metadata.o dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o dm-cache-mq-y += dm-cache-policy-mq.o +dm-cache-mq-era-y += dm-cache-policy-mq-era.o dm-cache-cleaner-y += dm-cache-policy-cleaner.o +dm-cache-hints-y += dm-cache-policy-hints.o md-mod-y += md.o bitmap.o raid456-y += raid5.o @@ -51,6 +53,8 @@ obj-$(CONFIG_DM_THIN_PROVISIONING) += dm obj-$(CONFIG_DM_VERITY) += dm-verity.o obj-$(CONFIG_DM_CACHE) += dm-cache.o obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o +obj-$(CONFIG_DM_CACHE_MQ_ERA) += dm-cache-mq-era.o +obj-$(CONFIG_DM_CACHE_HINTS) += dm-cache-hints.o obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o ifeq ($(CONFIG_DM_UEVENT),y) Index: linux/drivers/md/dm-cache-metadata.c =================================================================== --- linux.orig/drivers/md/dm-cache-metadata.c +++ linux/drivers/md/dm-cache-metadata.c @@ -113,6 +113,7 @@ struct dm_cache_metadata { char policy_name[CACHE_POLICY_NAME_SIZE]; unsigned policy_version[CACHE_POLICY_VERSION_SIZE]; size_t policy_hint_size; + void *policy_hint_value_buffer; struct dm_cache_statistics stats; }; @@ -198,7 +199,7 @@ static int superblock_lock(struct dm_cac /*----------------------------------------------------------------*/ -static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int __superblock_all_zeroes(struct dm_block_manager *bm, bool *result) { int r; unsigned i; @@ -214,10 +215,10 @@ static int __superblock_all_zeroes(struc return r; data_le = dm_block_data(b); - *result = 1; + *result = true; for (i = 0; i < sb_block_size; i++) { if (data_le[i] != zero) { - *result = 0; + *result = false; break; } } @@ -225,7 +226,7 @@ static int __superblock_all_zeroes(struc return dm_bm_unlock(b); } -static void __setup_mapping_info(struct dm_cache_metadata *cmd) +static int __setup_mapping_info(struct dm_cache_metadata *cmd) { struct dm_btree_value_type vt; @@ -237,11 +238,34 @@ static void __setup_mapping_info(struct dm_array_info_init(&cmd->info, cmd->tm, &vt); if (cmd->policy_hint_size) { - vt.size = sizeof(__le32); + if (cmd->policy_hint_size > DM_CACHE_POLICY_MAX_HINT_SIZE) { + DMERR("hint size is too large %d > %d", + (int) cmd->policy_hint_size, + (int) DM_CACHE_POLICY_MAX_HINT_SIZE); + return -EPERM; + } + + vt.size = cmd->policy_hint_size; dm_array_info_init(&cmd->hint_info, cmd->tm, &vt); - } + + cmd->policy_hint_value_buffer = kmalloc(cmd->policy_hint_size, GFP_KERNEL); + if (!cmd->policy_hint_value_buffer) { + DMERR("unable to allocate hint value buffer"); + return -ENOMEM; + } + } else + cmd->policy_hint_value_buffer = NULL; + + return 0; } +static void __teardown_mapping_info(struct dm_cache_metadata *cmd) +{ + if (cmd->policy_hint_value_buffer) + kfree(cmd->policy_hint_value_buffer); +} + + static int __write_initial_superblock(struct dm_cache_metadata *cmd) { int r; @@ -312,7 +336,9 @@ static int __format_metadata(struct dm_c return r; } - __setup_mapping_info(cmd); + r = __setup_mapping_info(cmd); + if (r < 0) + goto bad_mapping_info; r = dm_array_empty(&cmd->info, &cmd->root); if (r < 0) @@ -335,6 +361,8 @@ static int __format_metadata(struct dm_c return 0; bad: + __teardown_mapping_info(cmd); +bad_mapping_info: dm_tm_destroy(cmd->tm); dm_sm_destroy(cmd->metadata_sm); @@ -397,7 +425,10 @@ static int __open_metadata(struct dm_cac goto bad; } - __setup_mapping_info(cmd); + r = __setup_mapping_info(cmd); + if (r < 0) + goto bad; + dm_disk_bitset_init(cmd->tm, &cmd->discard_info); sb_flags = le32_to_cpu(disk_super->flags); cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags); @@ -411,7 +442,8 @@ bad: static int __open_or_format_metadata(struct dm_cache_metadata *cmd, bool format_device) { - int r, unformatted; + int r; + bool unformatted = false; r = __superblock_all_zeroes(cmd->bm, &unformatted); if (r) @@ -581,6 +613,7 @@ static int __commit_transaction(struct d disk_super->policy_version[0] = cpu_to_le32(cmd->policy_version[0]); disk_super->policy_version[1] = cpu_to_le32(cmd->policy_version[1]); disk_super->policy_version[2] = cpu_to_le32(cmd->policy_version[2]); + disk_super->policy_hint_size = cpu_to_le32(cmd->policy_hint_size); disk_super->read_hits = cpu_to_le32(cmd->stats.read_hits); disk_super->read_misses = cpu_to_le32(cmd->stats.read_misses); @@ -647,6 +680,7 @@ struct dm_cache_metadata *dm_cache_metad r = __create_persistent_data_objects(cmd, may_format_device); if (r) { + __teardown_mapping_info(cmd); kfree(cmd); return ERR_PTR(r); } @@ -663,22 +697,86 @@ struct dm_cache_metadata *dm_cache_metad void dm_cache_metadata_close(struct dm_cache_metadata *cmd) { __destroy_persistent_data_objects(cmd); + __teardown_mapping_info(cmd); kfree(cmd); } +/* + * Checks that the given cache block is either unmapped, or clean. + */ +static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b, + bool *result) +{ + int r; + __le64 value; + dm_oblock_t ob; + unsigned flags; + + r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value); + if (r) { + DMERR("block_unmapped_or_clean failed"); + return r; + } + + unpack_value(value, &ob, &flags); + *result = !((flags & (1 << M_VALID)) && (flags & (1 << M_DIRTY))); + + return 0; +} + +static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd, + dm_cblock_t begin, + dm_cblock_t end, + bool *result) +{ + int r; + + while (begin != end) { + r = block_unmapped_or_clean(cmd, begin, result); + if (r) + return r; + + if (!*result) { + DMERR("cache block %llu is dirty", + (unsigned long long) from_cblock(begin)); + return 0; + } + + begin++; + } + + return 0; +} + int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size) { int r; + bool clean; __le64 null_mapping = pack_value(0, 0); down_write(&cmd->root_lock); __dm_bless_for_disk(&null_mapping); + + if (new_cache_size < cmd->cache_blocks) { + r = blocks_are_unmapped_or_clean(cmd, new_cache_size, cmd->cache_blocks, &clean); + if (r) + goto out; + + if (!clean) { + DMERR("unable to shrink cache due to dirty blocks"); + r = -EINVAL; + goto out; + } + } + r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks), from_cblock(new_cache_size), &null_mapping, &cmd->root); if (!r) cmd->cache_blocks = new_cache_size; cmd->changed = true; + +out: up_write(&cmd->root_lock); return r; @@ -908,7 +1006,6 @@ static int __load_mapping(void *context, int r = 0; bool dirty; __le64 value; - __le32 hint_value = 0; dm_oblock_t oblock; unsigned flags; struct thunk *thunk = context; @@ -920,14 +1017,14 @@ static int __load_mapping(void *context, if (flags & M_VALID) { if (thunk->hints_valid) { r = dm_array_get_value(&cmd->hint_info, cmd->hint_root, - cblock, &hint_value); + cblock, cmd->policy_hint_value_buffer); if (r && r != -ENODATA) return r; } dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true; r = thunk->fn(thunk->context, oblock, to_cblock(cblock), - dirty, le32_to_cpu(hint_value), thunk->hints_valid); + dirty, cmd->policy_hint_value_buffer, thunk->hints_valid); } return r; @@ -1103,8 +1200,6 @@ int dm_cache_get_metadata_dev_size(struc static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy) { int r; - __le32 value; - size_t hint_size; const char *policy_name = dm_cache_policy_get_name(policy); const unsigned *policy_version = dm_cache_policy_get_version(policy); @@ -1113,6 +1208,8 @@ static int begin_hints(struct dm_cache_m return -EINVAL; if (!policy_unchanged(cmd, policy)) { + size_t hint_size; + strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name)); memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version)); @@ -1131,11 +1228,11 @@ static int begin_hints(struct dm_cache_m if (r) return r; - value = cpu_to_le32(0); + memset(cmd->policy_hint_value_buffer, 0, hint_size); __dm_bless_for_disk(&value); r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0, from_cblock(cmd->cache_blocks), - &value, &cmd->hint_root); + cmd->policy_hint_value_buffer, &cmd->hint_root); if (r) return r; } @@ -1154,22 +1251,20 @@ int dm_cache_begin_hints(struct dm_cache return r; } -static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, - uint32_t hint) +static int save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, void *hint) + __dm_written_to_disk(hint) { int r; - __le32 value = cpu_to_le32(hint); - __dm_bless_for_disk(&value); r = dm_array_set_value(&cmd->hint_info, cmd->hint_root, - from_cblock(cblock), &value, &cmd->hint_root); + from_cblock(cblock), hint, &cmd->hint_root); cmd->changed = true; return r; } -int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, - uint32_t hint) +int dm_cache_save_hint(struct dm_cache_metadata *cmd, dm_cblock_t cblock, void *hint) + __dm_written_to_disk(hint) { int r; @@ -1182,3 +1277,8 @@ int dm_cache_save_hint(struct dm_cache_m return r; } + +int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result) +{ + return blocks_are_unmapped_or_clean(cmd, 0, cmd->cache_blocks, result); +} Index: linux/drivers/md/dm-cache-metadata.h =================================================================== --- linux.orig/drivers/md/dm-cache-metadata.h +++ linux/drivers/md/dm-cache-metadata.h @@ -87,7 +87,7 @@ int dm_cache_changed_this_transaction(st typedef int (*load_mapping_fn)(void *context, dm_oblock_t oblock, dm_cblock_t cblock, bool dirty, - uint32_t hint, bool hint_valid); + void *hint, bool hint_valid); int dm_cache_load_mappings(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy, load_mapping_fn fn, @@ -118,9 +118,10 @@ int dm_cache_get_metadata_dev_size(struc void dm_cache_dump(struct dm_cache_metadata *cmd); /* - * The policy is invited to save a 32bit hint value for every cblock (eg, - * for a hit count). These are stored against the policy name. If - * policies are changed, then hints will be lost. If the machine crashes, + * The policy is invited to save a hint (void* sequence of bytes) for every + * cblock (eg, for a hit count) and is reponsible to do endianess conversions. + * These are stored against the policy name. + * If policies are changed, then hints will be lost. If the machine crashes, * hints will be lost. * * The hints are indexed by the cblock, but many policies will not @@ -132,10 +133,18 @@ void dm_cache_dump(struct dm_cache_metad int dm_cache_begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *p); /* - * requests hints for every cblock and stores in the metadata device. + * Saves the hint for a given cblock in the metadata device. Policy + * modules must perform any endian conversions needed and bless the hints + * for disk. */ int dm_cache_save_hint(struct dm_cache_metadata *cmd, - dm_cblock_t cblock, uint32_t hint); + dm_cblock_t cblock, void *hint) + __dm_written_to_disk(hint); + +/* + * Query method. Are all the blocks in the cache clean? + */ +int dm_cache_metadata_all_clean(struct dm_cache_metadata *cmd, bool *result); /*----------------------------------------------------------------*/ Index: linux/drivers/md/dm-cache-policy-cleaner.c =================================================================== --- linux.orig/drivers/md/dm-cache-policy-cleaner.c +++ linux/drivers/md/dm-cache-policy-cleaner.c @@ -274,7 +274,7 @@ static void add_cache_entry(struct polic static int wb_load_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t cblock, - uint32_t hint, bool hint_valid) + void *hint, bool hint_valid) { int r; struct policy *p = to_policy(pe); Index: linux/drivers/md/dm-cache-policy-hints.c =================================================================== --- /dev/null +++ linux/drivers/md/dm-cache-policy-hints.c @@ -0,0 +1,772 @@ +/* + * Copyright (C) 2013 Red Hat. All rights reserved. + * + * This file is released under the GPL. + * + * TESTING! NOT FOR PRODUCTION USE! + * + * "hints" policy to test variable hint size. + */ + +#include "dm.h" +#include "dm-cache-policy.h" +#include "dm-cache-policy-internal.h" + +#include +#include +#include + +#define DM_MSG_PREFIX "cache-policy-hints" + +/*----------------------------------------------------------------*/ + +static struct kmem_cache *hints_entry_cache; + +/*----------------------------------------------------------------*/ + +static unsigned next_power(unsigned n, unsigned min) +{ + return roundup_pow_of_two(max(n, min)); +} + +struct hash { + struct hlist_head *table; + dm_block_t hash_bits; + unsigned nr_buckets; +}; + +struct entry { + struct hlist_node hlist; + struct list_head list; + dm_oblock_t oblock; + dm_cblock_t cblock; +}; + +#define DEFAULT_HINT_SIZE DM_CACHE_POLICY_MAX_HINT_SIZE +struct policy { + struct dm_cache_policy policy; + struct mutex lock; + + sector_t origin_size, block_size; + + /* To optimize search in the allocation bitset */ + unsigned find_free_nr_words, find_free_last_word; + unsigned long *allocation_bitset; + + dm_cblock_t nr_cblocks_allocated; + dm_cblock_t cache_size; + + struct { + struct list_head free; /* Free cache entry list */ + struct list_head used; /* Used cache entry list */ + } queues; + + /* The cache hash */ + struct hash chash; + + void *hints_buffer; + unsigned hint_counter[4]; + + /* Flag to block (re)setting hint_size via the message interface */ + bool hint_size_set; +}; + +/*----------------------------------------------------------------------------*/ +/* Low-level queue function. */ +static struct entry *queue_pop(struct list_head *q) +{ + if (!list_empty(q)) { + struct list_head *elt = q->next; + + list_del(elt); + return list_entry(elt, struct entry, list); + } + + return NULL; +} +/*----------------------------------------------------------------------------*/ + +/* Allocate/free various resources. */ +static int alloc_hash(struct hash *hash, unsigned elts) +{ + hash->nr_buckets = next_power(elts >> 4, 16); + hash->hash_bits = ffs(hash->nr_buckets) - 1; + hash->table = vzalloc(sizeof(*hash->table) * hash->nr_buckets); + + return hash->table ? 0 : -ENOMEM; +} + +static void free_hash(struct hash *hash) +{ + vfree(hash->table); +} + +/* Free/alloc basic cache entry structures. */ +static void __free_cache_entries(struct list_head *q) { + struct entry *e; + + while ((e = queue_pop(q))) + kmem_cache_free(hints_entry_cache, e); +} + +static void free_cache_entries(struct policy *p) +{ + __free_cache_entries(&p->queues.free); + __free_cache_entries(&p->queues.used); +} + +static int alloc_cache_blocks_with_hash(struct policy *p, unsigned cache_size) +{ + int r = -ENOMEM; + unsigned u = cache_size; + + p->nr_cblocks_allocated = to_cblock(0); + + while (u--) { + struct entry *e = kmem_cache_zalloc(hints_entry_cache, GFP_KERNEL); + + if (!e) + goto bad_cache_alloc; + + list_add(&e->list, &p->queues.free); + } + + /* Cache entries hash. */ + r = alloc_hash(&p->chash, cache_size); + if (r) + goto bad_cache_alloc; + + return 0; + +bad_cache_alloc: + free_cache_entries(p); + + return r; +} + +static void free_cache_blocks_and_hash(struct policy *p) +{ + free_hash(&p->chash); + free_cache_entries(p); +} + +static void alloc_cblock(struct policy *p, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) >= from_cblock(p->cache_size)); + BUG_ON(test_bit(from_cblock(cblock), p->allocation_bitset)); + set_bit(from_cblock(cblock), p->allocation_bitset); +} + +static void free_cblock(struct policy *p, dm_cblock_t cblock) +{ + BUG_ON(from_cblock(cblock) >= from_cblock(p->cache_size)); + BUG_ON(!test_bit(from_cblock(cblock), p->allocation_bitset)); + clear_bit(from_cblock(cblock), p->allocation_bitset); +} + +/*----------------------------------------------------------------------------*/ +/* Low-level functions. */ +static struct policy *to_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct policy, policy); +} + +/*----------------------------------------------------------------*/ + +static unsigned bit_set_nr_words(unsigned long nr_cblocks) +{ + return dm_div_up(nr_cblocks, BITS_PER_LONG); +} + +static unsigned long *alloc_bitset(unsigned nr_cblocks) +{ + return vzalloc(sizeof(unsigned long) * bit_set_nr_words(nr_cblocks)); +} + +static void free_bitset(unsigned long *bits) +{ + vfree(bits); +} +/*----------------------------------------------------------------------------*/ + +/* Hash functions (lookup, insert, remove). */ +static struct entry *lookup_cache_entry(struct policy *p, dm_oblock_t oblock) +{ + struct hash *hash = &p->chash; + unsigned h = hash_64(from_oblock(oblock), hash->hash_bits); + struct entry *cur; + struct hlist_head *bucket = &hash->table[h]; + + hlist_for_each_entry(cur, bucket, hlist) { + if (cur->oblock == oblock) { + /* Move upfront bucket for faster access. */ + hlist_del(&cur->hlist); + hlist_add_head(&cur->hlist, bucket); + return cur; + } + } + + return NULL; +} + +static void insert_cache_hash_entry(struct policy *p, struct entry *e) +{ + unsigned h = hash_64(from_oblock(e->oblock), p->chash.hash_bits); + + hlist_add_head(&e->hlist, &p->chash.table[h]); +} + +static void remove_cache_hash_entry(struct policy *p, struct entry *e) +{ + hlist_del(&e->hlist); +} + + +/*----------------------------------------------------------------------------*/ +/* + * This doesn't allocate the block. + */ +static int __find_free_cblock(struct policy *p, unsigned begin, unsigned end, + dm_cblock_t *result, unsigned *last_word) +{ + int r = -ENOSPC; + unsigned w; + + for (w = begin; w < end; w++) { + /* + * ffz is undefined if no zero exists + */ + if (p->allocation_bitset[w] != ULONG_MAX) { + *last_word = w; + *result = to_cblock((w * BITS_PER_LONG) + ffz(p->allocation_bitset[w])); + if (from_cblock(*result) < from_cblock(p->cache_size)) + r = 0; + + break; + } + } + + return r; +} + +static int find_free_cblock(struct policy *p, dm_cblock_t *result) +{ + int r = __find_free_cblock(p, p->find_free_last_word, p->find_free_nr_words, result, &p->find_free_last_word); + + if (r == -ENOSPC && p->find_free_last_word) + r = __find_free_cblock(p, 0, p->find_free_last_word, result, &p->find_free_last_word); + + return r; +} + +static struct entry *alloc_cache_entry(struct policy *p) +{ + struct entry *e = queue_pop(&p->queues.free); + + if (e) { + BUG_ON(from_cblock(p->nr_cblocks_allocated) >= from_cblock(p->cache_size)); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) + 1); + } + + return e; +} + +static void alloc_cblock_and_insert_cache(struct policy *p, struct entry *e) +{ + alloc_cblock(p, e->cblock); + insert_cache_hash_entry(p, e); +} + +static void add_cache_entry(struct policy *p, struct entry *e) +{ + list_add_tail(&e->list, &p->queues.used); + alloc_cblock_and_insert_cache(p, e); +} + +static void remove_cache_entry(struct policy *p, struct entry *e) +{ + remove_cache_hash_entry(p, e); + free_cblock(p, e->cblock); +} + +static struct entry *evict_cache_entry(struct policy *p) +{ + struct entry *e = queue_pop(&p->queues.used); + + BUG_ON(!e); + remove_cache_entry(p, e); + + return e; +} + +static void get_cache_block(struct policy *p, dm_oblock_t oblock, struct bio *bio, + struct policy_result *result) +{ + struct entry *e = alloc_cache_entry(p); + + if (e) { + int r = find_free_cblock(p, &e->cblock); + + BUG_ON(r); + result->op = POLICY_NEW; + + } else { + e = evict_cache_entry(p); + result->old_oblock = e->oblock; + result->op = POLICY_REPLACE; + } + + result->cblock = e->cblock; + e->oblock = oblock; + add_cache_entry(p, e); +} + +static bool in_cache(struct policy *p, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + struct entry *e = lookup_cache_entry(p, oblock); + + if (!e) + return false; + + *cblock = e->cblock; + return true; +} + +/*----------------------------------------------------------------------------*/ + +/* Public interface (see dm-cache-policy.h */ +static int hints_map(struct dm_cache_policy *pe, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + int r = 0; + struct policy *p = to_policy(pe); + + result->op = POLICY_MISS; + + if (can_block) + mutex_lock(&p->lock); + + else if (!mutex_trylock(&p->lock)) + return -EWOULDBLOCK; + + + if (in_cache(p, oblock, &result->cblock)) + result->op = POLICY_HIT; + + else if (!can_migrate) + r = -EWOULDBLOCK; + + else + get_cache_block(p, oblock, bio, result); + + mutex_unlock(&p->lock); + + return r; +} + +static int hints_lookup(struct dm_cache_policy *pe, dm_oblock_t oblock, dm_cblock_t *cblock) +{ + int r; + struct policy *p = to_policy(pe); + + if (!mutex_trylock(&p->lock)) + return -EWOULDBLOCK; + + if (!in_cache(p, oblock, cblock)) + r = -ENOENT; + + mutex_unlock(&p->lock); + + return r; +} + +static void hints_destroy(struct dm_cache_policy *pe) +{ + struct policy *p = to_policy(pe); + + free_bitset(p->allocation_bitset); + free_cache_blocks_and_hash(p); + kfree(p->hints_buffer); + kfree(p); +} + +/*----------------------------------------------------------------------------*/ + +/* Hints endianess conversions */ +#define __le8 uint8_t +struct hints_ptrs { + __le64 *le64_hints; + __le32 *le32_hints; + __le16 *le16_hints; + __le8 *le8_hints; + + uint64_t *u64_hints; + uint32_t *u32_hints; + uint16_t *u16_hints; + uint8_t *u8_hints; +}; + +typedef int (*hints_xfer_fn_t) (struct hints_ptrs*, unsigned, unsigned, bool); + +#define cpu_to_le8(x) (x) +#define le8_to_cpu(x) (x) + +#define HINTS_XFER(width) \ +static int hints_ ## width ## _xfer(struct hints_ptrs *p, unsigned idx, unsigned val, bool to_disk) \ +{ \ + if (to_disk) \ + p->le ## width ## _hints[idx] = cpu_to_le ## width(val); \ +\ + else { \ + p->u ## width ## _hints[idx] = le ## width ## _to_cpu(p->le ## width ## _hints[idx]); \ + if (p->u ## width ## _hints[idx] != val) { \ + DMERR_LIMIT("%s -- hint value %llu != %u", __func__, \ + (long long unsigned) p->u ## width ## _hints[idx], val); \ + return -EINVAL; \ + } \ + } \ +\ + return 0; \ +} + +HINTS_XFER(64) +HINTS_XFER(32) +HINTS_XFER(16) +HINTS_XFER(8) + +static void calc_hint_value_counters(struct policy *p) +{ + unsigned div, rest = dm_cache_policy_get_hint_size(&p->policy), u; + + for (u = 3, div = sizeof(uint64_t); rest; u--, div >>= 1) { + p->hint_counter[u] = rest / div; + rest -= p->hint_counter[u] * div; + } +} + +/* Macro to set hint ptr for width on LHS based on RHS width<<1 */ +#define PTR_INC(lhs, rhs, c) \ + inc = 2 * p->hint_counter[c]; \ + ptrs->le ## lhs ## _hints = (__le ## lhs *) ptrs->le ## rhs ## _hints + inc; \ + ptrs->u ## lhs ## _hints = (uint ## lhs ## _t *) ptrs->u ## rhs ## _hints + inc; + +static void set_hints_ptrs(struct policy *p, struct hints_ptrs *ptrs) +{ + unsigned inc; + + ptrs->le64_hints = p->hints_buffer; + ptrs->u64_hints = p->hints_buffer; + + PTR_INC(32, 64, 3) + PTR_INC(16, 32, 2) + PTR_INC( 8, 16, 1) +} + +static void __hints_xfer_disk(struct policy *p, bool to_disk) +{ + unsigned idx, u, val; + hints_xfer_fn_t hints_xfer_fns[] = { + hints_8_xfer, + hints_16_xfer, + hints_32_xfer, + hints_64_xfer + }; + + struct hints_ptrs hints_ptrs; + + if (!p->hint_size_set) { + calc_hint_value_counters(p); + p->hint_size_set = true; + } + + /* Must happen after calc_hint_value_counters()! */ + set_hints_ptrs(p, &hints_ptrs); + + val = 1; + u = ARRAY_SIZE(hints_xfer_fns); + while (u--) { + for (idx = 0; idx < p->hint_counter[u]; idx++) { + /* + * val only suitable because of 256 hint value limitation. + * + * An uint8_t maxes at 255, so we could theoretically + * test hint sizes up to 2023 bytes with this limitation. + */ + if (hints_xfer_fns[u](&hints_ptrs, idx, val, to_disk)) + return; + + val++; + } + } + + return; +} + +static void hints_preset_and_to_disk(struct policy *p) +{ + __hints_xfer_disk(p, true); +} + +static void hints_from_disk_and_check(struct policy *p) +{ + __hints_xfer_disk(p, false); +} + +static int hints_load_mapping(struct dm_cache_policy *pe, + dm_oblock_t oblock, dm_cblock_t cblock, + void *hint, bool hint_valid) +{ + struct policy *p = to_policy(pe); + struct entry *e; + + e = alloc_cache_entry(p); + if (!e) + return -ENOMEM; + + e->cblock = cblock; + e->oblock = oblock; + + if (hint_valid) { + void *tmp = p->hints_buffer; + + p->hints_buffer = hint; + hints_from_disk_and_check(p); + p->hints_buffer = tmp; + } + + alloc_cblock_and_insert_cache(p, e); + + return 0; +} + +/* Walk mappings */ +static int hints_walk_mappings(struct dm_cache_policy *pe, policy_walk_fn fn, void *context) +{ + int r = 0; + struct policy *p = to_policy(pe); + struct entry *e; + + hints_preset_and_to_disk(p); + + mutex_lock(&p->lock); + + list_for_each_entry(e, &p->queues.used, list) { + r = fn(context, e->cblock, e->oblock, (void*) p->hints_buffer); + if (r) + break; + } + + mutex_unlock(&p->lock); + + return r; +} + +static struct entry *__hints_force_remove_mapping(struct policy *p, + dm_oblock_t oblock) +{ + struct entry *e = lookup_cache_entry(p, oblock); + + BUG_ON(!e); + + list_del(&e->list); + remove_cache_entry(p, e); + + return e; +} + +static void hints_remove_mapping(struct dm_cache_policy *pe, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct entry *e; + + mutex_lock(&p->lock); + e = __hints_force_remove_mapping(p, oblock); + list_add_tail(&e->list, &p->queues.free); + + BUG_ON(!from_cblock(p->nr_cblocks_allocated)); + p->nr_cblocks_allocated = to_cblock(from_cblock(p->nr_cblocks_allocated) - 1); + mutex_unlock(&p->lock); +} + +static void hints_force_mapping(struct dm_cache_policy *pe, + dm_oblock_t current_oblock, dm_oblock_t oblock) +{ + struct policy *p = to_policy(pe); + struct entry *e; + + mutex_lock(&p->lock); + + e = __hints_force_remove_mapping(p, current_oblock); + e->oblock = oblock; + add_cache_entry(p, e); + + mutex_unlock(&p->lock); +} + +static int hints_next_dirty_block(struct dm_cache_policy *pe, dm_oblock_t *oblock, dm_cblock_t *cblock) +{ + return -ENOENT; +} + +static dm_cblock_t hints_residency(struct dm_cache_policy *pe) +{ + /* FIXME: lock mutex, not sure we can block here. */ + return to_policy(pe)->nr_cblocks_allocated; +} + +static int hints_set_config_value(struct dm_cache_policy *pe, + const char *key, const char *value) +{ + if (!strcasecmp(key, "hint_size")) { + struct policy *p = to_policy(pe); + + if (p->hint_size_set) + return -EPERM; + + else { + unsigned tmp; + + if (kstrtou32(value, 10, &tmp)) + return -EINVAL; + + else { + int r = dm_cache_policy_set_hint_size(pe, tmp); + + if (!r) { + calc_hint_value_counters(p); + p->hint_size_set = true; + } + + return r; + } + } + } + + return -EINVAL; +} + +static int hints_emit_config_values(struct dm_cache_policy *pe, char *result, unsigned maxlen) +{ + ssize_t sz = 0; + + DMEMIT("hint_size %llu", (long long unsigned) dm_cache_policy_get_hint_size(pe)); + return 0; +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct policy *p) +{ + p->policy.destroy = hints_destroy; + p->policy.map = hints_map; + p->policy.lookup = hints_lookup; +#if 0 + p->policy.set_dirty = NULL; + p->policy.clear_dirty = NULL; +#endif + p->policy.load_mapping = hints_load_mapping; + p->policy.walk_mappings = hints_walk_mappings; + p->policy.remove_mapping = hints_remove_mapping; + p->policy.writeback_work = NULL; + p->policy.next_dirty_block = hints_next_dirty_block; + p->policy.force_mapping = hints_force_mapping; + p->policy.residency = hints_residency; + p->policy.tick = NULL; + p->policy.emit_config_values = hints_emit_config_values; + p->policy.set_config_value = hints_set_config_value; +} + +static struct dm_cache_policy *hints_policy_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t block_size) +{ + int r; + struct policy *p = kzalloc(sizeof(*p), GFP_KERNEL); + + if (!p) + return NULL; + + init_policy_functions(p); + + p->cache_size = cache_size; + p->find_free_nr_words = bit_set_nr_words(from_cblock(cache_size)); + p->find_free_last_word = 0; + p->block_size = block_size; + p->origin_size = origin_size; + mutex_init(&p->lock); + INIT_LIST_HEAD(&p->queues.free); + INIT_LIST_HEAD(&p->queues.used); + + /* Allocate cache entry structs and add them to free list. */ + r = alloc_cache_blocks_with_hash(p, from_cblock(cache_size)); + if (r) + goto bad_free_policy; + + /* Cache allocation bitset. */ + p->allocation_bitset = alloc_bitset(from_cblock(cache_size)); + if (!p->allocation_bitset) + goto bad_free_cache_blocks_and_hash; + + p->hints_buffer = kzalloc(DM_CACHE_POLICY_MAX_HINT_SIZE, GFP_KERNEL); + if (!p->hints_buffer) + goto bad_free_allocation_bitset; + + p->hint_size_set = false; + + return &p->policy; + +bad_free_allocation_bitset: + free_bitset(p->allocation_bitset); +bad_free_cache_blocks_and_hash: + free_cache_blocks_and_hash(p); +bad_free_policy: + kfree(p); + + return NULL; +} + +/*----------------------------------------------------------------------------*/ +static struct dm_cache_policy_type hints_policy_type = { + .name = "hints", + .version = {1, 0, 0}, + .hint_size = DEFAULT_HINT_SIZE, + .owner = THIS_MODULE, + .create = hints_policy_create +}; + +static int __init hints_init(void) +{ + int r = -ENOMEM; + + hints_entry_cache = kmem_cache_create("dm_hints_policy_cache_entry", + sizeof(struct entry), + __alignof__(struct entry), + 0, NULL); + if (hints_entry_cache) { + r = dm_cache_policy_register(&hints_policy_type); + if (r) + kmem_cache_destroy(hints_entry_cache); + + else { + DMINFO("version %u.%u.%u loaded", + hints_policy_type.version[0], + hints_policy_type.version[1], + hints_policy_type.version[2]); + } + } + + return r; +} + +static void __exit hints_exit(void) +{ + dm_cache_policy_unregister(&hints_policy_type); + kmem_cache_destroy(hints_entry_cache); +} + +module_init(hints_init); +module_exit(hints_exit); + +MODULE_AUTHOR("Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("hint size test cache policy"); Index: linux/drivers/md/dm-cache-policy-internal.h =================================================================== --- linux.orig/drivers/md/dm-cache-policy-internal.h +++ linux/drivers/md/dm-cache-policy-internal.h @@ -41,7 +41,7 @@ static inline void policy_clear_dirty(st static inline int policy_load_mapping(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t cblock, - uint32_t hint, bool hint_valid) + void *hint, bool hint_valid) { return p->load_mapping(p, oblock, cblock, hint, hint_valid); } @@ -59,6 +59,13 @@ static inline int policy_writeback_work( return p->writeback_work ? p->writeback_work(p, oblock, cblock) : -ENOENT; } +static inline int policy_next_dirty_block(struct dm_cache_policy *p, + dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + return p->next_dirty_block ? p->next_dirty_block(p, oblock, cblock) : -ENOENT; +} + static inline void policy_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) { return p->remove_mapping(p, oblock); @@ -87,7 +94,7 @@ static inline int policy_emit_config_val if (p->emit_config_values) return p->emit_config_values(p, result, maxlen); - DMEMIT("0"); + DMEMIT(" 0"); return 0; } @@ -119,6 +126,8 @@ const char *dm_cache_policy_get_name(str const unsigned *dm_cache_policy_get_version(struct dm_cache_policy *p); +#define DM_CACHE_POLICY_MAX_HINT_SIZE 256 /* Max 2023 for the policy hints test module to work */ +int dm_cache_policy_set_hint_size(struct dm_cache_policy *p, unsigned hint_size); size_t dm_cache_policy_get_hint_size(struct dm_cache_policy *p); /*----------------------------------------------------------------*/ Index: linux/drivers/md/dm-cache-policy-mq-era.c =================================================================== --- /dev/null +++ linux/drivers/md/dm-cache-policy-mq-era.c @@ -0,0 +1,546 @@ +/* + * Copyright 2013 NetApp, Inc. All Rights Reserved, contribution by + * Morgan Mears. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details + * + */ + +#include "dm-cache-policy.h" +#include "dm-cache-policy-internal.h" +#include "dm.h" + +#include +#include +#include +#include +#include + +#define DM_MSG_PREFIX "cache-policy-mq-era" + +typedef uint32_t era_t; +#define MQ_ERA_MAX_ERA UINT_MAX + +struct mq_era_policy { + struct dm_cache_policy policy; + struct mutex lock; /* FIXME: spinlock? */ + struct dm_cache_policy *mq; + dm_cblock_t cache_size; + era_t *cb_to_era; + era_t era_counter; +}; + +/*----------------------------------------------------------------*/ + +static struct mq_era_policy *to_mq_era_policy(struct dm_cache_policy *p) +{ + return container_of(p, struct mq_era_policy, policy); +} + +static int incr_era_counter(struct mq_era_policy *mq_era, const char *curr_era_counter_str) +{ + era_t curr_era_counter; + int r; + + /* + * If the era counter value provided by the user matches the current + * counter value while under lock, increment the counter (intention + * is to prevent races). Rollover problems are avoided by locking + * the counter at a maximum value (the application must take + * appropriate action on this error to preserve correction, but + * a properly behaved set of applications will never trigger it; + * the era counter is meant to increment less than once a second + * and is 32 bits. + */ + + if (kstrtou32(curr_era_counter_str, 10, &curr_era_counter)) + return -EINVAL; + + mutex_lock(&mq_era->lock); + + if (mq_era->era_counter != curr_era_counter) + r = -ECANCELED; + else if (mq_era->era_counter >= MQ_ERA_MAX_ERA) + r = -EOVERFLOW; + else { + mq_era->era_counter++; + r = 0; + } + + mutex_unlock(&mq_era->lock); + + return r; +} + +struct nested_walk_ctx { + policy_walk_fn parent_fn; + void *parent_ctx; + struct mq_era_policy *mq_era; +}; + +static int nested_walk(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint) +{ + struct nested_walk_ctx *ctx = (struct nested_walk_ctx *)context; + + /* + * Inserted as a filter into walk_mappings so we can take additional + * actions in the shim. + */ + + DMDEBUG("calling parent walk_mappings function for cblock %u, " + "oblock %llu (era %u)", from_cblock(cblock), oblock, + ctx->mq_era->cb_to_era[from_cblock(cblock)]); + + /* + * XXX need to consolidate the hint being provided by our caller (mq) + * with the hint we want to preserve (era) once the hint size + * restriction goes away. + */ + + return (*ctx->parent_fn)(ctx->parent_ctx, cblock, oblock, + ctx->mq_era->cb_to_era[from_cblock(cblock)]); +} + +static int era_is_gt_value(era_t era, era_t value) +{ + return era > value; +} + +static int era_is_gte_value(era_t era, era_t value) +{ + return era >= value; +} + +static int era_is_lte_value(era_t era, era_t value) +{ + return era <= value; +} + +static int era_is_lt_value(era_t era, era_t value) +{ + return era < value; +} + +typedef int (*era_match_fn_t)(era_t, era_t); + +struct find_oblocks_ctx { + struct mq_era_policy *mq_era; + era_match_fn_t era_match_fn; + era_t test_era; + uint32_t matches; + uint32_t next_ob_idx; + dm_oblock_t *oblocks; +}; + +static int find_oblocks(void *context, dm_cblock_t cblock, + dm_oblock_t oblock, uint32_t hint) +{ + struct find_oblocks_ctx *ctx = (struct find_oblocks_ctx *)context; + era_t era; + + /* + * Assembles a list of oblocks that are currently in the cache and + * whose cblocks have eras that satisfy the given matching function + * (currently >, >=, <=, or <) + */ + + if (ctx->next_ob_idx >= ctx->matches) + return -EOVERFLOW; + + era = ctx->mq_era->cb_to_era[from_cblock(cblock)]; + if (ctx->era_match_fn(era, ctx->test_era)) { + DMDEBUG("cblock %u has era %u matching test_era %u; " + "recording oblock %llu at oblocks %u.", + from_cblock(cblock), era, ctx->test_era, + oblock, ctx->next_ob_idx); + ctx->oblocks[ctx->next_ob_idx++] = oblock; + ctx->mq_era->cb_to_era[from_cblock(cblock)] = 0; + } + + return 0; +} + +static int cond_unmap_by_era(struct mq_era_policy *mq_era, + const char *test_era_str, + era_match_fn_t era_match_fn) +{ + struct find_oblocks_ctx fo_ctx; + uint32_t cb_idx, matches, ob_idx, max_cb_idx; + era_t test_era; + int r; + + /* + * Unmap blocks with eras matching the given era, according to the + * given matching function. + */ + + if (kstrtou32(test_era_str, 10, &test_era)) + return -EINVAL; + + /* + * This is a little convoluted, but is not expected to be a common + * operation. + */ + + mutex_lock(&mq_era->lock); + + /* While locked, count matches */ + max_cb_idx = from_cblock(mq_era->cache_size); + for (matches = 0, cb_idx = 0; cb_idx < max_cb_idx; cb_idx++) + if (era_match_fn(mq_era->cb_to_era[cb_idx], test_era)) + matches++; + + /* If there aren't any, we're done */ + if (matches == 0) { + r = 0; + goto out; + } + + /* Set up to find the origin block for each matching cache block */ + fo_ctx.mq_era = mq_era; + fo_ctx.era_match_fn = era_match_fn; + fo_ctx.test_era = test_era; + fo_ctx.matches = matches; + fo_ctx.next_ob_idx = 0; + fo_ctx.oblocks = kzalloc(sizeof(*fo_ctx.oblocks) * matches, GFP_KERNEL); + if (!fo_ctx.oblocks) { + r = -ENOMEM; + goto out; + } + + /* Go ahead and find the origins */ + r = mq_era->mq->walk_mappings(mq_era->mq, find_oblocks, &fo_ctx); + if (r) + goto free_and_out; + + /* Unmap each matching origin */ + for (ob_idx = 0; ob_idx < fo_ctx.next_ob_idx; ob_idx++) { + DMDEBUG("removing mapping for oblock %llu.", fo_ctx.oblocks[ob_idx]); + mq_era->mq->remove_mapping(mq_era->mq, fo_ctx.oblocks[ob_idx]); + } + +free_and_out: + kfree(fo_ctx.oblocks); +out: + mutex_unlock(&mq_era->lock); + return r; +} + +/* + * Public interface, via the policy struct. See dm-cache-policy.h for a + * description of these. + */ + +static void mq_era_destroy(struct dm_cache_policy *p) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + DMDEBUG("destroyed mq_era %p, mq %p.", mq_era, mq_era->mq); + mq_era->mq->destroy(mq_era->mq); + kfree(mq_era->cb_to_era); + kfree(mq_era); +} + +static int mq_era_map(struct dm_cache_policy *p, dm_oblock_t oblock, + bool can_block, bool can_migrate, bool discarded_oblock, + struct bio *bio, struct policy_result *result) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + uint32_t cb_idx; + int r; + + result->op = POLICY_MISS; + + if (can_block) + mutex_lock(&mq_era->lock); + else if (!mutex_trylock(&mq_era->lock)) + return -EWOULDBLOCK; + + /* Check for a mapping */ + r = mq_era->mq->map(mq_era->mq, oblock, can_block, can_migrate, + discarded_oblock, bio, result); + + /* If we got a hit and this is a write, update the era for the block */ + if (!r && (bio_data_dir(bio) == WRITE) && (result->op == POLICY_HIT)) { + cb_idx = from_cblock(result->cblock); + BUG_ON(cb_idx >= from_cblock(mq_era->cache_size)); + /* XXX remove this */ + DMDEBUG("assigning era %u to cblock %u, oblock %llu due to write hit.", + mq_era->era_counter, result->cblock, oblock); + mq_era->cb_to_era[cb_idx] = mq_era->era_counter; + } + + mutex_unlock(&mq_era->lock); + + return r; +} + +static int mq_era_lookup(struct dm_cache_policy *p, dm_oblock_t oblock, + dm_cblock_t *cblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + return mq_era->mq->lookup(mq_era->mq, oblock, cblock); +} + +static void mq_era_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + mq_era->mq->set_dirty(mq_era->mq, oblock); +} + +static void mq_era_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + mq_era->mq->clear_dirty(mq_era->mq, oblock); +} + +static int mq_era_load_mapping(struct dm_cache_policy *p, + dm_oblock_t oblock, dm_cblock_t cblock, + uint32_t hint, bool hint_valid) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + int r; + + /* + * XXX need to consolidate the hint being provided by our caller (mq) + * with the hint we want to preserve (era) once the hint size + * restriction goes away. + */ + + r = mq_era->mq->load_mapping(mq_era->mq, oblock, cblock, 0, 0); + if (!r && hint_valid && + (from_cblock(cblock) < from_cblock(mq_era->cache_size))) { + DMDEBUG("recovered era %u for cblock %u.", hint, cblock); + mq_era->cb_to_era[from_cblock(cblock)] = hint; + /* + * Make sure the era counter starts higher than the highest + * persisted era. + */ + if (hint >= mq_era->era_counter) { + mq_era->era_counter = hint; + if (mq_era->era_counter < MQ_ERA_MAX_ERA) + mq_era->era_counter++; + DMDEBUG("set era_counter to %u.", mq_era->era_counter); + } + } + + return r; +} + +static int mq_era_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, + void *context) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + struct nested_walk_ctx nested_walk_ctx = { + .parent_fn = fn, + .parent_ctx = context, + .mq_era = mq_era + }; + int r; + + /* XXX remove this */ + DMDEBUG("call to mq_era_walk_mappings"); + + mutex_lock(&mq_era->lock); + + r = mq_era->mq->walk_mappings(mq_era->mq, nested_walk, &nested_walk_ctx); + + mutex_unlock(&mq_era->lock); + + return r; +} + +static void mq_era_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + dm_cblock_t cblock; + + mutex_lock(&mq_era->lock); + + if (!mq_era->mq->lookup(mq_era->mq, oblock, &cblock)) { + DMDEBUG("zeroed era for cblock %u (oblock %llu) due to a call " + "to remove_mapping.", cblock, oblock); + mq_era->cb_to_era[from_cblock(cblock)] = 0; + } + + mq_era->mq->remove_mapping(mq_era->mq, oblock); + + mutex_unlock(&mq_era->lock); +} + +static int mq_era_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + return mq_era->mq->writeback_work(mq_era->mq, oblock, cblock); +} + +static void mq_era_force_mapping(struct dm_cache_policy *p, + dm_oblock_t current_oblock, + dm_oblock_t new_oblock) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + dm_cblock_t cblock; + + mutex_lock(&mq_era->lock); + + if (!mq_era->mq->lookup(mq_era->mq, current_oblock, &cblock)) { + DMDEBUG("assigning era %u to cblock %u, oblock %llu " + "(old_oblock %llu) due to force_mapping.", + mq_era->era_counter, cblock, new_oblock, + current_oblock); + mq_era->cb_to_era[from_cblock(cblock)] = mq_era->era_counter; + } + + mq_era->mq->force_mapping(mq_era->mq, current_oblock, new_oblock); + + mutex_unlock(&mq_era->lock); +} + +static dm_cblock_t mq_era_residency(struct dm_cache_policy *p) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + return mq_era->mq->residency(mq_era->mq); +} + +static void mq_era_tick(struct dm_cache_policy *p) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + mq_era->mq->tick(mq_era->mq); +} + +static int mq_era_set_config_value(struct dm_cache_policy *p, + const char *key, + const char *value) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + int r; + + if (!strcasecmp(key, "increment_era_counter")) + r = incr_era_counter(mq_era, value); + else if (!strcasecmp(key, "unmap_blocks_from_later_eras")) + r = cond_unmap_by_era(mq_era, value, era_is_gt_value); + else if (!strcasecmp(key, "unmap_blocks_from_this_era_and_later")) + r = cond_unmap_by_era(mq_era, value, era_is_gte_value); + else if (!strcasecmp(key, "unmap_blocks_from_this_era_and_earlier")) + r = cond_unmap_by_era(mq_era, value, era_is_lte_value); + else if (!strcasecmp(key, "unmap_blocks_from_earlier_eras")) + r = cond_unmap_by_era(mq_era, value, era_is_lt_value); + else + r = mq_era->mq->set_config_value(mq_era->mq, key, value); + + return r; +} + +static int mq_era_emit_config_values(struct dm_cache_policy *p, char *result, + unsigned maxlen) +{ + struct mq_era_policy *mq_era = to_mq_era_policy(p); + ssize_t sz = 0; + DMEMIT("era_counter %u ", mq_era->era_counter); + return mq_era->mq->emit_config_values(mq_era->mq, result + sz, maxlen - sz); +} + +/* Init the policy plugin interface function pointers. */ +static void init_policy_functions(struct mq_era_policy *mq_era) +{ + mq_era->policy.destroy = mq_era_destroy; + mq_era->policy.map = mq_era_map; + mq_era->policy.lookup = mq_era_lookup; + mq_era->policy.set_dirty = mq_era_set_dirty; + mq_era->policy.clear_dirty = mq_era_clear_dirty; + mq_era->policy.load_mapping = mq_era_load_mapping; + mq_era->policy.walk_mappings = mq_era_walk_mappings; + mq_era->policy.remove_mapping = mq_era_remove_mapping; + mq_era->policy.writeback_work = mq_era_writeback_work; + mq_era->policy.force_mapping = mq_era_force_mapping; + mq_era->policy.residency = mq_era_residency; + mq_era->policy.tick = mq_era_tick; + mq_era->policy.emit_config_values = mq_era_emit_config_values; + mq_era->policy.set_config_value = mq_era_set_config_value; +} + +static struct dm_cache_policy *mq_era_create(dm_cblock_t cache_size, + sector_t origin_size, + sector_t cache_block_size) +{ + struct mq_era_policy *mq_era = kzalloc(sizeof(*mq_era), GFP_KERNEL); + + if (!mq_era) + return NULL; + + init_policy_functions(mq_era); + mq_era->cache_size = cache_size; + mutex_init(&mq_era->lock); + + mq_era->cb_to_era = kzalloc(from_cblock(mq_era->cache_size) * + sizeof(*(mq_era->cb_to_era)), + GFP_KERNEL); + if (!mq_era->cb_to_era) + goto bad_alloc_cb_to_era; + mq_era->era_counter = 1; + + mq_era->mq = dm_cache_policy_create("mq", cache_size, origin_size, + cache_block_size); + if (!mq_era->mq) + goto bad_policy_create; + + DMDEBUG("created mq_era %p, mq %p.", mq_era, mq_era->mq); + + return &mq_era->policy; + +bad_policy_create: + kfree(mq_era->cb_to_era); +bad_alloc_cb_to_era: + kfree(mq_era); + + return NULL; +} + +/*----------------------------------------------------------------*/ + +static struct dm_cache_policy_type mq_era_policy_type = { + .name = "mq-era", + .version = {1, 0, 0}, + .hint_size = 4, + .owner = THIS_MODULE, + .create = mq_era_create +}; + +static int __init mq_era_init(void) +{ + int r; + + r = dm_cache_policy_register(&mq_era_policy_type); + if (!r) { + DMINFO("version %u.%u.%u loaded", + mq_era_policy_type.version[0], + mq_era_policy_type.version[1], + mq_era_policy_type.version[2]); + return 0; + } + + DMERR("register failed %d", r); + + dm_cache_policy_unregister(&mq_era_policy_type); + return -ENOMEM; +} + +static void __exit mq_era_exit(void) +{ + dm_cache_policy_unregister(&mq_era_policy_type); +} + +module_init(mq_era_init); +module_exit(mq_era_exit); + +MODULE_AUTHOR("Morgan Mears "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("mq-era cache policy"); Index: linux/drivers/md/dm-cache-policy-mq.c =================================================================== --- linux.orig/drivers/md/dm-cache-policy-mq.c +++ linux/drivers/md/dm-cache-policy-mq.c @@ -6,6 +6,7 @@ #include "dm-cache-policy.h" #include "dm.h" +#include "persistent-data/dm-btree.h" #include #include @@ -1030,7 +1031,7 @@ static void mq_clear_dirty(struct dm_cac static int mq_load_mapping(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t cblock, - uint32_t hint, bool hint_valid) + void *hint, bool hint_valid) { struct mq_policy *mq = to_mq_policy(p); struct entry *e; @@ -1043,38 +1044,45 @@ static int mq_load_mapping(struct dm_cac e->oblock = oblock; e->in_cache = true; e->dirty = true; /* this gets corrected in a minute */ - e->hit_count = hint_valid ? hint : 1; + e->hit_count = hint_valid ? le32_to_cpu(*((__le32 *) hint)) : 1; e->generation = mq->generation; push(mq, e); return 0; } +static int mq_save_hints(struct mq_policy *mq, struct queue *q, + policy_walk_fn fn, void *context) +{ + int r; + unsigned level; + struct entry *e; + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_for_each_entry(e, q->qs + level, list) { + __le32 value = cpu_to_le32(e->hit_count); + __dm_bless_for_disk(&value); + + r = fn(context, e->cblock, e->oblock, &value); + if (r) + return r; + } + + return 0; +} + static int mq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn, void *context) { struct mq_policy *mq = to_mq_policy(p); int r = 0; - struct entry *e; - unsigned level; mutex_lock(&mq->lock); - for (level = 0; level < NR_QUEUE_LEVELS; level++) - list_for_each_entry(e, &mq->cache_clean.qs[level], list) { - r = fn(context, e->cblock, e->oblock, e->hit_count); - if (r) - goto out; - } - - for (level = 0; level < NR_QUEUE_LEVELS; level++) - list_for_each_entry(e, &mq->cache_dirty.qs[level], list) { - r = fn(context, e->cblock, e->oblock, e->hit_count); - if (r) - goto out; - } + r = mq_save_hints(mq, &mq->cache_clean, fn, context); + if (!r) + r = mq_save_hints(mq, &mq->cache_dirty, fn, context); -out: mutex_unlock(&mq->lock); return r; Index: linux/drivers/md/dm-cache-policy.c =================================================================== --- linux.orig/drivers/md/dm-cache-policy.c +++ linux/drivers/md/dm-cache-policy.c @@ -81,8 +81,9 @@ int dm_cache_policy_register(struct dm_c int r; /* One size fits all for now */ - if (type->hint_size != 0 && type->hint_size != 4) { - DMWARN("hint size must be 0 or 4 but %llu supplied.", (unsigned long long) type->hint_size); + if (type->hint_size > DM_CACHE_POLICY_MAX_HINT_SIZE) { + DMWARN("hint size must be <= %llu but %llu supplied.", + (unsigned long long) DM_CACHE_POLICY_MAX_HINT_SIZE, (unsigned long long) type->hint_size); return -EINVAL; } @@ -166,4 +167,16 @@ size_t dm_cache_policy_get_hint_size(str } EXPORT_SYMBOL_GPL(dm_cache_policy_get_hint_size); +int dm_cache_policy_set_hint_size(struct dm_cache_policy *p, unsigned hint_size) +{ + struct dm_cache_policy_type *t = p->private; + + if (hint_size > DM_CACHE_POLICY_MAX_HINT_SIZE) + return -EPERM; + + t->hint_size = hint_size; + return 0; +} +EXPORT_SYMBOL_GPL(dm_cache_policy_set_hint_size); + /*----------------------------------------------------------------*/ Index: linux/drivers/md/dm-cache-policy.h =================================================================== --- linux.orig/drivers/md/dm-cache-policy.h +++ linux/drivers/md/dm-cache-policy.h @@ -8,6 +8,7 @@ #define DM_CACHE_POLICY_H #include "dm-cache-block-types.h" +#include "persistent-data/dm-btree.h" #include @@ -79,7 +80,8 @@ struct policy_result { }; typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock, - dm_oblock_t oblock, uint32_t hint); + dm_oblock_t oblock, void *hint) + __dm_written_to_disk(hint); /* * The cache policy object. Just a bunch of methods. It is envisaged that @@ -130,7 +132,7 @@ struct dm_cache_policy { * * Must not block. * - * Returns 0 if in cache, -ENOENT if not, < 0 for other errors (-EWOULDBLOCK + * Returns 0 iff in cache, -ENOENT iff not, < 0 on error (-EWOULDBLOCK * would be typical). */ int (*lookup)(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t *cblock); @@ -146,7 +148,7 @@ struct dm_cache_policy { * mapping from the metadata device into the policy. */ int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock, - dm_cblock_t cblock, uint32_t hint, bool hint_valid); + dm_cblock_t cblock, void *hint, bool hint_valid); int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn, void *context); @@ -159,7 +161,14 @@ struct dm_cache_policy { void (*force_mapping)(struct dm_cache_policy *p, dm_oblock_t current_oblock, dm_oblock_t new_oblock); + /* + * writeback_work supporting the cache target to retrieve any dirty blocks to write back. + * + * next_dirty_block providing any next dirty block to the background policy for writeback, + * thus allowing quicker eviction by evoiding demotion on cache block replacement. + */ int (*writeback_work)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); + int (*next_dirty_block)(struct dm_cache_policy *p, dm_oblock_t *oblock, dm_cblock_t *cblock); /* @@ -211,8 +220,7 @@ struct dm_cache_policy_type { /* * Policies may store a hint for each each cache block. - * Currently the size of this hint must be 0 or 4 bytes but we - * expect to relax this in future. + * Currently the size of this hint must <= 512 bytes. */ size_t hint_size; @@ -227,4 +235,4 @@ void dm_cache_policy_unregister(struct d /*----------------------------------------------------------------*/ -#endif /* DM_CACHE_POLICY_H */ +#endif /* DM_CACHE_POLICY_H */ Index: linux/drivers/md/dm-cache-target.c =================================================================== --- linux.orig/drivers/md/dm-cache-target.c +++ linux/drivers/md/dm-cache-target.c @@ -104,14 +104,37 @@ static void unhook_bio(struct hook_info /* * FIXME: the cache is read/write for the time being. */ -enum cache_mode { +enum cache_metadata_mode { CM_WRITE, /* metadata may be changed */ CM_READ_ONLY, /* metadata may not be changed */ }; +enum cache_io_mode { + /* + * Data is written to cached blocks only. These blocks are marked + * dirty. If you lose the cache device you will lose data. + * Potential performance increase for both reads and writes. + */ + CM_IO_WRITEBACK, + + /* + * Data is written to both cache and origin. Blocks are never + * dirty. Potential performance benfit for reads only. + */ + CM_IO_WRITETHROUGH, + + /* + * A degraded mode useful for various cache coherency situations + * (eg, rolling back snapshots). Reads and writes always go to the + * origin. If a write goes to a cached oblock, then the cache + * block is invalidated. + */ + CM_IO_PASSTHROUGH +}; + struct cache_features { - enum cache_mode mode; - bool write_through:1; + enum cache_metadata_mode mode; + enum cache_io_mode io_mode; }; struct cache_stats { @@ -562,9 +585,24 @@ static void save_stats(struct cache *cac #define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache)) #define PB_DATA_SIZE_WT (sizeof(struct per_bio_data)) +static bool writethrough_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITETHROUGH; +} + +static bool writeback_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_WRITEBACK; +} + +static bool passthrough_mode(struct cache_features *f) +{ + return f->io_mode == CM_IO_PASSTHROUGH; +} + static size_t get_per_bio_data_size(struct cache *cache) { - return cache->features.write_through ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; + return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB; } static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size) @@ -1130,6 +1168,32 @@ static void demote_then_promote(struct c quiesce_migration(mg); } +/* + * Invalidate a cache entry. No writeback occurs; any changes in the cache + * block are thrown away. + */ +static void invalidate(struct cache *cache, struct prealloc *structs, + dm_oblock_t oblock, dm_cblock_t cblock, + struct dm_bio_prison_cell *cell) +{ + struct dm_cache_migration *mg = prealloc_get_migration(structs); + + mg->err = false; + mg->writeback = false; + mg->demote = true; + mg->promote = false; + mg->requeue_holder = true; + mg->cache = cache; + mg->old_oblock = oblock; + mg->cblock = cblock; + mg->old_ocell = cell; + mg->new_ocell = NULL; + mg->start_jiffies = jiffies; + + inc_nr_migrations(cache); + quiesce_migration(mg); +} + /*---------------------------------------------------------------- * bio processing *--------------------------------------------------------------*/ @@ -1192,11 +1256,9 @@ static bool spare_migration_bandwidth(st return current_volume < cache->migration_threshold; } -static bool is_writethrough_io(struct cache *cache, struct bio *bio, - dm_cblock_t cblock) +static bool is_write_io(struct bio *bio) { - return bio_data_dir(bio) == WRITE && - cache->features.write_through && !is_dirty(cache, cblock); + return bio_data_dir(bio) == WRITE; } static void inc_hit_counter(struct cache *cache, struct bio *bio) @@ -1211,6 +1273,15 @@ static void inc_miss_counter(struct cach &cache->stats.read_miss : &cache->stats.write_miss); } +static void issue_cache_bio(struct cache *cache, struct bio *bio, + struct per_bio_data *pb, + dm_oblock_t oblock, dm_cblock_t cblock) +{ + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_cache_dirty(cache, bio, oblock, cblock); + issue(cache, bio); +} + static void process_bio(struct cache *cache, struct prealloc *structs, struct bio *bio) { @@ -1222,7 +1293,8 @@ static void process_bio(struct cache *ca size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); bool discarded_block = is_discarded_oblock(cache, block); - bool can_migrate = discarded_block || spare_migration_bandwidth(cache); + bool passthrough = passthrough_mode(&cache->features); + bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); /* * Check to see if that block is currently migrating. @@ -1243,15 +1315,39 @@ static void process_bio(struct cache *ca switch (lookup_result.op) { case POLICY_HIT: - inc_hit_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + if (passthrough) { + inc_miss_counter(cache, bio); - if (is_writethrough_io(cache, bio, lookup_result.cblock)) - remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - else - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + /* + * Passthrough always maps to the origin, + * invalidating any cache blocks that are written + * to. + */ + + if (is_write_io(bio)) { + atomic_inc(&cache->stats.demotion); + invalidate(cache, structs, block, lookup_result.cblock, new_ocell); + release_cell = false; + + } else { + // FIXME: factor out issue_origin() + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_origin_clear_discard(cache, bio, block); + issue(cache, bio); + } + } else { + inc_hit_counter(cache, bio); + + if (is_write_io(bio) && + writethrough_mode(&cache->features) && + !is_dirty(cache, lookup_result.cblock)) { + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); + issue(cache, bio); + } else + issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); + } - issue(cache, bio); break; case POLICY_MISS: @@ -1798,7 +1894,7 @@ static int parse_block_size(struct cache static void init_features(struct cache_features *cf) { cf->mode = CM_WRITE; - cf->write_through = false; + cf->io_mode = CM_IO_WRITEBACK; } static int parse_features(struct cache_args *ca, struct dm_arg_set *as, @@ -1823,10 +1919,13 @@ static int parse_features(struct cache_a arg = dm_shift_arg(as); if (!strcasecmp(arg, "writeback")) - cf->write_through = false; + cf->io_mode = CM_IO_WRITEBACK; else if (!strcasecmp(arg, "writethrough")) - cf->write_through = true; + cf->io_mode = CM_IO_WRITETHROUGH; + + else if (!strcasecmp(arg, "passthrough")) + cf->io_mode = CM_IO_PASSTHROUGH; else { *error = "Unrecognised cache feature requested"; @@ -2078,6 +2177,22 @@ static int cache_create(struct cache_arg } cache->cmd = cmd; + if (passthrough_mode(&cache->features)) { + bool all_clean; + + r = dm_cache_metadata_all_clean(cache->cmd, &all_clean); + if (r) { + *error = "dm_cache_metadata_all_clean() failed"; + goto bad; + } + + if (!all_clean) { + *error = "Cannot enter passthrough mode unless all blocks are clean"; + r = -EINVAL; + goto bad; + } + } + spin_lock_init(&cache->lock); bio_list_init(&cache->deferred_bios); bio_list_init(&cache->deferred_flush_bios); @@ -2291,17 +2406,38 @@ static int cache_map(struct dm_target *t return DM_MAPIO_SUBMITTED; } + r = DM_MAPIO_REMAPPED; switch (lookup_result.op) { case POLICY_HIT: - inc_hit_counter(cache, bio); - pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + if (passthrough_mode(&cache->features)) { + if (is_write_io(bio)) { + /* + * We need to invalidate this block, so + * defer for the worker thread. + */ + cell_defer(cache, cell, true); + r = DM_MAPIO_SUBMITTED; + + } else { + pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds); + inc_miss_counter(cache, bio); + remap_to_origin_clear_discard(cache, bio, block); + } - if (is_writethrough_io(cache, bio, lookup_result.cblock)) - remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - else - remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + } else { + inc_hit_counter(cache, bio); + + if (is_write_io(bio) && + writethrough_mode(&cache->features) && + !is_dirty(cache, lookup_result.cblock)) + remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); - cell_defer(cache, cell, false); + else + remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); + + cell_defer(cache, cell, false); + + } break; case POLICY_MISS: @@ -2326,10 +2462,10 @@ static int cache_map(struct dm_target *t DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, (unsigned) lookup_result.op); bio_io_error(bio); - return DM_MAPIO_SUBMITTED; + r = DM_MAPIO_SUBMITTED; } - return DM_MAPIO_REMAPPED; + return r; } static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) @@ -2388,7 +2524,7 @@ static int write_discard_bitset(struct c } static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, - uint32_t hint) + void *hint) { struct cache *cache = context; return dm_cache_save_hint(cache->cmd, cblock, hint); @@ -2458,7 +2594,7 @@ static void cache_postsuspend(struct dm_ } static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, - bool dirty, uint32_t hint, bool hint_valid) + bool dirty, void *hint, bool hint_valid) { int r; struct cache *cache = context; @@ -2490,26 +2626,71 @@ static int load_discard(void *context, s return 0; } -static int cache_preresume(struct dm_target *ti) +static dm_cblock_t get_cache_dev_size(struct cache *cache) +{ + sector_t size = get_dev_size(cache->cache_dev); + (void) sector_div(size, cache->sectors_per_block); + return to_cblock(size); +} + +static bool can_resize(struct cache *cache, dm_cblock_t new_size) +{ + if (new_size > cache->cache_size) + return true; + + /* + * We can't drop a dirty block. + */ + for (; new_size > cache->cache_size; + new_size = to_cblock(from_cblock(new_size) + 1)) { + if (is_dirty(cache, new_size)) { + DMERR("unable to shrink cache; cache block %llu is dirty", + (unsigned long long) from_cblock(new_size)); + return false; + } + } + + return true; +} + +static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size) +{ + int r; + + r = dm_cache_resize(cache->cmd, cache->cache_size); + if (r) { + DMERR("could not resize cache metadata"); + return r; + } + + cache->cache_size = new_size; + + return 0; +} + +static int cache_preresume_(struct dm_target *ti) { int r = 0; struct cache *cache = ti->private; - sector_t actual_cache_size = get_dev_size(cache->cache_dev); - (void) sector_div(actual_cache_size, cache->sectors_per_block); + dm_cblock_t csize = get_cache_dev_size(cache); /* * Check to see if the cache has resized. */ - if (from_cblock(cache->cache_size) != actual_cache_size || !cache->sized) { - cache->cache_size = to_cblock(actual_cache_size); - - r = dm_cache_resize(cache->cmd, cache->cache_size); - if (r) { - DMERR("could not resize cache metadata"); + if (!cache->sized) { + r = resize_cache_dev(cache, csize); + if (r) return r; - } cache->sized = true; + + } else if (csize != cache->cache_size) { + if (!can_resize(cache, csize)) + return -EINVAL; + + r = resize_cache_dev(cache, csize); + if (r) + return r; } if (!cache->loaded_mappings) { @@ -2536,6 +2717,14 @@ static int cache_preresume(struct dm_tar return r; } +static int cache_preresume(struct dm_target *ti) +{ + int r = cache_preresume_(ti); + if (r) + DMERR("cache_preresume failed"); + return r; +} + static void cache_resume(struct dm_target *ti) { struct cache *cache = ti->private; @@ -2602,10 +2791,18 @@ static void cache_status(struct dm_targe (unsigned long long) from_cblock(residency), cache->nr_dirty); - if (cache->features.write_through) + if (writethrough_mode(&cache->features)) DMEMIT("1 writethrough "); + + else if (passthrough_mode(&cache->features)) + DMEMIT("1 passthrough "); + + else if (writeback_mode(&cache->features)) + DMEMIT("1 writeback "); + else - DMEMIT("0 "); + DMERR("internal error: unknown io mode: %d", + (int) cache->features.io_mode); DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold); if (sz < maxlen) { Index: linux/drivers/md/persistent-data/dm-block-manager.h =================================================================== --- linux.orig/drivers/md/persistent-data/dm-block-manager.h +++ linux/drivers/md/persistent-data/dm-block-manager.h @@ -114,6 +114,11 @@ int dm_bm_flush_and_unlock(struct dm_blo void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); /* + * Request data is prefetched into the cache. + */ +void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); + +/* * Switches the bm to a read only mode. Once read-only mode * has been entered the following functions will return -EPERM. *