From: Joe Thornber Change the dm cache mq policy to write back dirty blocks in the background. Instead of one multiqueue for cached entries we have two. Dirty and clean. Writeback work is taken from the dirty mq. Demotion is now done by default from the clean one (saving a copy). Signed-off-by: Joe Thornber --- drivers/md/dm-cache-policy-mq.c | 155 +++++++++++++++++++++++++++++++++------- 1 file changed, 131 insertions(+), 24 deletions(-) CONTAINS MAJOR FIXME... Index: linux/drivers/md/dm-cache-policy-mq.c =================================================================== --- linux.orig/drivers/md/dm-cache-policy-mq.c +++ linux/drivers/md/dm-cache-policy-mq.c @@ -224,6 +224,7 @@ struct entry { * FIXME: pack these better */ bool in_cache:1; + bool dirty:1; unsigned hit_count; unsigned generation; unsigned tick; @@ -244,7 +245,8 @@ struct mq_policy { * for promotion to the cache. */ struct queue pre_cache; - struct queue cache; + struct queue cache_clean; + struct queue cache_dirty; /* * Keeps track of time, incremented by the core. We use this to @@ -310,7 +312,10 @@ struct mq_policy { }; /*----------------------------------------------------------------*/ -/* Free/alloc mq cache entry structures. */ + +/* + * Free/alloc mq cache entry structures. + */ static void takeout_queue(struct list_head *lh, struct queue *q) { unsigned level; @@ -324,7 +329,8 @@ static void free_entries(struct mq_polic struct entry *e, *tmp; takeout_queue(&mq->free, &mq->pre_cache); - takeout_queue(&mq->free, &mq->cache); + takeout_queue(&mq->free, &mq->cache_clean); + takeout_queue(&mq->free, &mq->cache_dirty); list_for_each_entry_safe(e, tmp, &mq->free, list) kmem_cache_free(mq_entry_cache, e); @@ -508,7 +514,8 @@ static void push(struct mq_policy *mq, s if (e->in_cache) { alloc_cblock(mq, e->cblock); - queue_push(&mq->cache, queue_level(e), &e->list); + queue_push(e->dirty ? &mq->cache_dirty : &mq->cache_clean, + queue_level(e), &e->list); } else queue_push(&mq->pre_cache, queue_level(e), &e->list); } @@ -531,14 +538,16 @@ static void del(struct mq_policy *mq, st */ static struct entry *pop(struct mq_policy *mq, struct queue *q) { - struct entry *e = container_of(queue_pop(q), struct entry, list); + struct entry *e; + struct list_head *h = queue_pop(q); - if (e) { - hash_remove(e); + if (!h) + return NULL; - if (e->in_cache) - free_cblock(mq, e->cblock); - } + e = container_of(h, struct entry, list); + hash_remove(e); + if (e->in_cache) + free_cblock(mq, e->cblock); return e; } @@ -578,7 +587,16 @@ static void check_generation(struct mq_p mq->generation++; for (level = 0; level < NR_QUEUE_LEVELS && count < MAX_TO_AVERAGE; level++) { - head = mq->cache.qs + level; + head = mq->cache_clean.qs + level; + list_for_each_entry(e, head, list) { + nr++; + total += e->hit_count; + + if (++count >= MAX_TO_AVERAGE) + break; + } + + head = mq->cache_dirty.qs + level; list_for_each_entry(e, head, list) { nr++; total += e->hit_count; @@ -631,19 +649,28 @@ static void requeue_and_update_tick(stru * - set the hit count to a hard coded value other than 1, eg, is it better * if it goes in at level 2? */ -static dm_cblock_t demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock) +static int demote_cblock(struct mq_policy *mq, dm_oblock_t *oblock, dm_cblock_t *cblock) { - dm_cblock_t result; - struct entry *demoted = pop(mq, &mq->cache); + struct entry *demoted = pop(mq, &mq->cache_clean); - BUG_ON(!demoted); - result = demoted->cblock; + if (!demoted) + /* + * We could get a block from mq->cache_dirty, but that + * would add extra latency to the triggering bio as it + * waits for the writeback. Better to not promote this + * time and hope there's a clean block next time this block + * is hit. + */ + return -ENOSPC; + + *cblock = demoted->cblock; *oblock = demoted->oblock; demoted->in_cache = false; + demoted->dirty = false; demoted->hit_count = 1; push(mq, demoted); - return result; + return 0; } /* @@ -697,17 +724,22 @@ static int cache_entry_found(struct mq_p } /* - * Moves and entry from the pre_cache to the cache. The main work is + * Moves an entry from the pre_cache to the cache. The main work is * finding which cache block to use. */ static int pre_cache_to_cache(struct mq_policy *mq, struct entry *e, struct policy_result *result) { + int r; dm_cblock_t cblock; if (find_free_cblock(mq, &cblock) == -ENOSPC) { + r = demote_cblock(mq, &result->old_oblock, &cblock); + if (r) { + result->op = POLICY_MISS; + return 0; + } result->op = POLICY_REPLACE; - cblock = demote_cblock(mq, &result->old_oblock); } else result->op = POLICY_NEW; @@ -715,6 +747,7 @@ static int pre_cache_to_cache(struct mq_ del(mq, e); e->in_cache = true; + e->dirty = false; push(mq, e); return 0; @@ -758,6 +791,7 @@ static void insert_in_pre_cache(struct m } e->in_cache = false; + e->dirty = false; e->oblock = oblock; e->hit_count = 1; e->generation = mq->generation; @@ -785,6 +819,7 @@ static void insert_in_cache(struct mq_po e->oblock = oblock; e->cblock = cblock; e->in_cache = true; + e->dirty = false; e->hit_count = 1; e->generation = mq->generation; push(mq, e); @@ -915,6 +950,36 @@ static int mq_lookup(struct dm_cache_pol return r; } +// FIXME: can these block? +static void mq_set_or_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock, bool dirty_state) +{ + struct mq_policy *mq = to_mq_policy(p); + struct entry *e; + + mutex_lock(&mq->lock); + e = hash_lookup(mq, oblock); + if (!e) + DMWARN("mq_%s_dirty called for a block that isn't in the cache", dirty_state ? "set" : "clear"); + else { + BUG_ON(!e->in_cache); + + del(mq, e); + e->dirty = dirty_state; + push(mq, e); + } + mutex_unlock(&mq->lock); +} + +static void mq_set_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + mq_set_or_clear_dirty(p, oblock, true); +} + +static void mq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock) +{ + mq_set_or_clear_dirty(p, oblock, false); +} + static int mq_load_mapping(struct dm_cache_policy *p, dm_oblock_t oblock, dm_cblock_t cblock, uint32_t hint, bool hint_valid) @@ -929,6 +994,7 @@ static int mq_load_mapping(struct dm_cac e->cblock = cblock; e->oblock = oblock; e->in_cache = true; + e->dirty = true; /* this gets corrected in a minute */ e->hit_count = hint_valid ? hint : 1; e->generation = mq->generation; push(mq, e); @@ -947,7 +1013,14 @@ static int mq_walk_mappings(struct dm_ca mutex_lock(&mq->lock); for (level = 0; level < NR_QUEUE_LEVELS; level++) - list_for_each_entry(e, &mq->cache.qs[level], list) { + list_for_each_entry(e, &mq->cache_clean.qs[level], list) { + r = fn(context, e->cblock, e->oblock, e->hit_count); + if (r) + goto out; + } + + for (level = 0; level < NR_QUEUE_LEVELS; level++) + list_for_each_entry(e, &mq->cache_dirty.qs[level], list) { r = fn(context, e->cblock, e->oblock, e->hit_count); if (r) goto out; @@ -967,6 +1040,7 @@ static void remove_mapping(struct mq_pol del(mq, e); e->in_cache = false; + e->dirty = false; push(mq, e); } @@ -979,6 +1053,34 @@ static void mq_remove_mapping(struct dm_ mutex_unlock(&mq->lock); } +static int __mq_writeback_work(struct mq_policy *mq, dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + struct entry *e = pop(mq, &mq->cache_dirty); + + if (!e) + return -ENODATA; + + *oblock = e->oblock; + *cblock = e->cblock; + e->dirty = false; + push(mq, e); + return 0; +} + +static int mq_writeback_work(struct dm_cache_policy *p, dm_oblock_t *oblock, + dm_cblock_t *cblock) +{ + int r; + struct mq_policy *mq = to_mq_policy(p); + + mutex_lock(&mq->lock); + r = __mq_writeback_work(mq, oblock, cblock); + mutex_unlock(&mq->lock); + + return r; +} + static void force_mapping(struct mq_policy *mq, dm_oblock_t current_oblock, dm_oblock_t new_oblock) { @@ -988,6 +1090,7 @@ static void force_mapping(struct mq_poli del(mq, e); e->oblock = new_oblock; + e->dirty = true; push(mq, e); } @@ -1059,10 +1162,12 @@ static void init_policy_functions(struct mq->policy.destroy = mq_destroy; mq->policy.map = mq_map; mq->policy.lookup = mq_lookup; + mq->policy.set_dirty = mq_set_dirty; + mq->policy.clear_dirty = mq_clear_dirty; mq->policy.load_mapping = mq_load_mapping; mq->policy.walk_mappings = mq_walk_mappings; mq->policy.remove_mapping = mq_remove_mapping; - mq->policy.writeback_work = NULL; + mq->policy.writeback_work = mq_writeback_work; mq->policy.force_mapping = mq_force_mapping; mq->policy.residency = mq_residency; mq->policy.tick = mq_tick; @@ -1095,7 +1200,9 @@ static struct dm_cache_policy *mq_create mq->find_free_last_word = 0; queue_init(&mq->pre_cache); - queue_init(&mq->cache); + queue_init(&mq->cache_clean); + queue_init(&mq->cache_dirty); + mq->generation_period = max((unsigned) from_cblock(cache_size), 1024U); mq->nr_entries = 2 * from_cblock(cache_size); @@ -1132,7 +1239,7 @@ bad_cache_alloc: static struct dm_cache_policy_type mq_policy_type = { .name = "mq", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create @@ -1140,7 +1247,7 @@ static struct dm_cache_policy_type mq_po static struct dm_cache_policy_type default_policy_type = { .name = "default", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .hint_size = 4, .owner = THIS_MODULE, .create = mq_create