From: Jonathan E Brassow This patch adds read balancing. The round-robin method is used, with MIN_READS (128) requests going to each device. --- [The 128 needs changing into a runtime parameter.] Index: linux-2.6.19-rc4/drivers/md/dm-raid1.c =================================================================== --- linux-2.6.19-rc4.orig/drivers/md/dm-raid1.c 2006-11-01 21:40:51.000000000 +0000 +++ linux-2.6.19-rc4/drivers/md/dm-raid1.c 2006-11-01 21:40:53.000000000 +0000 @@ -129,6 +129,9 @@ struct mirror_set { struct workqueue_struct *kmirrord_wq; struct work_struct kmirrord_work; + atomic_t read_count; /* Read counter for read balancing */ + struct mirror *read_mirror; /* Last mirror read. */ + unsigned int nr_mirrors; struct mirror mirror[0]; }; @@ -684,10 +687,46 @@ static void do_recovery(struct mirror_se /*----------------------------------------------------------------- * Reads *---------------------------------------------------------------*/ -static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector) +/* Switch to next dev, via round-robin, after MIN_READS reads */ +#define MIN_READS 128 + +/* choose_mirror + * @ms: the mirror set + * + * This function is used for read balancing. + * + * Returns: chosen mirror, or NULL on failure + */ +static struct mirror *choose_mirror(struct mirror_set *ms) { - /* FIXME: add read balancing */ - return ms->default_mirror; + struct mirror *start_mirror = ms->read_mirror; + + /* + * Perform MIN_READS on each working mirror then + * advance to the next one. start_mirror stores + * the first we tried, so we know when we're done. + */ + do { + if (likely(!atomic_read(&ms->read_mirror->error_count) && + !atomic_dec_and_test(&ms->read_count))) + goto use_mirror; + + atomic_set(&ms->read_count, MIN_READS); + + if (ms->read_mirror-- == ms->mirror) + ms->read_mirror += ms->nr_mirrors; + + } while (ms->read_mirror != start_mirror); + + /* + * We've rejected every mirror. + * Confirm that start_mirror can still be used. + */ + if (unlikely(atomic_read(&ms->read_mirror->error_count))) + return NULL; + +use_mirror: + return ms->read_mirror; } /* @@ -712,7 +751,7 @@ static void do_reads(struct mirror_set * * We can only read balance if the region is in sync. */ if (rh_in_sync(&ms->rh, region, 0)) - m = choose_mirror(ms, bio->bi_sector); + m = choose_mirror(ms); else m = ms->default_mirror; @@ -898,6 +937,7 @@ static struct mirror_set *alloc_context( ms->nr_regions = dm_sector_div_up(ti->len, region_size); ms->in_sync = 0; ms->default_mirror = &ms->mirror[DEFAULT_MIRROR]; + ms->read_mirror = &ms->mirror[DEFAULT_MIRROR]; if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) { ti->error = "Error creating dirty region hash"; @@ -1147,7 +1187,7 @@ static int mirror_map(struct dm_target * return DM_MAPIO_SUBMITTED; } - m = choose_mirror(ms, bio->bi_sector); + m = choose_mirror(ms); if (!m) return -EIO;