From: Benjamin Marzinski When the last path in a priority group fails, instead of switching to the next PG immediately, wait for a configurable amount of time in case any paths recover. In some configurations switching PG is an expensive exercise that you would prefer to avoid when there are transient failures. Signed-off-by: Benjamin Marzinski --- drivers/md/dm-mpath.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 107 insertions(+), 7 deletions(-) Index: linux-2.6.19/drivers/md/dm-mpath.c =================================================================== --- linux-2.6.19.orig/drivers/md/dm-mpath.c 2006-12-06 20:49:32.000000000 +0000 +++ linux-2.6.19/drivers/md/dm-mpath.c 2006-12-06 20:49:42.000000000 +0000 @@ -53,6 +53,12 @@ struct priority_group { struct list_head pgpaths; }; +enum pg_timeout_state { + PG_TIMEOUT_NONE = 0, + PG_TIMEOUT_IN_PROGRESS, + PG_TIMEOUT_FAILED +}; + /* Multipath context */ struct multipath { struct list_head list; @@ -76,6 +82,10 @@ struct multipath { unsigned queue_if_no_path; /* Queue I/O if last path fails? */ unsigned saved_queue_if_no_path;/* Saved state during suspension */ + unsigned pg_timeout; /* time to wait before switching PGs */ + enum pg_timeout_state pg_timeout_status; + struct timer_list pg_timer; + struct work_struct process_queued_ios; struct bio_list queued_ios; unsigned queue_size; @@ -106,6 +116,7 @@ static kmem_cache_t *_mpio_cache; struct workqueue_struct *kmultipathd; static void process_queued_ios(void *data); static void trigger_event(void *data); +static void pg_timeout_event(unsigned long data); /*----------------------------------------------- @@ -173,6 +184,9 @@ static struct multipath *alloc_multipath INIT_LIST_HEAD(&m->priority_groups); spin_lock_init(&m->lock); m->queue_io = 1; + init_timer(&m->pg_timer); + m->pg_timer.function = pg_timeout_event; + m->pg_timer.data = (unsigned long)m; INIT_WORK(&m->process_queued_ios, process_queued_ios, m); INIT_WORK(&m->trigger_event, trigger_event, m); m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); @@ -211,6 +225,28 @@ static void free_multipath(struct multip * Path selection *-----------------------------------------------*/ +static void pg_timeout_event(unsigned long data) +{ + unsigned long flags; + struct multipath *m = (struct multipath *)data; + + spin_lock_irqsave(&m->lock, flags); + + if (m->pg_timeout_status == PG_TIMEOUT_NONE) + goto out; + + m->pg_timeout_status = PG_TIMEOUT_FAILED; + + if (!m->pg_init_required && !m->pg_init_in_progress){ + m->queue_io = 0; + if (m->queue_size) + queue_work(kmultipathd, &m->process_queued_ios); + } + +out: + spin_unlock_irqrestore(&m->lock, flags); +} + static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { struct hw_handler *hwh = &m->hw_handler; @@ -256,12 +292,26 @@ static void __choose_pgpath(struct multi pg = m->next_pg; m->next_pg = NULL; if (!__choose_path_in_pg(m, pg)) - return; + goto out; } /* Don't change PG until it has no remaining paths */ - if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) - return; + if (m->current_pg) { + if (!__choose_path_in_pg(m, m->current_pg)) + goto out; + if (m->pg_timeout) { + if (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) + return; + if (m->pg_timeout_status == PG_TIMEOUT_NONE) { + m->queue_io = 1; + m->current_pgpath = NULL; + m->pg_timeout_status = PG_TIMEOUT_IN_PROGRESS; + mod_timer(&m->pg_timer, + jiffies + m->pg_timeout * HZ); + return; + } + } + } /* * Loop through priority groups until we find a valid path. @@ -273,13 +323,20 @@ static void __choose_pgpath(struct multi if (pg->bypassed == bypassed) continue; if (!__choose_path_in_pg(m, pg)) - return; + goto out; } } while (bypassed--); failed: m->current_pgpath = NULL; m->current_pg = NULL; + return; + +out: + if (m->pg_timeout_status != PG_TIMEOUT_NONE) { + del_timer(&m->pg_timer); + m->pg_timeout_status = PG_TIMEOUT_NONE; + } } /* @@ -319,6 +376,7 @@ static int map_io(struct multipath *m, s m->queue_size--; if ((pgpath && m->queue_io) || + (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) || (!pgpath && m->queue_if_no_path)) { /* Queue for the daemon to resubmit */ bio_list_add(&m->queued_ios, bio); @@ -419,7 +477,8 @@ static void process_queued_ios(void *dat pgpath = m->current_pgpath; if ((pgpath && !m->queue_io) || - (!pgpath && !m->queue_if_no_path)) + (!pgpath && !m->queue_if_no_path && + (m->pg_timeout_status != PG_TIMEOUT_IN_PROGRESS))) must_queue = 0; if (m->pg_init_required && !m->pg_init_in_progress) { @@ -868,7 +927,9 @@ static int reinstate_path(struct pgpath pgpath->path.is_active = 1; m->current_pgpath = NULL; - if (!m->nr_valid_paths++ && m->queue_size) + if ((!m->nr_valid_paths++ || + (m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS)) && + m->queue_size) queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->trigger_event); @@ -943,6 +1004,8 @@ static int switch_pg_num(struct multipat m->current_pg = NULL; m->next_pg = pg; } + if ((m->pg_timeout_status == PG_TIMEOUT_IN_PROGRESS) && m->queue_size) + queue_work(kmultipathd, &m->process_queued_ios); spin_unlock_irqrestore(&m->lock, flags); queue_work(kmultipathd, &m->trigger_event); @@ -973,6 +1036,40 @@ static int bypass_pg_num(struct multipat return 0; } +static int set_pg_timeout(struct multipath *m, const char *timeoutstr) +{ + unsigned timeout; + unsigned long flags; + enum pg_timeout_state status; + + if (!timeoutstr || (sscanf(timeoutstr, "%u", &timeout) != 1)) { + DMWARN("invalid timeout number supplied to set_pg_timeout"); + return -EINVAL; + } + spin_lock_irqsave(&m->lock, flags); + m->pg_timeout = timeout; + + if (timeout) + goto out; + + status = m->pg_timeout_status; + m->pg_timeout_status = PG_TIMEOUT_NONE; + if (status != PG_TIMEOUT_IN_PROGRESS) + goto out; + + del_timer(&m->pg_timer); + if (!m->pg_init_required && !m->pg_init_in_progress){ + m->queue_io = 0; + if (m->queue_size) + queue_work(kmultipathd, + &m->process_queued_ios); + } + +out: + spin_unlock_irqrestore(&m->lock, flags); + return 0; +} + /* * pg_init must call this when it has completed its initialisation */ @@ -997,7 +1094,8 @@ void dm_pg_init_complete(struct dm_path if (err_flags) { m->current_pgpath = NULL; m->current_pg = NULL; - } else if (!m->pg_init_required) + } else if (!m->pg_init_required && + (m->pg_timeout_status != PG_TIMEOUT_IN_PROGRESS)) m->queue_io = 0; m->pg_init_in_progress = 0; @@ -1257,6 +1355,8 @@ static int multipath_message(struct dm_t return bypass_pg_num(m, argv[1], 0); else if (!strnicmp(argv[0], MESG_STR("switch_group"))) return switch_pg_num(m, argv[1]); + else if (!strnicmp(argv[0], MESG_STR("set_pg_timeout"))) + return set_pg_timeout(m, argv[1]); else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) action = reinstate_path; else if (!strnicmp(argv[0], MESG_STR("fail_path")))