Subject: softlockup: allow panic on lockup From: Ingo Molnar Date: Thu Apr 17 08:51:42 CEST 2008 allow users to configure the softlockup detector to generate a panic instead of a warning message. high-availability systems might opt for this strict method (combined with panic_timeout= boot option/sysctl), instead of generating softlockup warnings ad infinitum. also, automated tests work better if the system reboots reliably (into a safe kernel) in case of a lockup. The full spectrum of configurability is supported: boot option, sysctl option and Kconfig option. it's default-disabled. Signed-off-by: Ingo Molnar --- Documentation/kernel-parameters.txt | 3 +++ include/linux/sched.h | 3 ++- kernel/softlockup.c | 21 +++++++++++++++++++++ kernel/sysctl.c | 11 +++++++++++ lib/Kconfig.debug | 26 +++++++++++++++++++++++++- 5 files changed, 62 insertions(+), 2 deletions(-) Index: linux/Documentation/kernel-parameters.txt =================================================================== --- linux.orig/Documentation/kernel-parameters.txt +++ linux/Documentation/kernel-parameters.txt @@ -1950,6 +1950,9 @@ and is between 256 and 4096 characters. snd-ymfpci= [HW,ALSA] + softlockup_panic= + [KNL] Should the soft-lockup detector generate panics. + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/sonypi.txt Index: linux/include/linux/sched.h =================================================================== --- linux.orig/include/linux/sched.h +++ linux/include/linux/sched.h @@ -296,7 +296,8 @@ extern void softlockup_tick(void); extern void spawn_softlockup_task(void); extern void touch_softlockup_watchdog(void); extern void touch_all_softlockup_watchdogs(void); -extern unsigned long softlockup_thresh; +extern unsigned int softlockup_panic; +extern unsigned long softlockup_thresh; extern unsigned long sysctl_hung_task_check_count; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_warnings; Index: linux/kernel/softlockup.c =================================================================== --- linux.orig/kernel/softlockup.c +++ linux/kernel/softlockup.c @@ -27,6 +27,21 @@ static DEFINE_PER_CPU(struct task_struct static int __read_mostly did_panic; unsigned long __read_mostly softlockup_thresh = 60; +/* + * Should we panic (and reboot, if panic_timeout= is set) when a + * soft-lockup occurs: + */ +unsigned int __read_mostly softlockup_panic = + CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; + +static int __init softlockup_panic_setup(char *str) +{ + softlockup_panic = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("softlockup_panic=", softlockup_panic_setup); + static int softlock_panic(struct notifier_block *this, unsigned long event, void *ptr) { @@ -120,6 +135,9 @@ void softlockup_tick(void) else dump_stack(); spin_unlock(&print_lock); + + if (softlockup_panic) + panic("softlockup: hung tasks"); } /* @@ -172,6 +190,9 @@ static void check_hung_task(struct task_ t->last_switch_timestamp = now; touch_nmi_watchdog(); + + if (softlockup_panic) + panic("softlockup: blocked tasks"); } /* Index: linux/kernel/sysctl.c =================================================================== --- linux.orig/kernel/sysctl.c +++ linux/kernel/sysctl.c @@ -745,6 +745,17 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_DETECT_SOFTLOCKUP { .ctl_name = CTL_UNNUMBERED, + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "softlockup_thresh", .data = &softlockup_thresh, .maxlen = sizeof(unsigned long), Index: linux/lib/Kconfig.debug =================================================================== --- linux.orig/lib/Kconfig.debug +++ linux/lib/Kconfig.debug @@ -136,7 +136,7 @@ config DETECT_SOFTLOCKUP help Say Y here to enable the kernel to detect "soft lockups", which are bugs that cause the kernel to loop in kernel - mode for more than 10 seconds, without giving other tasks a + mode for more than 60 seconds, without giving other tasks a chance to run. When a soft-lockup is detected, the kernel will print the @@ -148,6 +148,30 @@ config DETECT_SOFTLOCKUP can be detected via the NMI-watchdog, on platforms that support it.) +config BOOTPARAM_SOFTLOCKUP_PANIC + bool "Panic (Reboot) On Soft Lockups" + depends on DETECT_SOFTLOCKUP + help + Say Y here to enable the kernel to panic on "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 60 seconds, without giving other tasks a + chance to run. + + The panic can be used in combination with panic_timeout, + to cause the system to reboot automatically after a + lockup has been detected. This feature is useful for + high-availability systems that have uptime guarantees and + where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE + int + depends on DETECT_SOFTLOCKUP + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC + default 1 if BOOTPARAM_SOFTLOCKUP_PANIC + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS