--- linux/kernel/sysctl.c.orig Thu Feb 24 13:59:08 2000 +++ linux/kernel/sysctl.c Thu Feb 24 13:59:11 2000 @@ -308,12 +308,15 @@ {0} }; +extern void init_irq_proc (void); void __init sysctl_init(void) { #ifdef CONFIG_PROC_FS register_proc_table(root_table, proc_sys_root); + init_irq_proc(); #endif + } int do_sysctl (int *name, int nlen, --- linux/include/linux/irq.h.orig Thu Feb 24 13:59:08 2000 +++ linux/include/linux/irq.h Thu Feb 24 15:06:16 2000 @@ -26,6 +26,7 @@ void (*disable)(unsigned int irq); void (*ack)(unsigned int irq); void (*end)(unsigned int irq); + void (*set_affinity)(unsigned int irq, unsigned int mask); }; typedef struct hw_interrupt_type hw_irq_controller; @@ -38,17 +39,33 @@ * Pad this out to 32 bytes for cache and indexing reasons. */ typedef struct { - unsigned int status; /* IRQ status - - IRQ_INPROGRESS, IRQ_DISABLED */ - hw_irq_controller *handler; /* never derefed in arch - independent code */ - struct irqaction *action; /* IRQ action list */ - unsigned int depth; /* Disable depth for nested irq disables */ + unsigned int status; /* IRQ status */ + hw_irq_controller *handler; + struct irqaction *action; /* IRQ action list */ + unsigned int depth; /* nested irq disables */ + spinlock_t lock; + unsigned int __pad[3]; } ____cacheline_aligned irq_desc_t; -#include /* the arch dependent stuff */ +extern irq_desc_t irq_desc [NR_IRQS]; + +typedef struct { + unsigned int __local_irq_count; + unsigned int __local_bh_count; + atomic_t __nmi_counter; + unsigned int __pad[5]; +} ____cacheline_aligned irq_cpustat_t; + +extern irq_cpustat_t irq_stat [NR_CPUS]; -extern irq_desc_t irq_desc[NR_IRQS]; +/* + * Simple wrappers reducing source bloat + */ +#define local_irq_count(cpu) (irq_stat[(cpu)].__local_irq_count) +#define local_bh_count(cpu) (irq_stat[(cpu)].__local_bh_count) +#define nmi_counter(cpu) (irq_stat[(cpu)].__nmi_counter) + +#include /* the arch dependent stuff */ extern int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); extern spinlock_t irq_controller_lock; --- linux/include/asm-i386/softirq.h.orig Sat Feb 12 19:48:07 2000 +++ linux/include/asm-i386/softirq.h Thu Feb 24 15:06:16 2000 @@ -4,14 +4,12 @@ #include #include -extern unsigned int local_bh_count[NR_CPUS]; - -#define cpu_bh_disable(cpu) do { local_bh_count[(cpu)]++; barrier(); } while (0) -#define cpu_bh_enable(cpu) do { barrier(); local_bh_count[(cpu)]--; } while (0) +#define cpu_bh_disable(cpu) do { local_bh_count(cpu)++; barrier(); } while (0) +#define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--; } while (0) #define local_bh_disable() cpu_bh_disable(smp_processor_id()) #define local_bh_enable() cpu_bh_enable(smp_processor_id()) -#define in_softirq() (local_bh_count[smp_processor_id()] != 0) +#define in_softirq() (local_bh_count(smp_processor_id()) != 0) #endif /* __ASM_SOFTIRQ_H */ --- linux/include/asm-i386/hw_irq.h.orig Sat Feb 12 19:50:23 2000 +++ linux/include/asm-i386/hw_irq.h Thu Feb 24 15:06:16 2000 @@ -179,13 +179,21 @@ "pushl $"#nr"-256\n\t" \ "jmp common_interrupt"); +extern unsigned long prof_cpu_mask; /* * x86 profiling function, SMP safe. We might want to do this in * assembly totally? */ static inline void x86_do_profile (unsigned long eip) { - if (prof_buffer && current->pid) { + /* + * Only measure the CPUs specified by /proc/irq/prof_cpu_mask. + * (default is all CPUs.) + */ + if (!((1<>= prof_shift; /* --- linux/include/asm-i386/hardirq.h.orig Sat Feb 12 19:48:07 2000 +++ linux/include/asm-i386/hardirq.h Thu Feb 24 15:06:16 2000 @@ -2,25 +2,24 @@ #define __ASM_HARDIRQ_H #include - -extern unsigned int local_irq_count[NR_CPUS]; +#include /* * Are we in an interrupt context? Either doing bottom half * or hardware interrupt processing? */ #define in_interrupt() ({ int __cpu = smp_processor_id(); \ - (local_irq_count[__cpu] + local_bh_count[__cpu] != 0); }) + (local_irq_count(__cpu) + local_bh_count(__cpu) != 0); }) -#define in_irq() (local_irq_count[smp_processor_id()] != 0) +#define in_irq() (local_irq_count(smp_processor_id()) != 0) #ifndef __SMP__ -#define hardirq_trylock(cpu) (local_irq_count[cpu] == 0) +#define hardirq_trylock(cpu) (local_irq_count(cpu) == 0) #define hardirq_endlock(cpu) do { } while (0) -#define irq_enter(cpu, irq) (local_irq_count[cpu]++) -#define irq_exit(cpu, irq) (local_irq_count[cpu]--) +#define irq_enter(cpu, irq) (local_irq_count(cpu)++) +#define irq_exit(cpu, irq) (local_irq_count(cpu)--) #define synchronize_irq() barrier() @@ -31,7 +30,16 @@ extern unsigned char global_irq_holder; extern unsigned volatile int global_irq_lock; -extern atomic_t global_irq_count; + +static inline int irqs_running (void) +{ + int i; + + for (i = 0; i < smp_num_cpus; i++) + if (local_irq_count(i)) + return 1; + return 0; +} static inline void release_irqlock(int cpu) { @@ -44,8 +52,7 @@ static inline void irq_enter(int cpu, int irq) { - ++local_irq_count[cpu]; - atomic_inc(&global_irq_count); + ++local_irq_count(cpu); while (test_bit(0,&global_irq_lock)) { /* nothing */; @@ -54,13 +61,12 @@ static inline void irq_exit(int cpu, int irq) { - atomic_dec(&global_irq_count); - --local_irq_count[cpu]; + --local_irq_count(cpu); } static inline int hardirq_trylock(int cpu) { - return !local_irq_count[cpu] && !test_bit(0,&global_irq_lock); + return !local_irq_count(cpu) && !test_bit(0,&global_irq_lock); } #define hardirq_endlock(cpu) do { } while (0) --- linux/include/asm-i386/atomic.h.orig Sat Feb 12 19:48:04 2000 +++ linux/include/asm-i386/atomic.h Thu Feb 24 15:06:16 2000 @@ -84,6 +84,17 @@ return c != 0; } +static __inline__ int atomic_inc_and_test(volatile atomic_t *v) +{ + unsigned char c; + + __asm__ __volatile__( + LOCK "incl %0; sete %1" + :"=m" (__atomic_fool_gcc(v)), "=qm" (c) + :"m" (__atomic_fool_gcc(v))); + return c != 0; +} + extern __inline__ int atomic_add_negative(int i, volatile atomic_t *v) { unsigned char c; --- linux/include/asm-i386/smp.h.orig Thu Feb 24 14:22:10 2000 +++ linux/include/asm-i386/smp.h Thu Feb 24 15:06:16 2000 @@ -15,7 +15,9 @@ #include #include #include +#ifdef CONFIG_X86_IO_APIC #include +#endif #include #endif #endif --- linux/arch/i386/kernel/process.c.orig Thu Feb 24 13:59:05 2000 +++ linux/arch/i386/kernel/process.c Thu Feb 24 14:30:34 2000 @@ -74,8 +74,13 @@ */ static void default_idle(void) { - if (current_cpu_data.hlt_works_ok && !hlt_counter) - asm volatile("sti ; hlt" : : : "memory"); + if (current_cpu_data.hlt_works_ok && !hlt_counter) { + asm volatile("cli" : : : "memory"); + if (!current->need_resched) + asm volatile("sti ; hlt" : : : "memory"); + else + asm volatile("sti" : : : "memory"); + } } /* --- linux/arch/i386/kernel/io_apic.c.orig Thu Feb 24 13:58:59 2000 +++ linux/arch/i386/kernel/io_apic.c Thu Feb 24 14:50:03 2000 @@ -28,6 +28,8 @@ #include #include +static spinlock_t ioapic_lock = SPIN_LOCK_UNLOCKED; + /* * # of IO-APICs and # of IRQ routing registers */ @@ -87,9 +89,8 @@ entry->pin = pin; } -#define DO_ACTION(name,R,ACTION, FINAL) \ +#define __DO_ACTION(name,R,ACTION, FINAL) \ \ -static void name##_IO_APIC_irq(unsigned int irq) \ { \ int pin; \ struct irq_pin_list *entry = irq_2_pin + irq; \ @@ -109,8 +110,31 @@ FINAL; \ } -DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ -DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ +static void name##_IO_APIC_irq(unsigned int irq) \ +__DO_ACTION(name,R,ACTION, FINAL) + +DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ +DO_ACTION( __unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { @@ -537,7 +561,7 @@ entry.delivery_mode = dest_LowestPrio; entry.dest_mode = 1; /* logical delivery */ entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = APIC_ALL_CPUS; /* all CPUs */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { @@ -1026,16 +1050,16 @@ static int __init nmi_irq_works(void) { - atomic_t tmp[NR_CPUS]; + irq_cpustat_t tmp[NR_CPUS]; int j, cpu; - memcpy(tmp, nmi_counter, sizeof(tmp)); + memcpy(tmp, irq_stat, sizeof(tmp)); sti(); mdelay(50); for (j = 0; j < smp_num_cpus; j++) { cpu = cpu_logical_map(j); - if (atomic_read(nmi_counter+cpu) - atomic_read(tmp+cpu) <= 3) { + if (atomic_read(&nmi_counter(cpu)) - atomic_read(&tmp[cpu].__nmi_counter) <= 3) { printk("CPU#%d NMI appears to be stuck.\n", cpu); return 0; } @@ -1055,14 +1079,9 @@ * that was delayed but this is now handled in the device * independent code. */ -static void enable_edge_ioapic_irq(unsigned int irq) -{ - unmask_IO_APIC_irq(irq); -} +#define enable_edge_ioapic_irq unmask_IO_APIC_irq -static void disable_edge_ioapic_irq(unsigned int irq) -{ -} +static void disable_edge_ioapic_irq (unsigned int irq) { /* nothing */ } /* * Starting up a edge-triggered IO-APIC interrupt is @@ -1077,12 +1096,17 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) { int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); if (irq < 16) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) was_pending = 1; } - enable_edge_ioapic_irq(irq); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + return was_pending; } @@ -1093,14 +1117,15 @@ * interrupt for real. This prevents IRQ storms from unhandled * devices. */ -void static ack_edge_ioapic_irq(unsigned int irq) +static void ack_edge_ioapic_irq(unsigned int irq) { if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); ack_APIC_irq(); } -void static end_edge_ioapic_irq(unsigned int i){} + +static void end_edge_ioapic_irq (unsigned int i) { /* nothing */ } /* @@ -1108,23 +1133,46 @@ * and shutting down and starting up the interrupt * is the same as enabling and disabling them -- except * with a startup need to return a "was pending" value. + * + * Level triggered interrupts are special because we + * do not touch any IO-APIC register while handling + * them. We ack the APIC in the end-IRQ handler, not + * in the start-IRQ-handler. Protection against reentrance + * from the same interrupt is still provided, both by the + * generic IRQ layer and by the fact that an unacked local + * APIC does not accept IRQs. */ -static unsigned int startup_level_ioapic_irq(unsigned int irq) +static unsigned int startup_level_ioapic_irq (unsigned int irq) { unmask_IO_APIC_irq(irq); + return 0; /* don't check for pending */ } #define shutdown_level_ioapic_irq mask_IO_APIC_irq #define enable_level_ioapic_irq unmask_IO_APIC_irq #define disable_level_ioapic_irq mask_IO_APIC_irq -#define end_level_ioapic_irq unmask_IO_APIC_irq -void static mask_and_ack_level_ioapic_irq(unsigned int i) + +static void end_level_ioapic_irq (unsigned int i) { - mask_IO_APIC_irq(i); ack_APIC_irq(); } +static void mask_and_ack_level_ioapic_irq (unsigned int i) { /* nothing */ } + +static void set_ioapic_affinity (unsigned int irq, unsigned int mask) +{ + unsigned long flags; + /* + * Only the first 8 bits are valid. + */ + mask = mask << 24; + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION( target, 1, = mask, ) + spin_unlock_irqrestore(&ioapic_lock, flags); +} + /* * Level and edge triggered IO-APIC interrupts need different handling, * so we use two separate IRQ descriptors. Edge triggered IRQs can be @@ -1141,7 +1189,8 @@ enable_edge_ioapic_irq, disable_edge_ioapic_irq, ack_edge_ioapic_irq, - end_edge_ioapic_irq + end_edge_ioapic_irq, + set_ioapic_affinity, }; static struct hw_interrupt_type ioapic_level_irq_type = { @@ -1151,7 +1200,8 @@ enable_level_ioapic_irq, disable_level_ioapic_irq, mask_and_ack_level_ioapic_irq, - end_level_ioapic_irq + end_level_ioapic_irq, + set_ioapic_affinity, }; static inline void init_IO_APIC_traps(void) @@ -1185,12 +1235,12 @@ } } -void static ack_lapic_irq (unsigned int irq) +static void ack_lapic_irq (unsigned int irq) { ack_APIC_irq(); } -void static end_lapic_irq (unsigned int i) { /* nothing */ } +static void end_lapic_irq (unsigned int i) { /* nothing */ } static struct hw_interrupt_type lapic_irq_type = { "local-APIC-edge", --- linux/arch/i386/kernel/irq.c.orig Thu Feb 24 13:59:07 2000 +++ linux/arch/i386/kernel/irq.c Thu Feb 24 14:14:32 2000 @@ -31,21 +31,20 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include #include -unsigned int local_bh_count[NR_CPUS]; -unsigned int local_irq_count[NR_CPUS]; - -extern atomic_t nmi_counter[NR_CPUS]; /* * Linux has a controller-independent x86 interrupt architecture. @@ -63,17 +62,15 @@ * interrupt controllers, without having to do assembly magic. */ -/* - * Micro-access to controllers is serialized over the whole - * system. We never hold this lock when we call the actual - * IRQ handler. - */ -spinlock_t irq_controller_lock = SPIN_LOCK_UNLOCKED; +irq_cpustat_t irq_stat [NR_CPUS]; + /* * Controller mappings for all interrupt sources: */ irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = - { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }}; + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; + +static void register_irq_proc (unsigned int irq); /* * Special irq handlers. @@ -164,7 +161,7 @@ p += sprintf(p, "NMI: "); for (j = 0; j < smp_num_cpus; j++) p += sprintf(p, "%10u ", - atomic_read(nmi_counter+cpu_logical_map(j))); + atomic_read(&nmi_counter(cpu_logical_map(j)))); p += sprintf(p, "\n"); #if CONFIG_SMP p += sprintf(p, "LOC: "); @@ -186,7 +183,6 @@ #ifdef CONFIG_SMP unsigned char global_irq_holder = NO_PROC_ID; unsigned volatile int global_irq_lock; -atomic_t global_irq_count; static void show(char * str) { @@ -196,9 +192,9 @@ printk("\n%s, CPU %d:\n", str, cpu); printk("irq: %d [%d %d]\n", - atomic_read(&global_irq_count), local_irq_count[0], local_irq_count[1]); + irqs_running(), local_irq_count(0), local_irq_count(1)); printk("bh: %d [%d %d]\n", - spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count[0], local_bh_count[1]); + spin_is_locked(&global_bh_lock) ? 1 : 0, local_bh_count(0), local_bh_count(1)); stack = (unsigned long *) &stack; for (i = 40; i ; i--) { unsigned long x = *++stack; @@ -248,10 +244,9 @@ * for bottom half handlers unless we're * already executing in one.. */ - if (!atomic_read(&global_irq_count)) { - if (local_bh_count[cpu] || !spin_is_locked(&global_bh_lock)) + if (!irqs_running()) + if (local_bh_count(cpu) || !spin_is_locked(&global_bh_lock)) break; - } /* Duh, we have to loop. Release the lock to avoid deadlocks */ clear_bit(0,&global_irq_lock); @@ -264,11 +259,11 @@ __sti(); SYNC_OTHER_CORES(cpu); __cli(); - if (atomic_read(&global_irq_count)) + if (irqs_running()) continue; if (global_irq_lock) continue; - if (!local_bh_count[cpu] && spin_is_locked(&global_bh_lock)) + if (!local_bh_count(cpu) && spin_is_locked(&global_bh_lock)) continue; if (!test_and_set_bit(0,&global_irq_lock)) break; @@ -285,7 +280,7 @@ */ void synchronize_irq(void) { - if (atomic_read(&global_irq_count)) { + if (irqs_running()) { /* Stupid approach */ cli(); sti(); @@ -338,7 +333,7 @@ if (flags & (1 << EFLAGS_IF_SHIFT)) { int cpu = smp_processor_id(); __cli(); - if (!local_irq_count[cpu]) + if (!local_irq_count(cpu)) get_irqlock(cpu); } } @@ -347,7 +342,7 @@ { int cpu = smp_processor_id(); - if (!local_irq_count[cpu]) + if (!local_irq_count(cpu)) release_irqlock(cpu); __sti(); } @@ -364,6 +359,7 @@ int retval; int local_enabled; unsigned long flags; + int cpu = smp_processor_id(); __save_flags(flags); local_enabled = (flags >> EFLAGS_IF_SHIFT) & 1; @@ -371,10 +367,10 @@ retval = 2 + local_enabled; /* check for global flags if we're not in an interrupt */ - if (!local_irq_count[smp_processor_id()]) { + if (!local_irq_count(cpu)) { if (local_enabled) retval = 1; - if (global_irq_holder == (unsigned char) smp_processor_id()) + if (global_irq_holder == cpu) retval = 0; } return retval; @@ -442,16 +438,17 @@ * hardware disable after having gotten the irq * controller lock. */ -void disable_irq_nosync(unsigned int irq) +void inline disable_irq_nosync(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); - if (!irq_desc[irq].depth++) { - irq_desc[irq].status |= IRQ_DISABLED; - irq_desc[irq].handler->disable(irq); + spin_lock_irqsave(&desc->lock, flags); + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->handler->disable(irq); } - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } /* @@ -462,7 +459,7 @@ { disable_irq_nosync(irq); - if (!local_irq_count[smp_processor_id()]) { + if (!local_irq_count(smp_processor_id())) { do { barrier(); } while (irq_desc[irq].status & IRQ_INPROGRESS); @@ -471,28 +468,29 @@ void enable_irq(unsigned int irq) { + irq_desc_t *desc = irq_desc + irq; unsigned long flags; - spin_lock_irqsave(&irq_controller_lock, flags); - switch (irq_desc[irq].depth) { + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { case 1: { - unsigned int status = irq_desc[irq].status & ~IRQ_DISABLED; - irq_desc[irq].status = status; + unsigned int status = desc->status & ~IRQ_DISABLED; + desc->status = status; if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { - irq_desc[irq].status = status | IRQ_REPLAY; - hw_resend_irq(irq_desc[irq].handler,irq); + desc->status = status | IRQ_REPLAY; + hw_resend_irq(desc->handler,irq); } - irq_desc[irq].handler->enable(irq); + desc->handler->enable(irq); /* fall-through */ } default: - irq_desc[irq].depth--; + desc->depth--; break; case 0: printk("enable_irq() unbalanced from %p\n", __builtin_return_address(0)); } - spin_unlock_irqrestore(&irq_controller_lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } /* @@ -514,13 +512,12 @@ */ int irq = regs.orig_eax & 0xff; /* high bits used in ret_from_ code */ int cpu = smp_processor_id(); - irq_desc_t *desc; + irq_desc_t *desc = irq_desc + irq; struct irqaction * action; unsigned int status; kstat.irqs[cpu][irq]++; - desc = irq_desc + irq; - spin_lock(&irq_controller_lock); + spin_lock(&desc->lock); desc->handler->ack(irq); /* REPLAY is when Linux resends an IRQ that was dropped earlier @@ -540,7 +537,6 @@ status |= IRQ_INPROGRESS; /* we are handling it */ } desc->status = status; - spin_unlock(&irq_controller_lock); /* * If there is no IRQ handler or it was disabled, exit early. @@ -549,7 +545,7 @@ will take care of it. */ if (!action) - return 1; + goto out; /* * Edge triggered interrupts need to remember @@ -562,20 +558,24 @@ * SMP environment. */ for (;;) { + spin_unlock(&desc->lock); handle_IRQ_event(irq, ®s, action); - spin_lock(&irq_controller_lock); + spin_lock(&desc->lock); if (!(desc->status & IRQ_PENDING)) break; desc->status &= ~IRQ_PENDING; - spin_unlock(&irq_controller_lock); } desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED)) - desc->handler->end(irq); - spin_unlock(&irq_controller_lock); +out: + /* + * The ->end() handler has to deal with interrupts which got + * disabled while the handler was running. + */ + desc->handler->end(irq); + spin_unlock(&desc->lock); - if (softirq_state[cpu].active&softirq_state[cpu].mask) + if (softirq_state[cpu].active & softirq_state[cpu].mask) do_softirq(); return 1; } @@ -627,14 +627,16 @@ void free_irq(unsigned int irq, void *dev_id) { + irq_desc_t *desc; struct irqaction **p; unsigned long flags; if (irq >= NR_IRQS) return; - spin_lock_irqsave(&irq_controller_lock,flags); - p = &irq_desc[irq].action; + desc = irq_desc + irq; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; for (;;) { struct irqaction * action = *p; if (action) { @@ -645,22 +647,22 @@ /* Found it - now remove it from the list of entries */ *pp = action->next; - if (!irq_desc[irq].action) { - irq_desc[irq].status |= IRQ_DISABLED; - irq_desc[irq].handler->shutdown(irq); + if (!desc->action) { + desc->status |= IRQ_DISABLED; + desc->handler->shutdown(irq); } - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); #ifdef CONFIG_SMP /* Wait to make sure it's not being used on another CPU */ - while (irq_desc[irq].status & IRQ_INPROGRESS) + while (desc->status & IRQ_INPROGRESS) barrier(); #endif kfree(action); return; } printk("Trying to free free IRQ%d\n",irq); - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); return; } } @@ -676,18 +678,22 @@ unsigned long probe_irq_on(void) { unsigned int i; - unsigned long delay; + irq_desc_t *desc; unsigned long val; + unsigned long delay; /* * something may have generated an irq long ago and we want to * flush such a longstanding irq before considering it as spurious. */ - spin_lock_irq(&irq_controller_lock); - for (i = NR_IRQS-1; i > 0; i--) + for (i = NR_IRQS-1; i > 0; i--) { + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); if (!irq_desc[i].action) irq_desc[i].handler->startup(i); - spin_unlock_irq(&irq_controller_lock); + spin_unlock_irq(&desc->lock); + } /* Wait for longstanding interrupts to trigger. */ for (delay = jiffies + HZ/50; time_after(delay, jiffies); ) @@ -698,15 +704,17 @@ * (we must startup again here because if a longstanding irq * happened in the previous stage, it may have masked itself) */ - spin_lock_irq(&irq_controller_lock); for (i = NR_IRQS-1; i > 0; i--) { - if (!irq_desc[i].action) { - irq_desc[i].status |= IRQ_AUTODETECT | IRQ_WAITING; - if(irq_desc[i].handler->startup(i)) - irq_desc[i].status |= IRQ_PENDING; + desc = irq_desc + i; + + spin_lock_irq(&desc->lock); + if (!desc->action) { + desc->status |= IRQ_AUTODETECT | IRQ_WAITING; + if (desc->handler->startup(i)) + desc->status |= IRQ_PENDING; } + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); /* * Wait for spurious interrupts to trigger @@ -718,24 +726,24 @@ * Now filter out any obviously spurious interrupts */ val = 0; - spin_lock_irq(&irq_controller_lock); - for (i=0; ishutdown(i); - continue; + for (i = 0; i < NR_IRQS; i++) { + irq_desc_t *desc = irq_desc + i; + unsigned int status; + + spin_lock_irq(&desc->lock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + /* It triggered already - consider it spurious. */ + if (!(status & IRQ_WAITING)) { + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } else + if (i < 32) + val |= 1 << i; } - - if (i < 32) - val |= 1 << i; + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); return val; } @@ -750,20 +758,22 @@ unsigned int mask; mask = 0; - spin_lock_irq(&irq_controller_lock); for (i = 0; i < 16; i++) { - unsigned int status = irq_desc[i].status; + irq_desc_t *desc = irq_desc + i; + unsigned int status; - if (!(status & IRQ_AUTODETECT)) - continue; + spin_lock_irq(&desc->lock); + status = desc->status; - if (!(status & IRQ_WAITING)) - mask |= 1 << i; + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) + mask |= 1 << i; - irq_desc[i].status = status & ~IRQ_AUTODETECT; - irq_desc[i].handler->shutdown(i); + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); + } + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); return mask & val; } @@ -778,22 +788,24 @@ nr_irqs = 0; irq_found = 0; - spin_lock_irq(&irq_controller_lock); - for (i=0; ilock); + status = desc->status; + + if (status & IRQ_AUTODETECT) { + if (!(status & IRQ_WAITING)) { + if (!nr_irqs) + irq_found = i; + nr_irqs++; + } + desc->status = status & ~IRQ_AUTODETECT; + desc->handler->shutdown(i); } - irq_desc[i].status = status & ~IRQ_AUTODETECT; - irq_desc[i].handler->shutdown(i); + spin_unlock_irq(&desc->lock); } - spin_unlock_irq(&irq_controller_lock); if (nr_irqs > 1) irq_found = -irq_found; @@ -804,8 +816,9 @@ int setup_irq(unsigned int irq, struct irqaction * new) { int shared = 0; - struct irqaction *old, **p; unsigned long flags; + struct irqaction *old, **p; + irq_desc_t *desc = irq_desc + irq; /* * Some drivers like serial.c use request_irq() heavily, @@ -827,12 +840,12 @@ /* * The following block of code has to be executed atomically */ - spin_lock_irqsave(&irq_controller_lock,flags); - p = &irq_desc[irq].action; + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; if ((old = *p) != NULL) { /* Can't share interrupts unless both agree to */ if (!(old->flags & new->flags & SA_SHIRQ)) { - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); return -EBUSY; } @@ -847,11 +860,171 @@ *p = new; if (!shared) { - irq_desc[irq].depth = 0; - irq_desc[irq].status &= ~IRQ_DISABLED; - irq_desc[irq].handler->startup(irq); + desc->depth = 0; + desc->status &= ~IRQ_DISABLED; + desc->handler->startup(irq); } - spin_unlock_irqrestore(&irq_controller_lock,flags); + spin_unlock_irqrestore(&desc->lock,flags); + + register_irq_proc(irq); return 0; +} + +static struct proc_dir_entry * root_irq_dir; +static struct proc_dir_entry * irq_dir [NR_IRQS]; +static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; + +unsigned int irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = 0xffffffff}; + +#define HEX_DIGITS 8 + +static int irq_affinity_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08x\n", irq_affinity[(int)data]); +} + +static unsigned int parse_hex_value (const char *buffer, + unsigned long count, unsigned long *ret) +{ + unsigned char hexnum [HEX_DIGITS]; + unsigned long value; + int i; + + if (!count) + return -EINVAL; + if (count > HEX_DIGITS) + count = HEX_DIGITS; + if (copy_from_user(hexnum, buffer, count)) + return -EFAULT; + + /* + * Parse the first 8 characters as a hex string, any non-hex char + * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. + */ + value = 0; + + for (i = 0; i < count; i++) { + unsigned int c = hexnum[i]; + + switch (c) { + case '0' ... '9': c -= '0'; break; + case 'a' ... 'f': c -= 'a'-10; break; + case 'A' ... 'F': c -= 'A'-10; break; + default: + goto out; + } + value = (value << 4) | c; + } +out: + *ret = value; + return 0; +} + +static int irq_affinity_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + int irq = (int) data, full_count = count, err; + unsigned long new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = parse_hex_value(buffer, count, &new_value); + +#if CONFIG_SMP + /* + * Do not allow disabling IRQs completely - it's a too easy + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ + if (!(new_value & cpu_online_map)) + return -EINVAL; +#endif + + irq_affinity[irq] = new_value; + irq_desc[irq].handler->set_affinity(irq, new_value); + + return full_count; +} + +static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + unsigned long *mask = (unsigned long *) data; + if (count < HEX_DIGITS+1) + return -EINVAL; + return sprintf (page, "%08lx\n", *mask); +} + +static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, + unsigned long count, void *data) +{ + unsigned long *mask = (unsigned long *) data, full_count = count, err; + unsigned long new_value; + + err = parse_hex_value(buffer, count, &new_value); + if (err) + return err; + + *mask = new_value; + return full_count; +} + +#define MAX_NAMELEN 10 + +static void register_irq_proc (unsigned int irq) +{ + struct proc_dir_entry *entry; + char name [MAX_NAMELEN]; + + if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type)) + return; + + memset(name, 0, MAX_NAMELEN); + sprintf(name, "%d", irq); + + /* create /proc/irq/1234 */ + irq_dir[irq] = proc_mkdir(name, root_irq_dir); + + /* create /proc/irq/1234/smp_affinity */ + entry = create_proc_entry("smp_affinity", 0700, irq_dir[irq]); + + entry->nlink = 1; + entry->data = (void *)irq; + entry->read_proc = irq_affinity_read_proc; + entry->write_proc = irq_affinity_write_proc; + + smp_affinity_entry[irq] = entry; +} + +unsigned long prof_cpu_mask = -1; + +void init_irq_proc (void) +{ + struct proc_dir_entry *entry; + int i; + + /* create /proc/irq */ + root_irq_dir = proc_mkdir("irq", 0); + + /* create /proc/irq/prof_cpu_mask */ + entry = create_proc_entry("prof_cpu_mask", 0700, root_irq_dir); + + entry->nlink = 1; + entry->data = (void *)&prof_cpu_mask; + entry->read_proc = prof_cpu_mask_read_proc; + entry->write_proc = prof_cpu_mask_write_proc; + + /* + * Create entries for all existing IRQs. + */ + for (i = 0; i < NR_IRQS; i++) { + if (irq_desc[i].handler == &no_irq_type) + continue; + register_irq_proc(i); + } } --- linux/arch/i386/kernel/apm.c.orig Thu Feb 24 13:59:07 2000 +++ linux/arch/i386/kernel/apm.c Thu Feb 24 13:59:11 2000 @@ -590,7 +590,11 @@ continue; if (hlt_counter) continue; - asm volatile("sti ; hlt" : : : "memory"); + asm volatile("cli" : : : "memory"); + if (!current->need_resched) + asm volatile("sti ; hlt" : : : "memory"); + else + asm volatile("sti" : : : "memory"); continue; } --- linux/arch/i386/kernel/traps.c.orig Thu Feb 10 04:36:08 2000 +++ linux/arch/i386/kernel/traps.c Thu Feb 24 13:59:11 2000 @@ -360,8 +360,6 @@ printk("Do you have a strange power saving mode enabled?\n"); } -atomic_t nmi_counter[NR_CPUS]; - #if CONFIG_X86_IO_APIC int nmi_watchdog = 1; @@ -437,7 +435,8 @@ { unsigned char reason = inb(0x61); - atomic_inc(nmi_counter+smp_processor_id()); + + atomic_inc(&nmi_counter(smp_processor_id())); if (!(reason & 0xc0)) { #if CONFIG_X86_IO_APIC /* --- linux/arch/i386/kernel/i8259.c.orig Fri Feb 11 20:30:10 2000 +++ linux/arch/i386/kernel/i8259.c Thu Feb 24 14:51:59 2000 @@ -127,11 +127,14 @@ * moves to arch independent land */ -void enable_8259A_irq(unsigned int irq); -void disable_8259A_irq(unsigned int irq); +static spinlock_t i8259A_lock = SPIN_LOCK_UNLOCKED; + +static void end_8259A_irq (unsigned int irq) +{ + if (!(irq_desc[irq].status & IRQ_DISABLED)) + enable_8259A_irq(irq); +} -/* shutdown is same as "disable" */ -#define end_8259A_irq enable_8259A_irq #define shutdown_8259A_irq disable_8259A_irq void mask_and_ack_8259A(unsigned int); @@ -149,7 +152,8 @@ enable_8259A_irq, disable_8259A_irq, mask_and_ack_8259A, - end_8259A_irq + end_8259A_irq, + NULL }; /* @@ -183,30 +187,45 @@ void disable_8259A_irq(unsigned int irq) { unsigned int mask = 1 << irq; + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_A1,0xA1); else outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); + unsigned long flags; + + spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_A1,0xA1); else outb(cached_21,0x21); + spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) { unsigned int mask = 1<> 8)); + ret = inb(0x20) & mask; + else + ret = inb(0xA0) & (mask >> 8); + spin_unlock_irqrestore(&i8259A_lock, flags); + + return ret; } void make_8259A_irq(unsigned int irq) @@ -247,7 +266,9 @@ void mask_and_ack_8259A(unsigned int irq) { unsigned int irqmask = 1 << irq; + unsigned long flags; + spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -278,6 +299,7 @@ outb(cached_21,0x21); outb(0x20,0x20); /* 'generic EOI' to master */ } + spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: --- linux/arch/i386/kernel/setup.c.orig Thu Feb 10 23:54:11 2000 +++ linux/arch/i386/kernel/setup.c Thu Feb 24 13:59:11 2000 @@ -119,7 +119,7 @@ #endif extern int root_mountflags; -extern int _text, _etext, _edata, _end; +extern char _text, _etext, _edata, _end; extern unsigned long cpu_hz; /* --- linux/arch/i386/kernel/i386_ksyms.c.orig Thu Feb 24 14:58:48 2000 +++ linux/arch/i386/kernel/i386_ksyms.c Thu Feb 24 14:59:23 2000 @@ -144,6 +144,4 @@ EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(local_bh_count); -EXPORT_SYMBOL(local_irq_count); +EXPORT_SYMBOL(irq_stat); --- linux/Documentation/IRQ-affinity.txt.orig Thu Feb 24 13:59:11 2000 +++ linux/Documentation/IRQ-affinity.txt Thu Feb 24 14:52:24 2000 @@ -0,0 +1,37 @@ + +SMP IRQ affinity, started by Ingo Molnar + + +/proc/irq/IRQ#/smp_affinity specifies which target CPUs are permitted +for a given IRQ source. It's a bitmask of allowed CPUs. It's not allowed +to turn off all CPUs, and if an IRQ controller does not support IRQ +affinity then the value will not change from the default 0xffffffff. + +Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting +the IRQ to CPU4-8 (this is an 8-CPU SMP box): + +[root@moon 44]# cat smp_affinity +ffffffff +[root@moon 44]# echo 0f > smp_affinity +[root@moon 44]# cat smp_affinity +0000000f +[root@moon 44]# ping -f h +PING hell (195.4.7.3): 56 data bytes +... +--- hell ping statistics --- +6029 packets transmitted, 6027 packets received, 0% packet loss +round-trip min/avg/max = 0.1/0.1/0.4 ms +[root@moon 44]# cat /proc/interrupts | grep 44: + 44: 0 1785 1785 1783 1783 1 +1 0 IO-APIC-level eth1 +[root@moon 44]# echo f0 > smp_affinity +[root@moon 44]# ping -f h +PING hell (195.4.7.3): 56 data bytes +.. +--- hell ping statistics --- +2779 packets transmitted, 2777 packets received, 0% packet loss +round-trip min/avg/max = 0.1/0.5/585.4 ms +[root@moon 44]# cat /proc/interrupts | grep 44: + 44: 1068 1785 1785 1784 1784 1069 1070 1069 IO-APIC-level eth1 +[root@moon 44]# +