--- linux/init/main.c.orig Mon Sep 20 07:54:12 1999 +++ linux/init/main.c Mon Sep 20 07:54:31 1999 @@ -439,8 +439,6 @@ #endif -extern void initialize_secondary(void); - /* * Activate the first processor. */ @@ -448,14 +446,6 @@ asmlinkage void __init start_kernel(void) { char * command_line; - -#ifdef __SMP__ - static int boot_cpu = 1; - /* "current" has been set up, we need to load it now */ - if (!boot_cpu) - initialize_secondary(); - boot_cpu = 0; -#endif /* * Interrupts are still disabled. Do necessary setups, then --- linux/kernel/printk.c.orig Mon Sep 20 07:54:16 1999 +++ linux/kernel/printk.c Mon Sep 20 10:51:07 1999 @@ -22,7 +22,7 @@ #include -#define LOG_BUF_LEN (16384) +#define LOG_BUF_LEN (32768) #define LOG_BUF_MASK (LOG_BUF_LEN-1) static char buf[1024]; --- linux/include/asm-i386/apic.h.orig Mon Aug 23 17:10:45 1999 +++ linux/include/asm-i386/apic.h Mon Sep 20 10:55:53 1999 @@ -23,6 +23,7 @@ #define APIC_LDR_MASK (0xFF<<24) #define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFF #define APIC_DFR 0xE0 #define GET_APIC_DFR(x) (((x)>>28)&0x0F) #define SET_APIC_DFR(x) ((x)<<28) @@ -62,7 +63,14 @@ #define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) #define SET_APIC_DEST_FIELD(x) ((x)<<24) #define APIC_LVTT 0x320 +#define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 #define APIC_LVT_TIMER_PERIODIC (1<<17) #define APIC_LVT_MASKED (1<<16) #define APIC_LVT_LEVEL_TRIGGER (1<<15) @@ -79,6 +87,7 @@ #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 #define APIC_TDR_DIV_4 0x1 @@ -91,5 +100,256 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define MAX_IO_APICS 8 + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +#define lapic ((volatile struct local_apic *)APIC_BASE) + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { u32 __reserved[4]; } __reserved_15; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 #endif --- linux/include/asm-i386/irq.h.orig Thu May 6 23:02:34 1999 +++ linux/include/asm-i386/irq.h Mon Sep 20 07:54:31 1999 @@ -13,7 +13,7 @@ #define TIMER_IRQ 0 /* - * 16 8259A IRQ's, 240 potential APIC interrupt sources. + * 16 8259A IRQ's, 208 potential APIC interrupt sources. * Right now the APIC is mostly only used for SMP. * 256 vectors is an architectural limit. (we can have * more than 256 devices theoretically, but they will --- linux/include/asm-i386/hw_irq.h.orig Mon Aug 23 17:10:59 1999 +++ linux/include/asm-i386/hw_irq.h Mon Sep 20 10:54:13 1999 @@ -27,27 +27,35 @@ */ /* - * Special IRQ vectors used by the SMP architecture: + * Special IRQ vectors used by the SMP architecture, 0x30-0x4f * - * (some of the following vectors are 'rare', they are merged - * into a single vector (FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical.) - */ -#define RESCHEDULE_VECTOR 0x30 -#define INVALIDATE_TLB_VECTOR 0x31 -#define STOP_CPU_VECTOR 0x40 -#define LOCAL_TIMER_VECTOR 0x41 -#define CALL_FUNCTION_VECTOR 0x50 + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ +#define INVALIDATE_TLB_VECTOR 0x30 +#define LOCAL_TIMER_VECTOR 0x31 +#define RESCHEDULE_VECTOR 0x40 + +/* 'rare' vectors */ +#define CALL_FUNCTION_VECTOR 0x41 +#define STOP_CPU_VECTOR 0x42 /* - * First APIC vector available to drivers: (vectors 0x51-0xfe) + * These IRQs should never really happen on perfect hardware running + * a perfect kernel, but we nevertheless print a message to catch the + * other combinations ;) Subtle, the APIC architecture mandates these + * two special vectors to have bits 0-3 set to 1. */ -#define IRQ0_TRAP_VECTOR 0x51 +#define SPURIOUS_APIC_VECTOR 0x3f +#define ERROR_APIC_VECTOR 0x4f /* - * This IRQ should never happen, but we print a message nevertheless. + * First APIC vector available to drivers: (vectors 0x51-0xfe) + * we start at 0x51 to spread out vectors between priority levels + * evenly. (note that 0x80 is the syscall vector) */ -#define SPURIOUS_APIC_VECTOR 0xff +#define IRQ0_TRAP_VECTOR 0x51 extern int irq_vector[NR_IRQS]; #define IO_APIC_VECTOR(irq) irq_vector[irq] @@ -65,18 +73,20 @@ extern void mask_irq(unsigned int irq); extern void unmask_irq(unsigned int irq); extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); extern int i8259A_irq_pending(unsigned int irq); -extern void ack_APIC_irq(void); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); extern void FASTCALL(send_IPI_self(int vector)); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void make_8259A_irq(unsigned int irq); extern void send_IPI(int dest, int vector); extern void init_pic_mode(void); extern void print_IO_APIC(void); extern unsigned long io_apic_irqs; +extern volatile unsigned long irq_err_count; extern char _stext, _etext; @@ -214,6 +224,7 @@ #ifdef __SMP__ /*more of this file should probably be ifdefed SMP */ static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { + if (IO_APIC_IRQ(i)) send_IPI_self(IO_APIC_VECTOR(i)); } #else --- linux/include/asm-i386/smp.h.orig Mon Aug 30 11:20:42 1999 +++ linux/include/asm-i386/smp.h Mon Sep 20 12:17:12 1999 @@ -123,10 +123,12 @@ unsigned char mpc_dstirq; }; -#define MP_INT_VECTORED 0 -#define MP_INT_NMI 1 -#define MP_INT_SMI 2 -#define MP_INT_EXTINT 3 +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; #define MP_IRQDIR_DEFAULT 0 #define MP_IRQDIR_HIGH 1 @@ -150,7 +152,7 @@ * Default configurations * * 1 2 CPU ISA 82489DX - * 2 2 CPU EISA 82489DX no IRQ 8 or timer chaining + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining * 3 2 CPU EISA 82489DX * 4 2 CPU MCA 82489DX * 5 2 CPU ISA+PCI @@ -175,7 +177,6 @@ extern volatile unsigned long cpu_callin_map[NR_CPUS]; extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); extern void smp_send_reschedule(int cpu); -extern unsigned long ipi_count; extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void smp_local_timer_interrupt(struct pt_regs * regs); extern void (*mtrr_hook) (void); @@ -196,6 +197,26 @@ return *((volatile unsigned long *)(APIC_BASE+reg)); } +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +#else +# define FORCE_READ_AROUND_WRITE 1 +#endif + +extern inline void ack_APIC_irq(void) +{ + /* Clear the IPI */ + +#if FORCE_READ_AROUND_WRITE + /* Dummy read - unnecessery on the P6+ */ + apic_read(APIC_SPIV); +#endif + /* + * on P6+ cores (CONFIG_X86_GOOD_APIC) ack_APIC_irq() actually + * gets compiled as a single instruction ... yummie. + */ + apic_write(APIC_EOI, 0); /* Docs say use 0 for future compatibility */ +} /* * General functions that each host system must provide. --- linux/include/asm-i386/msr.h.orig Thu Apr 29 20:53:41 1999 +++ linux/include/asm-i386/msr.h Mon Sep 20 07:54:31 1999 @@ -23,6 +23,8 @@ #define rdtscll(val) \ __asm__ __volatile__ ("rdtsc" : "=A" (val)) +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + #define rdpmc(counter,low,high) \ __asm__ __volatile__("rdpmc" \ : "=a" (low), "=d" (high) \ --- linux/include/asm-i386/processor.h.orig Mon Aug 23 17:19:59 1999 +++ linux/include/asm-i386/processor.h Mon Sep 20 10:10:07 1999 @@ -106,6 +106,9 @@ #define current_cpu_data boot_cpu_data #endif +#define cpu_has_tsc \ + (cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC) + extern char ignore_irq13; extern void identify_cpu(struct cpuinfo_x86 *); --- linux/arch/i386/boot/setup.S.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/boot/setup.S Mon Sep 20 07:54:31 1999 @@ -641,36 +641,9 @@ out #0xf1,al call delay -! well, that went ok, I hope. Now we have to reprogram the interrupts :-( -! we put them right after the intel-reserved hardware interrupts, at -! int 0x20-0x2F. There they won't mess up anything. Sadly IBM really -! messed this up with the original PC, and they haven't been able to -! rectify it afterwards. Thus the bios puts interrupts at 0x08-0x0f, -! which is used for the internal hardware interrupts as well. We just -! have to reprogram the 8259's, and it isn't fun. +! well, that went ok, I hope. Now we mask all interrupts - the rest +! is done in init_IRQ(). - mov al,#0x11 ! initialization sequence - out #0x20,al ! send it to 8259A-1 - call delay - out #0xA0,al ! and to 8259A-2 - call delay - mov al,#0x20 ! start of hardware int's (0x20) - out #0x21,al - call delay - mov al,#0x28 ! start of hardware int's 2 (0x28) - out #0xA1,al - call delay - mov al,#0x04 ! 8259-1 is master - out #0x21,al - call delay - mov al,#0x02 ! 8259-2 is slave - out #0xA1,al - call delay - mov al,#0x01 ! 8086 mode for both - out #0x21,al - call delay - out #0xA1,al - call delay mov al,#0xFF ! mask off all interrupts for now out #0xA1,al call delay --- linux/arch/i386/kernel/smp.c.orig Mon Sep 20 07:54:11 1999 +++ linux/arch/i386/kernel/smp.c Mon Sep 20 11:45:34 1999 @@ -30,6 +30,7 @@ * Alan Cox : Added EBDA scanning * Ingo Molnar : various cleanups and rewrites * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX timers */ #include @@ -41,6 +42,7 @@ #include #include #include +#include #include @@ -111,21 +113,11 @@ static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */ -volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */ struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */ static unsigned int num_processors = 1; /* Internal processor count */ -unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */ unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ int apic_version[NR_CPUS]; /* APIC version number */ -unsigned long apic_retval; /* Just debugging the assembler.. */ - -volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */ -volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */ - -volatile unsigned long ipi_count; /* Number of IPIs delivered */ - -const char lk_lockmsg[] = "lock from interrupt context at %p\n"; int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; extern int nr_ioapics; @@ -151,8 +143,6 @@ */ #define APIC_DEFAULT_PHYS_BASE 0xfee00000 -#define CLEAR_TSC wrmsr(0x10, 0x00001000, 0x00001000) - /* * Setup routine for controlling SMP activation * @@ -180,17 +170,6 @@ __setup("maxcpus=", maxcpus); -void ack_APIC_irq(void) -{ - /* Clear the IPI */ - - /* Dummy read */ - apic_read(APIC_SPIV); - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - /* * Intel MP BIOS table parsing routines: */ @@ -219,9 +198,9 @@ { "80486DX","80486DX", "80486SX","80486DX/2 or 80487", - "80486SL","Intel5X2(tm)", - "Unknown","Unknown", - "80486DX/4" + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" }; if (family==0x6) return("Pentium(tm) Pro"); @@ -229,7 +208,7 @@ return("Pentium(tm)"); if (family==0x0F && model==0x0F) return("Special controller"); - if (family==0x04 && model<9) + if (family==0x04 && model<10) return model_defs[model]; sprintf(n,"Unknown CPU [%d:%d]",family, model); return n; @@ -390,11 +369,8 @@ (struct mpc_config_intsrc *)mpt; mp_irqs [mp_irq_entries] = *m; - if (++mp_irq_entries == MAX_IRQ_SOURCES) { - printk("Max irq sources exceeded!!\n"); - printk("Skipping remaining sources.\n"); - --mp_irq_entries; - } + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max irq sources exceeded!!\n"); mpt+=sizeof(*m); count+=sizeof(*m); @@ -404,17 +380,29 @@ { struct mpc_config_intlocal *m= (struct mpc_config_intlocal *)mpt; + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); mpt+=sizeof(*m); count+=sizeof(*m); break; } } } - if (ioapics > MAX_IO_APICS) - { + if (ioapics > MAX_IO_APICS) { printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS, ioapics); - printk("Warning: switching to non APIC mode.\n"); - skip_ioapic_setup=1; + printk("Warning: switching to non I/O APIC mode.\n"); + skip_ioapic_setup = 1; } return num_processors; } @@ -517,7 +505,7 @@ printk("ISA\n"); break; case 2: - printk("EISA with no IRQ8 chaining\n"); + printk("EISA with no IRQ0 and no IRQ13 DMA chaining\n"); break; case 6: case 3: @@ -725,40 +713,85 @@ atomic_set(&smp_commenced,1); } -void __init enable_local_APIC(void) +extern void __error_in_io_apic_c(void); + +void __init setup_local_APIC(void) { unsigned long value; + if ((ERROR_APIC_VECTOR & SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) + __error_in_io_apic_c(); + value = apic_read(APIC_SPIV); + value = 0xf; value |= (1<<8); /* Enable APIC (bit==1) */ #if 0 value &= ~(1<<9); /* Enable focus processor (bit==0) */ #else value |= (1<<9); /* Disable focus processor (bit==1) */ #endif - value |= 0xff; /* Set spurious IRQ vector to 0xff */ + value |= SPURIOUS_APIC_VECTOR; /* Set spurious IRQ vector */ apic_write(APIC_SPIV,value); /* - * Set Task Priority to 'accept all' + * Set up LVT0, LVT1: + * + * set up through-local-APIC on the BP's LINT0. This is not + * strictly necessery in pure symmetric-IO mode, but sometimes + * we delegate interrupts to the 8259A. + */ + if (hard_smp_processor_id() == boot_cpu_id) { + value = 0x00000700; + printk("enabled ExtINT on CPU#%d\n", hard_smp_processor_id()); + } else { + value = 0x00010700; + printk("masked ExtINT on CPU#%d\n", hard_smp_processor_id()); + } + apic_write(APIC_LVT0,value); + + /* + * only the BP should see the LINT1 NMI signal, obviously. + */ + if (hard_smp_processor_id() == boot_cpu_id) + value = 0x00000400; // unmask NMI + else + value = 0x00010400; // mask NMI + apic_write(APIC_LVT1,value); + + value = apic_read(APIC_ESR); + printk("ESR value before enabling vector: %08lx\n", value); + + value = apic_read(APIC_LVERR); + value = ERROR_APIC_VECTOR; // enables sending errors + apic_write(APIC_LVERR,value); + + /* + * spec says clear errors after enabling vector. + */ + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + printk("ESR value after enabling vector: %08lx\n", value); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. */ value = apic_read(APIC_TASKPRI); value &= ~APIC_TPRI_MASK; apic_write(APIC_TASKPRI,value); /* - * Clear the logical destination ID, just to be safe. - * also, put the APIC into flat delivery mode. + * Set up the logical destination ID and put the + * APIC into flat delivery mode. */ value = apic_read(APIC_LDR); value &= ~APIC_LDR_MASK; + value |= (1<<(smp_processor_id()+24)); apic_write(APIC_LDR,value); value = apic_read(APIC_DFR); value |= SET_APIC_DFR(0xf); apic_write(APIC_DFR, value); - - udelay(100); /* B safe */ } unsigned long __init init_smp_mappings(unsigned long memory_start) @@ -806,6 +839,173 @@ return memory_start; } +#ifdef CONFIG_X86_TSC +/* + * TSC synchronization. + * + * We first check wether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS] = { 0, }; + +#define NR_LOOPS 5 + +extern unsigned long fast_gettimeoffset_quotient; + +/* + * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit + * multiplication. Not terribly optimized but we need it at boot time only + * anyway. + * + * result == a / b + * == (a1 + a2*(2^32)) / b + * == a1/b + a2*(2^32/b) + * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b + * ^---- (this multiplication can overflow) + */ + +static unsigned long long div64 (unsigned long long a, unsigned long b0) +{ + unsigned int a1, a2; + unsigned long long res; + + a1 = ((unsigned int*)&a)[0]; + a2 = ((unsigned int*)&a)[1]; + + res = a1/b0 + + (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + + a2 / b0 + + (a2 * (0xffffffff % b0)) / b0; + + return res; +} + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + unsigned long one_usec; + int buggy = 0; + + printk("checking TSC synchronization across CPUs: "); + + one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2); + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + t0 = tsc_values[i]; + sum += t0; + } + avg = div64(sum, smp_num_cpus); + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*one_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = div64(delta, one_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", + i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * smp_num_cpus is not necessarily known at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); + } +} +#undef NR_LOOPS + +#endif + extern void calibrate_delay(void); void __init smp_callin(void) @@ -854,8 +1054,8 @@ * boards) */ - SMP_PRINTK(("CALLIN, before enable_local_APIC().\n")); - enable_local_APIC(); + SMP_PRINTK(("CALLIN, before setup_local_APIC().\n")); + setup_local_APIC(); /* * Set up our APIC timer. @@ -883,6 +1083,12 @@ * Allow the master to continue. */ set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); +#ifdef CONFIG_X86_TSC + /* + * Synchronize the TSC with the BP + */ + synchronize_tsc_ap (); +#endif } int cpucount = 0; @@ -1300,7 +1506,7 @@ } #endif - enable_local_APIC(); + setup_local_APIC(); /* * Set up our local APIC timer: @@ -1420,26 +1626,21 @@ * APIC ID, so we can go init the TSS and stuff: */ cpu_init(); +#ifdef CONFIG_X86_TSC + /* + * Synchronize the TSC with the AP + */ + if (cpucount) + synchronize_tsc_bp(); +#endif } - /* * the following functions deal with sending IPIs between CPUs. * * We use 'broadcast', CPU->CPU IPIs and self-IPIs too. */ - -/* - * Silly serialization to work around CPU bug in P5s. - * We can safely turn it off on a 686. - */ -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_APIC_SERIALIZATION 0 -#else -# define FORCE_APIC_SERIALIZATION 1 -#endif - static unsigned int cached_APIC_ICR; static unsigned int cached_APIC_ICR2; @@ -1462,7 +1663,7 @@ static inline unsigned int __get_ICR (void) { -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE /* * Wait for the APIC to become ready - this should never occur. It's * a debugging check really. @@ -1473,11 +1674,11 @@ while (count < 1000) { cfg = slow_ICR; - if (!(cfg&(1<<12))) { - if (count) - atomic_add(count, (atomic_t*)&ipi_count); + if (!(cfg&(1<<12))) return cfg; - } + printk("CPU #%d: ICR still busy [%08x]\n", + smp_processor_id(), cfg); + irq_err_count++; count++; udelay(10); } @@ -1491,19 +1692,25 @@ static inline unsigned int __get_ICR2 (void) { -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE return slow_ICR2; #else return cached_APIC_ICR2; #endif } +#define LOGICAL_DELIVERY 1 + static inline int __prepare_ICR (unsigned int shortcut, int vector) { unsigned int cfg; cfg = __get_ICR(); - cfg |= APIC_DEST_DM_FIXED|shortcut|vector; + cfg |= APIC_DEST_DM_FIXED|shortcut|vector +#if LOGICAL_DELIVERY + |APIC_DEST_LOGICAL +#endif + ; return cfg; } @@ -1513,7 +1720,11 @@ unsigned int cfg; cfg = __get_ICR2(); +#if LOGICAL_DELIVERY + cfg |= SET_APIC_DEST_FIELD((1< 1) + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void send_IPI_all(int vector) @@ -1566,7 +1782,7 @@ static inline void send_IPI_single(int dest, int vector) { unsigned long cfg; -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE unsigned long flags; __save_flags(flags); @@ -1589,7 +1805,7 @@ * Send the IPI. The write to APIC_ICR fires this off. */ apic_write(APIC_ICR, cfg); -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE __restore_flags(flags); #endif } @@ -1719,6 +1935,8 @@ * it goes straight through. */ +extern void print_local_APIC (void); + void smp_send_stop(void) { send_IPI_allbutself(STOP_CPU_VECTOR); @@ -1804,113 +2022,6 @@ return 0; } -static unsigned int calibration_result; - -void setup_APIC_timer(unsigned int clocks); - -/* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. - */ - -void smp_local_timer_interrupt(struct pt_regs * regs) -{ - int user = (user_mode(regs) != 0); - int cpu = smp_processor_id(); - - /* - * The profiling function is SMP safe. (nothing can mess - * around with "current", and the profiling counters are - * updated with atomic operations). This is especially - * useful with a profiling multiplier != 1 - */ - if (!user) - x86_do_profile(regs->eip); - - if (!--prof_counter[cpu]) { - int system = 1 - user; - struct task_struct * p = current; - - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - prof_counter[cpu] = prof_multiplier[cpu]; - if (prof_counter[cpu] != prof_old_multiplier[cpu]) { - setup_APIC_timer(calibration_result/prof_counter[cpu]); - prof_old_multiplier[cpu] = prof_counter[cpu]; - } - - /* - * After doing the above, we need to make like - * a normal interrupt - otherwise timer interrupts - * ignore the global interrupt lock, which is the - * WrongThing (tm) to do. - */ - - irq_enter(cpu, 0); - update_one_process(p, 1, user, system, cpu); - if (p->pid) { - p->counter -= 1; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->priority < DEF_PRIORITY) { - kstat.cpu_nice += user; - kstat.per_cpu_nice[cpu] += user; - } else { - kstat.cpu_user += user; - kstat.per_cpu_user[cpu] += user; - } - kstat.cpu_system += system; - kstat.per_cpu_system[cpu] += system; - - } - irq_exit(cpu, 0); - } - - /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. - */ -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesnt support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs * regs) -{ - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow, and we - * want to be able to accept NMI tlb invalidates - * during this time. - */ - ack_APIC_irq(); - smp_local_timer_interrupt(regs); -} - /* * Reschedule call back. Nothing to do, * all the work is done automatically when @@ -1961,6 +2072,7 @@ */ asmlinkage void smp_stop_cpu_interrupt(void) { + ack_APIC_irq(); stop_this_cpu(); } @@ -1991,6 +2103,33 @@ } /* + * This interrupt should never happen with our APIC/SMP architecture + */ + +static spinlock_t err_lock; + +asmlinkage void smp_error_interrupt(void) +{ + unsigned long v; + + spin_lock(&err_lock); + + v = apic_read(APIC_ESR); + printk("APIC error interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); + printk("... APIC ESR0: %08lx\n", v); + + apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); + printk("... APIC ESR1: %08lx\n", v); + + ack_APIC_irq(); + + irq_err_count++; + + spin_unlock(&err_lock); +} +/* * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts * per second. We assume that the caller has already set up the local * APIC. @@ -2039,18 +2178,20 @@ /* * Unfortunately the local APIC timer cannot be set up into NMI * mode. With the IO APIC we can re-route the external timer - * interrupt and broadcast it as an NMI to all CPUs, so no pain. + * interrupt and broadcast it as an NMI to all CPUs. */ tmp_value = apic_read(APIC_LVTT); - lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - apic_write(APIC_LVTT , lvtt1_value); + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | + APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + apic_write(APIC_LVTT, lvtt1_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 ) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); tmp_value = apic_read(APIC_TMICT); apic_write(APIC_TMICT, clocks/APIC_DIVISOR); @@ -2092,10 +2233,11 @@ int __init calibrate_APIC_clock(void) { - unsigned long long t1,t2; - long tt1,tt2; - long calibration_result; + unsigned long long t1 = 0, t2 = 0; + long tt1, tt2; + long result; int i; + const int LOOPS = HZ/10; printk("calibrating APIC timer ... "); @@ -2117,18 +2259,19 @@ /* * We wrapped around just now. Let's start: */ - rdtscll(t1); - tt1=apic_read(APIC_TMCCT); + if (cpu_has_tsc) + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); -#define LOOPS (HZ/10) /* * Let's wait LOOPS wraprounds: */ - for (i=0; ieip); + + if (!--prof_counter[cpu]) { + int system = 1 - user; + struct task_struct * p = current; + + /* + * The multiplier may have changed since the last time we got + * to this point as a result of the user writing to + * /proc/profile. In this case we need to adjust the APIC + * timer accordingly. + * + * Interrupts are already masked off at this point. + */ + prof_counter[cpu] = prof_multiplier[cpu]; + if (prof_counter[cpu] != prof_old_multiplier[cpu]) { + setup_APIC_timer(calibration_result/prof_counter[cpu]); + prof_old_multiplier[cpu] = prof_counter[cpu]; + } + + /* + * After doing the above, we need to make like + * a normal interrupt - otherwise timer interrupts + * ignore the global interrupt lock, which is the + * WrongThing (tm) to do. + */ + + irq_enter(cpu, 0); + update_one_process(p, 1, user, system, cpu); + if (p->pid) { + p->counter -= 1; + if (p->counter <= 0) { + p->counter = 0; + p->need_resched = 1; + } + if (p->priority < DEF_PRIORITY) { + kstat.cpu_nice += user; + kstat.per_cpu_nice[cpu] += user; + } else { + kstat.cpu_user += user; + kstat.per_cpu_user[cpu] += user; + } + kstat.cpu_system += system; + kstat.per_cpu_system[cpu] += system; + + } + irq_exit(cpu, 0); + } + + /* + * We take the 'long' return path, and there every subsystem + * grabs the apropriate locks (kernel lock/ irq lock). + * + * we might want to decouple profiling from the 'long path', + * and do the profiling totally in assembly. + * + * Currently this isn't too much of an issue (performance wise), + * we can take more than 100K local irqs per second on a 100 MHz P5. + */ +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesnt support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +unsigned int apic_timer_irqs [NR_CPUS] = { 0, }; + +void smp_apic_timer_interrupt(struct pt_regs * regs) +{ + /* + * the NMI deadlock-detector uses this. + */ + apic_timer_irqs[smp_processor_id()]++; + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + smp_local_timer_interrupt(regs); +} --- linux/arch/i386/kernel/traps.c.orig Mon Sep 20 07:54:13 1999 +++ linux/arch/i386/kernel/traps.c Mon Sep 20 11:07:42 1999 @@ -2,6 +2,8 @@ * linux/arch/i386/traps.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1998, Ingo Molnar, added NMI-Watchdog driver */ /* @@ -58,10 +60,17 @@ */ struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern int console_loglevel; + +static inline void console_silent(void) +{ + console_loglevel = 0; +} + static inline void console_verbose(void) { - extern int console_loglevel; - console_loglevel = 15; + if (console_loglevel) + console_loglevel = 15; } #define DO_ERROR(trapnr, signr, str, name, tsk) \ @@ -292,7 +301,7 @@ { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips\n"); -} +} static void io_check_error(unsigned char reason, struct pt_regs * regs) { @@ -325,18 +334,98 @@ printk("Do you have a strange power saving mode enabled?\n"); } +atomic_t nmi_counter[NR_CPUS]; + +#if CONFIG_SMP + +int nmi_watchdog = 1; + +static int __init setup_nmi_watchdog(char *str) +{ + get_option(&str, &nmi_watchdog); + return 1; +} + +__setup("nmi_watchdog=", setup_nmi_watchdog); + +extern unsigned int apic_timer_irqs [NR_CPUS]; +extern spinlock_t console_lock; +static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; + +inline void nmi_watchdog_tick(struct pt_regs * regs) +{ + /* + * the best way to detect wether a CPU has a 'hard lockup' problem + * is to check it's local APIC timer IRQ counts. If they are not + * changing then that CPU has some problem. + * + * as these watchdog NMI IRQs are broadcasted to every CPU, here + * we only have to check the current processor. + * + * since NMIs dont listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up console_lock first ... + * [when there will be more tty-related locks, break them up + * here too!] + */ + + static unsigned int last_irq_sums [NR_CPUS] = { 0, }, + alert_counter [NR_CPUS] = { 0, }; + + /* + * Since current-> is always on the stack, and we always switch + * the stack NMI-atomically, it's safe to use smp_processor_id(). + */ + int sum, cpu = smp_processor_id(); + + sum = apic_timer_irqs[cpu]; + + if (last_irq_sums[cpu] == sum) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; + if (alert_counter[cpu] == 5*HZ) { + spin_lock(&nmi_print_lock); + spin_unlock(&console_lock); // we are in trouble anyway + printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); + show_registers(regs); + printk("console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); + do_exit(SIGSEGV); + } + } else { + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +} +#endif + asmlinkage void do_nmi(struct pt_regs * regs, long error_code) { unsigned char reason = inb(0x61); - extern atomic_t nmi_counter; - atomic_inc(&nmi_counter); + atomic_inc(nmi_counter+smp_processor_id()); if (reason & 0x80) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); - if (!(reason & 0xc0)) + if (!(reason & 0xc0)) { +#if CONFIG_SMP + /* + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ + if (nmi_watchdog) + nmi_watchdog_tick(regs); + else + unknown_nmi_error(reason, regs); +#else unknown_nmi_error(reason, regs); +#endif + } } /* @@ -455,6 +544,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) { __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ + if(current->used_math) __asm__("frstor %0": :"m" (current->thread.i387)); else --- linux/arch/i386/kernel/io_apic.c.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/kernel/io_apic.c Mon Sep 20 12:11:39 1999 @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -27,6 +28,8 @@ */ #define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) +extern int nmi_watchdog; + /* * The structure of the IO-APIC: */ @@ -59,6 +62,11 @@ enum ioapic_irq_destination_types { dest_Fixed = 0, dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest__reserved_2 = 5, + dest_INIT = 6, dest_ExtINT = 7 }; @@ -94,13 +102,6 @@ * MP-BIOS irq configuration table structures: */ -enum mp_irq_source_types { - mp_INT = 0, - mp_NMI = 1, - mp_SMI = 2, - mp_ExtINT = 3 -}; - struct mpc_config_ioapic mp_apics[MAX_IO_APICS];/* I/O APIC entries */ int mp_irq_entries = 0; /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -202,16 +203,10 @@ FINAL; \ } -/* - * We disable IO-APIC IRQs by setting their 'destination CPU mask' to - * zero. Trick by Ramesh Nalluri. - */ -DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync(entry->apic))/* destination = 0x00 */ -DO_ACTION( enable, 1, |= 0xff000000, ) /* destination = 0xff */ DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -589,24 +584,30 @@ static int current_vector = IRQ0_TRAP_VECTOR, offset = 0; if (IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); + if (current_vector == 0xFF) + panic("ran out of interrupt sources!"); +next: current_vector += 8; - if (current_vector > 0xFE) { + if (current_vector == SYSCALL_VECTOR) + goto next; + + if (current_vector > 0xFF) { offset++; current_vector = IRQ0_TRAP_VECTOR + offset; - printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n", - current_vector); } - if (current_vector == SYSCALL_VECTOR) - panic("ran out of interrupt sources!"); IO_APIC_VECTOR(irq) = current_vector; return current_vector; } +extern void (*interrupt[NR_IRQS])(void); +static struct hw_interrupt_type ioapic_level_irq_type; +static struct hw_interrupt_type ioapic_edge_irq_type; + void __init setup_IO_APIC_irqs(void) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1; + int apic, pin, idx, irq, first_notcon = 1, vector; printk("init IO_APIC IRQs\n"); @@ -621,10 +622,11 @@ entry.delivery_mode = dest_LowestPrio; entry.dest_mode = 1; /* logical delivery */ entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = 0; /* but no route */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; /* all CPUs */ idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { + idx = find_irq_entry(apic,pin,mp_ExtINT); if (first_notcon) { printk(" IO-APIC (apicid-pin) %d-%d", mp_apics[apic].mpc_apicid, pin); first_notcon = 0; @@ -639,7 +641,7 @@ if (irq_trigger(idx)) { entry.trigger = 1; entry.mask = 1; - entry.dest.logical.logical_dest = 0xff; + entry.dest.logical.logical_dest = APIC_ALL_CPUS; } irq = pin_2_irq(idx,apic,pin); @@ -648,8 +650,20 @@ if (!apic && !IO_APIC_IRQ(irq)) continue; - entry.vector = assign_irq_vector(irq); + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; + + if (IO_APIC_irq_trigger(irq)) + irq_desc[irq].handler = &ioapic_level_irq_type; + else + irq_desc[irq].handler = &ioapic_edge_irq_type; + set_intr_gate(vector, interrupt[irq]); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); } @@ -660,34 +674,46 @@ } /* - * Set up a certain pin as ExtINT delivered interrupt + * Set up the 8259A-master output pin as broadcast to all + * CPUs. */ -void __init setup_ExtINT_pin(unsigned int apic, unsigned int pin, int irq) +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - /* - * add it to the IO-APIC irq-routing table: - */ memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_ExtINT; - entry.dest_mode = 0; /* physical delivery */ - entry.mask = 0; /* unmask IRQ now */ - /* - * We use physical delivery to get the timer IRQ - * to the boot CPU. 'boot_cpu_id' is the physical - * APIC ID of the boot CPU. - */ - entry.dest.physical.physical_dest = boot_cpu_id; + disable_8259A_irq(0); - entry.vector = assign_irq_vector(irq); + apic_write(APIC_LVT0, 0x00010700); // mask LVT0 + init_8259A(1); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = 1; /* logical delivery */ + entry.mask = 0; /* unmask IRQ now */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; + entry.delivery_mode = dest_LowestPrio; entry.polarity = 0; entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesnt have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ + irq_desc[0].handler = &ioapic_edge_irq_type; + + /* + * Add it to the IO-APIC irq-routing table: + */ + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + enable_8259A_irq(0); } void __init UNEXPECTED_IO_APIC(void) @@ -717,7 +743,9 @@ *(int *)®_00 = io_apic_read(apic, 0); *(int *)®_01 = io_apic_read(apic, 1); - *(int *)®_02 = io_apic_read(apic, 2); + if (reg_01.version >= 0x10) + *(int *)®_02 = io_apic_read(apic, 2); + printk("\nIO APIC #%d......\n", mp_apics[apic].mpc_apicid); printk(".... register #00: %08X\n", *(int *)®_00); printk("....... : physical APIC id: %02X\n", reg_00.ID); @@ -730,12 +758,15 @@ (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ (reg_01.entries != 0x1f) && /* dual Xeon boards */ - (reg_01.entries != 0x3F) /* bigger Xeon boards */ + (reg_01.entries != 0x22) && /* bigger Xeon boards */ + (reg_01.entries != 0x2E) && + (reg_01.entries != 0x3F) ) UNEXPECTED_IO_APIC(); printk("....... : IO APIC version: %04X\n", reg_01.version); - if ( (reg_01.version != 0x10) && /* oldest IO-APICs */ + if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.version != 0x11) && /* oldest IO-APICs */ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ (reg_01.version != 0x13) /* Xeon IO-APICs */ ) @@ -743,10 +774,12 @@ if (reg_01.__reserved_1 || reg_01.__reserved_2) UNEXPECTED_IO_APIC(); - printk(".... register #02: %08X\n", *(int *)®_02); - printk("....... : arbitration: %02X\n", reg_02.arbitration); - if (reg_02.__reserved_1 || reg_02.__reserved_2) - UNEXPECTED_IO_APIC(); + if (reg_01.version >= 0x10) { + printk(".... register #02: %08X\n", *(int *)®_02); + printk("....... : arbitration: %02X\n", reg_02.arbitration); + if (reg_02.__reserved_1 || reg_02.__reserved_2) + UNEXPECTED_IO_APIC(); + } printk(".... IRQ redirection table:\n"); @@ -797,6 +830,89 @@ return; } +static void print_APIC_bitfield (int base) +{ + unsigned int v; + int i, j; + + printk("0123456789abcdef0123456789abcdef\n"); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1<1) return 1; @@ -964,12 +1091,11 @@ */ static void enable_edge_ioapic_irq(unsigned int irq) { - enable_IO_APIC_irq(irq); + unmask_IO_APIC_irq(irq); } static void disable_edge_ioapic_irq(unsigned int irq) { - disable_IO_APIC_irq(irq); } /* @@ -995,8 +1121,17 @@ } #define shutdown_edge_ioapic_irq disable_edge_ioapic_irq -void static ack_edge_ioapic_irq(unsigned int i) -{ + +/* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ +void static ack_edge_ioapic_irq(unsigned int irq) +{ + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) + mask_IO_APIC_irq(irq); ack_APIC_irq(); } void static end_edge_ioapic_irq(unsigned int i){} @@ -1055,7 +1190,7 @@ static inline void init_IO_APIC_traps(void) { - int i; + int irq; /* * NOTE! The local APIC isn't very good at handling * multiple interrupts at the same interrupt level. @@ -1067,19 +1202,21 @@ * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (i = 0; i < NR_IRQS ; i++) { - if (IO_APIC_VECTOR(i) > 0) { - if (IO_APIC_irq_trigger(i)) - irq_desc[i].handler = &ioapic_level_irq_type; + for (irq = 0; irq < NR_IRQS ; irq++) { + if (IO_APIC_VECTOR(irq) > 0) { +#if 0 + if (IO_APIC_irq_trigger(irq)) + irq_desc[irq].handler = &ioapic_level_irq_type; else - irq_desc[i].handler = &ioapic_edge_irq_type; + irq_desc[irq].handler = &ioapic_edge_irq_type; /* * disable it in the 8259A: */ - if (i < 16) - disable_8259A_irq(i); + if (irq < 16) + disable_8259A_irq(irq); +#endif } else { - if (!IO_APIC_IRQ(i)) + if (!IO_APIC_IRQ(irq)) continue; /* @@ -1087,16 +1224,61 @@ * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (i < 16) { - make_8259A_irq(i); + if (irq < 16) { + make_8259A_irq(irq); continue; } /* Strange. Oh, well.. */ - irq_desc[i].handler = &no_irq_type; + irq_desc[irq].handler = &no_irq_type; } } - init_IRQ_SMP(); + /* + * i've moved this into setup_IO_APIC() so we get a more + * atomic route+vector setup, we will see how it works out. + */ +// init_IRQ_SMP(); +} + +void static ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +void static end_lapic_irq (unsigned int i) { /* nothing */ } + +static struct hw_interrupt_type lapic_irq_type = { + "local-APIC-edge", + NULL, /* startup_irq() not used for IRQ0 */ + NULL, /* shutdown_irq() not used for IRQ0 */ + NULL, /* enable_irq() not used for IRQ0 */ + NULL, /* disable_irq() not used for IRQ0 */ + ack_lapic_irq, + end_lapic_irq +}; + +static void enable_NMI_through_LVT0 (void * dummy) +{ + apic_write(APIC_LVT0, 0x00000400); // unmask and set to NMI +} + +static void setup_nmi (void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on both local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + printk("activating NMI Watchdog ..."); + + smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1); + enable_NMI_through_LVT0(NULL); + + printk(" done.\n"); } /* @@ -1108,45 +1290,72 @@ static inline void check_timer(void) { int pin1, pin2; + int vector; + + /* + * get/set the timer IRQ vector: + */ + vector = assign_irq_vector(0); + set_intr_gate(vector, interrupt[0]); pin1 = find_timer_pin(mp_INT); pin2 = find_timer_pin(mp_ExtINT); - enable_IO_APIC_irq(0); - if (!timer_irq_works()) { - if (pin1 != -1) - printk("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); - printk("...trying to set up timer as ExtINT... "); - - if (pin2 != -1) { - printk(".. (found pin %d) ...", pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - setup_ExtINT_pin(0, pin2, 0); - make_8259A_irq(0); + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (timer_irq_works()) { + if (nmi_watchdog) { + disable_8259A_irq(0); + init_8259A(1); + setup_nmi(); + enable_8259A_irq(0); } + return; + } - if (!timer_irq_works()) { - printk(" failed.\n"); - printk("...trying to set up timer as BP IRQ..."); - /* - * Just in case ... - */ - if (pin1 != -1) - clear_IO_APIC_pin(0, pin1); - if (pin2 != -1) - clear_IO_APIC_pin(0, pin2); - - make_8259A_irq(0); - - if (!timer_irq_works()) { - printk(" failed.\n"); - panic("IO-APIC + timer doesn't work!"); - } + if (pin1 != -1) { + printk("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + clear_IO_APIC_pin(0, pin1); + } + + printk("...trying to set up timer (IRQ0) through the 8259A ... "); + if (pin2 != -1) { + printk("\n..... (found pin %d) ...", pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_IRQ0_pin(pin2, vector); + if (timer_irq_works()) { + printk("works.\n"); + if (nmi_watchdog) + setup_nmi(); + return; } + /* + * Cleanup, just in case ... + */ + clear_IO_APIC_pin(0, pin2); + } + printk(" failed.\n"); + + if (nmi_watchdog) + printk("timer doesnt work through the IO-APIC - cannot activate NMI Watchdog!\n"); + + printk("...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); + irq_desc[0].handler = &lapic_irq_type; + init_8259A(1); // AEOI mode + apic_write(APIC_LVT0, 0x00000000 | vector); // Fixed mode + enable_8259A_irq(0); + + if (timer_irq_works()) { printk(" works.\n"); + return; } + printk(" failed :(.\n"); + panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); } /* @@ -1189,6 +1398,5 @@ setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - print_IO_APIC(); } --- linux/arch/i386/kernel/entry.S.orig Mon Sep 20 07:54:11 1999 +++ linux/arch/i386/kernel/entry.S Mon Sep 20 07:54:31 1999 @@ -323,9 +323,14 @@ jmp error_code ENTRY(nmi) + pushl %eax + SAVE_ALL + movl %esp,%edx pushl $0 - pushl $ SYMBOL_NAME(do_nmi) - jmp error_code + pushl %edx + call SYMBOL_NAME(do_nmi) + addl $8,%esp + RESTORE_ALL ENTRY(int3) pushl $0 --- linux/arch/i386/kernel/irq.c.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/kernel/irq.c Mon Sep 20 10:07:12 1999 @@ -48,7 +48,7 @@ unsigned int local_bh_count[NR_CPUS]; unsigned int local_irq_count[NR_CPUS]; -atomic_t nmi_counter; +extern atomic_t nmi_counter[NR_CPUS]; /* * Linux has a controller-independent x86 interrupt architecture. @@ -84,6 +84,52 @@ void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } /* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 + printk("unexpected IRQ trap at vector %02x\n", irq); +#ifdef __SMP__ + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +volatile unsigned long irq_err_count; + +/* * Generic, controller-independent functions: */ @@ -106,22 +152,23 @@ #ifndef __SMP__ p += sprintf(p, "%10u ", kstat_irqs(i)); #else - for (j=0; jtypename); p += sprintf(p, " %s", action->name); - for (action=action->next; action; action = action->next) { + for (action=action->next; action; action = action->next) p += sprintf(p, ", %s", action->name); - } *p++ = '\n'; } - p += sprintf(p, "NMI: %10u\n", atomic_read(&nmi_counter)); -#ifdef __SMP__ - p += sprintf(p, "ERR: %10lu\n", ipi_count); -#endif + p += sprintf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + atomic_read(nmi_counter+cpu_logical_map(j))); + p += sprintf(p, "\n"); + p += sprintf(p, "ERR: %10lu\n", irq_err_count); return p - buf; } @@ -494,6 +541,8 @@ spin_unlock_irqrestore(&irq_controller_lock, flags); } +extern int verbose_irq; + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -520,7 +569,7 @@ kstat.irqs[cpu][irq]++; desc = irq_desc + irq; spin_lock(&irq_controller_lock); - irq_desc[irq].handler->ack(irq); + desc->handler->ack(irq); /* REPLAY is when Linux resends an IRQ that was dropped earlier WAITING is used by probe to mark irqs that are being tested @@ -570,9 +619,8 @@ spin_unlock(&irq_controller_lock); } desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED)){ - irq_desc[irq].handler->end(irq); - } + if (!(desc->status & IRQ_DISABLED)) + desc->handler->end(irq); spin_unlock(&irq_controller_lock); /* --- linux/arch/i386/kernel/i8259.c.orig Mon Aug 30 11:16:24 1999 +++ linux/arch/i386/kernel/i8259.c Mon Sep 20 11:32:25 1999 @@ -24,53 +24,9 @@ #include - -/* - * Intel specific no controller code - * odd that no-controller should be architecture dependent - * but see the ifdef __SMP__ - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -#ifdef __SMP__ - /* - * [currently unexpected vectors happen only on SMP and APIC. - * if we want to have non-APIC and non-8259A controllers - * in the future with unexpected vectors, this ack should - * probably be made controller-specific.] - */ - ack_APIC_irq(); -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ /* + * Common place to define all x86 IRQ vectors + * * This builds up the IRQ handler stubs using some ugly macros in irq.h * * These macros create the low-level assembly IRQ routines that save @@ -79,7 +35,6 @@ * interrupt-controller happy. */ - BUILD_COMMON_IRQ() #define BI(x,y) \ @@ -129,6 +84,7 @@ BUILD_SMP_INTERRUPT(stop_cpu_interrupt,STOP_CPU_VECTOR) BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) /* * every pentium local APIC has two 'local interrupts', with a @@ -150,7 +106,7 @@ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -static void (*interrupt[NR_IRQS])(void) = { +void (*interrupt[NR_IRQS])(void) = { IRQLIST_16(0x0), #ifdef CONFIG_X86_IO_APIC @@ -164,17 +120,23 @@ #undef IRQ #undef IRQLIST_16 +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + * this file should become arch/i386/kernel/irq.c when the old irq.c + * moves to arch independent land + */ - - -static void enable_8259A_irq(unsigned int irq); +void enable_8259A_irq(unsigned int irq); void disable_8259A_irq(unsigned int irq); /* shutdown is same as "disable" */ #define end_8259A_irq enable_8259A_irq #define shutdown_8259A_irq disable_8259A_irq -static void mask_and_ack_8259A(unsigned int); +void mask_and_ack_8259A(unsigned int); static unsigned int startup_8259A_irq(unsigned int irq) { @@ -207,8 +169,8 @@ /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not connected to any IO-APIC pin, it's - * fed to the CPU IRQ line directly. + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IN0 line only. * * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. * this 'mixed mode' IRQ handling costs nothing because it's only used @@ -224,22 +186,20 @@ { unsigned int mask = 1 << irq; cached_irq_mask |= mask; - if (irq & 8) { + if (irq & 8) outb(cached_A1,0xA1); - } else { + else outb(cached_21,0x21); - } } -static void enable_8259A_irq(unsigned int irq) +void enable_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); cached_irq_mask &= mask; - if (irq & 8) { + if (irq & 8) outb(cached_A1,0xA1); - } else { + else outb(cached_21,0x21); - } } int i8259A_irq_pending(unsigned int irq) @@ -260,26 +220,140 @@ } /* + * This function assumes to be called rarely. Switching between + * 8259A registers is slow. + */ +static inline int i8259A_irq_real(unsigned int irq) +{ + int value; + int irqmask = 1<> 8); + outb(0x0A,0xA0); /* back to the IRR register */ + return value; +} + +/* * Careful! The 8259A is a fragile beast, it pretty * much _has_ to be done exactly like this (mask it * first, _then_ send the EOI, and the order of EOI * to the two 8259s is important! */ -static void mask_and_ack_8259A(unsigned int irq) +void mask_and_ack_8259A(unsigned int irq) { - cached_irq_mask |= 1 << irq; + unsigned int irqmask = 1 << irq; + + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecesserily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: if (irq & 8) { - inb(0xA1); /* DUMMY */ + inb(0xA1); /* DUMMY - (do we need this?) */ outb(cached_A1,0xA1); - outb(0x62,0x20); /* Specific EOI to cascade */ - outb(0x20,0xA0); + outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + outb(0x20,0xA0); /* 'generic EOI' to slave */ } else { - inb(0x21); /* DUMMY */ + inb(0x21); /* DUMMY - (do we need this?) */ outb(cached_21,0x21); - outb(0x20,0x20); + outb(0x20,0x20); /* 'generic EOI' to master */ + } + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask = 0; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { + printk("spurious 8259A interrupt: IRQ%d.\n", irq); + spurious_irq_mask |= irqmask; + } + irq_err_count++; + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; } } +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ + outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IN0-7 mapped to 0x20-0x27 */ + outb_p(0x04, 0x21); /* 8259A-1 is master */ + if (auto_eoi) + outb_p(0x03, 0x21); /* master does Auto EOI */ + else + outb_p(0x01, 0x21); /* master expects normal EOI */ + + outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ + outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IN0-7 mapped to 0x28-0x2f */ + outb_p(0x02, 0xA1); /* 8259A-2 is slave */ + outb_p(0x01, 0xA1); /* (slave does not support/need AEOI) */ + + if (auto_eoi) + /* + * in AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_irq_type.ack = disable_8259A_irq; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_21, 0x21); /* restore master IRQ mask */ + outb(cached_A1, 0xA1); /* restore slave IRQ mask */ + + restore_flags(flags); +} + #ifndef CONFIG_VISWS /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 @@ -307,7 +381,7 @@ * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; #endif @@ -315,6 +389,8 @@ { int i; + init_8259A(0); + for (i = 0; i < NR_IRQS; i++) { irq_desc[i].status = IRQ_DISABLED; irq_desc[i].action = 0; @@ -380,8 +456,9 @@ /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - /* IPI vector for APIC spurious interrupts */ + /* IPI vectors for APIC spurious and error interrupts */ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif /* --- linux/arch/i386/kernel/time.c.orig Mon Aug 23 17:10:52 1999 +++ linux/arch/i386/kernel/time.c Mon Sep 20 07:54:31 1999 @@ -74,7 +74,7 @@ * Equal to 2^32 * (1 / (clocks per usec) ). * Initialized in time_init. */ -static unsigned long fast_gettimeoffset_quotient=0; +unsigned long fast_gettimeoffset_quotient=0; extern rwlock_t xtime_lock; --- linux/arch/i386/kernel/head.S.orig Mon Aug 23 17:10:42 1999 +++ linux/arch/i386/kernel/head.S Mon Sep 20 07:54:31 1999 @@ -243,6 +243,15 @@ xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times +#ifdef __SMP__ + movb ready, %cl + cmpb $1,%cl + je 1f # the first CPU calls start_kernel + # all other CPUs call initialize_secondary + call SYMBOL_NAME(initialize_secondary) + jmp L6 +1: +#endif call SYMBOL_NAME(start_kernel) L6: jmp L6 # main should never return here, but --- linux/arch/alpha/kernel/smp.c.orig Mon Sep 20 07:54:13 1999 +++ linux/arch/alpha/kernel/smp.c Mon Sep 20 07:54:31 1999 @@ -579,17 +579,6 @@ mb(); } -/* - * Only broken Intel needs this, thus it should not even be - * referenced globally. - */ - -void __init -initialize_secondary(void) -{ -} - - extern void update_one_process(struct task_struct *p, unsigned long ticks, unsigned long user, unsigned long system, int cpu); --- linux/arch/sparc/kernel/smp.c.orig Mon Sep 20 07:54:13 1999 +++ linux/arch/sparc/kernel/smp.c Mon Sep 20 07:54:31 1999 @@ -100,13 +100,6 @@ local_flush_tlb_all(); } -/* Only broken Intel needs this, thus it should not even be referenced - * globally... - */ -void __init initialize_secondary(void) -{ -} - extern int cpu_idle(void); /* Activate a secondary processor. */ --- linux/arch/ppc/kernel/smp.c.orig Mon Sep 20 07:54:13 1999 +++ linux/arch/ppc/kernel/smp.c Mon Sep 20 07:54:31 1999 @@ -401,11 +401,6 @@ smp_commenced = 1; } -/* intel needs this */ -void __init initialize_secondary(void) -{ -} - /* Activate a secondary processor. */ int __init start_secondary(void *unused) { --- linux/arch/sparc64/kernel/smp.c.orig Mon Sep 20 07:54:13 1999 +++ linux/arch/sparc64/kernel/smp.c Mon Sep 20 07:54:31 1999 @@ -164,10 +164,6 @@ extern int cpu_idle(void); extern void init_IRQ(void); -void initialize_secondary(void) -{ -} - int start_secondary(void *unused) { trap_init(); --- linux/Documentation/nmi_watchdog.txt.orig Mon Sep 20 07:54:31 1999 +++ linux/Documentation/nmi_watchdog.txt Mon Sep 20 12:08:06 1999 @@ -0,0 +1,33 @@ + +Is your SMP system locking up unpredictably? No keyboard activity, just +a frustrating complete hard lockup? Do you want to help us debugging +such lockups? If all yes then this document is definitely for you. + +on Intel SMP hardware there is a feature that enables us to generate +'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt - these get +executed even if the system is otherwise locked up hard) This can be +used to debug hard kernel lockups. By executing periodic NMI interrupts, +the kernel can monitor wether any CPU has locked up, and print out +debugging messages if so. You can enable/disable the NMI watchdog at boot +time with the 'nmi_watchdog=1' boot parameter. Eg. the relevant +lilo.conf entry: + + append="nmi_watchdog=1" + +A 'lockup' is the following scenario: if any CPU in the system does not +execute the period local timer interrupt for more than 5 seconds, then +the NMI handler generates an oops and kills the process. This +'controlled crash' (and the resulting kernel messages) can be used to +debug the lockup. Thus whenever the lockup happens, wait 5 seconds and +the oops will show up automatically. If the kernel produces no messages +then the system has crashed so hard (eg. hardware-wise) that either it +cannot even accept NMI interrupts, or the crash has made the kernel +unable to print messages. + +NOTE: currently the NMI-oopser is enabled unconditionally on x86 SMP +boxes. + +[ feel free to send bug reports, suggestions and patches to + Ingo Molnar or the Linux SMP mailing + list at ] +