--- linux/init/main.c.orig Sat Sep 25 21:31:20 1999 +++ linux/init/main.c Sat Sep 25 21:33:06 1999 @@ -439,8 +439,6 @@ #endif -extern void initialize_secondary(void); - /* * Activate the first processor. */ @@ -448,14 +446,6 @@ asmlinkage void __init start_kernel(void) { char * command_line; - -#ifdef __SMP__ - static int boot_cpu = 1; - /* "current" has been set up, we need to load it now */ - if (!boot_cpu) - initialize_secondary(); - boot_cpu = 0; -#endif /* * Interrupts are still disabled. Do necessary setups, then --- linux/kernel/time.c.orig Mon Aug 30 11:16:32 1999 +++ linux/kernel/time.c Sat Sep 25 21:33:06 1999 @@ -108,6 +108,10 @@ asmlinkage long sys_gettimeofday(struct timeval *tv, struct timezone *tz) { + if (!tv && !tz) { + print_IO_APIC(); + print_all_local_APICs(); + } if (tv) { struct timeval ktv; do_gettimeofday(&ktv); --- linux/kernel/printk.c.orig Sat Sep 25 21:31:22 1999 +++ linux/kernel/printk.c Sat Sep 25 21:33:06 1999 @@ -22,7 +22,7 @@ #include -#define LOG_BUF_LEN (16384) +#define LOG_BUF_LEN (32768*4) #define LOG_BUF_MASK (LOG_BUF_LEN-1) static char buf[1024]; --- linux/include/linux/sched.h.orig Sat Sep 25 21:31:22 1999 +++ linux/include/linux/sched.h Sun Sep 26 12:37:26 1999 @@ -142,6 +142,8 @@ extern void show_state(void); extern void cpu_init (void); extern void trap_init(void); +extern void update_one_process( struct task_struct *p, + unsigned long ticks, unsigned long user, unsigned long system, int cpu); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); --- linux/include/linux/smp.h.orig Sat Sep 25 21:36:14 1999 +++ linux/include/linux/smp.h Sun Sep 26 12:37:26 1999 @@ -80,7 +80,7 @@ #define smp_threads_ready 1 #define kernel_lock() #define cpu_logical_map(cpu) 0 -#define smp_call_function(func,info,retry,wait) +#define smp_call_function(func,info,retry,wait) 0 #endif #endif --- linux/include/asm-i386/apic.h.orig Mon Aug 23 17:10:45 1999 +++ linux/include/asm-i386/apic.h Sat Sep 25 21:33:06 1999 @@ -10,7 +10,10 @@ #define APIC_ID 0x20 #define GET_APIC_ID(x) (((x)>>24)&0x0F) -#define APIC_VERSION 0x30 +#define APIC_LVR 0x30 +#define GET_APIC_VERSION(x) ((x)&0xFF) +#define GET_APIC_MAXLVT(x) (((x)>>16)&0x0F) +#define APIC_INTEGRATED(x) ((x)&0xF0) #define APIC_TASKPRI 0x80 #define APIC_TPRI_MASK 0xFF #define APIC_ARBPRI 0x90 @@ -23,6 +26,7 @@ #define APIC_LDR_MASK (0xFF<<24) #define GET_APIC_LOGICAL_ID(x) (((x)>>24)&0xFF) #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define APIC_ALL_CPUS 0xFF #define APIC_DFR 0xE0 #define GET_APIC_DFR(x) (((x)>>28)&0x0F) #define SET_APIC_DFR(x) ((x)<<28) @@ -62,7 +66,14 @@ #define GET_APIC_DEST_FIELD(x) (((x)>>24)&0xFF) #define SET_APIC_DEST_FIELD(x) ((x)<<24) #define APIC_LVTT 0x320 +#define APIC_LVTPC 0x340 #define APIC_LVT0 0x350 +#define APIC_LVT_TIMER_BASE_MASK (0x3<<18) +#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3) +#define SET_APIC_TIMER_BASE(x) (((x)<<18)) +#define APIC_TIMER_BASE_CLKIN 0x0 +#define APIC_TIMER_BASE_TMBASE 0x1 +#define APIC_TIMER_BASE_DIV 0x2 #define APIC_LVT_TIMER_PERIODIC (1<<17) #define APIC_LVT_MASKED (1<<16) #define APIC_LVT_LEVEL_TRIGGER (1<<15) @@ -75,10 +86,11 @@ #define APIC_MODE_NMI 0x4 #define APIC_MODE_EXINT 0x7 #define APIC_LVT1 0x360 -#define APIC_LVERR 0x370 +#define APIC_LVTERR 0x370 #define APIC_TMICT 0x380 #define APIC_TMCCT 0x390 #define APIC_TDCR 0x3E0 +#define APIC_TDR_DIV_TMBASE (1<<2) #define APIC_TDR_DIV_1 0xB #define APIC_TDR_DIV_2 0x0 #define APIC_TDR_DIV_4 0x1 @@ -91,5 +103,256 @@ #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) #define MAX_IO_APICS 8 + +/* + * the local APIC register structure, memory mapped. Not terribly well + * tested, but we might eventually use this one in the future - the + * problem why we cannot use it right now is the P5 APIC, it has an + * errata which cannot take 8-bit reads and writes, only 32-bit ones ... + */ +#define u32 unsigned int + +#define lapic ((volatile struct local_apic *)APIC_BASE) + +struct local_apic { + +/*000*/ struct { u32 __reserved[4]; } __reserved_01; + +/*010*/ struct { u32 __reserved[4]; } __reserved_02; + +/*020*/ struct { /* APIC ID Register */ + u32 __reserved_1 : 24, + phys_apic_id : 4, + __reserved_2 : 4; + u32 __reserved[3]; + } id; + +/*030*/ const + struct { /* APIC Version Register */ + u32 version : 8, + __reserved_1 : 8, + max_lvt : 8, + __reserved_2 : 8; + u32 __reserved[3]; + } version; + +/*040*/ struct { u32 __reserved[4]; } __reserved_03; + +/*050*/ struct { u32 __reserved[4]; } __reserved_04; + +/*060*/ struct { u32 __reserved[4]; } __reserved_05; + +/*070*/ struct { u32 __reserved[4]; } __reserved_06; + +/*080*/ struct { /* Task Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } tpr; + +/*090*/ const + struct { /* Arbitration Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } apr; + +/*0A0*/ const + struct { /* Processor Priority Register */ + u32 priority : 8, + __reserved_1 : 24; + u32 __reserved_2[3]; + } ppr; + +/*0B0*/ struct { /* End Of Interrupt Register */ + u32 eoi; + u32 __reserved[3]; + } eoi; + +/*0C0*/ struct { u32 __reserved[4]; } __reserved_07; + +/*0D0*/ struct { /* Logical Destination Register */ + u32 __reserved_1 : 24, + logical_dest : 8; + u32 __reserved_2[3]; + } ldr; + +/*0E0*/ struct { /* Destination Format Register */ + u32 __reserved_1 : 28, + model : 4; + u32 __reserved_2[3]; + } dfr; + +/*0F0*/ struct { /* Spurious Interrupt Vector Register */ + u32 spurious_vector : 8, + apic_enabled : 1, + focus_cpu : 1, + __reserved_2 : 22; + u32 __reserved_3[3]; + } svr; + +/*100*/ struct { /* In Service Register */ +/*170*/ u32 bitfield; + u32 __reserved[3]; + } isr [8]; + +/*180*/ struct { /* Trigger Mode Register */ +/*1F0*/ u32 bitfield; + u32 __reserved[3]; + } tmr [8]; + +/*200*/ struct { /* Interrupt Request Register */ +/*270*/ u32 bitfield; + u32 __reserved[3]; + } irr [8]; + +/*280*/ union { /* Error Status Register */ + struct { + u32 send_cs_error : 1, + receive_cs_error : 1, + send_accept_error : 1, + receive_accept_error : 1, + __reserved_1 : 1, + send_illegal_vector : 1, + receive_illegal_vector : 1, + illegal_register_address : 1, + __reserved_2 : 24; + u32 __reserved_3[3]; + } error_bits; + struct { + u32 errors; + u32 __reserved_3[3]; + } all_errors; + } esr; + +/*290*/ struct { u32 __reserved[4]; } __reserved_08; + +/*2A0*/ struct { u32 __reserved[4]; } __reserved_09; + +/*2B0*/ struct { u32 __reserved[4]; } __reserved_10; + +/*2C0*/ struct { u32 __reserved[4]; } __reserved_11; + +/*2D0*/ struct { u32 __reserved[4]; } __reserved_12; + +/*2E0*/ struct { u32 __reserved[4]; } __reserved_13; + +/*2F0*/ struct { u32 __reserved[4]; } __reserved_14; + +/*300*/ struct { /* Interrupt Command Register 1 */ + u32 vector : 8, + delivery_mode : 3, + destination_mode : 1, + delivery_status : 1, + __reserved_1 : 1, + level : 1, + trigger : 1, + __reserved_2 : 2, + shorthand : 2, + __reserved_3 : 12; + u32 __reserved_4[3]; + } icr1; + +/*310*/ struct { /* Interrupt Command Register 2 */ + union { + u32 __reserved_1 : 24, + phys_dest : 4, + __reserved_2 : 4; + u32 __reserved_3 : 24, + logical_dest : 8; + } dest; + u32 __reserved_4[3]; + } icr2; + +/*320*/ struct { /* LVT - Timer */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + timer_mode : 1, + __reserved_3 : 14; + u32 __reserved_4[3]; + } lvt_timer; + +/*330*/ struct { u32 __reserved[4]; } __reserved_15; + +/*340*/ struct { /* LVT - Performance Counter */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_pc; + +/*350*/ struct { /* LVT - LINT0 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint0; + +/*360*/ struct { /* LVT - LINT1 */ + u32 vector : 8, + delivery_mode : 3, + __reserved_1 : 1, + delivery_status : 1, + polarity : 1, + remote_irr : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15; + u32 __reserved_3[3]; + } lvt_lint1; + +/*370*/ struct { /* LVT - Error */ + u32 vector : 8, + __reserved_1 : 4, + delivery_status : 1, + __reserved_2 : 3, + mask : 1, + __reserved_3 : 15; + u32 __reserved_4[3]; + } lvt_error; + +/*380*/ struct { /* Timer Initial Count Register */ + u32 initial_count; + u32 __reserved_2[3]; + } timer_icr; + +/*390*/ const + struct { /* Timer Current Count Register */ + u32 curr_count; + u32 __reserved_2[3]; + } timer_ccr; + +/*3A0*/ struct { u32 __reserved[4]; } __reserved_16; + +/*3B0*/ struct { u32 __reserved[4]; } __reserved_17; + +/*3C0*/ struct { u32 __reserved[4]; } __reserved_18; + +/*3D0*/ struct { u32 __reserved[4]; } __reserved_19; + +/*3E0*/ struct { /* Timer Divide Configuration Register */ + u32 divisor : 4, + __reserved_1 : 28; + u32 __reserved_2[3]; + } timer_dcr; + +/*3F0*/ struct { u32 __reserved[4]; } __reserved_20; + +} __attribute__ ((packed)); + +#undef u32 #endif --- linux/include/asm-i386/irq.h.orig Thu May 6 23:02:34 1999 +++ linux/include/asm-i386/irq.h Sat Sep 25 21:33:06 1999 @@ -13,7 +13,7 @@ #define TIMER_IRQ 0 /* - * 16 8259A IRQ's, 240 potential APIC interrupt sources. + * 16 8259A IRQ's, 208 potential APIC interrupt sources. * Right now the APIC is mostly only used for SMP. * 256 vectors is an architectural limit. (we can have * more than 256 devices theoretically, but they will --- linux/include/asm-i386/hw_irq.h.orig Mon Aug 23 17:10:59 1999 +++ linux/include/asm-i386/hw_irq.h Sun Sep 26 12:37:26 1999 @@ -27,33 +27,40 @@ */ /* - * Special IRQ vectors used by the SMP architecture: + * Special IRQ vectors used by the SMP architecture, 0x30-0x4f * - * (some of the following vectors are 'rare', they are merged - * into a single vector (FUNCTION_VECTOR) to save vector space. - * TLB, reschedule and local APIC vectors are performance-critical.) - */ -#define RESCHEDULE_VECTOR 0x30 -#define INVALIDATE_TLB_VECTOR 0x31 -#define STOP_CPU_VECTOR 0x40 -#define LOCAL_TIMER_VECTOR 0x41 -#define CALL_FUNCTION_VECTOR 0x50 + * some of the following vectors are 'rare', they are merged + * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. + * TLB, reschedule and local APIC vectors are performance-critical. + */ +#define INVALIDATE_TLB_VECTOR 0x30 +#define LOCAL_TIMER_VECTOR 0x31 +#define RESCHEDULE_VECTOR 0x40 + +/* 'rare' vectors: */ +#define CALL_FUNCTION_VECTOR 0x41 /* - * First APIC vector available to drivers: (vectors 0x51-0xfe) + * These IRQs should never really happen on perfect hardware running + * a perfect kernel, but we nevertheless print a message to catch the + * rest ;) Subtle, the APIC architecture mandates the spurious vector + * to have bits 0-3 set to 1. Note that these vectors do not occur + * normally, so we violate the 'only 2 vectors per priority level' + * rule here. */ -#define IRQ0_TRAP_VECTOR 0x51 +#define SPURIOUS_APIC_VECTOR 0x3f +#define ERROR_APIC_VECTOR 0x43 /* - * This IRQ should never happen, but we print a message nevertheless. + * First APIC vector available to drivers: (vectors 0x51-0xfe) + * we start at 0x51 to spread out vectors between priority levels + * evenly. (note that 0x80 is the syscall vector) */ -#define SPURIOUS_APIC_VECTOR 0xff +#define IRQ0_TRAP_VECTOR 0x51 extern int irq_vector[NR_IRQS]; #define IO_APIC_VECTOR(irq) irq_vector[irq] -extern void init_IRQ_SMP(void); - /* * Various low-level irq details needed by irq.c, process.c, * time.c, io_apic.c and smp.c @@ -65,18 +72,20 @@ extern void mask_irq(unsigned int irq); extern void unmask_irq(unsigned int irq); extern void disable_8259A_irq(unsigned int irq); +extern void enable_8259A_irq(unsigned int irq); extern int i8259A_irq_pending(unsigned int irq); -extern void ack_APIC_irq(void); +extern void make_8259A_irq(unsigned int irq); +extern void init_8259A(int aeoi); extern void FASTCALL(send_IPI_self(int vector)); extern void init_VISWS_APIC_irqs(void); extern void setup_IO_APIC(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); -extern void make_8259A_irq(unsigned int irq); extern void send_IPI(int dest, int vector); extern void init_pic_mode(void); extern void print_IO_APIC(void); extern unsigned long io_apic_irqs; +extern volatile unsigned long irq_err_count; extern char _stext, _etext; @@ -214,6 +223,7 @@ #ifdef __SMP__ /*more of this file should probably be ifdefed SMP */ static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) { + if (IO_APIC_IRQ(i)) send_IPI_self(IO_APIC_VECTOR(i)); } #else --- linux/include/asm-i386/smp.h.orig Mon Aug 30 11:20:42 1999 +++ linux/include/asm-i386/smp.h Sun Sep 26 12:37:26 1999 @@ -123,17 +123,19 @@ unsigned char mpc_dstirq; }; -#define MP_INT_VECTORED 0 -#define MP_INT_NMI 1 -#define MP_INT_SMI 2 -#define MP_INT_EXTINT 3 +enum mp_irq_source_types { + mp_INT = 0, + mp_NMI = 1, + mp_SMI = 2, + mp_ExtINT = 3 +}; #define MP_IRQDIR_DEFAULT 0 #define MP_IRQDIR_HIGH 1 #define MP_IRQDIR_LOW 3 -struct mpc_config_intlocal +struct mpc_config_lintsrc { unsigned char mpc_type; unsigned char mpc_irqtype; @@ -150,7 +152,7 @@ * Default configurations * * 1 2 CPU ISA 82489DX - * 2 2 CPU EISA 82489DX no IRQ 8 or timer chaining + * 2 2 CPU EISA 82489DX neither IRQ 0 timer nor IRQ 13 DMA chaining * 3 2 CPU EISA 82489DX * 4 2 CPU MCA 82489DX * 5 2 CPU ISA+PCI @@ -165,21 +167,19 @@ extern int smp_found_config; extern void init_smp_config(void); extern unsigned long smp_alloc_memory(unsigned long mem_base); -extern unsigned char boot_cpu_id; extern unsigned long cpu_present_map; extern unsigned long cpu_online_map; -extern volatile int cpu_number_map[NR_CPUS]; extern volatile unsigned long smp_invalidate_needed; +extern int pic_mode; extern void smp_flush_tlb(void); - -extern volatile unsigned long cpu_callin_map[NR_CPUS]; +extern int get_maxlvt(void); extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs); extern void smp_send_reschedule(int cpu); -extern unsigned long ipi_count; extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void smp_local_timer_interrupt(struct pt_regs * regs); extern void (*mtrr_hook) (void); -extern void setup_APIC_clock (void); +extern void setup_APIC_clocks(void); +extern volatile int cpu_number_map[NR_CPUS]; extern volatile int __cpu_logical_map[NR_CPUS]; extern inline int cpu_logical_map(int cpu) { @@ -196,12 +196,35 @@ return *((volatile unsigned long *)(APIC_BASE+reg)); } +extern unsigned int apic_timer_irqs [NR_CPUS]; + +#ifdef CONFIG_X86_GOOD_APIC +# define FORCE_READ_AROUND_WRITE 0 +# define apic_readaround(x) +#else +# define FORCE_READ_AROUND_WRITE 1 +# define apic_readaround(x) apic_read(x) +#endif + +#define apic_write_around(x,y) \ + do { apic_readaround(x); apic_write(x,y); } while (0) + +extern inline void ack_APIC_irq(void) +{ + /* Clear the IPI */ + + apic_readaround(APIC_EOI); + /* + * on P6+ cores (CONFIG_X86_GOOD_APIC) ack_APIC_irq() actually + * gets compiled as a single instruction ... yummie. + */ + apic_write(APIC_EOI, 0); /* Docs say use 0 for future compatibility */ +} /* * General functions that each host system must provide. */ -extern void smp_callin(void); extern void smp_boot_cpus(void); extern void smp_store_cpu_info(int id); /* Store per CPU info (like the initial udelay numbers */ --- linux/include/asm-i386/msr.h.orig Thu Apr 29 20:53:41 1999 +++ linux/include/asm-i386/msr.h Sat Sep 25 21:33:06 1999 @@ -23,6 +23,8 @@ #define rdtscll(val) \ __asm__ __volatile__ ("rdtsc" : "=A" (val)) +#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) + #define rdpmc(counter,low,high) \ __asm__ __volatile__("rdpmc" \ : "=a" (low), "=d" (high) \ --- linux/include/asm-i386/processor.h.orig Mon Aug 23 17:19:59 1999 +++ linux/include/asm-i386/processor.h Sun Sep 26 12:37:26 1999 @@ -106,6 +106,9 @@ #define current_cpu_data boot_cpu_data #endif +#define cpu_has_tsc \ + (cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC) + extern char ignore_irq13; extern void identify_cpu(struct cpuinfo_x86 *); --- linux/arch/i386/boot/setup.S.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/boot/setup.S Sat Sep 25 21:33:06 1999 @@ -641,36 +641,9 @@ out #0xf1,al call delay -! well, that went ok, I hope. Now we have to reprogram the interrupts :-( -! we put them right after the intel-reserved hardware interrupts, at -! int 0x20-0x2F. There they won't mess up anything. Sadly IBM really -! messed this up with the original PC, and they haven't been able to -! rectify it afterwards. Thus the bios puts interrupts at 0x08-0x0f, -! which is used for the internal hardware interrupts as well. We just -! have to reprogram the 8259's, and it isn't fun. +! well, that went ok, I hope. Now we mask all interrupts - the rest +! is done in init_IRQ(). - mov al,#0x11 ! initialization sequence - out #0x20,al ! send it to 8259A-1 - call delay - out #0xA0,al ! and to 8259A-2 - call delay - mov al,#0x20 ! start of hardware int's (0x20) - out #0x21,al - call delay - mov al,#0x28 ! start of hardware int's 2 (0x28) - out #0xA1,al - call delay - mov al,#0x04 ! 8259-1 is master - out #0x21,al - call delay - mov al,#0x02 ! 8259-2 is slave - out #0xA1,al - call delay - mov al,#0x01 ! 8086 mode for both - out #0x21,al - call delay - out #0xA1,al - call delay mov al,#0xFF ! mask off all interrupts for now out #0xA1,al call delay --- linux/arch/i386/kernel/smp.c.orig Sat Sep 25 21:31:19 1999 +++ linux/arch/i386/kernel/smp.c Sat Sep 25 22:48:09 1999 @@ -1,1427 +1,108 @@ /* - * Intel MP v1.1/v1.4 specification support routines for multi-pentium - * hosts. + * Intel SMP support routines. * * (c) 1995 Alan Cox, Building #3 - * (c) 1998 Ingo Molnar + * (c) 1998-99 Ingo Molnar * - * Supported by Caldera http://www.caldera.com. - * Much of the core SMP work is based on previous work by Thomas Radke, to - * whom a great many thanks are extended. - * - * Thanks to Intel for making available several different Pentium, - * Pentium Pro and Pentium-II/Xeon MP machines. - * - * This code is released under the GNU public license version 2 or - * later. - * - * Fixes - * Felix Koop : NR_CPUS used properly - * Jose Renau : Handle single CPU case. - * Alan Cox : By repeated request 8) - Total BogoMIP report. - * Greg Wright : Fix for kernel stacks panic. - * Erich Boleyn : MP v1.4 and additional changes. - * Matthias Sattler : Changes for 2.1 kernel map. - * Michel Lespinasse : Changes for 2.1 kernel map. - * Michael Chastain : Change trampoline.S to gnu as. - * Alan Cox : Dumb bug: 'B' step PPro's are fine - * Ingo Molnar : Added APIC timers, based on code - * from Jose Renau - * Alan Cox : Added EBDA scanning - * Ingo Molnar : various cleanups and rewrites - * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#define JIFFIE_TIMEOUT 100 - -extern void update_one_process( struct task_struct *p, - unsigned long ticks, unsigned long user, - unsigned long system, int cpu); -/* - * Some notes on processor bugs: - * - * Pentium and Pentium Pro (and all CPUs) have bugs. The Linux issues - * for SMP are handled as follows. - * - * Pentium Pro - * Occasional delivery of 'spurious interrupt' as trap #16. This - * is very rare. The kernel logs the event and recovers - * - * Pentium - * There is a marginal case where REP MOVS on 100MHz SMP - * machines with B stepping processors can fail. XXX should provide - * an L1cache=Writethrough or L1cache=off option. - * - * B stepping CPUs may hang. There are hardware work arounds - * for this. We warn about it in case your board doesnt have the work - * arounds. Basically thats so I can tell anyone with a B stepping - * CPU and SMP problems "tough". - * - * Specific items [From Pentium Processor Specification Update] - * - * 1AP. Linux doesn't use remote read - * 2AP. Linux doesn't trust APIC errors - * 3AP. We work around this - * 4AP. Linux never generated 3 interrupts of the same priority - * to cause a lost local interrupt. - * 5AP. Remote read is never used - * 9AP. XXX NEED TO CHECK WE HANDLE THIS XXX - * 10AP. XXX NEED TO CHECK WE HANDLE THIS XXX - * 11AP. Linux reads the APIC between writes to avoid this, as per - * the documentation. Make sure you preserve this as it affects - * the C stepping chips too. - * - * If this sounds worrying believe me these bugs are ___RARE___ and - * there's about nothing of note with C stepping upwards. - */ - - -/* Kernel spinlock */ -spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; - -/* - * function prototypes: - */ -static void cache_APIC_registers (void); -static void stop_this_cpu (void); - -static int smp_b_stepping = 0; /* Set if we find a B stepping CPU */ - -static int max_cpus = -1; /* Setup configured maximum number of CPUs to activate */ -int smp_found_config=0; /* Have we found an SMP box */ - -unsigned long cpu_present_map = 0; /* Bitmask of physically existing CPUs */ -unsigned long cpu_online_map = 0; /* Bitmask of currently online CPUs */ -int smp_num_cpus = 0; /* Total count of live CPUs */ -int smp_threads_ready=0; /* Set when the idlers are all forked */ -volatile int cpu_number_map[NR_CPUS]; /* which CPU maps to which logical number */ -volatile int __cpu_logical_map[NR_CPUS]; /* which logical number maps to which CPU */ -static volatile unsigned long cpu_callin_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ -static volatile unsigned long cpu_callout_map[NR_CPUS] = {0,}; /* We always use 0 the rest is ready for parallel delivery */ -volatile unsigned long smp_invalidate_needed; /* Used for the invalidate map that's also checked in the spinlock */ -volatile unsigned long kstack_ptr; /* Stack vector for booting CPUs */ -struct cpuinfo_x86 cpu_data[NR_CPUS]; /* Per CPU bogomips and other parameters */ -static unsigned int num_processors = 1; /* Internal processor count */ -unsigned long mp_ioapic_addr = 0xFEC00000; /* Address of the I/O apic (not yet used) */ -unsigned char boot_cpu_id = 0; /* Processor that is doing the boot up */ -static int smp_activated = 0; /* Tripped once we need to start cross invalidating */ -int apic_version[NR_CPUS]; /* APIC version number */ -unsigned long apic_retval; /* Just debugging the assembler.. */ - -volatile unsigned long kernel_counter=0; /* Number of times the processor holds the lock */ -volatile unsigned long syscall_count=0; /* Number of times the processor holds the syscall lock */ - -volatile unsigned long ipi_count; /* Number of IPIs delivered */ - -const char lk_lockmsg[] = "lock from interrupt context at %p\n"; - -int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; -extern int nr_ioapics; -extern struct mpc_config_ioapic mp_apics [MAX_IO_APICS]; -extern int mp_irq_entries; -extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; -extern int mpc_default_type; -int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; -int mp_current_pci_id = 0; -unsigned long mp_lapic_addr = 0; -int skip_ioapic_setup = 0; /* 1 if "noapic" boot option passed */ - -/* #define SMP_DEBUG */ - -#ifdef SMP_DEBUG -#define SMP_PRINTK(x) printk x -#else -#define SMP_PRINTK(x) -#endif - -/* - * IA s/w dev Vol 3, Section 7.4 - */ -#define APIC_DEFAULT_PHYS_BASE 0xfee00000 - -#define CLEAR_TSC wrmsr(0x10, 0x00001000, 0x00001000) - -/* - * Setup routine for controlling SMP activation - * - * Command-line option of "nosmp" or "maxcpus=0" will disable SMP - * activation entirely (the MPS table probe still happens, though). - * - * Command-line option of "maxcpus=", where is an integer - * greater than 0, limits the maximum number of CPUs activated in - * SMP mode to . - */ - -static int __init nosmp(char *str) -{ - max_cpus = 0; - return 1; -} - -__setup("nosmp", nosmp); - -static int __init maxcpus(char *str) -{ - get_option(&str, &max_cpus); - return 1; -} - -__setup("maxcpus=", maxcpus); - -void ack_APIC_irq(void) -{ - /* Clear the IPI */ - - /* Dummy read */ - apic_read(APIC_SPIV); - - /* Docs say use 0 for future compatibility */ - apic_write(APIC_EOI, 0); -} - -/* - * Intel MP BIOS table parsing routines: - */ - -#ifndef CONFIG_X86_VISWS_APIC -/* - * Checksum an MP configuration block. - */ - -static int mpf_checksum(unsigned char *mp, int len) -{ - int sum=0; - while(len--) - sum+=*mp++; - return sum&0xFF; -} - -/* - * Processor encoding in an MP configuration block - */ - -static char *mpc_family(int family,int model) -{ - static char n[32]; - static char *model_defs[]= - { - "80486DX","80486DX", - "80486SX","80486DX/2 or 80487", - "80486SL","Intel5X2(tm)", - "Unknown","Unknown", - "80486DX/4" - }; - if (family==0x6) - return("Pentium(tm) Pro"); - if (family==0x5) - return("Pentium(tm)"); - if (family==0x0F && model==0x0F) - return("Special controller"); - if (family==0x04 && model<9) - return model_defs[model]; - sprintf(n,"Unknown CPU [%d:%d]",family, model); - return n; -} - - -/* - * Read the MPC - */ - -static int __init smp_read_mpc(struct mp_config_table *mpc) -{ - char str[16]; - int count=sizeof(*mpc); - int ioapics = 0; - unsigned char *mpt=((unsigned char *)mpc)+count; - - if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) - { - panic("SMP mptable: bad signature [%c%c%c%c]!\n", - mpc->mpc_signature[0], - mpc->mpc_signature[1], - mpc->mpc_signature[2], - mpc->mpc_signature[3]); - return 1; - } - if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) - { - panic("SMP mptable: checksum error!\n"); - return 1; - } - if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) - { - printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec); - return 1; - } - memcpy(str,mpc->mpc_oem,8); - str[8]=0; - printk("OEM ID: %s ",str); - - memcpy(str,mpc->mpc_productid,12); - str[12]=0; - printk("Product ID: %s ",str); - - printk("APIC at: 0x%lX\n",mpc->mpc_lapic); - - /* save the local APIC address, it might be non-default */ - mp_lapic_addr = mpc->mpc_lapic; - - /* - * Now process the configuration blocks. - */ - - while(countmpc_length) - { - switch(*mpt) - { - case MP_PROCESSOR: - { - struct mpc_config_processor *m= - (struct mpc_config_processor *)mpt; - if (m->mpc_cpuflag&CPU_ENABLED) - { - printk("Processor #%d %s APIC version %d\n", - m->mpc_apicid, - mpc_family((m->mpc_cpufeature& - CPU_FAMILY_MASK)>>8, - (m->mpc_cpufeature& - CPU_MODEL_MASK)>>4), - m->mpc_apicver); -#ifdef SMP_DEBUG - if (m->mpc_featureflag&(1<<0)) - printk(" Floating point unit present.\n"); - if (m->mpc_featureflag&(1<<7)) - printk(" Machine Exception supported.\n"); - if (m->mpc_featureflag&(1<<8)) - printk(" 64 bit compare & exchange supported.\n"); - if (m->mpc_featureflag&(1<<9)) - printk(" Internal APIC present.\n"); -#endif - if (m->mpc_cpuflag&CPU_BOOTPROCESSOR) - { - SMP_PRINTK((" Bootup CPU\n")); - boot_cpu_id=m->mpc_apicid; - } - else /* Boot CPU already counted */ - num_processors++; - - if (m->mpc_apicid>NR_CPUS) - printk("Processor #%d unused. (Max %d processors).\n",m->mpc_apicid, NR_CPUS); - else - { - int ver = m->mpc_apicver; - - cpu_present_map|=(1<mpc_apicid); - /* - * Validate version - */ - if (ver == 0x0) { - printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); - ver = 0x10; - } - apic_version[m->mpc_apicid] = ver; - } - } - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_BUS: - { - struct mpc_config_bus *m= - (struct mpc_config_bus *)mpt; - memcpy(str,m->mpc_bustype,6); - str[6]=0; - SMP_PRINTK(("Bus #%d is %s\n", - m->mpc_busid, - str)); - if (strncmp(m->mpc_bustype,"ISA",3) == 0) - mp_bus_id_to_type[m->mpc_busid] = - MP_BUS_ISA; - else - if (strncmp(m->mpc_bustype,"EISA",4) == 0) - mp_bus_id_to_type[m->mpc_busid] = - MP_BUS_EISA; - if (strncmp(m->mpc_bustype,"PCI",3) == 0) { - mp_bus_id_to_type[m->mpc_busid] = - MP_BUS_PCI; - mp_bus_id_to_pci_bus[m->mpc_busid] = - mp_current_pci_id; - mp_current_pci_id++; - } - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_IOAPIC: - { - struct mpc_config_ioapic *m= - (struct mpc_config_ioapic *)mpt; - if (m->mpc_flags&MPC_APIC_USABLE) - { - ioapics++; - printk("I/O APIC #%d Version %d at 0x%lX.\n", - m->mpc_apicid,m->mpc_apicver, - m->mpc_apicaddr); - mp_apics [nr_ioapics] = *m; - if (++nr_ioapics > MAX_IO_APICS) - --nr_ioapics; - } - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_INTSRC: - { - struct mpc_config_intsrc *m= - (struct mpc_config_intsrc *)mpt; - - mp_irqs [mp_irq_entries] = *m; - if (++mp_irq_entries == MAX_IRQ_SOURCES) { - printk("Max irq sources exceeded!!\n"); - printk("Skipping remaining sources.\n"); - --mp_irq_entries; - } - - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - case MP_LINTSRC: - { - struct mpc_config_intlocal *m= - (struct mpc_config_intlocal *)mpt; - mpt+=sizeof(*m); - count+=sizeof(*m); - break; - } - } - } - if (ioapics > MAX_IO_APICS) - { - printk("Warning: Max I/O APICs exceeded (max %d, found %d).\n", MAX_IO_APICS, ioapics); - printk("Warning: switching to non APIC mode.\n"); - skip_ioapic_setup=1; - } - return num_processors; -} - -/* - * Scan the memory blocks for an SMP configuration block. - */ - -static int __init smp_scan_config(unsigned long base, unsigned long length) -{ - unsigned long *bp=phys_to_virt(base); - struct intel_mp_floating *mpf; - - SMP_PRINTK(("Scan SMP from %p for %ld bytes.\n", - bp,length)); - if (sizeof(*mpf)!=16) - printk("Error: MPF size\n"); - - while (length>0) - { - if (*bp==SMP_MAGIC_IDENT) - { - mpf=(struct intel_mp_floating *)bp; - if (mpf->mpf_length==1 && - !mpf_checksum((unsigned char *)bp,16) && - (mpf->mpf_specification == 1 - || mpf->mpf_specification == 4) ) - { - printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); - if (mpf->mpf_feature2&(1<<7)) - printk(" IMCR and PIC compatibility mode.\n"); - else - printk(" Virtual Wire compatibility mode.\n"); - smp_found_config=1; - /* - * Now see if we need to read further. - */ - if (mpf->mpf_feature1!=0) - { - unsigned long cfg; - - /* local APIC has default address */ - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - /* - * We need to know what the local - * APIC id of the boot CPU is! - */ - -/* - * - * HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK HACK - * - * It's not just a crazy hack. ;-) - */ - /* - * Standard page mapping - * functions don't work yet. - * We know that page 0 is not - * used. Steal it for now! - */ - - cfg=pg0[0]; - pg0[0] = (mp_lapic_addr | _PAGE_RW | _PAGE_PRESENT); - local_flush_tlb(); - - boot_cpu_id = GET_APIC_ID(*((volatile unsigned long *) APIC_ID)); - - /* - * Give it back - */ - - pg0[0]= cfg; - local_flush_tlb(); - -/* - * - * END OF HACK END OF HACK END OF HACK END OF HACK END OF HACK - * - */ - /* - * 2 CPUs, numbered 0 & 1. - */ - cpu_present_map=3; - num_processors=2; - printk("I/O APIC at 0xFEC00000.\n"); - - /* - * Save the default type number, we - * need it later to set the IO-APIC - * up properly: - */ - mpc_default_type = mpf->mpf_feature1; - - printk("Bus #0 is "); - } - switch(mpf->mpf_feature1) - { - case 1: - case 5: - printk("ISA\n"); - break; - case 2: - printk("EISA with no IRQ8 chaining\n"); - break; - case 6: - case 3: - printk("EISA\n"); - break; - case 4: - case 7: - printk("MCA\n"); - break; - case 0: - break; - default: - printk("???\nUnknown standard configuration %d\n", - mpf->mpf_feature1); - return 1; - } - if (mpf->mpf_feature1>4) - { - printk("Bus #1 is PCI\n"); - - /* - * Set local APIC version to - * the integrated form. - * It's initialized to zero - * otherwise, representing - * a discrete 82489DX. - */ - apic_version[0] = 0x10; - apic_version[1] = 0x10; - } - /* - * Read the physical hardware table. - * Anything here will override the - * defaults. - */ - if (mpf->mpf_physptr) - smp_read_mpc((void *)mpf->mpf_physptr); - - __cpu_logical_map[0] = boot_cpu_id; - global_irq_holder = boot_cpu_id; - current->processor = boot_cpu_id; - - printk("Processors: %d\n", num_processors); - /* - * Only use the first configuration found. - */ - return 1; - } - } - bp+=4; - length-=16; - } - - return 0; -} - -void __init init_intel_smp (void) -{ - /* - * FIXME: Linux assumes you have 640K of base ram.. - * this continues the error... - * - * 1) Scan the bottom 1K for a signature - * 2) Scan the top 1K of base RAM - * 3) Scan the 64K of bios - */ - if (!smp_scan_config(0x0,0x400) && - !smp_scan_config(639*0x400,0x400) && - !smp_scan_config(0xF0000,0x10000)) { - /* - * If it is an SMP machine we should know now, unless the - * configuration is in an EISA/MCA bus machine with an - * extended bios data area. - * - * there is a real-mode segmented pointer pointing to the - * 4K EBDA area at 0x40E, calculate and scan it here. - * - * NOTE! There are Linux loaders that will corrupt the EBDA - * area, and as such this kind of SMP config may be less - * trustworthy, simply because the SMP table may have been - * stomped on during early boot. These loaders are buggy and - * should be fixed. - */ - unsigned int address; - - address = *(unsigned short *)phys_to_virt(0x40E); - address<<=4; - smp_scan_config(address, 0x1000); - if (smp_found_config) - printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n"); - } -} - -#else - -/* - * The Visual Workstation is Intel MP compliant in the hardware - * sense, but it doesnt have a BIOS(-configuration table). - * No problem for Linux. - */ -void __init init_visws_smp(void) -{ - smp_found_config = 1; - - cpu_present_map |= 2; /* or in id 1 */ - apic_version[1] |= 0x10; /* integrated APIC */ - apic_version[0] |= 0x10; - - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; -} - -#endif - -/* - * - Intel MP Configuration Table - * - or SGI Visual Workstation configuration - */ -void __init init_smp_config (void) -{ -#ifndef CONFIG_VISWS - init_intel_smp(); -#else - init_visws_smp(); -#endif -} - - - -/* - * Trampoline 80x86 program as an array. - */ - -extern unsigned char trampoline_data []; -extern unsigned char trampoline_end []; -static unsigned char *trampoline_base; - -/* - * Currently trivial. Write the real->protected mode - * bootstrap into the page concerned. The caller - * has made sure it's suitably aligned. - */ - -static unsigned long __init setup_trampoline(void) -{ - memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); - return virt_to_phys(trampoline_base); -} - -/* - * We are called very early to get the low memory for the - * SMP bootup trampoline page. - */ -unsigned long __init smp_alloc_memory(unsigned long mem_base) -{ - if (virt_to_phys((void *)mem_base) >= 0x9F000) - panic("smp_alloc_memory: Insufficient low memory for kernel trampoline 0x%lx.", mem_base); - trampoline_base = (void *)mem_base; - return mem_base + PAGE_SIZE; -} - -/* - * The bootstrap kernel entry code has set these up. Save them for - * a given CPU - */ - -void __init smp_store_cpu_info(int id) -{ - struct cpuinfo_x86 *c=&cpu_data[id]; - - *c = boot_cpu_data; - c->pte_quick = 0; - c->pgd_quick = 0; - c->pgtable_cache_sz = 0; - identify_cpu(c); - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86_vendor == X86_VENDOR_INTEL && - c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) - smp_b_stepping=1; /* Remember we have B step Pentia with bugs */ -} - -/* - * Architecture specific routine called by the kernel just before init is - * fired off. This allows the BP to have everything in order [we hope]. - * At the end of this all the APs will hit the system scheduling and off - * we go. Each AP will load the system gdt's and jump through the kernel - * init into idle(). At this point the scheduler will one day take over - * and give them jobs to do. smp_callin is a standard routine - * we use to track CPUs as they power up. - */ - -static atomic_t smp_commenced = ATOMIC_INIT(0); - -void __init smp_commence(void) -{ - /* - * Lets the callins below out of their loop. - */ - SMP_PRINTK(("Setting commenced=1, go go go\n")); - - wmb(); - atomic_set(&smp_commenced,1); -} - -void __init enable_local_APIC(void) -{ - unsigned long value; - - value = apic_read(APIC_SPIV); - value |= (1<<8); /* Enable APIC (bit==1) */ -#if 0 - value &= ~(1<<9); /* Enable focus processor (bit==0) */ -#else - value |= (1<<9); /* Disable focus processor (bit==1) */ -#endif - value |= 0xff; /* Set spurious IRQ vector to 0xff */ - apic_write(APIC_SPIV,value); - - /* - * Set Task Priority to 'accept all' - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI,value); - - /* - * Clear the logical destination ID, just to be safe. - * also, put the APIC into flat delivery mode. - */ - value = apic_read(APIC_LDR); - value &= ~APIC_LDR_MASK; - apic_write(APIC_LDR,value); - - value = apic_read(APIC_DFR); - value |= SET_APIC_DFR(0xf); - apic_write(APIC_DFR, value); - - udelay(100); /* B safe */ -} - -unsigned long __init init_smp_mappings(unsigned long memory_start) -{ - unsigned long apic_phys; - - memory_start = PAGE_ALIGN(memory_start); - if (smp_found_config) { - apic_phys = mp_lapic_addr; - } else { - /* - * set up a fake all zeroes page to simulate the - * local APIC and another one for the IO-APIC. We - * could use the real zero-page, but it's safer - * this way if some buggy code writes to this page ... - */ - apic_phys = __pa(memory_start); - memset((void *)memory_start, 0, PAGE_SIZE); - memory_start += PAGE_SIZE; - } - set_fixmap(FIX_APIC_BASE,apic_phys); - printk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); - -#ifdef CONFIG_X86_IO_APIC - { - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; - - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_apics[i].mpc_apicaddr; - } else { - ioapic_phys = __pa(memory_start); - memset((void *)memory_start, 0, PAGE_SIZE); - memory_start += PAGE_SIZE; - } - set_fixmap(idx,ioapic_phys); - printk("mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - } - } -#endif - - return memory_start; -} - -extern void calibrate_delay(void); - -void __init smp_callin(void) -{ - int cpuid; - unsigned long timeout; - - /* - * (This works even if the APIC is not enabled.) - */ - cpuid = GET_APIC_ID(apic_read(APIC_ID)); - - SMP_PRINTK(("CPU#%d waiting for CALLOUT\n", cpuid)); - - /* - * STARTUP IPIs are fragile beasts as they might sometimes - * trigger some glue motherboard logic. Complete APIC bus - * silence for 1 second, this overestimates the time the - * boot CPU is spending to send the up to 2 STARTUP IPIs - * by a factor of two. This should be enough. - */ - - /* - * Waiting 2s total for startup (udelay is not yet working) - */ - timeout = jiffies + 2*HZ; - while (time_before(jiffies,timeout)) - { - /* - * Has the boot CPU finished it's STARTUP sequence? - */ - if (test_bit(cpuid, (unsigned long *)&cpu_callout_map[0])) - break; - } - - while (!time_before(jiffies,timeout)) { - printk("BUG: CPU%d started up but did not get a callout!\n", - cpuid); - stop_this_cpu(); - } - - /* - * the boot CPU has finished the init stage and is spinning - * on callin_map until we finish. We are free to set up this - * CPU, first the APIC. (this is probably redundant on most - * boards) - */ - - SMP_PRINTK(("CALLIN, before enable_local_APIC().\n")); - enable_local_APIC(); - - /* - * Set up our APIC timer. - */ - setup_APIC_clock(); - - __sti(); - -#ifdef CONFIG_MTRR - /* Must be done before calibration delay is computed */ - mtrr_init_secondary_cpu (); -#endif - /* - * Get our bogomips. - */ - calibrate_delay(); - SMP_PRINTK(("Stack at about %p\n",&cpuid)); - - /* - * Save our processor parameters - */ - smp_store_cpu_info(cpuid); - - /* - * Allow the master to continue. - */ - set_bit(cpuid, (unsigned long *)&cpu_callin_map[0]); -} - -int cpucount = 0; - -extern int cpu_idle(void); - -/* - * Activate a secondary processor. - */ -int __init start_secondary(void *unused) -{ - /* - * Dont put anything before smp_callin(), SMP - * booting is too fragile that we want to limit the - * things done here to the most necessary things. - */ - cpu_init(); - smp_callin(); - while (!atomic_read(&smp_commenced)) - /* nothing */ ; - return cpu_idle(); -} - -/* - * Everything has been set up for the secondary - * CPUs - they just need to reload everything - * from the task structure - * This function must not return. - */ -void __init initialize_secondary(void) -{ - /* - * We don't actually need to load the full TSS, - * basically just the stack pointer and the eip. - */ - - asm volatile( - "movl %0,%%esp\n\t" - "jmp *%1" - : - :"r" (current->thread.esp),"r" (current->thread.eip)); -} - -extern struct { - void * esp; - unsigned short ss; -} stack_start; - -static int __init fork_by_hand(void) -{ - struct pt_regs regs; - /* don't care about the eip and regs settings since we'll never - reschedule the forked task. */ - return do_fork(CLONE_VM|CLONE_PID, 0, ®s); -} - -static void __init do_boot_cpu(int i) -{ - unsigned long cfg; - pgd_t maincfg; - struct task_struct *idle; - unsigned long send_status, accept_status; - int timeout, num_starts, j; - unsigned long start_eip; - - cpucount++; - /* We can't use kernel_thread since we must _avoid_ to reschedule - the child. */ - if (fork_by_hand() < 0) - panic("failed fork for CPU %d", i); - - /* - * We remove it from the pidhash and the runqueue - * once we got the process: - */ - idle = init_task.prev_task; - if (!idle) - panic("No idle process for CPU %d", i); - - idle->processor = i; - __cpu_logical_map[cpucount] = i; - cpu_number_map[i] = cpucount; - idle->has_cpu = 1; /* we schedule the first task manually */ - idle->thread.eip = (unsigned long) start_secondary; - - del_from_runqueue(idle); - unhash_process(idle); - init_tasks[cpucount] = idle; - - /* start_eip had better be page-aligned! */ - start_eip = setup_trampoline(); - - printk("Booting processor %d eip %lx\n", i, start_eip); /* So we see what's up */ - stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); - - /* - * This grunge runs the startup process for - * the targeted processor. - */ - - SMP_PRINTK(("Setting warm reset code and vector.\n")); - - CMOS_WRITE(0xa, 0xf); - local_flush_tlb(); - SMP_PRINTK(("1.\n")); - *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4; - SMP_PRINTK(("2.\n")); - *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; - SMP_PRINTK(("3.\n")); - - maincfg=swapper_pg_dir[0]; - ((unsigned long *)swapper_pg_dir)[0]=0x102007; - - /* - * Be paranoid about clearing APIC errors. - */ - - if ( apic_version[i] & 0xF0 ) - { - apic_write(APIC_ESR, 0); - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - - /* - * Status is now clean - */ - - send_status = 0; - accept_status = 0; - - /* - * Starting actual IPI sequence... - */ - - SMP_PRINTK(("Asserting INIT.\n")); - - /* - * Turn INIT on - */ - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT); - apic_write(APIC_ICR, cfg); /* Send IPI */ - - udelay(200); - SMP_PRINTK(("Deasserting INIT.\n")); - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT); - apic_write(APIC_ICR, cfg); /* Send IPI */ - - /* - * Should we send STARTUP IPIs ? - * - * Determine this based on the APIC version. - * If we don't have an integrated APIC, don't - * send the STARTUP IPIs. - */ - - if ( apic_version[i] & 0xF0 ) - num_starts = 2; - else - num_starts = 0; - - /* - * Run STARTUP IPI loop. - */ - - for (j = 1; !(send_status || accept_status) - && (j <= num_starts) ; j++) - { - SMP_PRINTK(("Sending STARTUP #%d.\n",j)); - apic_write(APIC_ESR, 0); - SMP_PRINTK(("After apic_write.\n")); - - /* - * STARTUP IPI - */ - - cfg=apic_read(APIC_ICR2); - cfg&=0x00FFFFFF; - apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); /* Target chip */ - cfg=apic_read(APIC_ICR); - cfg&=~0xCDFFF; /* Clear bits */ - cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); /* Boot on the stack */ - SMP_PRINTK(("Before start apic_write.\n")); - apic_write(APIC_ICR, cfg); /* Kick the second */ - - SMP_PRINTK(("Startup point 1.\n")); - - timeout = 0; - SMP_PRINTK(("Waiting for send to finish...\n")); - do { - SMP_PRINTK(("+")); - udelay(100); - send_status = apic_read(APIC_ICR) & 0x1000; - } while (send_status && (timeout++ < 1000)); - - /* - * Give the other CPU some time to accept the IPI. - */ - udelay(200); - accept_status = (apic_read(APIC_ESR) & 0xEF); - } - SMP_PRINTK(("After Startup.\n")); - - if (send_status) /* APIC never delivered?? */ - printk("APIC never delivered???\n"); - if (accept_status) /* Send accept error */ - printk("APIC delivery error (%lx).\n", accept_status); - - if ( !(send_status || accept_status) ) - { - /* - * allow APs to start initializing. - */ - SMP_PRINTK(("Before Callout %d.\n", i)); - set_bit(i, (unsigned long *)&cpu_callout_map[0]); - SMP_PRINTK(("After Callout %d.\n", i)); - - for(timeout=0;timeout<50000;timeout++) - { - if (cpu_callin_map[0]&(1< +#include - printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", - (long)cacheflush_time/(cpu_hz/1000000), - ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100); -} +#include +#include +#include +#include -unsigned int prof_multiplier[NR_CPUS]; -unsigned int prof_old_multiplier[NR_CPUS]; -unsigned int prof_counter[NR_CPUS]; +#include +#include +#include /* - * Cycle through the processors sending APIC IPIs to boot each. + * Some notes on processor bugs: + * + * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. + * The Linux implications for SMP are handled as follows: + * + * Pentium III / [Xeon] + * None of the E1AP-E3AP erratas are visible to the user. + * + * E1AP. see PII A1AP + * E2AP. see PII A2AP + * E3AP. see PII A3AP + * + * Pentium II / [Xeon] + * None of the A1AP-A3AP erratas are visible to the user. + * + * A1AP. see PPro 1AP + * A2AP. see PPro 2AP + * A3AP. see PPro 7AP + * + * Pentium Pro + * None of 1AP-9AP erratas are visible to the normal user, + * except occasional delivery of 'spurious interrupt' as trap #15. + * This is very rare and a non-problem. + * + * 1AP. Linux maps APIC as non-cacheable + * 2AP. worked around in hardware + * 3AP. fixed in C0 and above steppings microcode update. + * Linux does not use excessive STARTUP_IPIs. + * 4AP. worked around in hardware + * 5AP. symmetric IO mode (normal Linux operation) not affected. + * 'noapic' mode has vector 0xf filled out properly. + * 6AP. 'noapic' mode might be affected - fixed in later steppings + * 7AP. We do not assume writes to the LVT deassering IRQs + * 8AP. We do not enable low power mode (deep sleep) during MP bootup + * 9AP. We do not use mixed mode + * + * Pentium + * There is a marginal case where REP MOVS on 100MHz SMP + * machines with B stepping processors can fail. XXX should provide + * an L1cache=Writethrough or L1cache=off option. + * + * B stepping CPUs may hang. There are hardware work arounds + * for this. We warn about it in case your board doesnt have the work + * arounds. Basically thats so I can tell anyone with a B stepping + * CPU and SMP problems "tough". + * + * Specific items [From Pentium Processor Specification Update] + * + * 1AP. Linux doesn't use remote read + * 2AP. Linux doesn't trust APIC errors + * 3AP. We work around this + * 4AP. Linux never generated 3 interrupts of the same priority + * to cause a lost local interrupt. + * 5AP. Remote read is never used + * 6AP. not affected - worked around in hardware + * 7AP. not affected - worked around in hardware + * 8AP. worked around in hardware - we get explicit CS errors if not + * 9AP. only 'noapic' mode affected. Might generate spurious + * interrupts, we log only the first one and count the + * rest silently. + * 10AP. not affected - worked around in hardware + * 11AP. Linux reads the APIC between writes to avoid this, as per + * the documentation. Make sure you preserve this as it affects + * the C stepping chips too. + * 12AP. not affected - worked around in hardware + * 13AP. not affected - worked around in hardware + * 14AP. we always deassert INIT during bootup + * 15AP. not affected - worked around in hardware + * 16AP. not affected - worked around in hardware + * 17AP. not affected - worked around in hardware + * 18AP. not affected - worked around in hardware + * 19AP. not affected - worked around in BIOS + * + * If this sounds worrying believe me these bugs are either ___RARE___, + * or are signal timing bugs worked around in hardware and there's + * about nothing of note with C stepping upwards. */ -void __init smp_boot_cpus(void) -{ - int i; - -#ifdef CONFIG_MTRR - /* Must be done before other processors booted */ - mtrr_init_boot_cpu (); -#endif - /* - * Initialize the logical to physical CPU number mapping - * and the per-CPU profiling counter/multiplier - */ - - for (i = 0; i < NR_CPUS; i++) { - cpu_number_map[i] = -1; - prof_counter[i] = 1; - prof_old_multiplier[i] = 1; - prof_multiplier[i] = 1; - } - - /* - * Setup boot CPU information - */ - - smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */ - smp_tune_scheduling(); - printk("CPU%d: ", boot_cpu_id); - print_cpu_info(&cpu_data[boot_cpu_id]); - - /* - * not necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * (and for the case when a non-SMP board boots an SMP kernel) - */ - cpu_present_map |= (1 << hard_smp_processor_id()); - - cpu_number_map[boot_cpu_id] = 0; - - init_idle(); - - /* - * If we couldnt find an SMP configuration at boot time, - * get out of here now! - */ - - if (!smp_found_config) - { - printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n"); -#ifndef CONFIG_VISWS - io_apic_irqs = 0; -#endif - cpu_online_map = cpu_present_map; - smp_num_cpus = 1; - goto smp_done; - } - - /* - * If SMP should be disabled, then really disable it! - */ - - if (!max_cpus) - { - smp_found_config = 0; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - } - -#ifdef SMP_DEBUG - { - int reg; - - /* - * This is to verify that we're looking at - * a real local APIC. Check these against - * your board if the CPUs aren't getting - * started for no apparent reason. - */ - - reg = apic_read(APIC_VERSION); - SMP_PRINTK(("Getting VERSION: %x\n", reg)); - - apic_write(APIC_VERSION, 0); - reg = apic_read(APIC_VERSION); - SMP_PRINTK(("Getting VERSION: %x\n", reg)); - - /* - * The two version reads above should print the same - * NON-ZERO!!! numbers. If the second one is zero, - * there is a problem with the APIC write/read - * definitions. - * - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - - - reg = apic_read(APIC_LVT0); - SMP_PRINTK(("Getting LVT0: %x\n", reg)); - - reg = apic_read(APIC_LVT1); - SMP_PRINTK(("Getting LVT1: %x\n", reg)); - } -#endif - - enable_local_APIC(); - - /* - * Set up our local APIC timer: - */ - setup_APIC_clock (); - - /* - * Now scan the CPU present map and fire up the other CPUs. - */ - - /* - * Add all detected CPUs. (later on we can down individual - * CPUs which will change cpu_online_map but not necessarily - * cpu_present_map. We are pretty much ready for hot-swap CPUs.) - */ - cpu_online_map = cpu_present_map; - mb(); - - SMP_PRINTK(("CPU map: %lx\n", cpu_present_map)); - - for(i=0;i cpucount+1)) - { - do_boot_cpu(i); - } - - /* - * Make sure we unmap all failed CPUs - */ - - if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) { - printk("CPU #%d not responding. Removing from cpu_online_map.\n",i); - cpu_online_map &= ~(1 << i); - } - } - - /* - * Cleanup possible dangling ends... - */ - -#ifndef CONFIG_VISWS - { - unsigned long cfg; - - /* - * Install writable page 0 entry. - */ - cfg = pg0[0]; - pg0[0] = _PAGE_RW | _PAGE_PRESENT; /* writeable, present, addr 0 */ - local_flush_tlb(); - - /* - * Paranoid: Set warm reset code and vector here back - * to default values. - */ - - CMOS_WRITE(0, 0xf); - - *((volatile long *) phys_to_virt(0x467)) = 0; - - /* - * Restore old page 0 entry. - */ - - pg0[0] = cfg; - local_flush_tlb(); - } -#endif - - /* - * Allow the user to impress friends. - */ - - SMP_PRINTK(("Before bogomips.\n")); - if (!cpucount) { - printk(KERN_ERR "Error: only one processor found.\n"); - cpu_online_map = (1<CPU IPIs and self-IPIs too. */ - -/* - * Silly serialization to work around CPU bug in P5s. - * We can safely turn it off on a 686. - */ -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_APIC_SERIALIZATION 0 -#else -# define FORCE_APIC_SERIALIZATION 1 -#endif - static unsigned int cached_APIC_ICR; static unsigned int cached_APIC_ICR2; @@ -1462,7 +132,7 @@ static inline unsigned int __get_ICR (void) { -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE /* * Wait for the APIC to become ready - this should never occur. It's * a debugging check really. @@ -1473,11 +143,11 @@ while (count < 1000) { cfg = slow_ICR; - if (!(cfg&(1<<12))) { - if (count) - atomic_add(count, (atomic_t*)&ipi_count); + if (!(cfg&(1<<12))) return cfg; - } + printk("CPU #%d: ICR still busy [%08x]\n", + smp_processor_id(), cfg); + irq_err_count++; count++; udelay(10); } @@ -1491,19 +161,25 @@ static inline unsigned int __get_ICR2 (void) { -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE return slow_ICR2; #else return cached_APIC_ICR2; #endif } +#define LOGICAL_DELIVERY 1 + static inline int __prepare_ICR (unsigned int shortcut, int vector) { unsigned int cfg; cfg = __get_ICR(); - cfg |= APIC_DEST_DM_FIXED|shortcut|vector; + cfg |= APIC_DEST_DM_FIXED|shortcut|vector +#if LOGICAL_DELIVERY + |APIC_DEST_LOGICAL +#endif + ; return cfg; } @@ -1513,7 +189,11 @@ unsigned int cfg; cfg = __get_ICR2(); +#if LOGICAL_DELIVERY + cfg |= SET_APIC_DEST_FIELD((1< 1) + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); } static inline void send_IPI_all(int vector) @@ -1566,7 +251,7 @@ static inline void send_IPI_single(int dest, int vector) { unsigned long cfg; -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE unsigned long flags; __save_flags(flags); @@ -1589,7 +274,7 @@ * Send the IPI. The write to APIC_ICR fires this off. */ apic_write(APIC_ICR, cfg); -#if FORCE_APIC_SERIALIZATION +#if FORCE_READ_AROUND_WRITE __restore_flags(flags); #endif } @@ -1715,200 +400,99 @@ } /* - * this function sends a 'stop' IPI to all other CPUs in the system. - * it goes straight through. - */ - -void smp_send_stop(void) -{ - send_IPI_allbutself(STOP_CPU_VECTOR); -} - -/* Structure and data for smp_call_function(). This is designed to minimise + * Structure and data for smp_call_function(). This is designed to minimise * static memory requirements. It also looks cleaner. */ -struct smp_call_function_struct { +static volatile struct call_data_struct { void (*func) (void *info); void *info; - atomic_t unstarted_count; - atomic_t unfinished_count; + atomic_t started; + atomic_t finished; int wait; -}; -static volatile struct smp_call_function_struct *smp_call_function_data = NULL; +} *call_data = NULL; /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. */ -int smp_call_function (void (*func) (void *info), void *info, int retry, - int wait) -/* [SUMMARY] Run a function on all other CPUs. - The function to run. This must be fast and non-blocking. - An arbitrary pointer to pass to the function. - If true, keep retrying until ready. - If true, wait until function has completed on other CPUs. - [RETURNS] 0 on success, else a negative status code. Does not return until - remote CPUs are nearly ready to execute <> or are or have executed. -*/ -{ - unsigned long timeout; - struct smp_call_function_struct data; - static spinlock_t lock = SPIN_LOCK_UNLOCKED; - - if (retry) { - while (1) { - if (smp_call_function_data) { - schedule (); /* Give a mate a go */ - continue; - } - spin_lock (&lock); - if (smp_call_function_data) { - spin_unlock (&lock); /* Bad luck */ - continue; - } - /* Mine, all mine! */ - break; - } - } - else { - if (smp_call_function_data) return -EBUSY; - spin_lock (&lock); - if (smp_call_function_data) { - spin_unlock (&lock); - return -EBUSY; - } - } - smp_call_function_data = &data; - spin_unlock (&lock); - data.func = func; - data.info = info; - atomic_set (&data.unstarted_count, smp_num_cpus - 1); - data.wait = wait; - if (wait) atomic_set (&data.unfinished_count, smp_num_cpus - 1); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself (CALL_FUNCTION_VECTOR); - /* Wait for response */ - timeout = jiffies + JIFFIE_TIMEOUT; - while ( (atomic_read (&data.unstarted_count) > 0) && - time_before (jiffies, timeout) ) - barrier (); - if (atomic_read (&data.unstarted_count) > 0) { - smp_call_function_data = NULL; - return -ETIMEDOUT; - } - if (wait) - while (atomic_read (&data.unfinished_count) > 0) - barrier (); - smp_call_function_data = NULL; - return 0; -} - -static unsigned int calibration_result; - -void setup_APIC_timer(unsigned int clocks); - +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) /* - * Local timer interrupt handler. It does both profiling and - * process statistics/rescheduling. - * - * We do profiling in every local tick, statistics/rescheduling - * happen only every 'profiling multiplier' ticks. The default - * multiplier is 1 and it can be changed by writing the new multiplier - * value into /proc/profile. + * [SUMMARY] Run a function on all other CPUs. + * The function to run. This must be fast and non-blocking. + * An arbitrary pointer to pass to the function. + * If true, we might schedule away to lock the mutex + * If true, wait (atomically) until function has completed on other CPUs. + * [RETURNS] 0 on success, else a negative status code. Does not return until + * remote CPUs are nearly ready to execute <> or are or have executed. */ - -void smp_local_timer_interrupt(struct pt_regs * regs) { - int user = (user_mode(regs) != 0); - int cpu = smp_processor_id(); + struct call_data_struct data; + int ret, cpus = smp_num_cpus-1; + static DECLARE_MUTEX(lock); + unsigned long timeout; - /* - * The profiling function is SMP safe. (nothing can mess - * around with "current", and the profiling counters are - * updated with atomic operations). This is especially - * useful with a profiling multiplier != 1 - */ - if (!user) - x86_do_profile(regs->eip); + printk("smp_call(), cpus:%d\n", cpus); - if (!--prof_counter[cpu]) { - int system = 1 - user; - struct task_struct * p = current; + if (nonatomic) + down(&lock); + else + if (down_trylock(&lock)) + return -EBUSY; - /* - * The multiplier may have changed since the last time we got - * to this point as a result of the user writing to - * /proc/profile. In this case we need to adjust the APIC - * timer accordingly. - * - * Interrupts are already masked off at this point. - */ - prof_counter[cpu] = prof_multiplier[cpu]; - if (prof_counter[cpu] != prof_old_multiplier[cpu]) { - setup_APIC_timer(calibration_result/prof_counter[cpu]); - prof_old_multiplier[cpu] = prof_counter[cpu]; - } + if (call_data) // temporary debugging check + BUG(); - /* - * After doing the above, we need to make like - * a normal interrupt - otherwise timer interrupts - * ignore the global interrupt lock, which is the - * WrongThing (tm) to do. - */ + call_data = &data; + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + mb(); - irq_enter(cpu, 0); - update_one_process(p, 1, user, system, cpu); - if (p->pid) { - p->counter -= 1; - if (p->counter <= 0) { - p->counter = 0; - p->need_resched = 1; - } - if (p->priority < DEF_PRIORITY) { - kstat.cpu_nice += user; - kstat.per_cpu_nice[cpu] += user; - } else { - kstat.cpu_user += user; - kstat.per_cpu_user[cpu] += user; - } - kstat.cpu_system += system; - kstat.per_cpu_system[cpu] += system; + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_allbutself(CALL_FUNCTION_VECTOR); - } - irq_exit(cpu, 0); - } + /* Wait for response */ + timeout = jiffies + HZ; + while ((atomic_read(&data.started) != cpus) + && time_before(jiffies, timeout)) + barrier(); + ret = -ETIMEDOUT; + if (atomic_read(&data.started) != cpus) + goto out; + ret = 0; + if (wait) + while (atomic_read(&data.finished) != cpus) + barrier(); +out: + call_data = NULL; + up(&lock); + return 0; +} +static void stop_this_cpu (void * dummy) +{ /* - * We take the 'long' return path, and there every subsystem - * grabs the apropriate locks (kernel lock/ irq lock). - * - * we might want to decouple profiling from the 'long path', - * and do the profiling totally in assembly. - * - * Currently this isn't too much of an issue (performance wise), - * we can take more than 100K local irqs per second on a 100 MHz P5. + * Remove this CPU: */ + clear_bit(smp_processor_id(), &cpu_online_map); + + if (cpu_data[smp_processor_id()].hlt_works_ok) + for(;;) __asm__("hlt"); + for (;;); } /* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesnt support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] + * this function calls the 'stop' function on all other CPUs in the system. */ -void smp_apic_timer_interrupt(struct pt_regs * regs) + +void smp_send_stop(void) { - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow, and we - * want to be able to accept NMI tlb invalidates - * during this time. - */ - ack_APIC_irq(); - smp_local_timer_interrupt(regs); + smp_call_function(stop_this_cpu, NULL, 1, 0); } /* @@ -1944,39 +528,24 @@ } -static void stop_this_cpu (void) +asmlinkage void smp_call_function_interrupt(void) { + void (*func) (void *info) = call_data->func; + void *info = call_data->info; + int wait = call_data->wait; + + ack_APIC_irq(); /* - * Remove this CPU: + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function */ - clear_bit(smp_processor_id(), &cpu_online_map); - - if (cpu_data[smp_processor_id()].hlt_works_ok) - for(;;) __asm__("hlt"); - for (;;); -} - -/* - * CPU halt call-back - */ -asmlinkage void smp_stop_cpu_interrupt(void) -{ - stop_this_cpu(); -} - -asmlinkage void smp_call_function_interrupt(void) -{ - void (*func) (void *info) = smp_call_function_data->func; - void *info = smp_call_function_data->info; - int wait = smp_call_function_data->wait; - - ack_APIC_irq (); - /* Notify initiating CPU that I've grabbed the data and am about to - execute the function */ - atomic_dec (&smp_call_function_data->unstarted_count); - /* At this point the structure may be out of scope unless wait==1 */ - (*func) (info); - if (wait) atomic_dec (&smp_call_function_data->unfinished_count); + atomic_inc(&call_data->started); + /* + * At this point the structure may be out of scope unless wait==1 + */ + (*func)(info); + if (wait) + atomic_inc(&call_data->finished); } /* @@ -1991,6 +560,34 @@ } /* + * This interrupt should never happen with our APIC/SMP architecture + */ + +static spinlock_t err_lock; + +asmlinkage void smp_error_interrupt(void) +{ + unsigned long v; + + spin_lock(&err_lock); + + v = apic_read(APIC_ESR); + printk("APIC error interrupt on CPU#%d, should never happen.\n", + smp_processor_id()); + printk("... APIC ESR0: %08lx\n", v); + + apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); + printk("... APIC ESR1: %08lx\n", v); + + ack_APIC_irq(); + + irq_err_count++; + + spin_unlock(&err_lock); +} + +/* * This part sets up the APIC 32 bit clock in LVTT1, with HZ interrupts * per second. We assume that the caller has already set up the local * APIC. @@ -1999,6 +596,10 @@ * closely follows bus clocks. */ +int prof_multiplier[NR_CPUS] = { 1, }; +int prof_old_multiplier[NR_CPUS] = { 1, }; +int prof_counter[NR_CPUS] = { 1, }; + /* * The timer chip is already set up at HZ interrupts per second here, * but we do not accept timer interrupts yet. We only allow the BP @@ -2015,66 +616,102 @@ return count; } +void __init wait_8254_wraparound(void) +{ + unsigned int curr_count, prev_count=~0; + int delta; + + curr_count = get_8254_timer_count(); + + do { + prev_count = curr_count; + curr_count = get_8254_timer_count(); + delta = curr_count-prev_count; + + /* + * This limit for delta seems arbitrary, but it isn't, it's + * slightly above the level of error a buggy Mercury/Neptune + * chipset timer can cause. + */ + + } while (delta<300); +} + /* * This function sets up the local APIC timer, with a timeout of * 'clocks' APIC bus clock. During calibration we actually call - * this function twice, once with a bogus timeout value, second - * time for real. The other (noncalibrating) CPUs call this - * function only once, with the real value. - * - * We are strictly in irqs off mode here, as we do not want to - * get an APIC interrupt go off accidentally. + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. * * We do reads before writes even if unnecessary, to get around the - * APIC double write bug. + * P5 APIC double write bug. */ #define APIC_DIVISOR 16 -void setup_APIC_timer(unsigned int clocks) +void __setup_APIC_LVTT(unsigned int clocks) { - unsigned long lvtt1_value; - unsigned int tmp_value; + unsigned int lvtt1_value, tmp_value; - /* - * Unfortunately the local APIC timer cannot be set up into NMI - * mode. With the IO APIC we can re-route the external timer - * interrupt and broadcast it as an NMI to all CPUs, so no pain. - */ tmp_value = apic_read(APIC_LVTT); - lvtt1_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; - apic_write(APIC_LVTT , lvtt1_value); + lvtt1_value = SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV) | + APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; + apic_write(APIC_LVTT, lvtt1_value); /* * Divide PICLK by 16 */ tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR , (tmp_value & ~APIC_TDR_DIV_1 ) - | APIC_TDR_DIV_16); + apic_write(APIC_TDCR, (tmp_value + & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) + | APIC_TDR_DIV_16); tmp_value = apic_read(APIC_TMICT); apic_write(APIC_TMICT, clocks/APIC_DIVISOR); } -void __init wait_8254_wraparound(void) +void setup_APIC_timer(void * data) { - unsigned int curr_count, prev_count=~0; + unsigned int clocks = (unsigned int) data, slice, t0, t1, nr; + unsigned long flags; int delta; - curr_count = get_8254_timer_count(); - - do { - prev_count = curr_count; - curr_count = get_8254_timer_count(); - delta = curr_count-prev_count; + __save_flags(flags); + __sti(); + /* + * ok, Intel has some smart code in their APIC that knows + * if a CPU was in 'hlt' lowpower mode, and this increases + * its APIC arbitration priority. To avoid the external timer + * IRQ APIC event being in synchron with the APIC clock we + * introduce an interrupt skew to spread out timer events. + * + * The number of slices within a 'big' timeslice is smp_num_cpus+1 + */ + slice = clocks / (smp_num_cpus+1); + nr = cpu_number_map[smp_processor_id()] + 1; + printk("cpu: %d, clocks: %d, slice: %d, nr: %d.\n", + smp_processor_id(), clocks, slice, nr); /* - * This limit for delta seems arbitrary, but it isn't, it's - * slightly above the level of error a buggy Mercury/Neptune - * chipset timer can cause. + * Wait for IRQ0's slice: */ + wait_8254_wraparound(); - } while (delta<300); + __setup_APIC_LVTT(clocks); + + t0 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + do { + t1 = apic_read(APIC_TMCCT)*APIC_DIVISOR; + delta = (int)(t0 - t1 - slice*nr); + } while (delta < 0); + + __setup_APIC_LVTT(clocks); + + printk("CPU%d\n", + smp_processor_id(), t0, t1, delta, slice, clocks); + + __restore_flags(flags); } /* @@ -2092,10 +729,11 @@ int __init calibrate_APIC_clock(void) { - unsigned long long t1,t2; - long tt1,tt2; - long calibration_result; + unsigned long long t1 = 0, t2 = 0; + long tt1, tt2; + long result; int i; + const int LOOPS = HZ/10; printk("calibrating APIC timer ... "); @@ -2104,7 +742,7 @@ * value into the APIC clock, we just want to get the * counter running for calibration. */ - setup_APIC_timer(1000000000); + __setup_APIC_LVTT(1000000000); /* * The timer chip counts down to zero. Let's wait @@ -2112,23 +750,24 @@ * (the current tick might have been already half done) */ - wait_8254_wraparound (); + wait_8254_wraparound(); /* * We wrapped around just now. Let's start: */ - rdtscll(t1); - tt1=apic_read(APIC_TMCCT); + if (cpu_has_tsc) + rdtscll(t1); + tt1 = apic_read(APIC_TMCCT); -#define LOOPS (HZ/10) /* * Let's wait LOOPS wraprounds: */ - for (i=0; ieip); + + if (--prof_counter[cpu] <= 0) { + int system = 1 - user; + struct task_struct * p = current; + + /* + * The multiplier may have changed since the last time we got + * to this point as a result of the user writing to + * /proc/profile. In this case we need to adjust the APIC + * timer accordingly. + * + * Interrupts are already masked off at this point. + */ + prof_counter[cpu] = prof_multiplier[cpu]; + if (prof_counter[cpu] != prof_old_multiplier[cpu]) { + __setup_APIC_LVTT(calibration_result/prof_counter[cpu]); + prof_old_multiplier[cpu] = prof_counter[cpu]; + } + + /* + * After doing the above, we need to make like + * a normal interrupt - otherwise timer interrupts + * ignore the global interrupt lock, which is the + * WrongThing (tm) to do. + */ + + irq_enter(cpu, 0); + update_one_process(p, 1, user, system, cpu); + if (p->pid) { + p->counter -= 1; + if (p->counter <= 0) { + p->counter = 0; + p->need_resched = 1; + } + if (p->priority < DEF_PRIORITY) { + kstat.cpu_nice += user; + kstat.per_cpu_nice[cpu] += user; + } else { + kstat.cpu_user += user; + kstat.per_cpu_user[cpu] += user; + } + kstat.cpu_system += system; + kstat.per_cpu_system[cpu] += system; + + } + irq_exit(cpu, 0); + } + + /* + * We take the 'long' return path, and there every subsystem + * grabs the apropriate locks (kernel lock/ irq lock). + * + * we might want to decouple profiling from the 'long path', + * and do the profiling totally in assembly. + * + * Currently this isn't too much of an issue (performance wise), + * we can take more than 100K local irqs per second on a 100 MHz P5. + */ +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesnt support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +unsigned int apic_timer_irqs [NR_CPUS] = { 0, }; + +void smp_apic_timer_interrupt(struct pt_regs * regs) +{ + /* + * the NMI deadlock-detector uses this. + */ + apic_timer_irqs[smp_processor_id()]++; + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + smp_local_timer_interrupt(regs); +} --- linux/arch/i386/kernel/traps.c.orig Sat Sep 25 21:31:20 1999 +++ linux/arch/i386/kernel/traps.c Sat Sep 25 22:52:21 1999 @@ -2,6 +2,8 @@ * linux/arch/i386/traps.c * * Copyright (C) 1991, 1992 Linus Torvalds + * + * 1998, Ingo Molnar, added NMI-Watchdog driver */ /* @@ -58,10 +60,17 @@ */ struct desc_struct idt_table[256] __attribute__((__section__(".data.idt"))) = { {0, 0}, }; +extern int console_loglevel; + +static inline void console_silent(void) +{ + console_loglevel = 0; +} + static inline void console_verbose(void) { - extern int console_loglevel; - console_loglevel = 15; + if (console_loglevel) + console_loglevel = 15; } #define DO_ERROR(trapnr, signr, str, name, tsk) \ @@ -202,8 +211,6 @@ printk("%s: %04lx\n", str, err & 0xffff); show_registers(regs); -spin_lock_irq(&die_lock); - spin_unlock_irq(&die_lock); do_exit(SIGSEGV); } @@ -292,7 +299,11 @@ { printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n"); printk("You probably have a hardware problem with your RAM chips\n"); -} + + /* Clear and disable the memory parity error line. */ + reason = (reason & 0xf) | 4; + outb(reason, 0x61); +} static void io_check_error(unsigned char reason, struct pt_regs * regs) { @@ -301,8 +312,8 @@ printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); - /* Re-enable the IOCK line, wait for a few seconds */ - reason |= 8; + /* Re-enable the IOCK line, wait for a few seconds */ + reason = (reason & 0xf) | 8; outb(reason, 0x61); i = 2000; while (--i) udelay(1000); @@ -325,18 +336,107 @@ printk("Do you have a strange power saving mode enabled?\n"); } +atomic_t nmi_counter[NR_CPUS]; + +#if CONFIG_SMP + +int nmi_watchdog = 1; + +static int __init setup_nmi_watchdog(char *str) +{ + get_option(&str, &nmi_watchdog); + return 1; +} + +__setup("nmi_watchdog=", setup_nmi_watchdog); + +extern spinlock_t console_lock; +static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; + +inline void nmi_watchdog_tick(struct pt_regs * regs) +{ + /* + * the best way to detect wether a CPU has a 'hard lockup' problem + * is to check it's local APIC timer IRQ counts. If they are not + * changing then that CPU has some problem. + * + * as these watchdog NMI IRQs are broadcasted to every CPU, here + * we only have to check the current processor. + * + * since NMIs dont listen to _any_ locks, we have to be extremely + * careful not to rely on unsafe variables. The printk might lock + * up though, so we have to break up console_lock first ... + * [when there will be more tty-related locks, break them up + * here too!] + */ + + static unsigned int last_irq_sums [NR_CPUS] = { 0, }, + alert_counter [NR_CPUS] = { 0, }; + + /* + * Since current-> is always on the stack, and we always switch + * the stack NMI-atomically, it's safe to use smp_processor_id(). + */ + int sum, cpu = smp_processor_id(); + + sum = apic_timer_irqs[cpu]; + + if (last_irq_sums[cpu] == sum) { + /* + * Ayiee, looks like this CPU is stuck ... + * wait a few IRQs (5 seconds) before doing the oops ... + */ + alert_counter[cpu]++; + if (alert_counter[cpu] == 5*HZ) { + spin_lock(&nmi_print_lock); + spin_unlock(&console_lock); // we are in trouble anyway + printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu); + show_registers(regs); + printk("console shuts up ...\n"); + console_silent(); + spin_unlock(&nmi_print_lock); + do_exit(SIGSEGV); + } + } else { + last_irq_sums[cpu] = sum; + alert_counter[cpu] = 0; + } +} +#endif + asmlinkage void do_nmi(struct pt_regs * regs, long error_code) { unsigned char reason = inb(0x61); - extern atomic_t nmi_counter; - atomic_inc(&nmi_counter); + atomic_inc(nmi_counter+smp_processor_id()); + if (!(reason & 0xc0)) { +#if CONFIG_SMP + /* + * Ok, so this is none of the documented NMI sources, + * so it must be the NMI watchdog. + */ + if (nmi_watchdog) { + nmi_watchdog_tick(regs); + return; + } else + unknown_nmi_error(reason, regs); +#else + unknown_nmi_error(reason, regs); +#endif + return; + } if (reason & 0x80) mem_parity_error(reason, regs); if (reason & 0x40) io_check_error(reason, regs); - if (!(reason & 0xc0)) - unknown_nmi_error(reason, regs); + /* + * Reassert NMI in case it became active meanwhile + * as it's edge-triggered. + */ + outb(0x8f, 0x70); + inb(0x71); /* dummy */ + outb(0x0f, 0x70); + inb(0x71); /* dummy */ } /* @@ -455,6 +555,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) { __asm__ __volatile__("clts"); /* Allow maths ops (or we recurse) */ + if(current->used_math) __asm__("frstor %0": :"m" (current->thread.i387)); else @@ -658,7 +759,7 @@ */ set_fixmap(FIX_APIC_BASE, APIC_PHYS_BASE); printk("Local APIC ID %lx\n", apic_read(APIC_ID)); - printk("Local APIC Version %lx\n", apic_read(APIC_VERSION)); + printk("Local APIC Version %lx\n", apic_read(APIC_LVR)); set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); printk("Cobalt Revision %lx\n", co_cpu_read(CO_CPU_REV)); --- linux/arch/i386/kernel/io_apic.c.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/kernel/io_apic.c Sun Sep 26 17:53:50 1999 @@ -1,7 +1,7 @@ /* * Intel IO-APIC support for multi-Pentium hosts. * - * Copyright (C) 1997, 1998 Ingo Molnar, Hajnalka Szabo + * Copyright (C) 1997, 1998, 1999 Ingo Molnar, Hajnalka Szabo * * Many thanks to Stig Venaas for trying out countless experimental * patches and reporting/debugging problems patiently! @@ -18,15 +18,21 @@ #include #include #include +#include #include +#undef __init +#define __init + /* * volatile is justified in this case, IO-APIC register contents * might change spontaneously, GCC should not cache it */ #define IO_APIC_BASE(idx) ((volatile int *)__fix_to_virt(FIX_IO_APIC_BASE_0 + idx)) +extern int nmi_watchdog; + /* * The structure of the IO-APIC: */ @@ -59,6 +65,11 @@ enum ioapic_irq_destination_types { dest_Fixed = 0, dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, dest_ExtINT = 7 }; @@ -94,14 +105,7 @@ * MP-BIOS irq configuration table structures: */ -enum mp_irq_source_types { - mp_INT = 0, - mp_NMI = 1, - mp_SMI = 2, - mp_ExtINT = 3 -}; - -struct mpc_config_ioapic mp_apics[MAX_IO_APICS];/* I/O APIC entries */ +struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];/* I/O APIC entries */ int mp_irq_entries = 0; /* # of MP IRQ source entries */ struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* MP IRQ source entries */ @@ -202,16 +206,10 @@ FINAL; \ } -/* - * We disable IO-APIC IRQs by setting their 'destination CPU mask' to - * zero. Trick by Ramesh Nalluri. - */ -DO_ACTION( disable, 1, &= 0x00ffffff, io_apic_sync(entry->apic))/* destination = 0x00 */ -DO_ACTION( enable, 1, |= 0xff000000, ) /* destination = 0xff */ DO_ACTION( mask, 0, |= 0x00010000, io_apic_sync(entry->apic))/* mask = 1 */ DO_ACTION( unmask, 0, &= 0xfffeffff, ) /* mask = 0 */ -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) { struct IO_APIC_route_entry entry; @@ -289,7 +287,7 @@ for (i = 0; i < mp_irq_entries; i++) if ( (mp_irqs[i].mpc_irqtype == type) && - (mp_irqs[i].mpc_dstapic == mp_apics[apic].mpc_apicid) && + (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid) && (mp_irqs[i].mpc_dstirq == pin)) return i; @@ -330,7 +328,7 @@ int lbus = mp_irqs[i].mpc_srcbus; for (apic = 0; apic < nr_ioapics; apic++) - if (mp_apics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) break; if ((apic || IO_APIC_IRQ(mp_irqs[i].mpc_dstirq)) && @@ -589,24 +587,30 @@ static int current_vector = IRQ0_TRAP_VECTOR, offset = 0; if (IO_APIC_VECTOR(irq) > 0) return IO_APIC_VECTOR(irq); + if (current_vector == 0xFF) + panic("ran out of interrupt sources!"); +next: current_vector += 8; - if (current_vector > 0xFE) { + if (current_vector == SYSCALL_VECTOR) + goto next; + + if (current_vector > 0xFF) { offset++; current_vector = IRQ0_TRAP_VECTOR + offset; - printk("WARNING: ASSIGN_IRQ_VECTOR wrapped back to %02X\n", - current_vector); } - if (current_vector == SYSCALL_VECTOR) - panic("ran out of interrupt sources!"); IO_APIC_VECTOR(irq) = current_vector; return current_vector; } +extern void (*interrupt[NR_IRQS])(void); +static struct hw_interrupt_type ioapic_level_irq_type; +static struct hw_interrupt_type ioapic_edge_irq_type; + void __init setup_IO_APIC_irqs(void) { struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1; + int apic, pin, idx, irq, first_notcon = 1, vector; printk("init IO_APIC IRQs\n"); @@ -621,15 +625,33 @@ entry.delivery_mode = dest_LowestPrio; entry.dest_mode = 1; /* logical delivery */ entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = 0; /* but no route */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; /* all CPUs */ idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { + if (!apic && pin && IO_APIC_IRQ(pin) && (pin < 16)) { + /* + * Install 'default handler' into unlisted + * entries. + */ + printk("adding unlisted pin %d as IRQ%d.\n", + pin, pin); + irq = pin; + entry.trigger = EISA_ELCR(irq); + entry.polarity = 0; + vector = assign_irq_vector(irq); + add_pin_to_irq(irq, apic, pin); + irq_desc[irq].handler = &ioapic_edge_irq_type; + entry.vector = vector; + set_intr_gate(vector, interrupt[irq]); + goto do_write; + } + if (first_notcon) { - printk(" IO-APIC (apicid-pin) %d-%d", mp_apics[apic].mpc_apicid, pin); + printk(" IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin); first_notcon = 0; } else - printk(", %d-%d", mp_apics[apic].mpc_apicid, pin); + printk(", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } @@ -639,17 +661,30 @@ if (irq_trigger(idx)) { entry.trigger = 1; entry.mask = 1; - entry.dest.logical.logical_dest = 0xff; + entry.dest.logical.logical_dest = APIC_ALL_CPUS; } - irq = pin_2_irq(idx,apic,pin); + irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); if (!apic && !IO_APIC_IRQ(irq)) continue; - entry.vector = assign_irq_vector(irq); + if (IO_APIC_IRQ(irq)) { + vector = assign_irq_vector(irq); + entry.vector = vector; + if (IO_APIC_irq_trigger(irq)) + irq_desc[irq].handler = &ioapic_level_irq_type; + else + irq_desc[irq].handler = &ioapic_edge_irq_type; + + set_intr_gate(vector, interrupt[irq]); + + if (!apic && (irq < 16)) + disable_8259A_irq(irq); + } +do_write: io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); } @@ -660,34 +695,47 @@ } /* - * Set up a certain pin as ExtINT delivered interrupt + * Set up the 8259A-master output pin as broadcast to all + * CPUs. */ -void __init setup_ExtINT_pin(unsigned int apic, unsigned int pin, int irq) +void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) { struct IO_APIC_route_entry entry; - /* - * add it to the IO-APIC irq-routing table: - */ memset(&entry,0,sizeof(entry)); - entry.delivery_mode = dest_ExtINT; - entry.dest_mode = 0; /* physical delivery */ - entry.mask = 0; /* unmask IRQ now */ - /* - * We use physical delivery to get the timer IRQ - * to the boot CPU. 'boot_cpu_id' is the physical - * APIC ID of the boot CPU. - */ - entry.dest.physical.physical_dest = boot_cpu_id; + disable_8259A_irq(0); + + apic_readaround(APIC_LVT0); + apic_write(APIC_LVT0, 0x00010700); // mask LVT0 - entry.vector = assign_irq_vector(irq); + init_8259A(1); + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = 1; /* logical delivery */ + entry.mask = 0; /* unmask IRQ now */ + entry.dest.logical.logical_dest = APIC_ALL_CPUS; + entry.delivery_mode = dest_LowestPrio; entry.polarity = 0; entry.trigger = 0; + entry.vector = vector; - io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); - io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + /* + * The timer IRQ doesnt have to know that behind the + * scene we have a 8259A-master in AEOI mode ... + */ + irq_desc[0].handler = &ioapic_edge_irq_type; + + /* + * Add it to the IO-APIC irq-routing table: + */ + io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); + + enable_8259A_irq(0); } void __init UNEXPECTED_IO_APIC(void) @@ -705,7 +753,7 @@ printk("number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) - printk("number of IO-APIC #%d registers: %d.\n", mp_apics[i].mpc_apicid, nr_ioapic_registers[i]); + printk("number of IO-APIC #%d registers: %d.\n", mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]); /* * We are a bit conservative about what we expect. We have to @@ -717,8 +765,10 @@ *(int *)®_00 = io_apic_read(apic, 0); *(int *)®_01 = io_apic_read(apic, 1); - *(int *)®_02 = io_apic_read(apic, 2); - printk("\nIO APIC #%d......\n", mp_apics[apic].mpc_apicid); + if (reg_01.version >= 0x10) + *(int *)®_02 = io_apic_read(apic, 2); + + printk("\nIO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); printk(".... register #00: %08X\n", *(int *)®_00); printk("....... : physical APIC id: %02X\n", reg_00.ID); if (reg_00.__reserved_1 || reg_00.__reserved_2) @@ -730,12 +780,15 @@ (reg_01.entries != 0x17) && /* typical ISA+PCI boards */ (reg_01.entries != 0x1b) && /* Compaq Proliant boards */ (reg_01.entries != 0x1f) && /* dual Xeon boards */ - (reg_01.entries != 0x3F) /* bigger Xeon boards */ + (reg_01.entries != 0x22) && /* bigger Xeon boards */ + (reg_01.entries != 0x2E) && + (reg_01.entries != 0x3F) ) UNEXPECTED_IO_APIC(); printk("....... : IO APIC version: %04X\n", reg_01.version); - if ( (reg_01.version != 0x10) && /* oldest IO-APICs */ + if ( (reg_01.version != 0x01) && /* 82489DX IO-APICs */ + (reg_01.version != 0x10) && /* oldest IO-APICs */ (reg_01.version != 0x11) && /* Pentium/Pro IO-APICs */ (reg_01.version != 0x13) /* Xeon IO-APICs */ ) @@ -743,10 +796,12 @@ if (reg_01.__reserved_1 || reg_01.__reserved_2) UNEXPECTED_IO_APIC(); - printk(".... register #02: %08X\n", *(int *)®_02); - printk("....... : arbitration: %02X\n", reg_02.arbitration); - if (reg_02.__reserved_1 || reg_02.__reserved_2) - UNEXPECTED_IO_APIC(); + if (reg_01.version >= 0x10) { + printk(".... register #02: %08X\n", *(int *)®_02); + printk("....... : arbitration: %02X\n", reg_02.arbitration); + if (reg_02.__reserved_1 || reg_02.__reserved_2) + UNEXPECTED_IO_APIC(); + } printk(".... IRQ redirection table:\n"); @@ -797,8 +852,116 @@ return; } +static void print_APIC_bitfield (int base) +{ + unsigned int v; + int i, j; + + printk("0123456789abcdef0123456789abcdef\n"); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1< 3) { + apic_readaround(APIC_SPIV); // not strictly necessery + apic_write(APIC_ESR, 0); + } + v = apic_read(APIC_ESR); + printk("... APIC ESR: %08x\n", v); + } + + v = apic_read(APIC_ICR); + printk("... APIC ICR: %08x\n", v); + v = apic_read(APIC_ICR2); + printk("... APIC ICR2: %08x\n", v); + + v = apic_read(APIC_LVTT); + printk("... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk("... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk("... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk("... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk("... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk("... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk("... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk("... APIC TDCR: %08x\n", v); + printk("\n"); +} + +void print_all_local_APICs (void) +{ + smp_call_function(print_local_APIC, NULL, 1, 1); + print_local_APIC(NULL); +} + static void __init init_sym_mode(void) { + struct IO_APIC_reg_01 reg_01; int i; for (i = 0; i < PIN_MAP_SIZE; i++) { @@ -809,24 +972,21 @@ for (i = 0; i < MAX_PIRQS; i++) pirq_entries[i] =- 1; - printk("enabling symmetric IO mode... "); - - outb(0x70, 0x22); - outb(0x01, 0x23); - - printk("...done.\n"); + if (pic_mode) { + /* + * PIC mode, enable symmetric IO mode in the IMCR. + */ + printk("leaving PIC mode, enabling symmetric IO mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } /* * The number of IO-APIC IRQ registers (== #pins): */ - { - struct IO_APIC_reg_01 reg_01; - int i; - - for (i = 0; i < nr_ioapics; i++) { - *(int *)®_01 = io_apic_read(i, 1); - nr_ioapic_registers[i] = reg_01.entries+1; - } + for (i = 0; i < nr_ioapics; i++) { + *(int *)®_01 = io_apic_read(i, 1); + nr_ioapic_registers[i] = reg_01.entries+1; } /* @@ -835,24 +995,41 @@ clear_IO_APIC(); } +static void clear_lapic_ints (void * dummy) +{ + int maxlvt; + + maxlvt = get_maxlvt(); + apic_write_around(APIC_LVTT, 0x00010000); + apic_write_around(APIC_LVT0, 0x00010000); + apic_write_around(APIC_LVT1, 0x00010000); + if (maxlvt >= 3) + apic_write_around(APIC_LVTERR, 0x00010000); + if (maxlvt >= 4) + apic_write_around(APIC_LVTPC, 0x00010000); +} + /* * Not an __init, needed by the reboot code */ void init_pic_mode(void) { /* - * Clear the IO-APIC before rebooting: + * Clear the IO-APIC and local APICs before rebooting: */ clear_IO_APIC(); + smp_call_function(clear_lapic_ints, NULL, 1, 1); + clear_lapic_ints(NULL); /* * Put it back into PIC mode (has an effect only on - * certain boards) + * certain older boards) */ - printk("disabling symmetric IO mode... "); + if (pic_mode) { + printk("disabling symmetric IO mode, entering PIC mode.\n"); outb_p(0x70, 0x22); outb_p(0x00, 0x23); - printk("...done.\n"); + } } static void __init setup_ioapic_id(void) @@ -914,10 +1091,13 @@ * MP specification 1.4 defines some extra rules for default * configurations, fix them up here: */ - switch (mpc_default_type) { case 2: + /* + * IRQ0 is not connected: + */ + mp_irqs[0].mpc_irqtype = mp_ExtINT; break; default: /* @@ -942,7 +1122,7 @@ unsigned int t1 = jiffies; sti(); - mdelay(100); + mdelay(40); if (jiffies-t1>1) return 1; @@ -950,6 +1130,27 @@ return 0; } +extern atomic_t nmi_counter[NR_CPUS]; + +static int __init nmi_irq_works(void) +{ + atomic_t tmp[NR_CPUS]; + int j, cpu; + + memcpy(tmp, nmi_counter, sizeof(tmp)); + sti(); + mdelay(50); + + for (j = 0; j < smp_num_cpus; j++) { + cpu = cpu_logical_map(j); + if (atomic_read(nmi_counter+cpu) - atomic_read(tmp+cpu) <= 3) { + printk("CPU#%d NMI appears to be stuck.\n", cpu); + return 0; + } + } + return 1; +} + /* * In the SMP+IOAPIC case it might happen that there are an unspecified * number of pending IRQ events unhandled. These cases are very rare, @@ -964,12 +1165,11 @@ */ static void enable_edge_ioapic_irq(unsigned int irq) { - enable_IO_APIC_irq(irq); + unmask_IO_APIC_irq(irq); } static void disable_edge_ioapic_irq(unsigned int irq) { - disable_IO_APIC_irq(irq); } /* @@ -995,8 +1195,17 @@ } #define shutdown_edge_ioapic_irq disable_edge_ioapic_irq -void static ack_edge_ioapic_irq(unsigned int i) -{ + +/* + * Once we have recorded IRQ_PENDING already, we can mask the + * interrupt for real. This prevents IRQ storms from unhandled + * devices. + */ +void static ack_edge_ioapic_irq(unsigned int irq) +{ + if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) + == (IRQ_PENDING | IRQ_DISABLED)) + mask_IO_APIC_irq(irq); ack_APIC_irq(); } void static end_edge_ioapic_irq(unsigned int i){} @@ -1055,7 +1264,8 @@ static inline void init_IO_APIC_traps(void) { - int i; + int irq; + /* * NOTE! The local APIC isn't very good at handling * multiple interrupts at the same interrupt level. @@ -1067,36 +1277,62 @@ * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (i = 0; i < NR_IRQS ; i++) { - if (IO_APIC_VECTOR(i) > 0) { - if (IO_APIC_irq_trigger(i)) - irq_desc[i].handler = &ioapic_level_irq_type; - else - irq_desc[i].handler = &ioapic_edge_irq_type; - /* - * disable it in the 8259A: - */ - if (i < 16) - disable_8259A_irq(i); - } else { - if (!IO_APIC_IRQ(i)) - continue; - + for (irq = 0; irq < NR_IRQS ; irq++) { + if (IO_APIC_IRQ(irq) && !IO_APIC_VECTOR(irq)) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (i < 16) { - make_8259A_irq(i); - continue; - } - - /* Strange. Oh, well.. */ - irq_desc[i].handler = &no_irq_type; + if (irq < 16) + make_8259A_irq(irq); + else + /* Strange. Oh, well.. */ + irq_desc[irq].handler = &no_irq_type; } } - init_IRQ_SMP(); +} + +void static ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +void static end_lapic_irq (unsigned int i) { /* nothing */ } + +static struct hw_interrupt_type lapic_irq_type = { + "local-APIC-edge", + NULL, /* startup_irq() not used for IRQ0 */ + NULL, /* shutdown_irq() not used for IRQ0 */ + NULL, /* enable_irq() not used for IRQ0 */ + NULL, /* disable_irq() not used for IRQ0 */ + ack_lapic_irq, + end_lapic_irq +}; + +static void enable_NMI_through_LVT0 (void * dummy) +{ + apic_readaround(APIC_LVT0); + apic_write(APIC_LVT0, 0x00000400); // unmask and set to NMI +} + +static void setup_nmi (void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + printk("activating NMI Watchdog ..."); + + smp_call_function(enable_NMI_through_LVT0, NULL, 1, 1); + enable_NMI_through_LVT0(NULL); + + printk(" done.\n"); } /* @@ -1108,45 +1344,78 @@ static inline void check_timer(void) { int pin1, pin2; + int vector; + + /* + * get/set the timer IRQ vector: + */ + vector = assign_irq_vector(0); + set_intr_gate(vector, interrupt[0]); pin1 = find_timer_pin(mp_INT); pin2 = find_timer_pin(mp_ExtINT); - enable_IO_APIC_irq(0); - if (!timer_irq_works()) { - if (pin1 != -1) - printk("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); - printk("...trying to set up timer as ExtINT... "); - - if (pin2 != -1) { - printk(".. (found pin %d) ...", pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - setup_ExtINT_pin(0, pin2, 0); - make_8259A_irq(0); + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (timer_irq_works()) { + if (nmi_watchdog) { + disable_8259A_irq(0); + init_8259A(1); + setup_nmi(); + enable_8259A_irq(0); + if (nmi_irq_works()) + return; + } else + return; + } + + if (pin1 != -1) { + printk("..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + clear_IO_APIC_pin(0, pin1); + } + + printk("...trying to set up timer (IRQ0) through the 8259A ... "); + if (pin2 != -1) { + printk("\n..... (found pin %d) ...", pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + setup_ExtINT_IRQ0_pin(pin2, vector); + if (timer_irq_works()) { + printk("works.\n"); + if (nmi_watchdog) { + setup_nmi(); + if (nmi_irq_works()) + return; + } else + return; } + /* + * Cleanup, just in case ... + */ + clear_IO_APIC_pin(0, pin2); + } + printk(" failed.\n"); - if (!timer_irq_works()) { - printk(" failed.\n"); - printk("...trying to set up timer as BP IRQ..."); - /* - * Just in case ... - */ - if (pin1 != -1) - clear_IO_APIC_pin(0, pin1); - if (pin2 != -1) - clear_IO_APIC_pin(0, pin2); - - make_8259A_irq(0); - - if (!timer_irq_works()) { - printk(" failed.\n"); - panic("IO-APIC + timer doesn't work!"); - } - } + if (nmi_watchdog) + printk("timer doesnt work through the IO-APIC - cannot activate NMI Watchdog!\n"); + + printk("...trying to set up timer as Virtual Wire IRQ..."); + + disable_8259A_irq(0); + irq_desc[0].handler = &lapic_irq_type; + init_8259A(1); // AEOI mode + apic_readaround(APIC_LVT0); + apic_write(APIC_LVT0, 0x00000000 | vector); // Fixed mode + enable_8259A_irq(0); + + if (timer_irq_works()) { printk(" works.\n"); + return; } + printk(" failed :(.\n"); + panic("IO-APIC + timer doesn't work! pester mingo@redhat.com"); } /* @@ -1189,6 +1458,5 @@ setup_IO_APIC_irqs(); init_IO_APIC_traps(); check_timer(); - print_IO_APIC(); } --- linux/arch/i386/kernel/entry.S.orig Sat Sep 25 21:31:19 1999 +++ linux/arch/i386/kernel/entry.S Sat Sep 25 21:33:06 1999 @@ -323,9 +323,14 @@ jmp error_code ENTRY(nmi) + pushl %eax + SAVE_ALL + movl %esp,%edx pushl $0 - pushl $ SYMBOL_NAME(do_nmi) - jmp error_code + pushl %edx + call SYMBOL_NAME(do_nmi) + addl $8,%esp + RESTORE_ALL ENTRY(int3) pushl $0 --- linux/arch/i386/kernel/irq.c.orig Wed Sep 1 08:29:05 1999 +++ linux/arch/i386/kernel/irq.c Sat Sep 25 22:53:19 1999 @@ -22,7 +22,6 @@ #include #include -#include #include #include #include @@ -30,14 +29,13 @@ #include #include #include -#include #include #include +#include #include #include #include -#include #include #include #include @@ -48,7 +46,7 @@ unsigned int local_bh_count[NR_CPUS]; unsigned int local_irq_count[NR_CPUS]; -atomic_t nmi_counter; +extern atomic_t nmi_counter[NR_CPUS]; /* * Linux has a controller-independent x86 interrupt architecture. @@ -75,7 +73,8 @@ /* * Controller mappings for all interrupt sources: */ -irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }}; +irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = + { [0 ... NR_IRQS-1] = { 0, &no_irq_type, }}; /* * Special irq handlers. @@ -84,6 +83,52 @@ void no_action(int cpl, void *dev_id, struct pt_regs *regs) { } /* + * Generic no controller code + */ + +static void enable_none(unsigned int irq) { } +static unsigned int startup_none(unsigned int irq) { return 0; } +static void disable_none(unsigned int irq) { } +static void ack_none(unsigned int irq) +{ +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves, it doesnt deserve + * a generic callback i think. + */ +#if CONFIG_X86 + printk("unexpected IRQ trap at vector %02x\n", irq); +#ifdef __SMP__ + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + */ + ack_APIC_irq(); +#endif +#endif +} + +/* startup is the same as "enable", shutdown is same as "disable" */ +#define shutdown_none disable_none +#define end_none enable_none + +struct hw_interrupt_type no_irq_type = { + "none", + startup_none, + shutdown_none, + enable_none, + disable_none, + ack_none, + end_none +}; + +volatile unsigned long irq_err_count; + +/* * Generic, controller-independent functions: */ @@ -106,22 +151,30 @@ #ifndef __SMP__ p += sprintf(p, "%10u ", kstat_irqs(i)); #else - for (j=0; jtypename); p += sprintf(p, " %s", action->name); - for (action=action->next; action; action = action->next) { + for (action=action->next; action; action = action->next) p += sprintf(p, ", %s", action->name); - } *p++ = '\n'; } - p += sprintf(p, "NMI: %10u\n", atomic_read(&nmi_counter)); -#ifdef __SMP__ - p += sprintf(p, "ERR: %10lu\n", ipi_count); -#endif + p += sprintf(p, "NMI: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + atomic_read(nmi_counter+cpu_logical_map(j))); + p += sprintf(p, "\n"); +#if CONFIG_SMP + p += sprintf(p, "LOC: "); + for (j = 0; j < smp_num_cpus; j++) + p += sprintf(p, "%10u ", + apic_timer_irqs[cpu_logical_map(j)]); + p += sprintf(p, "\n"); +#endif + p += sprintf(p, "ERR: %10lu\n", irq_err_count); return p - buf; } @@ -494,6 +547,8 @@ spin_unlock_irqrestore(&irq_controller_lock, flags); } +extern int verbose_irq; + /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific @@ -520,7 +575,7 @@ kstat.irqs[cpu][irq]++; desc = irq_desc + irq; spin_lock(&irq_controller_lock); - irq_desc[irq].handler->ack(irq); + desc->handler->ack(irq); /* REPLAY is when Linux resends an IRQ that was dropped earlier WAITING is used by probe to mark irqs that are being tested @@ -570,9 +625,8 @@ spin_unlock(&irq_controller_lock); } desc->status &= ~IRQ_INPROGRESS; - if (!(desc->status & IRQ_DISABLED)){ - irq_desc[irq].handler->end(irq); - } + if (!(desc->status & IRQ_DISABLED)) + desc->handler->end(irq); spin_unlock(&irq_controller_lock); /* --- linux/arch/i386/kernel/i8259.c.orig Mon Aug 30 11:16:24 1999 +++ linux/arch/i386/kernel/i8259.c Sat Sep 25 22:46:32 1999 @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include @@ -9,68 +8,23 @@ #include #include #include -#include #include #include +#include #include #include #include #include -#include #include #include #include #include - -/* - * Intel specific no controller code - * odd that no-controller should be architecture dependent - * but see the ifdef __SMP__ - */ - -static void enable_none(unsigned int irq) { } -static unsigned int startup_none(unsigned int irq) { return 0; } -static void disable_none(unsigned int irq) { } -static void ack_none(unsigned int irq) -{ -#ifdef __SMP__ - /* - * [currently unexpected vectors happen only on SMP and APIC. - * if we want to have non-APIC and non-8259A controllers - * in the future with unexpected vectors, this ack should - * probably be made controller-specific.] - */ - ack_APIC_irq(); -#endif -} - -/* startup is the same as "enable", shutdown is same as "disable" */ -#define shutdown_none disable_none -#define end_none enable_none - -struct hw_interrupt_type no_irq_type = { - "none", - startup_none, - shutdown_none, - enable_none, - disable_none, - ack_none, - end_none -}; - - -/* - * This is the 'legacy' 8259A Programmable Interrupt Controller, - * present in the majority of PC/AT boxes. - * plus some generic x86 specific things if generic specifics makes - * any sense at all. - * this file should become arch/i386/kernel/irq.c when the old irq.c - * moves to arch independent land - */ /* + * Common place to define all x86 IRQ vectors + * * This builds up the IRQ handler stubs using some ugly macros in irq.h * * These macros create the low-level assembly IRQ routines that save @@ -79,7 +33,6 @@ * interrupt-controller happy. */ - BUILD_COMMON_IRQ() #define BI(x,y) \ @@ -93,7 +46,7 @@ /* * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: - * (these are usually mapped to vectors 0x20-0x30) + * (these are usually mapped to vectors 0x20-0x2f) */ BUILD_16_IRQS(0x0) @@ -126,9 +79,9 @@ */ BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) -BUILD_SMP_INTERRUPT(stop_cpu_interrupt,STOP_CPU_VECTOR) BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) /* * every pentium local APIC has two 'local interrupts', with a @@ -150,7 +103,7 @@ IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) -static void (*interrupt[NR_IRQS])(void) = { +void (*interrupt[NR_IRQS])(void) = { IRQLIST_16(0x0), #ifdef CONFIG_X86_IO_APIC @@ -164,17 +117,23 @@ #undef IRQ #undef IRQLIST_16 +/* + * This is the 'legacy' 8259A Programmable Interrupt Controller, + * present in the majority of PC/AT boxes. + * plus some generic x86 specific things if generic specifics makes + * any sense at all. + * this file should become arch/i386/kernel/irq.c when the old irq.c + * moves to arch independent land + */ - - -static void enable_8259A_irq(unsigned int irq); +void enable_8259A_irq(unsigned int irq); void disable_8259A_irq(unsigned int irq); /* shutdown is same as "disable" */ #define end_8259A_irq enable_8259A_irq #define shutdown_8259A_irq disable_8259A_irq -static void mask_and_ack_8259A(unsigned int); +void mask_and_ack_8259A(unsigned int); static unsigned int startup_8259A_irq(unsigned int irq) { @@ -207,8 +166,8 @@ /* * Not all IRQs can be routed through the IO-APIC, eg. on certain (older) - * boards the timer interrupt is not connected to any IO-APIC pin, it's - * fed to the CPU IRQ line directly. + * boards the timer interrupt is not really connected to any IO-APIC pin, + * it's fed to the master 8259A's IR0 line only. * * Any '1' bit in this mask means the IRQ is routed through the IO-APIC. * this 'mixed mode' IRQ handling costs nothing because it's only used @@ -224,22 +183,20 @@ { unsigned int mask = 1 << irq; cached_irq_mask |= mask; - if (irq & 8) { + if (irq & 8) outb(cached_A1,0xA1); - } else { + else outb(cached_21,0x21); - } } -static void enable_8259A_irq(unsigned int irq) +void enable_8259A_irq(unsigned int irq) { unsigned int mask = ~(1 << irq); cached_irq_mask &= mask; - if (irq & 8) { + if (irq & 8) outb(cached_A1,0xA1); - } else { + else outb(cached_21,0x21); - } } int i8259A_irq_pending(unsigned int irq) @@ -260,26 +217,141 @@ } /* + * This function assumes to be called rarely. Switching between + * 8259A registers is slow. + */ +static inline int i8259A_irq_real(unsigned int irq) +{ + int value; + int irqmask = 1<> 8); + outb(0x0A,0xA0); /* back to the IRR register */ + return value; +} + +/* * Careful! The 8259A is a fragile beast, it pretty * much _has_ to be done exactly like this (mask it * first, _then_ send the EOI, and the order of EOI * to the two 8259s is important! */ -static void mask_and_ack_8259A(unsigned int irq) +void mask_and_ack_8259A(unsigned int irq) { - cached_irq_mask |= 1 << irq; + unsigned int irqmask = 1 << irq; + + /* + * Lightweight spurious IRQ detection. We do not want + * to overdo spurious IRQ handling - it's usually a sign + * of hardware problems, so we only do the checks we can + * do without slowing down good hardware unnecesserily. + * + * Note that IRQ7 and IRQ15 (the two spurious IRQs + * usually resulting from the 8259A-1|2 PICs) occur + * even if the IRQ is masked in the 8259A. Thus we + * can check spurious 8259A IRQs without doing the + * quite slow i8259A_irq_real() call for every IRQ. + * This does not cover 100% of spurious interrupts, + * but should be enough to warn the user that there + * is something bad going on ... + */ + if (cached_irq_mask & irqmask) + goto spurious_8259A_irq; + cached_irq_mask |= irqmask; + +handle_real_irq: if (irq & 8) { - inb(0xA1); /* DUMMY */ + inb(0xA1); /* DUMMY - (do we need this?) */ outb(cached_A1,0xA1); - outb(0x62,0x20); /* Specific EOI to cascade */ - outb(0x20,0xA0); + outb(0x62,0x20); /* 'Specific EOI' to master-IRQ2 */ + outb(0x20,0xA0); /* 'generic EOI' to slave */ } else { - inb(0x21); /* DUMMY */ + inb(0x21); /* DUMMY - (do we need this?) */ outb(cached_21,0x21); - outb(0x20,0x20); + outb(0x20,0x20); /* 'generic EOI' to master */ + } + return; + +spurious_8259A_irq: + /* + * this is the slow path - should happen rarely. + */ + if (i8259A_irq_real(irq)) + /* + * oops, the IRQ _is_ in service according to the + * 8259A - not spurious, go handle it. + */ + goto handle_real_irq; + + { + static int spurious_irq_mask = 0; + /* + * At this point we can be sure the IRQ is spurious, + * lets ACK and report it. [once per IRQ] + */ + if (!(spurious_irq_mask & irqmask)) { + printk("spurious 8259A interrupt: IRQ%d.\n", irq); + spurious_irq_mask |= irqmask; + } + irq_err_count++; + /* + * Theoretically we do not have to handle this IRQ, + * but in Linux this does not cause problems and is + * simpler for us. + */ + goto handle_real_irq; } } +void init_8259A(int auto_eoi) +{ + unsigned long flags; + + save_flags(flags); + cli(); + + outb(0xff, 0x21); /* mask all of 8259A-1 */ + outb(0xff, 0xA1); /* mask all of 8259A-2 */ + + /* + * outb_p - this has to work on a wide range of PC hardware. + */ + outb_p(0x11, 0x20); /* ICW1: select 8259A-1 init */ + outb_p(0x20 + 0, 0x21); /* ICW2: 8259A-1 IR0-7 mapped to 0x20-0x27 */ + outb_p(0x04, 0x21); /* 8259A-1 (the master) has a slave on IR2 */ + if (auto_eoi) + outb_p(0x03, 0x21); /* master does Auto EOI */ + else + outb_p(0x01, 0x21); /* master expects normal EOI */ + + outb_p(0x11, 0xA0); /* ICW1: select 8259A-2 init */ + outb_p(0x20 + 8, 0xA1); /* ICW2: 8259A-2 IR0-7 mapped to 0x28-0x2f */ + outb_p(0x02, 0xA1); /* 8259A-2 is a slave on master's IR2 */ + outb_p(0x01, 0xA1); /* (slave's support for AEOI in flat mode + is to be investigated) */ + + if (auto_eoi) + /* + * in AEOI mode we just have to mask the interrupt + * when acking. + */ + i8259A_irq_type.ack = disable_8259A_irq; + + udelay(100); /* wait for 8259A to initialize */ + + outb(cached_21, 0x21); /* restore master IRQ mask */ + outb(cached_A1, 0xA1); /* restore slave IRQ mask */ + + restore_flags(flags); +} + #ifndef CONFIG_VISWS /* * Note that on a 486, we don't want to do a SIGFPE on an irq13 @@ -307,7 +379,7 @@ * IRQ2 is cascade interrupt to second interrupt controller */ -static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; +static struct irqaction irq2 = { no_action, 0, 0, "cascade", NULL, NULL}; #endif @@ -315,6 +387,8 @@ { int i; + init_8259A(0); + for (i = 0; i < NR_IRQS; i++) { irq_desc[i].status = IRQ_DISABLED; irq_desc[i].action = 0; @@ -357,9 +431,9 @@ #ifdef __SMP__ /* - IRQ0 must be given a fixed assignment and initialized - before init_IRQ_SMP. - */ + * IRQ0 must be given a fixed assignment and initialized, + * because it's used before the IO-APIC is set up. + */ set_intr_gate(IRQ0_TRAP_VECTOR, interrupt[0]); /* @@ -371,17 +445,15 @@ /* IPI for invalidation */ set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); - /* IPI for CPU halt */ - set_intr_gate(STOP_CPU_VECTOR, stop_cpu_interrupt); - /* self generated IPI for local APIC timer */ set_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); /* IPI for generic function call */ set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); - /* IPI vector for APIC spurious interrupts */ + /* IPI vectors for APIC spurious and error interrupts */ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); + set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); #endif /* @@ -397,13 +469,3 @@ setup_irq(13, &irq13); #endif } - -#ifdef CONFIG_X86_IO_APIC -void __init init_IRQ_SMP(void) -{ - int i; - for (i = 0; i < NR_IRQS ; i++) - if (IO_APIC_VECTOR(i) > 0) - set_intr_gate(IO_APIC_VECTOR(i), interrupt[i]); -} -#endif --- linux/arch/i386/kernel/time.c.orig Mon Aug 23 17:10:52 1999 +++ linux/arch/i386/kernel/time.c Sat Sep 25 21:33:06 1999 @@ -74,7 +74,7 @@ * Equal to 2^32 * (1 / (clocks per usec) ). * Initialized in time_init. */ -static unsigned long fast_gettimeoffset_quotient=0; +unsigned long fast_gettimeoffset_quotient=0; extern rwlock_t xtime_lock; --- linux/arch/i386/kernel/head.S.orig Mon Aug 23 17:10:42 1999 +++ linux/arch/i386/kernel/head.S Sat Sep 25 21:33:06 1999 @@ -243,6 +243,15 @@ xorl %eax,%eax lldt %ax cld # gcc2 wants the direction flag cleared at all times +#ifdef __SMP__ + movb ready, %cl + cmpb $1,%cl + je 1f # the first CPU calls start_kernel + # all other CPUs call initialize_secondary + call SYMBOL_NAME(initialize_secondary) + jmp L6 +1: +#endif call SYMBOL_NAME(start_kernel) L6: jmp L6 # main should never return here, but --- linux/arch/i386/kernel/Makefile.orig Mon Aug 30 11:16:24 1999 +++ linux/arch/i386/kernel/Makefile Sat Sep 25 21:33:06 1999 @@ -43,7 +43,7 @@ endif ifdef CONFIG_SMP -O_OBJS += smp.o trampoline.o +O_OBJS += smp.o smpboot.o trampoline.o endif ifdef CONFIG_X86_IO_APIC --- linux/arch/i386/kernel/smpboot.c.orig Sat Sep 25 21:33:06 1999 +++ linux/arch/i386/kernel/smpboot.c Sat Sep 25 22:48:53 1999 @@ -0,0 +1,1650 @@ +/* + * Intel MP v1.1/v1.4 specification compliant parsing routines. + * + * (c) 1995 Alan Cox, Building #3 + * (c) 1998, 1999 Ingo Molnar + * + * Much of the core SMP work is based on previous work by Thomas Radke, to + * whom a great many thanks are extended. + * + * Thanks to Intel for making available several different Pentium, + * Pentium Pro and Pentium-II/Xeon MP machines. + * Original development of Linux SMP code supported by Caldera. + * + * This code is released under the GNU public license version 2 or + * later. + * + * Fixes + * Felix Koop : NR_CPUS used properly + * Jose Renau : Handle single CPU case. + * Alan Cox : By repeated request 8) - Total BogoMIP report. + * Greg Wright : Fix for kernel stacks panic. + * Erich Boleyn : MP v1.4 and additional changes. + * Matthias Sattler : Changes for 2.1 kernel map. + * Michel Lespinasse : Changes for 2.1 kernel map. + * Michael Chastain : Change trampoline.S to gnu as. + * Alan Cox : Dumb bug: 'B' step PPro's are fine + * Ingo Molnar : Added APIC timers, based on code + * from Jose Renau + * Alan Cox : Added EBDA scanning + * Ingo Molnar : various cleanups and rewrites + * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. + * Maciej W. Rozycki : Bits for genuine 82489DX timers + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +/* Set if we find a B stepping CPU */ +static int smp_b_stepping = 0; + +/* Setup configured maximum number of CPUs to activate */ +static int max_cpus = -1; +/* 1 if "noapic" boot option passed */ +int skip_ioapic_setup = 0; + +/* Total count of live CPUs */ +int smp_num_cpus = 0; +/* Internal processor count */ +static unsigned int num_processors = 1; + +/* Have we found an SMP box */ +int smp_found_config = 0; + +/* Bitmask of physically existing CPUs */ +unsigned long cpu_present_map = 0; +/* Bitmask of currently online CPUs */ +unsigned long cpu_online_map = 0; + +/* which CPU maps to which logical number */ +volatile int cpu_number_map[NR_CPUS]; +/* which logical number maps to which CPU */ +volatile int __cpu_logical_map[NR_CPUS]; + +static volatile unsigned long cpu_callin_map = 0; +static volatile unsigned long cpu_callout_map = 0; + +/* Per CPU bogomips and other parameters */ +struct cpuinfo_x86 cpu_data[NR_CPUS]; +/* Processor that is doing the boot up */ +static unsigned int boot_cpu_id = 0; + +/* Tripped once we need to start cross invalidating */ +static int smp_activated = 0; +/* Set when the idlers are all forked */ +int smp_threads_ready = 0; + +/* + * Various Linux-internal data structures created from the + * MP-table. + */ +int apic_version [NR_CPUS]; +int mp_bus_id_to_type [MAX_MP_BUSSES] = { -1, }; +extern int nr_ioapics; +extern struct mpc_config_ioapic mp_ioapics [MAX_IO_APICS]; +extern int mp_irq_entries; +extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; +extern int mpc_default_type; +int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { -1, }; +int mp_current_pci_id = 0; +unsigned long mp_lapic_addr = 0; +int pic_mode; + +extern void cache_APIC_registers (void); + +#define SMP_DEBUG 1 + +#if SMP_DEBUG +#define dprintk(x...) printk(##x) +#else +#define dprintk(x...) +#endif + +/* + * IA s/w dev Vol 3, Section 7.4 + */ +#define APIC_DEFAULT_PHYS_BASE 0xfee00000 + +/* + * Setup routine for controlling SMP activation + * + * Command-line option of "nosmp" or "maxcpus=0" will disable SMP + * activation entirely (the MPS table probe still happens, though). + * + * Command-line option of "maxcpus=", where is an integer + * greater than 0, limits the maximum number of CPUs activated in + * SMP mode to . + */ + +static int __init nosmp(char *str) +{ + max_cpus = 0; + return 1; +} + +__setup("nosmp", nosmp); + +static int __init maxcpus(char *str) +{ + get_option(&str, &max_cpus); + return 1; +} + +__setup("maxcpus=", maxcpus); + +/* + * Intel MP BIOS table parsing routines: + */ + +#ifndef CONFIG_X86_VISWS_APIC +/* + * Checksum an MP configuration block. + */ + +static int __init mpf_checksum(unsigned char *mp, int len) +{ + int sum=0; + while(len--) + sum+=*mp++; + return sum&0xFF; +} + +/* + * Processor encoding in an MP configuration block + */ + +static char __init *mpc_family(int family,int model) +{ + static char n[32]; + static char *model_defs[]= + { + "80486DX","80486DX", + "80486SX","80486DX/2 or 80487", + "80486SL","80486SX/2", + "Unknown","80486DX/2-WB", + "80486DX/4","80486DX/4-WB" + }; + + switch (family) { + case 0x04: + if (model < 10) + return model_defs[model]; + break; + + case 0x05: + return("Pentium(tm)"); + + case 0x06: + return("Pentium(tm) Pro"); + + case 0x0F: + if (model == 0x0F) + return("Special controller"); + } + sprintf(n,"Unknown CPU [%d:%d]",family, model); + return n; +} + +static void __init MP_processor_info (struct mpc_config_processor *m) +{ + int ver; + + if (!(m->mpc_cpuflag & CPU_ENABLED)) + return; + + printk("Processor #%d %s APIC version %d\n", + m->mpc_apicid, + mpc_family( (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8 , + (m->mpc_cpufeature & CPU_MODEL_MASK)>>4), + m->mpc_apicver); + +#ifdef SMP_DEBUG + if (m->mpc_featureflag&(1<<0)) + printk(" Floating point unit present.\n"); + if (m->mpc_featureflag&(1<<7)) + printk(" Machine Exception supported.\n"); + if (m->mpc_featureflag&(1<<8)) + printk(" 64 bit compare & exchange supported.\n"); + if (m->mpc_featureflag&(1<<9)) + printk(" Internal APIC present.\n"); +#endif + + if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { + dprintk(" Bootup CPU\n"); + boot_cpu_id = m->mpc_apicid; + } else + /* Boot CPU already counted */ + num_processors++; + + if (m->mpc_apicid > NR_CPUS) { + printk("Processor #%d unused. (Max %d processors).\n", + m->mpc_apicid, NR_CPUS); + return; + } + ver = m->mpc_apicver; + + cpu_present_map |= (1<mpc_apicid); + /* + * Validate version + */ + if (ver == 0x0) { + printk("BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid); + ver = 0x10; + } + apic_version[m->mpc_apicid] = ver; +} + +static void __init MP_bus_info (struct mpc_config_bus *m) +{ + char str[7]; + + memcpy(str, m->mpc_bustype, 6); + str[6] = 0; + dprintk("Bus #%d is %s\n", m->mpc_busid, str); + + if (strncmp(str, "ISA", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; + } else { + if (strncmp(str, "EISA", 4) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; + } else { + if (strncmp(str, "PCI", 3) == 0) { + mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; + mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id; + mp_current_pci_id++; + } else { + printk("Unknown bustype %s\n", str); + panic("cannot handle bus - mail to linux-smp@vger.rutgers.edu"); + } } } +} + +static void __init MP_ioapic_info (struct mpc_config_ioapic *m) +{ + if (!(m->mpc_flags & MPC_APIC_USABLE)) + return; + + printk("I/O APIC #%d Version %d at 0x%lX.\n", + m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr); + if (nr_ioapics >= MAX_IO_APICS) { + printk("Max # of I/O APICs (%d) exceeded (found %d).\n", + MAX_IO_APICS, nr_ioapics); + panic("Recompile kernel with bigger MAX_IO_APICS!.\n"); + } + mp_ioapics[nr_ioapics] = *m; + nr_ioapics++; +} + +static void __init MP_intsrc_info (struct mpc_config_intsrc *m) +{ + mp_irqs [mp_irq_entries] = *m; + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + +static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) +{ + /* + * Well it seems all SMP boards in existence + * use ExtINT/LVT1 == LINT0 and + * NMI/LVT2 == LINT1 - the following check + * will show us if this assumptions is false. + * Until then we do not have to add baggage. + */ + if ((m->mpc_irqtype == mp_ExtINT) && + (m->mpc_destapiclint != 0)) + BUG(); + if ((m->mpc_irqtype == mp_NMI) && + (m->mpc_destapiclint != 1)) + BUG(); +} + +/* + * Read/parse the MPC + */ + +static int __init smp_read_mpc(struct mp_config_table *mpc) +{ + char str[16]; + int count=sizeof(*mpc); + unsigned char *mpt=((unsigned char *)mpc)+count; + + if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) + { + panic("SMP mptable: bad signature [%c%c%c%c]!\n", + mpc->mpc_signature[0], + mpc->mpc_signature[1], + mpc->mpc_signature[2], + mpc->mpc_signature[3]); + return 1; + } + if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) + { + panic("SMP mptable: checksum error!\n"); + return 1; + } + if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) + { + printk("Bad Config Table version (%d)!!\n",mpc->mpc_spec); + return 1; + } + memcpy(str,mpc->mpc_oem,8); + str[8]=0; + printk("OEM ID: %s ",str); + + memcpy(str,mpc->mpc_productid,12); + str[12]=0; + printk("Product ID: %s ",str); + + printk("APIC at: 0x%lX\n",mpc->mpc_lapic); + + /* save the local APIC address, it might be non-default */ + mp_lapic_addr = mpc->mpc_lapic; + + /* + * Now process the configuration blocks. + */ + while (count < mpc->mpc_length) { + switch(*mpt) { + case MP_PROCESSOR: + { + struct mpc_config_processor *m= + (struct mpc_config_processor *)mpt; + MP_processor_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_BUS: + { + struct mpc_config_bus *m= + (struct mpc_config_bus *)mpt; + MP_bus_info(m); + mpt += sizeof(*m); + count += sizeof(*m); + break; + } + case MP_IOAPIC: + { + struct mpc_config_ioapic *m= + (struct mpc_config_ioapic *)mpt; + MP_ioapic_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_INTSRC: + { + struct mpc_config_intsrc *m= + (struct mpc_config_intsrc *)mpt; + + MP_intsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + case MP_LINTSRC: + { + struct mpc_config_lintsrc *m= + (struct mpc_config_lintsrc *)mpt; + MP_lintsrc_info(m); + mpt+=sizeof(*m); + count+=sizeof(*m); + break; + } + } + } + return num_processors; +} + +/* + * Scan the memory blocks for an SMP configuration block. + */ +static int __init smp_get_mpf(struct intel_mp_floating *mpf) +{ + printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); + if (mpf->mpf_feature2 & (1<<7)) { + printk(" IMCR and PIC compatibility mode.\n"); + pic_mode = 1; + } else { + printk(" Virtual Wire compatibility mode.\n"); + pic_mode = 0; + } + smp_found_config = 1; + /* + * default CPU id - if it's different in the mptable + * then we change it before first using it. + */ + boot_cpu_id = 0; + /* + * Now see if we need to read further. + */ + if (mpf->mpf_feature1 != 0) { + /* + * local APIC has default address + */ + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* + * 2 CPUs, numbered 0 & 1. + */ + cpu_present_map = 3; + num_processors = 2; + + nr_ioapics = 1; + mp_ioapics[0].mpc_apicaddr = 0xFEC00000; + /* + * Save the default type number, we + * need it later to set the IO-APIC + * up properly: + */ + mpc_default_type = mpf->mpf_feature1; + + printk("Bus #0 is "); + } + + switch (mpf->mpf_feature1) { + case 1: + case 5: + printk("ISA\n"); + break; + case 2: + printk("EISA with no IRQ0 and no IRQ13 DMA chaining\n"); + break; + case 6: + case 3: + printk("EISA\n"); + break; + case 4: + case 7: + printk("MCA\n"); + break; + case 0: + if (!mpf->mpf_physptr) + BUG(); + break; + default: + printk("???\nUnknown standard configuration %d\n", + mpf->mpf_feature1); + return 1; + } + if (mpf->mpf_feature1 > 4) { + printk("Bus #1 is PCI\n"); + + /* + * Set local APIC version to the integrated form. + * It's initialized to zero otherwise, representing + * a discrete 82489DX. + */ + apic_version[0] = 0x10; + apic_version[1] = 0x10; + } + /* + * Read the physical hardware table. Anything here will override the + * defaults. + */ + if (mpf->mpf_physptr) + smp_read_mpc((void *)mpf->mpf_physptr); + + __cpu_logical_map[0] = boot_cpu_id; + global_irq_holder = boot_cpu_id; + current->processor = boot_cpu_id; + + printk("Processors: %d\n", num_processors); + /* + * Only use the first configuration found. + */ + return 1; +} + +static int __init smp_scan_config(unsigned long base, unsigned long length) +{ + unsigned long *bp = phys_to_virt(base); + struct intel_mp_floating *mpf; + + dprintk("Scan SMP from %p for %ld bytes.\n", bp,length); + if (sizeof(*mpf) != 16) + printk("Error: MPF size\n"); + + while (length > 0) { + mpf = (struct intel_mp_floating *)bp; + if ((*bp == SMP_MAGIC_IDENT) && + (mpf->mpf_length == 1) && + !mpf_checksum((unsigned char *)bp, 16) && + ((mpf->mpf_specification == 1) + || (mpf->mpf_specification == 4)) ) { + + printk("found SMP MP-table at %08ld\n", + virt_to_phys(mpf)); + smp_get_mpf(mpf); + return 1; + } + bp += 4; + length -= 16; + } + return 0; +} + +void __init init_intel_smp (void) +{ + unsigned int address; + + /* + * FIXME: Linux assumes you have 640K of base ram.. + * this continues the error... + * + * 1) Scan the bottom 1K for a signature + * 2) Scan the top 1K of base RAM + * 3) Scan the 64K of bios + */ + if (smp_scan_config(0x0,0x400) || + smp_scan_config(639*0x400,0x400) || + smp_scan_config(0xF0000,0x10000)) + return; + /* + * If it is an SMP machine we should know now, unless the + * configuration is in an EISA/MCA bus machine with an + * extended bios data area. + * + * there is a real-mode segmented pointer pointing to the + * 4K EBDA area at 0x40E, calculate and scan it here. + * + * NOTE! There are Linux loaders that will corrupt the EBDA + * area, and as such this kind of SMP config may be less + * trustworthy, simply because the SMP table may have been + * stomped on during early boot. These loaders are buggy and + * should be fixed. + */ + + address = *(unsigned short *)phys_to_virt(0x40E); + address <<= 4; + smp_scan_config(address, 0x1000); + if (smp_found_config) + printk(KERN_WARNING "WARNING: MP table in the EBDA can be UNSAFE, contact linux-smp@vger.rutgers.edu if you experience SMP problems!\n"); +} + +#else + +/* + * The Visual Workstation is Intel MP compliant in the hardware + * sense, but it doesnt have a BIOS(-configuration table). + * No problem for Linux. + */ +void __init init_visws_smp(void) +{ + smp_found_config = 1; + + cpu_present_map |= 2; /* or in id 1 */ + apic_version[1] |= 0x10; /* integrated APIC */ + apic_version[0] |= 0x10; + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; +} + +#endif + +/* + * - Intel MP Configuration Table + * - or SGI Visual Workstation configuration + */ +void __init init_smp_config (void) +{ +#ifndef CONFIG_VISWS + init_intel_smp(); +#else + init_visws_smp(); +#endif +} + + + +/* + * Trampoline 80x86 program as an array. + */ + +extern unsigned char trampoline_data []; +extern unsigned char trampoline_end []; +static unsigned char *trampoline_base; + +/* + * Currently trivial. Write the real->protected mode + * bootstrap into the page concerned. The caller + * has made sure it's suitably aligned. + */ + +static unsigned long __init setup_trampoline(void) +{ + memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); + return virt_to_phys(trampoline_base); +} + +/* + * We are called very early to get the low memory for the + * SMP bootup trampoline page. + */ +unsigned long __init smp_alloc_memory(unsigned long mem_base) +{ + if (virt_to_phys((void *)mem_base) >= 0x9F000) + BUG(); + trampoline_base = (void *)mem_base; + return mem_base + PAGE_SIZE; +} + +/* + * The bootstrap kernel entry code has set these up. Save them for + * a given CPU + */ + +void __init smp_store_cpu_info(int id) +{ + struct cpuinfo_x86 *c=&cpu_data[id]; + + *c = boot_cpu_data; + c->pte_quick = 0; + c->pgd_quick = 0; + c->pgtable_cache_sz = 0; + identify_cpu(c); + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; +} + +/* + * Architecture specific routine called by the kernel just before init is + * fired off. This allows the BP to have everything in order [we hope]. + * At the end of this all the APs will hit the system scheduling and off + * we go. Each AP will load the system gdt's and jump through the kernel + * init into idle(). At this point the scheduler will one day take over + * and give them jobs to do. smp_callin is a standard routine + * we use to track CPUs as they power up. + */ + +static atomic_t smp_commenced = ATOMIC_INIT(0); + +void __init smp_commence(void) +{ + /* + * Lets the callins below out of their loop. + */ + dprintk("Setting commenced=1, go go go\n"); + + wmb(); + atomic_set(&smp_commenced,1); +} + +extern void __error_in_io_apic_c(void); + + +int get_maxlvt(void) +{ + unsigned int v, ver, maxlvt; + + v = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(v); + /* 82489DXs do not report # of LVT entries. */ + maxlvt = APIC_INTEGRATED(ver) ? GET_APIC_MAXLVT(v) : 2; + return maxlvt; +} + +void __init setup_local_APIC(void) +{ + unsigned long value, ver, maxlvt; + + if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) + __error_in_io_apic_c(); + + value = apic_read(APIC_SPIV); + value = 0xf; + /* + * Enable APIC + */ + value |= (1<<8); +#if 0 + /* Enable focus processor (bit==0) */ + value &= ~(1<<9); +#else + /* Disable focus processor (bit==1) */ + value |= (1<<9); +#endif + /* + * Set spurious IRQ vector + */ + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV,value); + + /* + * Set up LVT0, LVT1: + * + * set up through-local-APIC on the BP's LINT0. This is not + * strictly necessery in pure symmetric-IO mode, but sometimes + * we delegate interrupts to the 8259A. + */ + if (hard_smp_processor_id() == boot_cpu_id) { + value = 0x00000700; + printk("enabled ExtINT on CPU#%d\n", hard_smp_processor_id()); + } else { + value = 0x00010700; + printk("masked ExtINT on CPU#%d\n", hard_smp_processor_id()); + } + apic_write_around(APIC_LVT0,value); + + /* + * only the BP should see the LINT1 NMI signal, obviously. + */ + if (hard_smp_processor_id() == boot_cpu_id) + value = 0x00000400; // unmask NMI + else + value = 0x00010400; // mask NMI + apic_write_around(APIC_LVT1,value); + + value = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(value); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + maxlvt = get_maxlvt(); + /* + * Due to the Pentium erratum 3AP. + */ + if (maxlvt > 3) { + apic_readaround(APIC_SPIV); // not strictly necessery + apic_write(APIC_ESR, 0); + } + value = apic_read(APIC_ESR); + printk("ESR value before enabling vector: %08lx\n", value); + + value = apic_read(APIC_LVTERR); + value = ERROR_APIC_VECTOR; // enables sending errors + apic_write(APIC_LVTERR,value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt != 3) { + apic_readaround(APIC_SPIV); + apic_write(APIC_ESR, 0); + } + value = apic_read(APIC_ESR); + printk("ESR value after enabling vector: %08lx\n", value); + } else + printk("No ESR for 82489DX.\n"); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write(APIC_TASKPRI,value); + + /* + * Set up the logical destination ID and put the + * APIC into flat delivery mode. + */ + value = apic_read(APIC_LDR); + value &= ~APIC_LDR_MASK; + value |= (1<<(smp_processor_id()+24)); + apic_write(APIC_LDR,value); + + value = apic_read(APIC_DFR); + value |= SET_APIC_DFR(0xf); + apic_write(APIC_DFR, value); +} + +unsigned long __init init_smp_mappings(unsigned long memory_start) +{ + unsigned long apic_phys; + + memory_start = PAGE_ALIGN(memory_start); + if (smp_found_config) { + apic_phys = mp_lapic_addr; + } else { + /* + * set up a fake all zeroes page to simulate the + * local APIC and another one for the IO-APIC. We + * could use the real zero-page, but it's safer + * this way if some buggy code writes to this page ... + */ + apic_phys = __pa(memory_start); + memset((void *)memory_start, 0, PAGE_SIZE); + memory_start += PAGE_SIZE; + } + set_fixmap(FIX_APIC_BASE,apic_phys); + dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); + +#ifdef CONFIG_X86_IO_APIC + { + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; + + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mpc_apicaddr; + } else { + ioapic_phys = __pa(memory_start); + memset((void *)memory_start, 0, PAGE_SIZE); + memory_start += PAGE_SIZE; + } + set_fixmap(idx,ioapic_phys); + dprintk("mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + } + } +#endif + + return memory_start; +} + +/* + * TSC synchronization. + * + * We first check wether all CPUs have their TSC's synchronized, + * then we print a warning if not, and always resync. + */ + +static atomic_t tsc_start_flag = ATOMIC_INIT(0); +static atomic_t tsc_count_start = ATOMIC_INIT(0); +static atomic_t tsc_count_stop = ATOMIC_INIT(0); +static unsigned long long tsc_values[NR_CPUS] = { 0, }; + +#define NR_LOOPS 5 + +extern unsigned long fast_gettimeoffset_quotient; + +/* + * accurate 64-bit/32-bit division, expanded to 32-bit divisions and 64-bit + * multiplication. Not terribly optimized but we need it at boot time only + * anyway. + * + * result == a / b + * == (a1 + a2*(2^32)) / b + * == a1/b + a2*(2^32/b) + * == a1/b + a2*((2^32-1)/b) + a2/b + (a2*((2^32-1) % b))/b + * ^---- (this multiplication can overflow) + */ + +static unsigned long long div64 (unsigned long long a, unsigned long b0) +{ + unsigned int a1, a2; + unsigned long long res; + + a1 = ((unsigned int*)&a)[0]; + a2 = ((unsigned int*)&a)[1]; + + res = a1/b0 + + (unsigned long long)a2 * (unsigned long long)(0xffffffff/b0) + + a2 / b0 + + (a2 * (0xffffffff % b0)) / b0; + + return res; +} + +static void __init synchronize_tsc_bp (void) +{ + int i; + unsigned long long t0; + unsigned long long sum, avg; + long long delta; + unsigned long one_usec; + int buggy = 0; + + printk("checking TSC synchronization across CPUs: "); + + one_usec = ((1<<30)/fast_gettimeoffset_quotient)*(1<<2); + + atomic_set(&tsc_start_flag, 1); + wmb(); + + /* + * We loop a few times to get a primed instruction cache, + * then the last pass is more or less synchronized and + * the BP and APs set their cycle counters to zero all at + * once. This reduces the chance of having random offsets + * between the processors, and guarantees that the maximum + * delay between the cycle counters is never bigger than + * the latency of information-passing (cachelines) between + * two CPUs. + */ + for (i = 0; i < NR_LOOPS; i++) { + /* + * all APs synchronize but they loop on '== num_cpus' + */ + while (atomic_read(&tsc_count_start) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_stop, 0); + wmb(); + /* + * this lets the APs save their current TSC: + */ + atomic_inc(&tsc_count_start); + + rdtscll(tsc_values[smp_processor_id()]); + /* + * We clear the TSC in the last loop: + */ + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + /* + * Wait for all APs to leave the synchronization point: + */ + while (atomic_read(&tsc_count_stop) != smp_num_cpus-1) mb(); + atomic_set(&tsc_count_start, 0); + wmb(); + atomic_inc(&tsc_count_stop); + } + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + t0 = tsc_values[i]; + sum += t0; + } + avg = div64(sum, smp_num_cpus); + + sum = 0; + for (i = 0; i < NR_CPUS; i++) { + if (!(cpu_online_map & (1 << i))) + continue; + + delta = tsc_values[i] - avg; + if (delta < 0) + delta = -delta; + /* + * We report bigger than 2 microseconds clock differences. + */ + if (delta > 2*one_usec) { + long realdelta; + if (!buggy) { + buggy = 1; + printk("\n"); + } + realdelta = div64(delta, one_usec); + if (tsc_values[i] < avg) + realdelta = -realdelta; + + printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n", + i, realdelta); + } + + sum += delta; + } + if (!buggy) + printk("passed.\n"); +} + +static void __init synchronize_tsc_ap (void) +{ + int i; + + /* + * smp_num_cpus is not necessarily known at the time + * this gets called, so we first wait for the BP to + * finish SMP initialization: + */ + while (!atomic_read(&tsc_start_flag)) mb(); + + for (i = 0; i < NR_LOOPS; i++) { + atomic_inc(&tsc_count_start); + while (atomic_read(&tsc_count_start) != smp_num_cpus) mb(); + + rdtscll(tsc_values[smp_processor_id()]); + if (i == NR_LOOPS-1) + write_tsc(0, 0); + + atomic_inc(&tsc_count_stop); + while (atomic_read(&tsc_count_stop) != smp_num_cpus) mb(); + } +} +#undef NR_LOOPS + +extern void calibrate_delay(void); + +void __init smp_callin(void) +{ + int cpuid; + unsigned long timeout; + + /* + * (This works even if the APIC is not enabled.) + */ + cpuid = GET_APIC_ID(apic_read(APIC_ID)); + + dprintk("CPU#%d waiting for CALLOUT\n", cpuid); + + /* + * STARTUP IPIs are fragile beasts as they might sometimes + * trigger some glue motherboard logic. Complete APIC bus + * silence for 1 second, this overestimates the time the + * boot CPU is spending to send the up to 2 STARTUP IPIs + * by a factor of two. This should be enough. + */ + + /* + * Waiting 2s total for startup (udelay is not yet working) + */ + timeout = jiffies + 2*HZ; + while (time_before(jiffies, timeout)) { + /* + * Has the boot CPU finished it's STARTUP sequence? + */ + if (test_bit(cpuid, &cpu_callout_map)) + break; + } + + if (!time_before(jiffies, timeout)) { + printk("BUG: CPU%d started up but did not get a callout!\n", + cpuid); + BUG(); + } + + /* + * the boot CPU has finished the init stage and is spinning + * on callin_map until we finish. We are free to set up this + * CPU, first the APIC. (this is probably redundant on most + * boards) + */ + + dprintk("CALLIN, before setup_local_APIC().\n"); + setup_local_APIC(); + + sti(); + +#ifdef CONFIG_MTRR + /* + * Must be done before calibration delay is computed + */ + mtrr_init_secondary_cpu (); +#endif + /* + * Get our bogomips. + */ + calibrate_delay(); + dprintk("Stack at about %p\n",&cpuid); + + /* + * Save our processor parameters + */ + smp_store_cpu_info(cpuid); + + /* + * Allow the master to continue. + */ + set_bit(cpuid, &cpu_callin_map); + + /* + * Synchronize the TSC with the BP + */ + if (cpu_has_tsc) + synchronize_tsc_ap (); +} + +int cpucount = 0; + +extern int cpu_idle(void); + +/* + * Activate a secondary processor. + */ +int __init start_secondary(void *unused) +{ + /* + * Dont put anything before smp_callin(), SMP + * booting is too fragile that we want to limit the + * things done here to the most necessary things. + */ + cpu_init(); + smp_callin(); + while (!atomic_read(&smp_commenced)) + /* nothing */ ; + return cpu_idle(); +} + +/* + * Everything has been set up for the secondary + * CPUs - they just need to reload everything + * from the task structure + * This function must not return. + */ +void __init initialize_secondary(void) +{ + /* + * We don't actually need to load the full TSS, + * basically just the stack pointer and the eip. + */ + + asm volatile( + "movl %0,%%esp\n\t" + "jmp *%1" + : + :"r" (current->thread.esp),"r" (current->thread.eip)); +} + +extern struct { + void * esp; + unsigned short ss; +} stack_start; + +static int __init fork_by_hand(void) +{ + struct pt_regs regs; + /* + * don't care about the eip and regs settings since + * we'll never reschedule the forked task. + */ + return do_fork(CLONE_VM|CLONE_PID, 0, ®s); +} + +static void __init do_boot_cpu(int i) +{ + unsigned long cfg; + pgd_t maincfg; + struct task_struct *idle; + unsigned long send_status, accept_status; + int timeout, num_starts, j; + unsigned long start_eip; + + cpucount++; + /* + * We can't use kernel_thread since we must avoid to + * reschedule the child. + */ + if (fork_by_hand() < 0) + panic("failed fork for CPU %d", i); + + /* + * We remove it from the pidhash and the runqueue + * once we got the process: + */ + idle = init_task.prev_task; + if (!idle) + panic("No idle process for CPU %d", i); + + idle->processor = i; + __cpu_logical_map[cpucount] = i; + cpu_number_map[i] = cpucount; + idle->has_cpu = 1; /* we schedule the first task manually */ + idle->thread.eip = (unsigned long) start_secondary; + + del_from_runqueue(idle); + unhash_process(idle); + init_tasks[cpucount] = idle; + + /* start_eip had better be page-aligned! */ + start_eip = setup_trampoline(); + + /* So we see what's up */ + printk("Booting processor %d eip %lx\n", i, start_eip); + stack_start.esp = (void *) (1024 + PAGE_SIZE + (char *)idle); + + /* + * This grunge runs the startup process for + * the targeted processor. + */ + + dprintk("Setting warm reset code and vector.\n"); + + CMOS_WRITE(0xa, 0xf); + local_flush_tlb(); + dprintk("1.\n"); + *((volatile unsigned short *) phys_to_virt(0x469)) = start_eip >> 4; + dprintk("2.\n"); + *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; + dprintk("3.\n"); + + maincfg=swapper_pg_dir[0]; + ((unsigned long *)swapper_pg_dir)[0]=0x102007; + + /* + * Be paranoid about clearing APIC errors. + */ + + if (APIC_INTEGRATED(apic_version[i])) { + apic_readaround(APIC_SPIV); + apic_write(APIC_ESR, 0); + accept_status = (apic_read(APIC_ESR) & 0xEF); + } + + /* + * Status is now clean + */ + send_status = 0; + accept_status = 0; + + /* + * Starting actual IPI sequence... + */ + + dprintk("Asserting INIT.\n"); + + /* + * Turn INIT on + */ + cfg = apic_read(APIC_ICR2); + cfg &= 0x00FFFFFF; + + /* + * Target chip + */ + apic_write(APIC_ICR2, cfg | SET_APIC_DEST_FIELD(i)); + + /* + * Send IPI + */ + cfg = apic_read(APIC_ICR); + cfg &= ~0xCDFFF; + cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_ASSERT | APIC_DEST_DM_INIT); + apic_write(APIC_ICR, cfg); + + udelay(200); + dprintk("Deasserting INIT.\n"); + + /* Target chip */ + cfg = apic_read(APIC_ICR2); + cfg &= 0x00FFFFFF; + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); + + /* Send IPI */ + cfg = apic_read(APIC_ICR); + cfg &= ~0xCDFFF; + cfg |= (APIC_DEST_LEVELTRIG | APIC_DEST_DM_INIT); + apic_write(APIC_ICR, cfg); + + /* + * Should we send STARTUP IPIs ? + * + * Determine this based on the APIC version. + * If we don't have an integrated APIC, don't + * send the STARTUP IPIs. + */ + + if (APIC_INTEGRATED(apic_version[i])) + num_starts = 2; + else + num_starts = 0; + + /* + * Run STARTUP IPI loop. + */ + + for (j = 1; j <= num_starts; j++) { + dprintk("Sending STARTUP #%d.\n",j); + apic_readaround(APIC_SPIV); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + dprintk("After apic_write.\n"); + + /* + * STARTUP IPI + */ + + /* Target chip */ + cfg = apic_read(APIC_ICR2); + cfg &= 0x00FFFFFF; + apic_write(APIC_ICR2, cfg|SET_APIC_DEST_FIELD(i)); + + /* Boot on the stack */ + cfg = apic_read(APIC_ICR); + cfg &= ~0xCDFFF; + cfg |= (APIC_DEST_DM_STARTUP | (start_eip >> 12)); + + /* Kick the second */ + apic_write(APIC_ICR, cfg); + + dprintk("Startup point 1.\n"); + + dprintk("Waiting for send to finish...\n"); + timeout = 0; + do { + dprintk("+"); + udelay(100); + send_status = apic_read(APIC_ICR) & 0x1000; + } while (send_status && (timeout++ < 1000)); + + /* + * Give the other CPU some time to accept the IPI. + */ + udelay(200); + accept_status = (apic_read(APIC_ESR) & 0xEF); + if (send_status || accept_status) + break; + } + dprintk("After Startup.\n"); + + if (send_status) + printk("APIC never delivered???\n"); + if (accept_status) + printk("APIC delivery error (%lx).\n", accept_status); + + if (!send_status && !accept_status) { + /* + * allow APs to start initializing. + */ + dprintk("Before Callout %d.\n", i); + set_bit(i, &cpu_callout_map); + dprintk("After Callout %d.\n", i); + + /* + * Wait 5s total for a response + */ + for (timeout = 0; timeout < 50000; timeout++) { + if (test_bit(i, &cpu_callin_map)) + break; /* It has booted */ + udelay(100); + } + + if (test_bit(i, &cpu_callin_map)) { + /* number CPUs logically, starting from 1 (BSP is 0) */ + printk("OK.\n"); + printk("CPU%d: ", i); + print_cpu_info(&cpu_data[i]); + } else { + if (*((volatile unsigned char *)phys_to_virt(8192)) + == 0xA5) /* trampoline code not run */ + printk("Stuck ??\n"); + else + printk("CPU booted but not responding.\n"); + } + dprintk("CPU has booted.\n"); + } else { + __cpu_logical_map[cpucount] = -1; + cpu_number_map[i] = -1; + cpucount--; + } + + swapper_pg_dir[0]=maincfg; + local_flush_tlb(); + + /* mark "stuck" area as not stuck */ + *((volatile unsigned long *)phys_to_virt(8192)) = 0; +} + +cycles_t cacheflush_time; +extern unsigned long cpu_hz; + +static void smp_tune_scheduling (void) +{ + unsigned long cachesize; + /* + * Rough estimation for SMP scheduling, this is the number of + * cycles it takes for a fully memory-limited process to flush + * the SMP-local cache. + * + * (For a P5 this pretty much means we will choose another idle + * CPU almost always at wakeup time (this is due to the small + * L1 cache), on PIIs it's around 50-100 usecs, depending on + * the cache size) + */ + + if (!cpu_hz) { + /* + * this basically disables processor-affinity + * scheduling on SMP without a TSC. + */ + cacheflush_time = 0; + return; + } else { + cachesize = boot_cpu_data.x86_cache_size; + if (cachesize == -1) + cachesize = 8; /* Pentiums */ + + cacheflush_time = cpu_hz/1024*cachesize/5000; + } + + printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n", + (long)cacheflush_time/(cpu_hz/1000000), + ((long)cacheflush_time*100/(cpu_hz/1000000)) % 100); +} + +/* + * Cycle through the processors sending APIC IPIs to boot each. + */ + +extern int prof_multiplier[NR_CPUS]; +extern int prof_old_multiplier[NR_CPUS]; +extern int prof_counter[NR_CPUS]; + +void __init smp_boot_cpus(void) +{ + int i; + +#ifdef CONFIG_MTRR + /* Must be done before other processors booted */ + mtrr_init_boot_cpu (); +#endif + /* + * Initialize the logical to physical CPU number mapping + * and the per-CPU profiling counter/multiplier + */ + + for (i = 0; i < NR_CPUS; i++) { + cpu_number_map[i] = -1; + prof_counter[i] = 1; + prof_old_multiplier[i] = 1; + prof_multiplier[i] = 1; + } + + /* + * Setup boot CPU information + */ + + smp_store_cpu_info(boot_cpu_id); /* Final full version of the data */ + smp_tune_scheduling(); + printk("CPU%d: ", boot_cpu_id); + print_cpu_info(&cpu_data[boot_cpu_id]); + + /* + * not necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * (and for the case when a non-SMP board boots an SMP kernel) + */ + cpu_present_map |= (1 << hard_smp_processor_id()); + + cpu_number_map[boot_cpu_id] = 0; + + init_idle(); + + /* + * If we couldnt find an SMP configuration at boot time, + * get out of here now! + */ + + if (!smp_found_config) { + printk(KERN_NOTICE "SMP motherboard not detected. Using dummy APIC emulation.\n"); +#ifndef CONFIG_VISWS + io_apic_irqs = 0; +#endif + cpu_online_map = cpu_present_map; + smp_num_cpus = 1; + goto smp_done; + } + + /* + * If SMP should be disabled, then really disable it! + */ + + if (!max_cpus) { + smp_found_config = 0; + printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + } + +#ifdef SMP_DEBUG + { + int reg; + + /* + * This is to verify that we're looking at + * a real local APIC. Check these against + * your board if the CPUs aren't getting + * started for no apparent reason. + */ + + reg = apic_read(APIC_LVR); + dprintk("Getting VERSION: %x\n", reg); + + apic_write(APIC_LVR, 0); + reg = apic_read(APIC_LVR); + dprintk("Getting VERSION: %x\n", reg); + + /* + * The two version reads above should print the same + * NON-ZERO!!! numbers. If the second one is zero, + * there is a problem with the APIC write/read + * definitions. + * + * The next two are just to see if we have sane values. + * They're only really relevant if we're in Virtual Wire + * compatibility mode, but most boxes are anymore. + */ + + + reg = apic_read(APIC_LVT0); + dprintk("Getting LVT0: %x\n", reg); + + reg = apic_read(APIC_LVT1); + dprintk("Getting LVT1: %x\n", reg); + } +#endif + + setup_local_APIC(); + + if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) + BUG(); + + /* + * Now scan the CPU present map and fire up the other CPUs. + */ + + /* + * Add all detected CPUs. (later on we can down individual + * CPUs which will change cpu_online_map but not necessarily + * cpu_present_map. We are pretty much ready for hot-swap CPUs.) + */ + cpu_online_map = cpu_present_map; + mb(); + + dprintk("CPU map: %lx\n", cpu_present_map); + + for (i = 0; i < NR_CPUS; i++) { + /* + * Don't even attempt to start the boot CPU! + */ + if (i == boot_cpu_id) + continue; + + if ((cpu_online_map & (1 << i)) + && (max_cpus < 0 || max_cpus > cpucount+1)) { + do_boot_cpu(i); + } + + /* + * Make sure we unmap all failed CPUs + */ + if (cpu_number_map[i] == -1 && (cpu_online_map & (1 << i))) { + printk("CPU #%d not responding - cannot use it.\n",i); + cpu_online_map &= ~(1 << i); + } + } + + /* + * Cleanup possible dangling ends... + */ + +#ifndef CONFIG_VISWS + { + unsigned long cfg; + + /* + * Install writable page 0 entry to set BIOS data area. + */ + cfg = pg0[0]; + /* writeable, present, addr 0 */ + pg0[0] = _PAGE_RW | _PAGE_PRESENT | 0; + local_flush_tlb(); + + /* + * Paranoid: Set warm reset code and vector here back + * to default values. + */ + CMOS_WRITE(0, 0xf); + + *((volatile long *) phys_to_virt(0x467)) = 0; + + /* + * Restore old page 0 entry. + */ + pg0[0] = cfg; + local_flush_tlb(); + } +#endif + + /* + * Allow the user to impress friends. + */ + + dprintk("Before bogomips.\n"); + if (!cpucount) { + printk(KERN_ERR "Error: only one processor found.\n"); + cpu_online_map = (1< or the Linux SMP mailing + list at ] + --- linux/Makefile.orig Sat Sep 25 21:31:20 1999 +++ linux/Makefile Sat Sep 25 21:33:06 1999 @@ -86,7 +86,7 @@ # standard CFLAGS # -CFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +CFLAGS = -Wall -Wstrict-prototypes -O2 -fno-omit-frame-pointer # use '-fno-strict-aliasing', but only if the compiler can take it CFLAGS += $(shell if $(CC) -fno-strict-aliasing -S -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-fno-strict-aliasing"; fi) --- linux/MAINTAINERS.orig Sat Sep 25 21:31:19 1999 +++ linux/MAINTAINERS Sat Sep 25 21:33:06 1999 @@ -428,6 +428,11 @@ L: linux-kernel@vger.rutgers.edu S: Maintained +INTEL APIC/IOAPIC, LOWLEVEL X86 SMP SUPPORT +P: Ingo Molnar +M: mingo@redhat.com +S: Maintained + IP MASQUERADING: P: Juanjo Ciarlante M: jjciarla@raiz.uncu.edu.ar