--- linux/arch/i386/kernel/cpu/common.c.orig Thu Jul 25 12:48:42 2002 +++ linux/arch/i386/kernel/cpu/common.c Thu Jul 25 15:37:42 2002 @@ -421,14 +421,14 @@ */ void __init cpu_init (void) { - int nr = smp_processor_id(); - struct tss_struct * t = &init_tss[nr]; + int cpu = smp_processor_id(); + struct tss_struct * t = init_tss + cpu; - if (test_and_set_bit(nr, &cpu_initialized)) { - printk(KERN_WARNING "CPU#%d already initialized!\n", nr); + if (test_and_set_bit(cpu, &cpu_initialized)) { + printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); for (;;) __sti(); } - printk(KERN_INFO "Initializing CPU#%d\n", nr); + printk(KERN_INFO "Initializing CPU#%d\n", cpu); if (cpu_has_vme || cpu_has_tsc || cpu_has_de) clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); @@ -441,7 +441,17 @@ } #endif - __asm__ __volatile__("lgdt %0": "=m" (gdt_descr)); + /* + * Initialize the per-CPU GDT with the boot GDT, + * and set up the GDT descriptor: + */ + if (cpu) { + memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); + cpu_gdt_descr[cpu].size = GDT_SIZE; + cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; + } + + __asm__ __volatile__("lgdt %0": "=m" (cpu_gdt_descr[cpu])); __asm__ __volatile__("lidt %0": "=m" (idt_descr)); /* @@ -450,18 +460,18 @@ __asm__("pushfl ; andl $0xffffbfff,(%esp) ; popfl"); /* - * set up and load the per-CPU TSS and LDT + * Set up and load the per-CPU TSS and LDT */ atomic_inc(&init_mm.mm_count); current->active_mm = &init_mm; if(current->mm) BUG(); - enter_lazy_tlb(&init_mm, current, nr); + enter_lazy_tlb(&init_mm, current, cpu); t->esp0 = current->thread.esp0; - set_tss_desc(nr,t); - gdt_table[__TSS(nr)].b &= 0xfffffdff; - load_TR(nr); + set_tss_desc(cpu,t); + cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + load_TR_desc(); load_LDT(&init_mm.context); /* Clear %fs and %gs. */ --- linux/arch/i386/kernel/head.S.orig Thu Jul 25 12:39:59 2002 +++ linux/arch/i386/kernel/head.S Thu Jul 25 18:42:02 2002 @@ -231,7 +231,7 @@ call check_x87 incb ready - lgdt gdt_descr + lgdt cpu_gdt_descr lidt idt_descr ljmp $(__KERNEL_CS),$1f 1: movl $(__KERNEL_DS),%eax # reload all the segment registers @@ -338,29 +338,27 @@ iret /* - * The interrupt descriptor table has room for 256 idt's, - * the global descriptor table is dependent on the number - * of tasks we can have.. + * The IDT and GDT 'descriptors' are a strange 48-bit object + * only used by the lidt and lgdt instructions. They are not + * like usual segment descriptors - they consist of a 16-bit + * segment size, and 32-bit linear address value: */ -#define IDT_ENTRIES 256 -#define GDT_ENTRIES (__TSS(NR_CPUS)) - -.globl idt -.globl gdt +.globl idt_descr +.globl cpu_gdt_descr ALIGN - .word 0 idt_descr: .word IDT_ENTRIES*8-1 # idt contains 256 entries -idt: .long idt_table - .word 0 -gdt_descr: +# boot GDT descriptor (later on used by CPU#0): + +cpu_gdt_descr: .word GDT_ENTRIES*8-1 -gdt: - .long gdt_table + .long cpu_gdt_table + + .fill NR_CPUS-1,6,0 # space for the other GDT descriptors /* * This is initialized to create an identity-mapping at 0-8M (for bootup @@ -418,15 +416,15 @@ * NOTE! Make sure the gdt descriptor in head.S matches this if you * change anything. */ -ENTRY(gdt_table) +ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x0000000000000000 /* not used */ + .quad 0x0000000000000000 /* TLS descriptor */ .quad 0x00cf9a000000ffff /* 0x10 kernel 4GB code at 0x00000000 */ .quad 0x00cf92000000ffff /* 0x18 kernel 4GB data at 0x00000000 */ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ - .quad 0x0000000000000000 /* not used */ - .quad 0x0000000000000000 /* not used */ + .quad 0x0000000000000000 /* TSS descriptor */ + .quad 0x0000000000000000 /* LDT descriptor */ /* * The APM segments have byte granularity and their bases * and limits are set at run time. @@ -444,5 +442,8 @@ .quad 0x0000000000000000 /* 0x88 not used */ .quad 0x0000000000000000 /* 0x90 not used */ .quad 0x0000000000000000 /* 0x98 not used */ - /* Per CPU segments */ - .fill NR_CPUS*4,8,0 /* space for TSS's and LDT's */ + +#if CONFIG_SMP + .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ +#endif + --- linux/arch/i386/kernel/apm.c.orig Thu Jul 25 12:59:23 2002 +++ linux/arch/i386/kernel/apm.c Thu Jul 25 18:25:09 2002 @@ -1922,35 +1922,38 @@ * that extends up to the end of page zero (that we have reserved). * This is for buggy BIOS's that refer to (real mode) segment 0x40 * even though they are called in protected mode. + * + * NOTE: on SMP we call into the APM BIOS only on CPU#0, so it's + * enough to modify CPU#0's GDT. */ - set_base(gdt[APM_40 >> 3], + set_base(cpu_gdt_table[0][APM_40 >> 3], __va((unsigned long)0x40 << 4)); - _set_limit((char *)&gdt[APM_40 >> 3], 4095 - (0x40 << 4)); + _set_limit((char *)&cpu_gdt_table[0][APM_40 >> 3], 4095 - (0x40 << 4)); apm_bios_entry.offset = apm_info.bios.offset; apm_bios_entry.segment = APM_CS; - set_base(gdt[APM_CS >> 3], + set_base(cpu_gdt_table[0][APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(gdt[APM_CS_16 >> 3], + set_base(cpu_gdt_table[0][APM_CS_16 >> 3], __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(gdt[APM_DS >> 3], + set_base(cpu_gdt_table[0][APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[0][APM_CS >> 3], 64 * 1024 - 1); /* For some unknown machine. */ - _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[0][APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); + _set_limit((char *)&cpu_gdt_table[0][APM_DS >> 3], 64 * 1024 - 1); #ifndef APM_RELAX_SEGMENTS } else { - _set_limit((char *)&gdt[APM_CS >> 3], + _set_limit((char *)&cpu_gdt_table[0][APM_CS >> 3], (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_CS_16 >> 3], + _set_limit((char *)&cpu_gdt_table[0][APM_CS_16 >> 3], (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&gdt[APM_DS >> 3], + _set_limit((char *)&cpu_gdt_table[0][APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); } #endif --- linux/arch/i386/kernel/i386_ksyms.c.orig Thu Jul 25 13:01:06 2002 +++ linux/arch/i386/kernel/i386_ksyms.c Thu Jul 25 13:01:12 2002 @@ -74,7 +74,6 @@ EXPORT_SYMBOL(pm_power_off); EXPORT_SYMBOL(get_cmos_time); EXPORT_SYMBOL(apm_info); -EXPORT_SYMBOL(gdt); #ifdef CONFIG_DEBUG_IOVIRT EXPORT_SYMBOL(__io_virt_debug); --- linux/arch/i386/kernel/entry.S.orig Thu Jul 25 16:01:56 2002 +++ linux/arch/i386/kernel/entry.S Thu Jul 25 16:06:06 2002 @@ -764,6 +764,7 @@ .long sys_futex /* 240 */ .long sys_sched_setaffinity .long sys_sched_getaffinity + .long sys_set_thread_area .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall --- linux/arch/i386/kernel/traps.c.orig Thu Jul 25 12:45:16 2002 +++ linux/arch/i386/kernel/traps.c Thu Jul 25 15:31:17 2002 @@ -833,37 +833,6 @@ _set_gate(a,12,3,addr); } -#define _set_seg_desc(gate_addr,type,dpl,base,limit) {\ - *((gate_addr)+1) = ((base) & 0xff000000) | \ - (((base) & 0x00ff0000)>>16) | \ - ((limit) & 0xf0000) | \ - ((dpl)<<13) | \ - (0x00408000) | \ - ((type)<<8); \ - *(gate_addr) = (((base) & 0x0000ffff)<<16) | \ - ((limit) & 0x0ffff); } - -#define _set_tssldt_desc(n,addr,limit,type) \ -__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ - "movw %%ax,2(%2)\n\t" \ - "rorl $16,%%eax\n\t" \ - "movb %%al,4(%2)\n\t" \ - "movb %4,5(%2)\n\t" \ - "movb $0,6(%2)\n\t" \ - "movb %%ah,7(%2)\n\t" \ - "rorl $16,%%eax" \ - : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) - -void set_tss_desc(unsigned int n, void *addr) -{ - _set_tssldt_desc(gdt_table+__TSS(n), (int)addr, 235, 0x89); -} - -void set_ldt_desc(unsigned int n, void *addr, unsigned int size) -{ - _set_tssldt_desc(gdt_table+__LDT(n), (int)addr, ((size << 3)-1), 0x82); -} - #ifdef CONFIG_X86_VISWS_APIC /* --- linux/arch/i386/kernel/suspend.c.orig Thu Jul 25 12:58:07 2002 +++ linux/arch/i386/kernel/suspend.c Thu Jul 25 15:38:02 2002 @@ -203,14 +203,13 @@ void fix_processor_context(void) { - int nr = smp_processor_id(); - struct tss_struct * t = &init_tss[nr]; + int cpu = smp_processor_id(); + struct tss_struct * t = init_tss + cpu; - set_tss_desc(nr,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ - gdt_table[__TSS(nr)].b &= 0xfffffdff; - - load_TR(nr); /* This does ltr */ + set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy tsc or some similar stupidity. */ + cpu_gdt_table[cpu][TSS_ENTRY].b &= 0xfffffdff; + load_TR_desc(); /* This does ltr */ load_LDT(¤t->mm->context); /* This does lldt */ /* --- linux/arch/i386/kernel/trampoline.S.orig Thu Jul 25 14:32:42 2002 +++ linux/arch/i386/kernel/trampoline.S Thu Jul 25 18:56:47 2002 @@ -63,9 +63,14 @@ .word 0 # idt limit = 0 .word 0, 0 # idt base = 0L +# +# NOTE: here we actually use CPU#0's GDT - but that is OK, we reload +# the proper GDT shortly after booting up the secondary CPUs. +# + gdt_48: .word 0x0800 # gdt limit = 2048, 256 GDT entries - .long gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU) + .long cpu_gdt_table-__PAGE_OFFSET # gdt base = gdt (first SMP CPU) .globl trampoline_end trampoline_end: --- linux/arch/i386/kernel/process.c.orig Thu Jul 25 16:02:50 2002 +++ linux/arch/i386/kernel/process.c Thu Jul 25 19:01:31 2002 @@ -662,7 +662,8 @@ { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct tss_struct *tss = init_tss + smp_processor_id(); + int cpu = smp_processor_id(); + struct tss_struct *tss = init_tss + cpu; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -689,6 +690,14 @@ } /* + * Load the per-thread Thread-Local Storage descriptor. + * + * NOTE: it's faster to do the two stores unconditionally + * than to branch away. + */ + load_TLS_desc(next, cpu); + + /* * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { @@ -818,3 +827,58 @@ } #undef last_sched #undef first_sched + +/* + * Set the Thread-Local Storage area: + */ +asmlinkage int sys_set_thread_area(unsigned int base, unsigned int limit, unsigned int flags) +{ + struct thread_struct *t = ¤t->thread; + int limit_in_pages = 0, writable = 0; + int cpu; + + /* do not allow unused flags */ + if (flags & ~TLS_FLAGS_MASK) + return -EINVAL; + + /* check limit */ + if (limit & 0xfff00000) + return -EINVAL; + + /* + * Clear the TLS? + */ + if (flags & TLS_FLAG_CLEAR) { + cpu = get_cpu(); + t->tls_base = t->tls_limit = t->tls_flags = 0; + t->tls_desc.a = t->tls_desc.b = 0; + load_TLS_desc(t, cpu); + put_cpu(); + return 0; + } + + if (flags & TLS_FLAG_LIMIT_IN_PAGES) + limit_in_pages = 1; + if (flags & TLS_FLAG_WRITABLE) + writable = 1; + + /* + * We must not get preempted while modifying the TLS. + */ + cpu = get_cpu(); + t->tls_base = base; + t->tls_limit = limit; + t->tls_flags = flags; + + t->tls_desc.a = ((base & 0x0000ffff) << 16) | (limit & 0x0ffff); + + t->tls_desc.b = (base & 0xff000000) | ((base & 0x00ff0000) >> 16) | + (limit & 0xf0000) | (writable << 9) | (1 << 15) | + (1 << 22) | (limit_in_pages << 23) | 0x7000; + + load_TLS_desc(t, cpu); + put_cpu(); + + return TLS_ENTRY*8 + 3; +} + --- linux/include/asm-i386/desc.h.orig Thu Jul 25 12:40:48 2002 +++ linux/include/asm-i386/desc.h Thu Jul 25 19:01:43 2002 @@ -4,72 +4,59 @@ #include /* - * The layout of the GDT under Linux: + * The layout of the per-CPU GDT under Linux: * * 0 - null - * 1 - not used + * 1 - Thread-Local Storage (TLS) segment * 2 - kernel code segment * 3 - kernel data segment - * 4 - user code segment <-- new cacheline + * 4 - user code segment <==== new cacheline * 5 - user data segment - * 6 - not used - * 7 - not used - * 8 - APM BIOS support <-- new cacheline + * 6 - TSS + * 7 - LDT + * 8 - APM BIOS support <==== new cacheline * 9 - APM BIOS support * 10 - APM BIOS support * 11 - APM BIOS support - * 12 - PNPBIOS support + * 12 - PNPBIOS support <==== new cacheline * 13 - PNPBIOS support * 14 - PNPBIOS support * 15 - PNPBIOS support - * 16 - PNPBIOS support + * 16 - PNPBIOS support <==== new cacheline * 17 - not used * 18 - not used * 19 - not used + */ +#define TLS_ENTRY 1 +#define TSS_ENTRY 6 +#define LDT_ENTRY 7 +/* + * The interrupt descriptor table has room for 256 idt's, + * the global descriptor table is dependent on the number + * of tasks we can have.. * - * The TSS+LDT descriptors are spread out a bit so that every CPU - * has an exclusive cacheline for the per-CPU TSS and LDT: - * - * 20 - CPU#0 TSS <-- new cacheline - * 21 - CPU#0 LDT - * 22 - not used - * 23 - not used - * 24 - CPU#1 TSS <-- new cacheline - * 25 - CPU#1 LDT - * 26 - not used - * 27 - not used - * ... NR_CPUS per-CPU TSS+LDT's if on SMP - * - * Entry into gdt where to find first TSS. + * We pad the GDT to cacheline boundary. */ -#define __FIRST_TSS_ENTRY 20 -#define __FIRST_LDT_ENTRY (__FIRST_TSS_ENTRY+1) - -#define __TSS(n) (((n)<<2) + __FIRST_TSS_ENTRY) -#define __LDT(n) (((n)<<2) + __FIRST_LDT_ENTRY) +#define IDT_ENTRIES 256 +#define GDT_ENTRIES 20 #ifndef __ASSEMBLY__ #include -struct desc_struct { - unsigned long a,b; -}; +#define GDT_SIZE (GDT_ENTRIES*sizeof(struct desc_struct)) -extern struct desc_struct gdt_table[]; -extern struct desc_struct *idt, *gdt; +extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES]; struct Xgt_desc_struct { unsigned short size; unsigned long address __attribute__((packed)); -}; +} __attribute__ ((packed)); -#define idt_descr (*(struct Xgt_desc_struct *)((char *)&idt - 2)) -#define gdt_descr (*(struct Xgt_desc_struct *)((char *)&gdt - 2)) +extern struct Xgt_desc_struct idt_descr, cpu_gdt_descr[NR_CPUS]; -#define load_TR(n) __asm__ __volatile__("ltr %%ax"::"a" (__TSS(n)<<3)) - -#define __load_LDT(n) __asm__ __volatile__("lldt %%ax"::"a" (__LDT(n)<<3)) +#define load_TR_desc() __asm__ __volatile__("ltr %%ax"::"a" (TSS_ENTRY<<3)) +#define load_LDT_desc() __asm__ __volatile__("lldt %%ax"::"a" (LDT_ENTRY<<3)) /* * This is the ldt that every process will get unless we need @@ -77,14 +64,43 @@ */ extern struct desc_struct default_ldt[]; extern void set_intr_gate(unsigned int irq, void * addr); -extern void set_ldt_desc(unsigned int n, void *addr, unsigned int size); -extern void set_tss_desc(unsigned int n, void *addr); + +#define _set_tssldt_desc(n,addr,limit,type) \ +__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \ + "movw %%ax,2(%2)\n\t" \ + "rorl $16,%%eax\n\t" \ + "movb %%al,4(%2)\n\t" \ + "movb %4,5(%2)\n\t" \ + "movb $0,6(%2)\n\t" \ + "movb %%ah,7(%2)\n\t" \ + "rorl $16,%%eax" \ + : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type)) + +static inline void set_tss_desc(unsigned int cpu, void *addr) +{ + _set_tssldt_desc(&cpu_gdt_table[cpu][TSS_ENTRY], (int)addr, 235, 0x89); +} + +static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size) +{ + _set_tssldt_desc(&cpu_gdt_table[cpu][LDT_ENTRY], (int)addr, ((size << 3)-1), 0x82); +} + +#define TLS_FLAGS_MASK 0x00000007 + +#define TLS_FLAG_LIMIT_IN_PAGES 0x00000001 +#define TLS_FLAG_WRITABLE 0x00000002 +#define TLS_FLAG_CLEAR 0x00000004 + +static inline void load_TLS_desc(struct thread_struct *t, unsigned int cpu) +{ + cpu_gdt_table[cpu][TLS_ENTRY] = t->tls_desc; +} static inline void clear_LDT(void) { - int cpu = smp_processor_id(); - set_ldt_desc(cpu, &default_ldt[0], 5); - __load_LDT(cpu); + set_ldt_desc(smp_processor_id(), &default_ldt[0], 5); + load_LDT_desc(); } /* @@ -92,17 +108,16 @@ */ static inline void load_LDT (mm_context_t *pc) { - int cpu = smp_processor_id(); void *segments = pc->ldt; int count = pc->size; - if (!count) { + if (likely(!count)) { segments = &default_ldt[0]; count = 5; } - set_ldt_desc(cpu, segments, count); - __load_LDT(cpu); + set_ldt_desc(smp_processor_id(), segments, count); + load_LDT_desc(); } #endif /* !__ASSEMBLY__ */ --- linux/include/asm-i386/processor.h.orig Thu Jul 25 15:11:14 2002 +++ linux/include/asm-i386/processor.h Thu Jul 25 17:05:18 2002 @@ -18,6 +18,10 @@ #include #include +struct desc_struct { + unsigned long a,b; +}; + /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -372,6 +376,9 @@ unsigned long v86flags, v86mask, v86mode, saved_esp0; /* IO permissions */ unsigned long *ts_io_bitmap; +/* TLS info and cached descriptor */ + unsigned int tls_base, tls_limit, tls_flags; + struct desc_struct tls_desc; }; #define INIT_THREAD { \ @@ -395,7 +402,7 @@ 0,0,0,0, /* esp,ebp,esi,edi */ \ 0,0,0,0,0,0, /* es,cs,ss */ \ 0,0,0,0,0,0, /* ds,fs,gs */ \ - __LDT(0),0, /* ldt */ \ + LDT_ENTRY,0, /* ldt */ \ 0, INVALID_IO_BITMAP_OFFSET, /* tace, bitmap */ \ {~0, } /* ioperm */ \ } --- linux/include/asm-i386/mmu_context.h.orig Thu Jul 25 15:45:34 2002 +++ linux/include/asm-i386/mmu_context.h Thu Jul 25 15:45:59 2002 @@ -17,7 +17,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk, unsigned cpu) { - if(cpu_tlbstate[cpu].state == TLBSTATE_OK) + if (cpu_tlbstate[cpu].state == TLBSTATE_OK) cpu_tlbstate[cpu].state = TLBSTATE_LAZY; } #else @@ -40,18 +40,18 @@ /* Re-load page tables */ load_cr3(next->pgd); - /* load_LDT, if either the previous or next thread - * has a non-default LDT. + /* + * load the LDT, if the LDT is different: */ - if (next->context.size+prev->context.size) + if (unlikely(prev->context.ldt != next->context.ldt)) load_LDT(&next->context); } #ifdef CONFIG_SMP else { cpu_tlbstate[cpu].state = TLBSTATE_OK; - if(cpu_tlbstate[cpu].active_mm != next) + if (cpu_tlbstate[cpu].active_mm != next) BUG(); - if(!test_and_set_bit(cpu, &next->cpu_vm_mask)) { + if (!test_and_set_bit(cpu, &next->cpu_vm_mask)) { /* We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload %cr3. */