--- linux/arch/i386/kernel/cpu/proc.c.orig +++ linux/arch/i386/kernel/cpu/proc.c @@ -79,10 +79,8 @@ static int show_cpuinfo(struct seq_file else seq_printf(m, "stepping\t: unknown\n"); - if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", - cpu_khz / 1000, (cpu_khz % 1000)); - } + seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + cpu_khz / 1000, (cpu_khz % 1000)); /* Cache size */ if (c->x86_cache_size >= 0) --- linux/arch/i386/kernel/i387.c.orig +++ linux/arch/i386/kernel/i387.c @@ -33,16 +33,16 @@ void init_fpu(struct task_struct *tsk) { if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; + memset(&tsk->thread.fpu->fxsave, 0, sizeof(struct i387_fxsave_struct)); + tsk->thread.fpu->fxsave.cwd = 0x37f; if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = 0x1f80; + tsk->thread.fpu->fxsave.mxcsr = 0x1f80; } else { - memset(&tsk->thread.i387.fsave, 0, sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; + memset(&tsk->thread.fpu->fsave, 0, sizeof(struct i387_fsave_struct)); + tsk->thread.fpu->fsave.cwd = 0xffff037fu; + tsk->thread.fpu->fsave.swd = 0xffff0000u; + tsk->thread.fpu->fsave.twd = 0xffffffffu; + tsk->thread.fpu->fsave.fos = 0xffff0000u; } tsk->used_math = 1; } @@ -67,10 +67,10 @@ void restore_fpu( struct task_struct *ts { if ( cpu_has_fxsr ) { asm volatile( "fxrstor %0" - : : "m" (tsk->thread.i387.fxsave) ); + : : "m" (tsk->thread.fpu->fxsave) ); } else { asm volatile( "frstor %0" - : : "m" (tsk->thread.i387.fsave) ); + : : "m" (tsk->thread.fpu->fsave) ); } } @@ -144,34 +144,34 @@ static inline unsigned long twd_fxsr_to_ unsigned short get_fpu_cwd( struct task_struct *tsk ) { if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.cwd; + return tsk->thread.fpu->fxsave.cwd; } else { - return (unsigned short)tsk->thread.i387.fsave.cwd; + return (unsigned short)tsk->thread.fpu->fsave.cwd; } } unsigned short get_fpu_swd( struct task_struct *tsk ) { if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.swd; + return tsk->thread.fpu->fxsave.swd; } else { - return (unsigned short)tsk->thread.i387.fsave.swd; + return (unsigned short)tsk->thread.fpu->fsave.swd; } } unsigned short get_fpu_twd( struct task_struct *tsk ) { if ( cpu_has_fxsr ) { - return tsk->thread.i387.fxsave.twd; + return tsk->thread.fpu->fxsave.twd; } else { - return (unsigned short)tsk->thread.i387.fsave.twd; + return (unsigned short)tsk->thread.fpu->fsave.twd; } } unsigned short get_fpu_mxcsr( struct task_struct *tsk ) { if ( cpu_has_xmm ) { - return tsk->thread.i387.fxsave.mxcsr; + return tsk->thread.fpu->fxsave.mxcsr; } else { return 0x1f80; } @@ -180,34 +180,34 @@ unsigned short get_fpu_mxcsr( struct tas void set_fpu_cwd( struct task_struct *tsk, unsigned short cwd ) { if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.cwd = cwd; + tsk->thread.fpu->fxsave.cwd = cwd; } else { - tsk->thread.i387.fsave.cwd = ((long)cwd | 0xffff0000u); + tsk->thread.fpu->fsave.cwd = ((long)cwd | 0xffff0000u); } } void set_fpu_swd( struct task_struct *tsk, unsigned short swd ) { if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.swd = swd; + tsk->thread.fpu->fxsave.swd = swd; } else { - tsk->thread.i387.fsave.swd = ((long)swd | 0xffff0000u); + tsk->thread.fpu->fsave.swd = ((long)swd | 0xffff0000u); } } void set_fpu_twd( struct task_struct *tsk, unsigned short twd ) { if ( cpu_has_fxsr ) { - tsk->thread.i387.fxsave.twd = twd_i387_to_fxsr(twd); + tsk->thread.fpu->fxsave.twd = twd_i387_to_fxsr(twd); } else { - tsk->thread.i387.fsave.twd = ((long)twd | 0xffff0000u); + tsk->thread.fpu->fsave.twd = ((long)twd | 0xffff0000u); } } void set_fpu_mxcsr( struct task_struct *tsk, unsigned short mxcsr ) { if ( cpu_has_xmm ) { - tsk->thread.i387.fxsave.mxcsr = (mxcsr & 0xffbf); + tsk->thread.fpu->fxsave.mxcsr = (mxcsr & 0xffbf); } } @@ -295,8 +295,8 @@ static inline int save_i387_fsave( struc struct task_struct *tsk = current; unlazy_fpu( tsk ); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if ( __copy_to_user( buf, &tsk->thread.i387.fsave, + tsk->thread.fpu->fsave.status = tsk->thread.fpu->fsave.swd; + if ( __copy_to_user( buf, &tsk->thread.fpu->fsave, sizeof(struct i387_fsave_struct) ) ) return -1; return 1; @@ -309,15 +309,15 @@ static int save_i387_fxsave( struct _fps unlazy_fpu( tsk ); - if ( convert_fxsr_to_user( buf, &tsk->thread.i387.fxsave ) ) + if ( convert_fxsr_to_user( buf, &tsk->thread.fpu->fxsave ) ) return -1; - err |= __put_user( tsk->thread.i387.fxsave.swd, &buf->status ); + err |= __put_user( tsk->thread.fpu->fxsave.swd, &buf->status ); err |= __put_user( X86_FXSR_MAGIC, &buf->magic ); if ( err ) return -1; - if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + if ( __copy_to_user( &buf->_fxsr_env[0], &tsk->thread.fpu->fxsave, sizeof(struct i387_fxsave_struct) ) ) return -1; return 1; @@ -340,7 +340,7 @@ int save_i387( struct _fpstate __user *b return save_i387_fsave( buf ); } } else { - return save_i387_soft( ¤t->thread.i387.soft, buf ); + return save_i387_soft( ¤t->thread.fpu->soft, buf ); } } @@ -348,7 +348,7 @@ static inline int restore_i387_fsave( st { struct task_struct *tsk = current; clear_fpu( tsk ); - return __copy_from_user( &tsk->thread.i387.fsave, buf, + return __copy_from_user( &tsk->thread.fpu->fsave, buf, sizeof(struct i387_fsave_struct) ); } @@ -357,11 +357,11 @@ static int restore_i387_fxsave( struct _ int err; struct task_struct *tsk = current; clear_fpu( tsk ); - err = __copy_from_user( &tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + err = __copy_from_user( &tsk->thread.fpu->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct) ); /* mxcsr bit 6 and 31-16 must be zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= 0xffbf; - return err ? 1 : convert_fxsr_from_user( &tsk->thread.i387.fxsave, buf ); + tsk->thread.fpu->fxsave.mxcsr &= 0xffbf; + return err ? 1 : convert_fxsr_from_user( &tsk->thread.fpu->fxsave, buf ); } int restore_i387( struct _fpstate __user *buf ) @@ -375,7 +375,7 @@ int restore_i387( struct _fpstate __user err = restore_i387_fsave( buf ); } } else { - err = restore_i387_soft( ¤t->thread.i387.soft, buf ); + err = restore_i387_soft( ¤t->thread.fpu->soft, buf ); } current->used_math = 1; return err; @@ -388,7 +388,7 @@ int restore_i387( struct _fpstate __user static inline int get_fpregs_fsave( struct user_i387_struct __user *buf, struct task_struct *tsk ) { - return __copy_to_user( buf, &tsk->thread.i387.fsave, + return __copy_to_user( buf, &tsk->thread.fpu->fsave, sizeof(struct user_i387_struct) ); } @@ -396,7 +396,7 @@ static inline int get_fpregs_fxsave( str struct task_struct *tsk ) { return convert_fxsr_to_user( (struct _fpstate __user *)buf, - &tsk->thread.i387.fxsave ); + &tsk->thread.fpu->fxsave ); } int get_fpregs( struct user_i387_struct __user *buf, struct task_struct *tsk ) @@ -408,7 +408,7 @@ int get_fpregs( struct user_i387_struct return get_fpregs_fsave( buf, tsk ); } } else { - return save_i387_soft( &tsk->thread.i387.soft, + return save_i387_soft( &tsk->thread.fpu->soft, (struct _fpstate __user *)buf ); } } @@ -416,14 +416,14 @@ int get_fpregs( struct user_i387_struct static inline int set_fpregs_fsave( struct task_struct *tsk, struct user_i387_struct __user *buf ) { - return __copy_from_user( &tsk->thread.i387.fsave, buf, + return __copy_from_user( &tsk->thread.fpu->fsave, buf, sizeof(struct user_i387_struct) ); } static inline int set_fpregs_fxsave( struct task_struct *tsk, struct user_i387_struct __user *buf ) { - return convert_fxsr_from_user( &tsk->thread.i387.fxsave, + return convert_fxsr_from_user( &tsk->thread.fpu->fxsave, (struct _fpstate __user *)buf ); } @@ -436,7 +436,7 @@ int set_fpregs( struct task_struct *tsk, return set_fpregs_fsave( tsk, buf ); } } else { - return restore_i387_soft( &tsk->thread.i387.soft, + return restore_i387_soft( &tsk->thread.fpu->soft, (struct _fpstate __user *)buf ); } } @@ -444,7 +444,7 @@ int set_fpregs( struct task_struct *tsk, int get_fpxregs( struct user_fxsr_struct __user *buf, struct task_struct *tsk ) { if ( cpu_has_fxsr ) { - if (__copy_to_user( buf, &tsk->thread.i387.fxsave, + if (__copy_to_user( buf, &tsk->thread.fpu->fxsave, sizeof(struct user_fxsr_struct) )) return -EFAULT; return 0; @@ -458,11 +458,11 @@ int set_fpxregs( struct task_struct *tsk int ret = 0; if ( cpu_has_fxsr ) { - if (__copy_from_user( &tsk->thread.i387.fxsave, buf, + if (__copy_from_user( &tsk->thread.fpu->fxsave, buf, sizeof(struct user_fxsr_struct) )) ret = -EFAULT; /* mxcsr bit 6 and 31-16 must be zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= 0xffbf; + tsk->thread.fpu->fxsave.mxcsr &= 0xffbf; } else { ret = -EIO; } @@ -476,7 +476,7 @@ int set_fpxregs( struct task_struct *tsk static inline void copy_fpu_fsave( struct task_struct *tsk, struct user_i387_struct *fpu ) { - memcpy( fpu, &tsk->thread.i387.fsave, + memcpy( fpu, &tsk->thread.fpu->fsave, sizeof(struct user_i387_struct) ); } @@ -487,10 +487,10 @@ static inline void copy_fpu_fxsave( stru unsigned short *from; int i; - memcpy( fpu, &tsk->thread.i387.fxsave, 7 * sizeof(long) ); + memcpy( fpu, &tsk->thread.fpu->fxsave, 7 * sizeof(long) ); to = (unsigned short *)&fpu->st_space[0]; - from = (unsigned short *)&tsk->thread.i387.fxsave.st_space[0]; + from = (unsigned short *)&tsk->thread.fpu->fxsave.st_space[0]; for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { memcpy( to, from, 5 * sizeof(unsigned short) ); } @@ -522,7 +522,7 @@ int dump_extended_fpu( struct pt_regs *r fpvalid = tsk->used_math && cpu_has_fxsr; if ( fpvalid ) { unlazy_fpu( tsk ); - memcpy( fpu, &tsk->thread.i387.fxsave, + memcpy( fpu, &tsk->thread.fpu->fxsave, sizeof(struct user_fxsr_struct) ); } @@ -551,7 +551,7 @@ int dump_task_extended_fpu(struct task_s if (fpvalid) { if (tsk == current) unlazy_fpu(tsk); - memcpy(fpu, &tsk->thread.i387.fxsave, sizeof(*fpu)); + memcpy(fpu, &tsk->thread.fpu->fxsave, sizeof(*fpu)); } return fpvalid; } --- linux/arch/i386/kernel/vcpu.c.orig +++ linux/arch/i386/kernel/vcpu.c @@ -0,0 +1,409 @@ +/* + * linux/arch/i386/kernel/vcpu.c + * + * Virtual CPU: Virtual Kernel Mode & Virtual User Mode Support + * + * started by Ingo Molnar, Copyright (C) 2004 + */ + +#include +#include +#include +#include + +#include +#include +#include + +#define FLAG_MASK 0x00044dd5 + +#define vuser() (¤t->thread.vuser) +#define vkernel() (¤t->thread.vkernel) + +extern int panic_timeout; + +#define Printk(x...) do { if (panic_timeout) printk(x); } while (0) + +static void show_vregs(struct x86_pt_regs *regs, int fs, int gs) +{ + Printk("| EIP: %08lx ESP: %08lx EFL: %08lx DS: %04x ES: %04x |\n", regs->eip, regs->esp, regs->eflags, regs->xds, regs->xes); + Printk("| EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx SS: %04x CS: %04x |\n",regs->eax, regs->ebx, regs->ecx, regs->edx, regs->xss, regs->xcs); + Printk("| ESI: %08lx EDI: %08lx EBP: %08lx OAX: %08lx FS: %04x GS: %04x |\n", regs->esi, regs->edi, regs->ebp, regs->orig_eax, fs, gs); +} + +static void show_vfpu(struct x86_fxsave_struct *fp) +{ + int i; + + Printk("---------------------------------------------------------\n"); + Printk("| cwd:%04x swd:%04x twd:%04x fop:%04x fip:%08lx |\n", + fp->cwd, fp->swd, fp->twd, fp->fop, fp->fip); + Printk("| fcs:%08lx foo:%08lx fos:%08lx mxcsr:%08lx |\n", + fp->fcs, fp->foo, fp->fos, fp->mxcsr); + + for (i = 0; i < 8; i += 2) { + long *ptr = (void *)fp->st_space + i*8; + + Printk("| st[%d]: %08lx:%08lx st[%d]: %08lx:%08lx |\n", + i, ptr[0], ptr[1], i+1, ptr[2], ptr[3]); + } + Printk("---------------------------------------------------------\n"); +} + +static void print_vcpu_state(struct pt_regs *__regs) +{ + struct x86_pt_regs *regs = (void *)__regs; + int xfs = 0, xgs = 0; + + if (panic_timeout >= 2) + return; + savesegment(fs, xfs); + savesegment(gs, xgs); + xfs &= 0xffff; xgs &= 0xffff; + + Printk("-------------------------------------------------------------------------------\n"); + Printk("| [host regs (%p): |\n", regs); + show_vregs(regs, xfs, xgs); + Printk("| |\n"); + Printk("| [vuser regs] |\n"); + show_vregs(&vuser()->regs, vuser()->xfs, vuser()->xgs); + Printk("| |\n"); + Printk("| [vkernel regs] |\n"); + show_vregs(&vkernel()->regs, vkernel()->xfs, vkernel()->xgs); + Printk("-------------------------------------------------------------------------------\n"); +} + +static int set_vuser(struct vcpu_state_struct *new_state, int singlestep) +{ + struct x86_pt_regs *regs = &new_state->regs; + struct mm_struct *new_mm; + + new_mm = proc_mm_get_mm((int)new_state->mm); + if (IS_ERR(new_mm)) + return -EBADF; + new_state->mm = new_mm; + Printk("set_vuser(mm:%p/%d) singlestep:%d\n", new_mm, atomic_read(&new_mm->mm_users), singlestep); + + /* Sanitize privileged register values: */ + regs->xds |= 3; regs->xes |= 3; regs->xcs |= 3; regs->xss |= 3; + new_state->xfs |= 3; new_state->xgs |= 3; + regs->eflags &= FLAG_MASK; + regs->eflags |= IF_MASK; + if (singlestep) + regs->eflags |= TF_MASK; + + struct_cpy(vuser(), new_state); + + return 0; +} + +static void switch_to_mm(struct mm_struct *new_mm) +{ + struct mm_struct *old_mm = current->mm; + + Printk("switch_to_mm(%p/%d => %p/%d)\n", old_mm, atomic_read(&old_mm->mm_users), new_mm, atomic_read(&new_mm->mm_users)); + BUG_ON((unsigned long)new_mm < __PAGE_OFFSET); + + current->mm = new_mm; + current->active_mm = new_mm; + +#ifdef CONFIG_X86_SWITCH_PAGETABLES + current->thread_info->user_pgd = (void *)__pa(new_mm->pgd); +#else + load_cr3(new_mm->pgd); +#endif + /* + * load the LDT, if the LDT is different: + */ + if (unlikely(old_mm->context.size + new_mm->context.size)) { + Printk("switching LDT (%d => %d)\n", old_mm->context.size, + new_mm->context.size); + load_LDT(&new_mm->context); + } +} + + +static void switch_to_vuser_mm(void) +{ + switch_to_mm(vuser()->mm); +} + +static void save_vkernel_regs(struct pt_regs *regs) +{ + struct_cpy(&vkernel()->regs, regs); + + savesegment(fs, vkernel()->xfs); + savesegment(gs, vkernel()->xgs); + vkernel()->xfs &= 0xffff; + vkernel()->xgs &= 0xffff; +} + +static void save_vuser_regs(struct pt_regs *regs) +{ + struct_cpy(&vuser()->regs, regs); + + savesegment(fs, vuser()->xfs); + savesegment(gs, vuser()->xgs); + vuser()->xfs &= 0xffff; + vuser()->xgs &= 0xffff; + vuser()->regs.eflags &= ~TF_MASK; +} + +static void save_vkernel_state(struct pt_regs *regs) +{ + save_vkernel_regs(regs); + vkernel()->mm = current->mm; + Printk("save_vkernel_state(mm:%p/%d)\n", current->mm, atomic_read(¤t->mm->mm_users)); + /* save TLS */ +} + +static void switch_to_vuser_regs(struct pt_regs *regs) +{ + struct_cpy(regs, &vuser()->regs); + + loadsegment(fs, vuser()->xfs); + loadsegment(gs, vuser()->xgs); +} + +static void switch_to_vkernel_regs(struct pt_regs *regs) +{ + struct_cpy(regs, &vkernel()->regs); + + loadsegment(fs, vkernel()->xfs); + loadsegment(gs, vkernel()->xgs); +} + +static void vcpu_switch_to_vfpu(void) +{ +// save_init_fpu(current); // hack + unlazy_fpu(current); + current->thread.fpu = ¤t->thread.__vfpu; + stts(); + + Printk("vcpu_switch_to_vfpu [disabled] [mm: %p].\n", current->thread.vuser.mm); +// show_vfpu((void *)current->thread.fpu); +} + +static long vcpu_switch_run_vuser(struct vcpu_state_struct *vuser, struct pt_regs *regs, int singlestep) +{ + int err; + + err = set_vuser(vuser, singlestep); + if (err) + return err; + + save_vkernel_state(regs); + switch_to_vuser_mm(); + vcpu_switch_to_vfpu(); + switch_to_vuser_regs(regs); + + set_thread_flag(TIF_VUSER); + + Printk("%s:%d switch_run_vuser.\n", current->comm, current->pid); + print_vcpu_state(regs); + + return 0; +} + +static int vcpu_switch_to_fpu(void) +{ + int ret = 0; + struct thread_info *ti = current->thread_info; + struct thread_struct *thread = ¤t->thread; + + BUG_ON(!cpu_has_fxsr); + BUG_ON((ti->status & TS_USEDFPU) && !(ti->status & TS_USEDVFPU)); + + if (!(ti->status & TS_USEDVFPU)) + goto out; + + unlazy_fpu(current); + + ret = -EINVAL; + if (thread->vfpu_user) { + ret = copy_to_user(thread->vfpu_user, + thread->fpu, sizeof(*thread->fpu)); + Printk("vcpu_switch_to_fpu [mm: %p].\n", thread->vuser.mm); + Printk("FPU state passed back to user-space [%p]:\n", thread->vfpu_user); + show_vfpu((void *)thread->fpu); + } + + + ti->status &= ~TS_USEDVFPU; +out: + thread->fpu = &thread->__fpu; + stts(); + + Printk("vcpu_switch_to_fpu() => %d.\n", ret); + return ret; +} + +static int handle_vfpu_trap(void) +{ + struct thread_struct *thread = ¤t->thread; + int ret; + + BUG_ON(!cpu_has_fxsr); + + if (!thread->vfpu_user) + return -EINVAL; + + ret = copy_from_user(thread->fpu, + thread->vfpu_user, sizeof(*thread->fpu)); + if (ret) + return ret; + + clts(); +// __save_init_fpu(current); // hack + asm volatile ("fxrstor %0" : : "m" (*thread->fpu)); + current->thread_info->status |= (TS_USEDFPU|TS_USEDVFPU); + + Printk("handle_vfpu_trap [mm: %p].\n", thread->vuser.mm); + show_vfpu((void *)thread->fpu); + + return 0; +} + +static int vcpu_switch_to_vkernel(struct pt_regs *regs) +{ + int ret; + + Printk("%s:%d switch_to_vkernel (frame: %p (%p)).\n", + current->comm, current->pid, ®s, regs); + print_vcpu_state(regs); + + clear_thread_flag(TIF_VUSER); + + save_vuser_regs(regs); + switch_to_vkernel_regs(regs); + + ret = vcpu_switch_to_fpu(); + if (!ret) + ret = copy_to_user(current->thread.vuser_ptr, + vuser(), sizeof(*vuser())); + Printk("vuser eax: %08lx\n", vuser()->regs.eax); + + return ret; +} + +void vcpu_trap(struct pt_regs *regs, int trapno, long error_code, + unsigned long addr) +{ + int ret; + + if (panic_timeout) { + #define BYTES 16 + unsigned char tmp[BYTES]; + int i, ret; + + Printk("vuser trap#%d [%08lx, %08lx] at EIP %08lx [frame: %p, ca:%p].\n", + trapno, error_code, addr, regs->eip, regs, + __builtin_return_address(0)); + memset(tmp, 0xab, BYTES); + ret = copy_from_user(tmp, (void *)regs->eip, BYTES); + printk("insn at %08lx (%d):", regs->eip, ret); + for (i = 0; i < BYTES; i++) + printk(" %02x", tmp[i]); + printk("\n"); + } + + vuser()->trap.trapno = trapno; + vuser()->trap.error_code = error_code; + vuser()->trap.addr = addr; + + switch_to_mm(vkernel()->mm); + + if (trapno == 7) { + if (!handle_vfpu_trap()) { + switch_to_vuser_mm(); + return; + } + } + ret = vcpu_switch_to_vkernel(regs); + + if (!ret) + ret = VCPU_EVENT_TRAP; + + regs->eax = ret; +} + +void vcpu_signal(struct pt_regs *regs) +{ + int ret; + + Printk("vuser sigpending at EIP %08lx [frame: %p].\n", + regs->eip, regs); + + switch_to_mm(vkernel()->mm); + ret = vcpu_switch_to_vkernel(regs); + if (!ret) + ret = VCPU_EVENT_SIGNAL; + vcpu_switch_to_fpu(); + regs->eax = ret; +} + +void vcpu_vuser_syscall(struct pt_regs regs) +{ + int ret; + + Printk("vuser syscall at EIP %08lx [frame: %p].\n", + regs.eip, ®s); + + switch_to_mm(vkernel()->mm); + ret = vcpu_switch_to_vkernel(®s); + if (!ret) + ret = VCPU_EVENT_SYSCALL; + vcpu_switch_to_fpu(); + regs.eax = ret; +} + +long sys_vcpu(struct pt_regs regs) +{ + int op = regs.ebx; + struct vcpu_state_struct __user *__vuser = (void *)regs.ecx; + struct vcpu_state_struct vuser; + int err, ret; + + err = copy_from_user(&vuser, __vuser, sizeof(vuser)); + if (err) { + Printk("hm1, %d.\n", err); + return err; + } + + if (vuser.magic != VCPU_MAGIC) { + Printk("hm, bad magic %08x.\n", vuser.magic); + return -EINVAL; + } + current->thread.vuser_ptr = __vuser; + current->thread.vfpu_user = (void *)vuser.fxregs; + + Printk("sys_vcpu(%d,%p) pt_regs: %p [eip: %08lx] [tis:%ld].\n", op, __vuser, ®s, regs.eip, current->thread_info->status); + print_vcpu_state(®s); + if (vuser.fxregs) { + struct x86_fxsave_struct fp; + + memset(&fp, 0, sizeof(fp)); + if (copy_from_user(&fp, vuser.fxregs, sizeof(fp))) + BUG(); + Printk("FPU state passed in by user-space [%p]:\n", vuser.fxregs); + show_vfpu(&fp); + } + + switch (op) { + case VCPU_OP_RUN_VUSER: + // return vcpu_run_vuser(regs); + ret = -ENOSYS; + break; + case VCPU_OP_SWITCH_RUN_VUSER: + ret = vcpu_switch_run_vuser(&vuser, ®s, 0); + break; + case VCPU_OP_SWITCH_RUN_VUSER_SINGLESTEP: + ret = vcpu_switch_run_vuser(&vuser, ®s, 1); + break; + default: + ret = -ENOSYS; + } + Printk("return from sys_vcpu(%d,%p) pt_regs: %p [eip: %08lx, eax: %08lx].\n", op, __vuser, ®s, regs.eip, regs.eax); + return ret; +} + --- linux/arch/i386/kernel/Makefile.orig +++ linux/arch/i386/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.ld obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - doublefault.o entry_trampoline.o + doublefault.o entry_trampoline.o vcpu.o obj-y += cpu/ obj-y += timers/ --- linux/arch/i386/kernel/entry.S.orig +++ linux/arch/i386/kernel/entry.S @@ -414,6 +414,13 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + + testb $_TIF_VUSER,TI_flags(%ebp) + jz skip_vuser + call vcpu_vuser_syscall + jmp syscall_exit +skip_vuser: + cmpl $(nr_syscalls), %eax jae syscall_badsys # system call tracing in operation @@ -421,6 +428,15 @@ ENTRY(system_call) jnz syscall_trace_entry syscall_call: call *sys_call_table(,%eax,4) + + # + # dont overwrite the return EAX if returning to + # vuser from sys_vcpu: + # + GET_THREAD_INFO(%ebp) + testb $_TIF_VUSER,TI_flags(%ebp) + jnz syscall_exit + movl %eax,EAX(%esp) # store the return value syscall_exit: cli # make sure we don't miss an interrupt @@ -1055,5 +1071,6 @@ ENTRY(sys_call_table) .long sys_mq_notify .long sys_mq_getsetattr .long sys_remap_file_pages /* 280 */ + .long sys_vcpu syscall_table_size=(.-sys_call_table) --- linux/arch/i386/kernel/process.c.orig +++ linux/arch/i386/kernel/process.c @@ -312,6 +312,8 @@ void flush_thread(void) */ clear_fpu(tsk); tsk->used_math = 0; + memset(&tsk->thread.vkernel, 0, sizeof(tsk->thread.vkernel)); + memset(&tsk->thread.vuser, 0, sizeof(tsk->thread.vuser)); } void release_thread(struct task_struct *dead_task) @@ -367,6 +369,7 @@ int copy_thread(int nr, unsigned long cl p->thread.eip = (unsigned long) ret_from_fork; p->thread_info->real_stack = p->thread_info; + p->thread.fpu = &p->thread.__fpu; savesegment(fs,p->thread.fs); savesegment(gs,p->thread.gs); --- linux/arch/i386/kernel/signal.c.orig +++ linux/arch/i386/kernel/signal.c @@ -617,8 +617,11 @@ void do_notify_resume(struct pt_regs *re clear_thread_flag(TIF_SINGLESTEP); } /* deal with pending signal delivery */ - if (thread_info_flags & _TIF_SIGPENDING) + if (thread_info_flags & _TIF_SIGPENDING) { + if (test_thread_flag(TIF_VUSER)) + return vcpu_signal(regs); do_signal(regs,oldset); + } clear_thread_flag(TIF_IRET); } --- linux/arch/i386/kernel/traps.c.orig +++ linux/arch/i386/kernel/traps.c @@ -357,6 +357,7 @@ static inline unsigned long get_cr2(void return address; } + static inline void do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs * regs, long error_code, siginfo_t *info) { @@ -371,6 +372,11 @@ static inline void do_trap(int trapnr, i trap_signal: { struct task_struct *tsk = current; + + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, trapnr, error_code, + (unsigned long)info->si_addr); + tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; if (info) @@ -452,6 +458,9 @@ asmlinkage void do_general_protection(st if (!(regs->xcs & 3)) goto gp_in_kernel; + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 13, error_code, regs->eip); + current->thread.error_code = error_code; current->thread.trap_no = 13; force_sig(SIGSEGV, current); @@ -598,9 +607,12 @@ asmlinkage void do_debug(struct pt_regs unsigned int condition; struct task_struct *tsk = current; siginfo_t info; +extern int panic_timeout; __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + if (panic_timeout) printk("do_debug(%ld,%08lx), eax: %08lx\n", error_code, regs->eip, regs->eax); + /* It's safe to allow irq's after DR6 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); @@ -652,6 +664,9 @@ asmlinkage void do_debug(struct pt_regs goto clear_TF; } + if (((regs->xcs & 3) == 3) && test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 1, error_code, regs->eip); + /* Ok, finally something we can handle */ tsk->thread.trap_no = 1; tsk->thread.error_code = error_code; @@ -703,7 +718,7 @@ clear_TF: * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(void *eip) +void math_error(struct pt_regs *regs) { struct task_struct * task; siginfo_t info; @@ -714,12 +729,16 @@ void math_error(void *eip) */ task = current; save_init_fpu(task); + + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 16, 0, regs->eip); + task->thread.trap_no = 16; task->thread.error_code = 0; info.si_signo = SIGFPE; info.si_errno = 0; info.si_code = __SI_FAULT; - info.si_addr = eip; + info.si_addr = (void *)regs->eip; /* * (~cwd & swd) will mask out exceptions that are not set to unmasked * status. 0x3f is the exception bits in these regs, 0x200 is the @@ -762,10 +781,10 @@ void math_error(void *eip) asmlinkage void do_coprocessor_error(struct pt_regs * regs, long error_code) { ignore_fpu_irq = 1; - math_error((void *)regs->eip); + math_error(regs); } -void simd_math_error(void *eip) +void simd_math_error(struct pt_regs *regs) { struct task_struct * task; siginfo_t info; @@ -776,12 +795,16 @@ void simd_math_error(void *eip) */ task = current; save_init_fpu(task); + + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 19, 0, regs->eip); + task->thread.trap_no = 19; task->thread.error_code = 0; info.si_signo = SIGFPE; info.si_errno = 0; info.si_code = __SI_FAULT; - info.si_addr = eip; + info.si_addr = (void *)regs->eip; /* * The SIMD FPU exceptions are handled a little differently, as there * is only a single status/control register. Thus, to determine which @@ -819,7 +842,7 @@ asmlinkage void do_simd_coprocessor_erro if (cpu_has_xmm) { /* Handle SIMD FPU exceptions on PIII+ processors. */ ignore_fpu_irq = 1; - simd_math_error((void *)regs->eip); + simd_math_error(regs); } else { /* * Handle strange cache flush from user space exception @@ -831,6 +854,9 @@ asmlinkage void do_simd_coprocessor_erro return; } die_if_kernel("cache flush denied", regs, error_code); + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 19, error_code, regs->eip); + current->thread.trap_no = 19; current->thread.error_code = error_code; force_sig(SIGSEGV, current); @@ -861,11 +887,23 @@ asmlinkage void math_state_restore(struc struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + if (panic_timeout && (panic_timeout < 3)) + printk("math_state_restore %s:%d [eip:%08lx]\n", + current->comm, current->pid, regs.eip); + + if (test_thread_flag(TIF_VUSER) && !(thread->status & TS_USEDVFPU)) + return vcpu_trap(®s, 7, 0, regs.eip); clts(); /* Allow maths ops (or we recurse) */ - if (!tsk->used_math) + if (!test_thread_flag(TIF_VUSER) && !tsk->used_math) init_fpu(tsk); restore_fpu(tsk); - thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ + /* + * So we fnsave on switch_to() and in vcpu mode we copy VFPU state + * to userspace: + */ + thread->status |= TS_USEDFPU; + if (test_thread_flag(TIF_VUSER)) + thread->status |= TS_USEDVFPU; } #ifndef CONFIG_MATH_EMULATION @@ -1033,6 +1071,7 @@ void __init trap_init(void) * Should be a barrier for any external CPU state. */ cpu_init(); + current->thread.fpu = ¤t->thread.__fpu; trap_init_hook(); } --- linux/arch/i386/kernel/i8259.c.orig +++ linux/arch/i386/kernel/i8259.c @@ -323,13 +323,12 @@ void init_8259A(int auto_eoi) static irqreturn_t math_error_irq(int cpl, void *dev_id, struct pt_regs *regs) { - extern void math_error(void *); #ifndef CONFIG_X86_PC9800 outb(0,0xF0); #endif if (ignore_fpu_irq || !boot_cpu_data.hard_math) return IRQ_NONE; - math_error((void *)regs->eip); + math_error(regs); return IRQ_HANDLED; } --- linux/arch/i386/mm/fault.c.orig +++ linux/arch/i386/mm/fault.c @@ -387,9 +387,13 @@ bad_area_nosemaphore: if (is_prefetch(regs, address)) return; + error_code |= (address >= TASK_SIZE); + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 14, error_code, address); + tsk->thread.cr2 = address; /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; info.si_signo = SIGSEGV; info.si_errno = 0; @@ -496,6 +500,10 @@ do_sigbus: if (is_prefetch(regs, address)) return; + error_code |= (address >= TASK_SIZE); + if (test_thread_flag(TIF_VUSER)) + return vcpu_trap(regs, 14, error_code, address); + tsk->thread.cr2 = address; tsk->thread.error_code = error_code; tsk->thread.trap_no = 14; --- linux/arch/um/kernel/skas/include/proc_mm.h.orig +++ linux/arch/um/kernel/skas/include/proc_mm.h @@ -11,14 +11,7 @@ #define MM_MPROTECT 56 #define MM_COPY_SEGMENTS 57 #define MM_REMAP_FILE_PAGES 58 - -struct mm_remap_file_pages { - unsigned long start; - unsigned long size; - unsigned long prot; - unsigned long pgoff; - unsigned long flags; -}; +#define MM_MODIFY_LDT 59 struct mm_mmap { unsigned long addr; @@ -40,13 +33,28 @@ struct mm_mprotect { unsigned int prot; }; +struct mm_remap_file_pages { + unsigned long start; + unsigned long size; + unsigned long prot; + unsigned long pgoff; + unsigned long flags; +}; + +struct mm_modify_ldt { + int func; + void *ptr; + unsigned long bytecount; +}; + struct proc_mm_op { int op; union { struct mm_mmap mmap; struct mm_munmap munmap; struct mm_mprotect mprotect; - struct mm_remap_file_pages fremap; + struct mm_remap_file_pages fremap; + struct mm_modify_ldt ldt; int copy_segments; } u; }; --- linux/arch/um/kernel/skas/include/skas.h.orig +++ linux/arch/um/kernel/skas/include/skas.h @@ -24,6 +24,7 @@ extern void new_thread_handler(int sig); extern void handle_syscall(union uml_pt_regs *regs); extern void remap(int fd, unsigned long virt, unsigned long phys, unsigned long len, int prot); +extern int mod_ldt(int fd, int func, void *ptr, unsigned long bytecount); extern void map(int fd, unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x); extern int unmap(int fd, void *addr, int len); --- linux/arch/um/kernel/skas/include/uaccess.h.orig +++ linux/arch/um/kernel/skas/include/uaccess.h @@ -8,6 +8,8 @@ #include "asm/errno.h" +#include + #define access_ok_skas(type, addr, size) \ ((segment_eq(get_fs(), KERNEL_DS)) || \ (((unsigned long) (addr) < TASK_SIZE) && \ --- linux/arch/um/kernel/skas/process_kern.c.orig +++ linux/arch/um/kernel/skas/process_kern.c @@ -16,6 +16,169 @@ #include "frame.h" #include "kern.h" #include "mode.h" +#include "linux/vcpu.h" +#include "asm/uaccess.h" + +static void uregs_to_vregs(struct vcpu_state_struct *vregs, + union uml_pt_regs *uregs) +{ +#define C(x,y) \ + vregs->regs.x = uregs->skas.regs[HOST_##y] + + C(ebx, EBX); C(ecx, ECX); C(edx, EDX); C(esi, ESI); + C(edi, EDI); C(ebp, EBP); C(eax, EAX); + C(xds, DS); C(xes, ES); + vregs->regs.orig_eax = uregs->skas.syscall; + C(eip, IP); C(xcs, CS); C(eflags, EFLAGS); + C(esp, SP); C(xss, SS); + vregs->xfs = uregs->skas.regs[HOST_FS]; + vregs->xgs = uregs->skas.regs[HOST_GS]; +#undef C + vregs->fxregs = &uregs->skas.xfp; +} + +static void vregs_to_uregs(union uml_pt_regs *uregs, + struct vcpu_state_struct *vregs) +{ +#define C(x,y) \ + uregs->skas.regs[HOST_##y] = vregs->regs.x + + C(ebx, EBX); C(ecx, ECX); C(edx, EDX); C(esi, ESI); + C(edi, EDI); C(ebp, EBP); C(eax, EAX); + C(xds, DS); C(xes, ES); + uregs->skas.syscall = vregs->regs.orig_eax; + C(eip, IP); C(xcs, CS); C(eflags, EFLAGS); + C(esp, SP); C(xss, SS); + uregs->skas.regs[HOST_FS] = vregs->xfs; + uregs->skas.regs[HOST_GS] = vregs->xgs; +#undef C +} + + +static void __show_vregs(struct vcpu_state_struct *vstate) +{ + struct x86_pt_regs *regs = &vstate->regs; + printk("-------------------------------------------------------------------------------\n"); + printk("| EIP: %08lx ESP: %08lx EFL: %08lx DS: %04x ES: %04x |\n", regs->eip, regs->esp, regs->eflags & ~0x100, regs->xds & 0xffff, regs->xes & 0xffff); + printk("| EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx SS: %04x CS: %04x |\n",regs->eax, regs->ebx, regs->ecx, regs->edx, regs->xss, regs->xcs); + printk("| ESI: %08lx EDI: %08lx EBP: %08lx OAX: %08lx FS: %04lx GS: %04lx |\n", regs->esi, regs->edi, regs->ebp, 0L /*regs->orig_eax*/, vstate->xfs, vstate->xgs); + printk("-------------------------------------------------------------------------------\n"); +} + +static void show_vregs(struct vcpu_state_struct *vstate) +{ + if (mode_verbose) + __show_vregs(vstate); +} + +#define BYTES 16 + +static void panic_vregs(struct vcpu_state_struct *vstate) +{ + unsigned char tmp[BYTES]; + unsigned long eip = vstate->regs.eip; + int i, ret; + + __show_vregs(vstate); + memset(tmp, 0xab, BYTES); + ret = copy_from_user(tmp, (void *)eip, BYTES); + printk("insn at %08lx (%d):", eip, ret); + for (i = 0; i < BYTES; i++) + printk(" %02x", tmp[i]); + printk("\n"); +} + +void vcpu_userspace(union uml_pt_regs *regs) +{ + int step = 0, substep; + struct vcpu_state_struct vuser; + int err, ret, status, op, pid = current->pid, trapno; + struct x86_fxsave_struct *fxregs = (void *)®s->skas.xfp; // hack + + memset(®s->skas.xfp, 0, sizeof(regs->skas.xfp)); // HACK + fxregs->cwd = 0x037f; + fxregs->swd = 0x0220; + + memset(&vuser, 0, sizeof(vuser)); + vuser.magic = VCPU_MAGIC; + + Printk("kernel userspace loop, pid:%d, userspace pid:%d.\n", + getpid(), pid); + uregs_to_vregs(&vuser, regs); + show_vregs(&vuser); + +// restore_registers(regs); + + while (1) { + step++; + substep = 1; +continue_substep: + Printk("regs-fs: %08lx, regs-gs: %08lx\n", + regs->skas.regs[HOST_FS], regs->skas.regs[HOST_GS]); + uregs_to_vregs(&vuser, regs); + + vuser.mm = (void *) current->mm->context.skas.mm_fd; + + Printk("\nstep #%d/%d towards sys_vcpu:\n", step, substep); + show_vregs(&vuser); + + if (mode_singlestep) + ret = sys_vcpu(VCPU_OP_SWITCH_RUN_VUSER_SINGLESTEP, &vuser, -1); + else + ret = sys_vcpu(VCPU_OP_SWITCH_RUN_VUSER, &vuser, -1); + + Printk("back from sys_vcpu: event %d.\n", ret); + show_vregs(&vuser); + + vregs_to_uregs(regs, &vuser); + regs->skas.is_user = 1; + + Printk("regs-fs: %08lx, regs-gs: %08lx\n", + regs->skas.regs[HOST_FS], regs->skas.regs[HOST_GS]); +// if (vuser.regs.eip == 0x40001f0f) for (;;); +// save_registers(regs); + + switch (ret) { + case VCPU_EVENT_TRAP: + trapno = vuser.trap.trapno; + Printk("... trap %d (errcode:%d, addr:%08lx).\n", + trapno, vuser.trap.error_code, vuser.trap.addr); + switch (trapno) { + case 1: + Printk("... debug trap - continuing.\n"); + substep++; + goto continue_substep; + + case 0: + user_signal(SIGFPE, regs); + break; + case 14: + segv(vuser.trap.addr, 0, vuser.trap.error_code & 2, 1, NULL); + + break; + default: + panic_vregs(&vuser); + panic("unknown trap code %d.\n", trapno); + } + break; + case VCPU_EVENT_SYSCALL: + handle_syscall(regs); + break; + + case VCPU_EVENT_SIGNAL: + Printk("... signal interruption - continuing.\n"); + break; + + + default: + panic("unknown return code %d.\n", ret); + } +// restore_registers(regs); + interrupt_end(); + } + Printk("exiting kernel vcpu loop, pid:%d, userspace pid:%d.\n", + getpid(), pid); +} int singlestepping_skas(void) { @@ -69,13 +232,15 @@ void new_thread_handler(int sig) * 0 if it just exits */ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); - if(n == 1) + if (n == 1) userspace(¤t->thread.regs.regs); - else do_exit(0); + else + do_exit(0); } void new_thread_proc(void *stack, void (*handler)(int sig)) { + local_irq_disable(); init_new_thread_stack(stack, handler); os_usr1_process(os_getpid()); } @@ -98,6 +263,7 @@ void fork_handler(int sig) if(current->thread.prev_sched != NULL) schedule_tail(current->thread.prev_sched); current->thread.prev_sched = NULL; +// change_sig(SIGUSR1, 0); unblock_signals(); userspace(¤t->thread.regs.regs); @@ -149,6 +315,7 @@ static int start_kernel_proc(void *unuse block_signals(); pid = os_getpid(); + Printk("start_kernel_proc() pid: %d\n", pid); cpu_tasks[0].pid = pid; cpu_tasks[0].task = current; @@ -161,6 +328,7 @@ static int start_kernel_proc(void *unuse int start_uml_skas(void) { + Printk("start_uml_skas() pid %d.\n", getpid()); start_userspace(0); capture_signal_stack(); uml_idle_timer(); --- linux/arch/um/kernel/skas/process.c.orig +++ linux/arch/um/kernel/skas/process.c @@ -53,6 +53,7 @@ static void handle_segv(int pid) panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", errno); + Printk("... segv errcode: %d, addr: %08lx.\n", FAULT_WRITE(fault.is_write), fault.addr); segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); } @@ -87,10 +88,13 @@ static void handle_trap(int pid, union u static int userspace_tramp(void *arg) { + Printk("userspace_tramp #1 pid:%d, %p\n", getpid(), arg); init_new_thread_signals(0); enable_timer(); ptrace(PTRACE_TRACEME, 0, 0, 0); + Printk("userspace_tramp #2 pid:%d, %p\n", getpid(), arg); os_stop_process(os_getpid()); + Printk("userspace_tramp #3 pid:%d, %p\n", getpid(), arg); return(0); } @@ -112,6 +116,7 @@ void start_userspace(int cpu) pid = clone(userspace_tramp, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, NULL); + Printk("start_userspace(%d), pid: %d [parent: %d].\n", cpu, pid, getpid()); if(pid < 0) panic("start_userspace : clone failed, errno = %d", errno); @@ -132,17 +137,77 @@ void start_userspace(int cpu) userspace_pid[cpu] = pid; } -void userspace(union uml_pt_regs *regs) +#include "/home/mingo/linux/linux/include/linux/vcpu.h" + +#define __NR_sys_vcpu 281 +_syscall3(int, sys_vcpu, int, op, struct vcpu_state_struct *, vuser, int, mm_fd); + +static void uregs_to_vregs(struct vcpu_state_struct *vregs, + union uml_pt_regs *uregs) +{ +#define C(x,y) \ + vregs->regs.x = uregs->skas.regs[HOST_##y] + + C(ebx, EBX); C(ecx, ECX); C(edx, EDX); C(esi, ESI); + C(edi, EDI); C(ebp, EBP); C(eax, EAX); + C(xds, DS); C(xes, ES); C(orig_eax, EAX); + C(eip, IP); C(xcs, CS); C(eflags, EFLAGS); + C(esp, SP); C(xss, SS); + vregs->xfs = uregs->skas.regs[HOST_FS]; + vregs->xgs = uregs->skas.regs[HOST_GS]; +#undef C +} + +static void vregs_to_uregs(union uml_pt_regs *uregs, + struct vcpu_state_struct *vregs) +{ +#define C(x,y) \ + uregs->skas.regs[HOST_##y] = vregs->regs.x + + C(ebx, EBX); C(ecx, ECX); C(edx, EDX); C(esi, ESI); + C(edi, EDI); C(ebp, EBP); C(eax, EAX); + C(xds, DS); C(xes, ES); C(orig_eax, EAX); + C(eip, IP); C(xcs, CS); C(eflags, EFLAGS); + C(esp, SP); C(xss, SS); + vregs->xfs = uregs->skas.regs[HOST_FS]; + vregs->xgs = uregs->skas.regs[HOST_GS]; +#undef C +} + +static void show_vregs(struct vcpu_state_struct *vstate) { + struct x86_pt_regs *regs = &vstate->regs; + + Printk("-------------------------------------------------------------------------------\n"); + Printk("| EIP: %08lx ESP: %08lx EFL: %08lx DS: %04x ES: %04x |\n", regs->eip, regs->esp, regs->eflags & ~0x100, regs->xds & 0xffff, regs->xes & 0xffff); + Printk("| EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx SS: %04x CS: %04x |\n",regs->eax, regs->ebx, regs->ecx, regs->edx, regs->xss, regs->xcs); + Printk("| ESI: %08lx EDI: %08lx EBP: %08lx OAX: %08lx FS: %04lx GS: %04lx |\n", regs->esi, regs->edi, regs->ebp, 0 /* regs->orig_eax */, vstate->xfs, vstate->xgs); + Printk("-------------------------------------------------------------------------------\n"); +} + +static void skas_userspace(union uml_pt_regs *regs) +{ + int step = 1, substep = 1; + struct vcpu_state_struct vuser; int err, status, op, pid = userspace_pid[0]; + memset(&vuser, 0, sizeof(vuser)); +// memset(®s->skas.xfp, 0, sizeof(regs->skas.xfp)); + Printk("kernel userspace loop, pid:%d, userspace pid:%d.\n", + getpid(), pid); + uregs_to_vregs(&vuser, regs); show_vregs(&vuser); + vregs_to_uregs(regs, &vuser); restore_registers(regs); - + err = ptrace(PTRACE_SYSCALL, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", errno); while(1){ + Printk("start-loop, eip:%08lx\n", UPT_IP(regs)); + Printk("\nstep #%d/%d towards ptrace-cont:\n", step, substep); + uregs_to_vregs(&vuser, regs); show_vregs(&vuser); + err = waitpid(pid, &status, WUNTRACED); if(err < 0) panic("userspace - waitpid failed, errno = %d\n", @@ -150,14 +215,24 @@ void userspace(union uml_pt_regs *regs) regs->skas.is_user = 1; save_registers(regs); + Printk("back from userspace (stop status: %d [%d]).\n", status, WSTOPSIG(status)); + uregs_to_vregs(&vuser, regs); show_vregs(&vuser); + vregs_to_uregs(regs, &vuser); +// if (vuser.regs.eip == 0x40001f0f) +// for (;;); if(WIFSTOPPED(status)){ switch(WSTOPSIG(status)){ case SIGSEGV: handle_segv(pid); + step++; break; case SIGTRAP: + Printk("got SIGTRAP - is_user: %d/%d.\n", + UPT_IS_USER(regs), regs->skas.is_user); handle_trap(pid, regs); + Printk("SIGTRAP - continuing.\n"); + substep++; break; case SIGIO: case SIGVTALRM: @@ -166,23 +241,43 @@ void userspace(union uml_pt_regs *regs) case SIGFPE: case SIGWINCH: user_signal(WSTOPSIG(status), regs); + step++; break; default: printk("userspace - child stopped with signal " "%d\n", WSTOPSIG(status)); + step++; } + Printk("pre-irqend, eip:%08lx\n", UPT_IP(regs)); interrupt_end(); - } + Printk("post-irqend, eip:%08lx\n", UPT_IP(regs)); + } else + step++; + Printk("pre-restore, eip:%08lx\n", UPT_IP(regs)); restore_registers(regs); + Printk("post-restore, eip:%08lx\n", UPT_IP(regs)); op = singlestepping_skas() ? PTRACE_SINGLESTEP : PTRACE_SYSCALL; + if (mode_singlestep) + op = PTRACE_SINGLESTEP; err = ptrace(op, pid, 0, 0); if(err) panic("userspace - PTRACE_SYSCALL failed, " "errno = %d\n", errno); + Printk("end-loop, eip:%08lx\n", UPT_IP(regs)); } + Printk("exiting kernel userspace loop, pid:%d, userspace pid:%d.\n", + getpid(), pid); +} + +void userspace(union uml_pt_regs *regs) +{ + if (mode_vcpu) + vcpu_userspace(regs); + else + skas_userspace(regs); } void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, @@ -224,6 +319,9 @@ void save_registers(union uml_pt_regs *r unsigned long *fp_regs; int err, fp_op; + if (mode_vcpu) + printk("ugh - save_registers() called at %p, in vcpu mode!\n", + __builtin_return_address(0)); if(have_fpx_regs){ fp_op = PTRACE_GETFPXREGS; fp_regs = regs->skas.xfp; @@ -245,6 +343,9 @@ void restore_registers(union uml_pt_regs unsigned long *fp_regs; int err, fp_op; + if (mode_vcpu) + printk("ugh - restore_registers() called at %p, in vcpu mode!\n", + __builtin_return_address(0)); if(have_fpx_regs){ fp_op = PTRACE_SETFPXREGS; fp_regs = regs->skas.xfp; @@ -282,6 +383,7 @@ int start_idle_thread(void *stack, void jmp_buf **switch_buf = switch_buf_ptr; int n; + Printk("start_idle_thread(%p) pid:%d\n", stack, getpid()); *fork_buf_ptr = &initial_jmpbuf; n = sigsetjmp(initial_jmpbuf, 1); if(n == 0) @@ -369,6 +471,8 @@ void switch_mm_skas(int mm_fd) { int err; + if (mode_vcpu) + return; #warning need cpu pid in switch_mm_skas err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); if(err) --- linux/arch/um/kernel/skas/trap_user.c.orig +++ linux/arch/um/kernel/skas/trap_user.c @@ -13,14 +13,57 @@ #include "task.h" #include "sigcontext.h" +#undef Printk +#define Printk(x...) do { } while(0) + +extern unsigned long signals_enabled, signals_pending; + +void handle_pending_signals(void) +{ + int save_errno = errno, i; + struct signal_info *info; + unsigned long mask; + + if (um_in_interrupt()) + return; + + while (signals_pending) { + for (i = 0; i < 32; i++) { + mask = 1 << i; + if (mask & signals_pending) { + signals_pending &= ~mask; + info = &sig_info[i]; + + if (!info->is_irq) + unblock_signals(); + else + block_signals(); + (*info->handler)(i, 0); + unblock_signals(); + } + } + } + errno = save_errno; +} + void sig_handler_common_skas(int sig, void *sc_ptr) { struct sigcontext *sc = sc_ptr; struct skas_regs *r; struct signal_info *info; int save_errno = errno; + int save_user; + + if (!signals_enabled && (sig != SIGSEGV)) { + Printk("disabled? sig_handler_common_skas(%d, %p/%p)\n", sig, __builtin_return_address(0), __builtin_return_address(1)); + if (sig >= 32) + panic("huh?"); + signals_pending |= (1 << sig); + return; + } r = &TASK_REGS(get_current())->skas; + save_user = r->is_user; r->is_user = 0; r->fault_addr = SC_FAULT_ADDR(sc); r->fault_type = SC_FAULT_TYPE(sc); @@ -28,11 +71,18 @@ void sig_handler_common_skas(int sig, vo change_sig(SIGUSR1, 1); info = &sig_info[sig]; - if(!info->is_irq) unblock_signals(); + if (!info->is_irq) + unblock_signals(); - (*info->handler)(sig, (union uml_pt_regs *) r); + if (sig == SIGSEGV) + (*info->handler)(sig, (union uml_pt_regs *) r); + else + (*info->handler)(sig, 0); + r->is_user = save_user; errno = save_errno; + + unblock_signals(); } extern int missed_ticks[]; @@ -41,6 +91,16 @@ void user_signal(int sig, union uml_pt_r { struct signal_info *info; + Printk("=> user_signal(%d, eip:%08lx)\n", sig, UPT_IP(regs)); + + if (!signals_enabled) { + if (sig >= 32) + panic("huh?"); + signals_pending |= (1 << sig); + Printk("DISABLED <= user_signal(%d, eip:%08lx)\n", sig, UPT_IP(regs)); + return; + } + regs->skas.is_user = 1; regs->skas.fault_addr = 0; regs->skas.fault_type = 0; @@ -49,6 +109,7 @@ void user_signal(int sig, union uml_pt_r (*info->handler)(sig, regs); unblock_signals(); + Printk("<= user_signal(%d, eip:%08lx)\n", sig, UPT_IP(regs)); } /* --- linux/arch/um/kernel/skas/syscall_user.c.orig +++ linux/arch/um/kernel/skas/syscall_user.c @@ -19,7 +19,7 @@ void handle_syscall(union uml_pt_regs *regs) { long result; - int index = 0, record = record_syscalls; + int index = 0, record = record_syscalls, nr = UPT_SYSCALL_NR(regs); if (record) index = record_syscall_start(UPT_SYSCALL_NR(regs)); @@ -27,6 +27,7 @@ void handle_syscall(union uml_pt_regs *r syscall_trace(); result = execute_syscall(regs); + Printk("executed syscall %d => %d.\n", nr, result); REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || (result == -ERESTARTNOINTR)) --- linux/arch/um/kernel/skas/syscall_kern.c.orig +++ linux/arch/um/kernel/skas/syscall_kern.c @@ -10,6 +10,7 @@ #include "asm/current.h" #include "sysdep/syscalls.h" #include "kern_util.h" +#include "user_util.h" extern syscall_handler_t *sys_call_table[]; @@ -26,6 +27,7 @@ long execute_syscall_skas(void *r) if((syscall >= NR_syscalls) || (syscall < 0)) res = -ENOSYS; else res = EXECUTE_SYSCALL(syscall, regs); + Printk("execute_syscall_skas(%d) => %d.\n", syscall, res); return(res); } --- linux/arch/um/kernel/skas/mem_user.c.orig +++ linux/arch/um/kernel/skas/mem_user.c @@ -34,6 +34,26 @@ void remap(int fd, unsigned long virt, u panic("map : /proc/mm remap failed, errno = %d\n", errno); } +int mod_ldt(int fd, int func, void *ptr, unsigned long bytecount) +{ + struct proc_mm_op ldt; + int n; + + ldt = ((struct proc_mm_op) + { .op = MM_MODIFY_LDT, + .u = + { .ldt = + { .func = func, + .ptr = ptr, + .bytecount = bytecount + } } } ); + n = os_write_file(fd, &ldt, sizeof(ldt)); + + Printk("ldt(%d, %d,%p,%d) => %d\n", fd, func, ptr, bytecount, n); + + return n; +} + void map(int fd, unsigned long virt, unsigned long phys, unsigned long len, int r, int w, int x) { --- linux/arch/um/kernel/um_arch.c.orig +++ linux/arch/um/kernel/um_arch.c @@ -251,8 +251,58 @@ __uml_setup("mode=tt", mode_tt_setup, int mode_tt = DEFAULT_TT; int mode_fremap = 0; +int mode_vcpu = 0; +int mode_singlestep = 0; +int mode_verbose = 0; int force_mmap = 0; int record_syscalls = 0; +int mode_fastirq = 0; + +static int __init mode_verbose_setup(char *line, int *add) +{ + mode_verbose = 1; + return 0; +} + +__uml_setup("mode=verbose", mode_verbose_setup, +"mode=verbose\n" +" this option causes uml to verbose userspace.\n\n" +); + +static int __init mode_singlestep_setup(char *line, int *add) +{ + mode_singlestep = 1; + return 0; +} + +__uml_setup("mode=singlestep", mode_singlestep_setup, +"mode=singlestep\n" +" this option causes uml to singlestep userspace.\n\n" +); + +static int __init mode_fastirq_setup(char *line, int *add) +{ + mode_fastirq = 1; + return 0; +} + +__uml_setup("mode=fastirq", mode_fastirq_setup, +"mode=fastirq\n" +" this option causes uml to use fast irq masking/unmasking.\n\n" +); + + + +static int __init mode_vcpu_setup(char *line, int *add) +{ + mode_vcpu = 1; + return 0; +} + +__uml_setup("mode=vcpu", mode_vcpu_setup, +"mode=vcpu\n" +" this option causes UML to use VCPU mode.\n\n" +); static int __init mode_mmap_setup(char *line, int *add) { --- linux/arch/um/kernel/process.c.orig +++ linux/arch/um/kernel/process.c @@ -252,7 +252,7 @@ void forward_pending_sigio(int target) kill(target, SIGIO); } -#define __NR_new_remap_file_pages 274 +#define __NR_new_remap_file_pages 280 _syscall5(int, new_remap_file_pages, unsigned long, start, unsigned long, len, unsigned long, prot, unsigned long, pgoff, int, flags); --- linux/arch/um/kernel/trap_kern.c.orig +++ linux/arch/um/kernel/trap_kern.c @@ -141,6 +141,8 @@ unsigned long segv(unsigned long address void *catcher; int err; + Printk("segv(%08lx, %08lx, %d, %d), called from %p\n", + address, ip, is_write, is_user, __builtin_return_address(0)); if(!is_user && (address >= start_vm) && (address < end_vm)){ flush_tlb_kernel_vm(); return(0); @@ -202,9 +204,11 @@ void bad_segv(unsigned long address, uns void relay_signal(int sig, union uml_pt_regs *regs) { +// Printk("relay_signal(%d, user:%d, eip:%08lx), called from %p\n", +// sig, UPT_IS_USER(regs), UPT_IP(regs), __builtin_return_address(0)); if(arch_handle_signal(sig, regs)) return; - if(!UPT_IS_USER(regs)) - panic("Kernel mode signal %d", sig); +// if(!UPT_IS_USER(regs)) +// panic("Kernel mode signal %d", sig); force_sig(sig, current); } --- linux/arch/um/kernel/trap_user.c.orig +++ linux/arch/um/kernel/trap_user.c @@ -50,6 +50,9 @@ void segv_handler(int sig, union uml_pt_ { int index, max; + Printk("segv_handler(%d), user:%d, eip:%08lx\n", + sig, UPT_IS_USER(regs), UPT_IP(regs)); + if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs)); --- linux/arch/um/kernel/signal_user.c.orig +++ linux/arch/um/kernel/signal_user.c @@ -19,6 +19,9 @@ #include "sysdep/sigcontext.h" #include "sigcontext.h" +#undef Printk +#define Printk(x...) do { } while(0) + void set_sigstack(void *sig_stack, int size) { stack_t stack = ((stack_t) { .ss_flags = 0, @@ -47,14 +50,84 @@ void set_handler(int sig, void (*handler panic("sigaction failed"); } -int change_sig(int signal, int on) +void fflush_stdout(void) { - sigset_t sigset, old; + fflush(stdout); +} - sigemptyset(&sigset); - sigaddset(&sigset, signal); - sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); - return(!sigismember(&old, signal)); +unsigned long signals_enabled = 3; +unsigned long signals_pending = 0; + +#define ADDR (*(volatile long *) addr) + +static __inline__ void set_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( + "lock; btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +static __inline__ void clear_bit(int nr, volatile unsigned long * addr) +{ + __asm__ __volatile__( + "lock; btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( + "lock; btsl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + int oldbit; + + __asm__ __volatile__( + "lock; btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +extern void handle_pending_signals(void); + +static void fastirq_block_signals(void) +{ + signals_enabled = 0; +} + +static void fastirq_unblock_signals(void) +{ + signals_enabled = 3; + if (signals_pending) + handle_pending_signals(); +} + +static int fastirq_get_signals(void) +{ + return signals_enabled; +} + +static int fastirq_set_signals(int enable) +{ + int prev = signals_enabled; + + signals_enabled = enable; + if (enable && signals_pending) { + Printk("SIGNALS PENDING!\n"); + handle_pending_signals(); + } + + return prev; } static void change_signals(int type) @@ -70,12 +143,12 @@ static void change_signals(int type) panic("Failed to change signal mask - errno = %d", errno); } -void block_signals(void) +static void skas_block_signals(void) { change_signals(SIG_BLOCK); } -void unblock_signals(void) +static void skas_unblock_signals(void) { change_signals(SIG_UNBLOCK); } @@ -93,16 +166,18 @@ static int enable_mask(sigset_t *mask) return(sigs); } -int get_signals(void) +static int skas_get_signals(void) { + int sigs; sigset_t mask; if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) panic("Failed to get signal mask"); - return(enable_mask(&mask)); + sigs = enable_mask(&mask); + return sigs; } -int set_signals(int enable) +static int skas_set_signals(int enable) { sigset_t mask; int ret; @@ -130,6 +205,94 @@ int set_signals(int enable) return(ret); } + +int change_sig(int signal, int on) +{ + sigset_t sigset, old; + + Printk("change_sig(%d, %d) at %p\n", signal, on, __builtin_return_address(0)); + sigemptyset(&sigset); + sigaddset(&sigset, signal); + sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); + return(!sigismember(&old, signal)); +} + +extern int um_preempt_count(void); + +#define TR1 um_in_interrupt(), um_preempt_count(), __builtin_return_address(0) + +#define TR5 um_in_interrupt(), um_preempt_count(), __builtin_return_address(0), __builtin_return_address(1), __builtin_return_address(2), __builtin_return_address(3), __builtin_return_address(4) + +#define TR3 um_in_interrupt(), um_preempt_count(), __builtin_return_address(0), __builtin_return_address(1), __builtin_return_address(2) + +#define DTPrintk1(f,x...) \ + do { Printk(f" at [%d/%08lx] %p\n", x, TR1); } while (0) + +#define DTPrintk3(f,x...) \ + do { Printk(f" at [%d/%08lx] %p/%p/%p\n", x, TR3); } while (0) + +#define DTPrintk5(f,x...) \ + do { Printk(f" at [%d/%08lx] %p/%p/%p/%p/%p\n", x, TR5); } while (0) + +#define DTPrintk DTPrintk1 + +int set_signals(int enable) +{ + int ret; + + if (mode_fastirq) + ret = fastirq_set_signals(enable); + else + ret = skas_set_signals(enable); + + DTPrintk("set_signals(%d) <= %d", enable, ret); + return ret; +} + +int get_signals(void) +{ + int sigs; + + if (mode_fastirq) + sigs = fastirq_get_signals(); + else + sigs = skas_get_signals(); + + DTPrintk("get_signals() => %d", sigs); + + return sigs; +} + +int box_is_up = 0; + +void unblock_signals(void) +{ + static int first = 1; + + if (first) { + first = 0; + skas_unblock_signals(); + box_is_up = 1; + } + + if (mode_fastirq) + fastirq_unblock_signals(); + else + skas_unblock_signals(); + + DTPrintk("unblock%s", "_signals"); +} + +void block_signals(void) +{ + if (mode_fastirq) + fastirq_block_signals(); + else + skas_block_signals(); + + DTPrintk("block%s", "_signals"); +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux/arch/um/kernel/irq.c.orig +++ linux/arch/um/kernel/irq.c @@ -280,6 +280,7 @@ unsigned int do_IRQ(int irq, union uml_p struct irqaction * action; unsigned int status; + regs = NULL; irq_enter(); kstat_this_cpu.irqs[irq]++; spin_lock(&desc->lock); --- linux/arch/um/kernel/process_kern.c.orig +++ linux/arch/um/kernel/process_kern.c @@ -390,11 +390,21 @@ int um_in_interrupt(void) return(in_interrupt()); } +int um_preempt_count(void) +{ + return preempt_count(); +} + int cpu(void) { return(current_thread->cpu); } +int __init_new_context(struct task_struct *tsk, struct mm_struct *mm, struct mm_struct *old_mm) +{ + return 0; +} + /* * Overrides for Emacs so that we follow Linus's tabbing style. * Emacs will notice this stuff at the end of the file and automatically --- linux/arch/um/include/sysdep-i386/frame_user.h.orig +++ linux/arch/um/include/sysdep-i386/frame_user.h @@ -22,6 +22,8 @@ * it masks off the page number to get the offset within the page and subtracts * that from the page size, and that's how big the fpstate struct will be * considered to be. + * + * typical size on x86 is 640 bytes ... */ static inline void setup_arch_frame_raw(struct arch_frame_data_raw *data, --- linux/arch/um/include/user_util.h.orig +++ linux/arch/um/include/user_util.h @@ -10,7 +10,18 @@ extern int mode_tt; extern int mode_fremap; +extern int mode_vcpu; +extern int mode_singlestep; +extern int mode_verbose; extern int record_syscalls; +extern int mode_fastirq; + +extern void fflush_stdout(void); +extern int printk(const char *fmt, ...); +extern int um_in_interrupt(void); +extern int box_is_up; + +#define Printk(x...) do { if (mode_verbose) { printf(x); fflush_stdout(); } } while (0) extern int grantpt(int __fd); extern int unlockpt(int __fd); --- linux/arch/um/sys-i386/ldt.c.orig +++ linux/arch/um/sys-i386/ldt.c @@ -9,6 +9,7 @@ #include "asm/ptrace.h" #include "choose-mode.h" #include "kern.h" +#include "user_util.h" #ifdef CONFIG_MODE_TT extern int modify_ldt(int func, void *ptr, unsigned long bytecount); @@ -51,6 +52,7 @@ int sys_modify_ldt_skas(int func, void * .ptr = buf, .bytecount = bytecount }); res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); + Printk("PTRACE_LDT result: %d [pid %d]\n", res, userspace_pid); if(res < 0) goto out; @@ -70,10 +72,64 @@ int sys_modify_ldt_skas(int func, void * kfree(buf); return(res); } + +extern int mod_ldt(int fd, int func, void *ptr, unsigned long bytecount); + +static int sys_modify_ldt_vcpu(int func, void *ptr, unsigned long bytecount) +{ + void *buf; + int res, n; + + buf = kmalloc(bytecount, GFP_KERNEL); + if(buf == NULL) + return(-ENOMEM); + + res = 0; + + switch(func){ + case 1: + case 0x11: + res = copy_from_user(buf, ptr, bytecount); + break; + } + + if(res != 0){ + res = -EFAULT; + goto out; + } + + res = mod_ldt(current->mm->context.skas.mm_fd, func, buf, bytecount); + Printk("mod_ldt(%d, %d) result: %d\n", current->mm->context.skas.mm_fd, func, res); + if(res < 0) + goto out; + + switch(func){ + case 0: + case 2: + n = res; + res = copy_to_user(ptr, buf, n); + if(res != 0) + res = -EFAULT; + else + res = n; + break; + } + + out: + kfree(buf); + return(res); +} + #endif int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) { +#ifdef CONFIG_MODE_SKAS + extern int mode_vcpu; + + if (mode_vcpu) + return sys_modify_ldt_vcpu(func, ptr, bytecount); +#endif return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func, ptr, bytecount)); } --- linux/include/linux/vcpu.h.orig +++ linux/include/linux/vcpu.h @@ -0,0 +1,89 @@ +/* + * include/linux/vcpu.h + * + * Virtual CPU: Virtual Kernel Mode & Virtual User Mode Support + * + * started by Ingo Molnar, Copyright (C) 2004 + */ + +#ifndef _LINUX_VCPU_H +#define _LINUX_VCPU_H + +#include + +struct x86_pt_regs { + long ebx; + long ecx; + long edx; + long esi; + long edi; + long ebp; + long eax; + int xds; + int xes; + long orig_eax; + long eip; + int xcs; + long eflags; + long esp; + int xss; +}; + +struct x86_fxsave_struct { + unsigned short cwd; + unsigned short swd; + unsigned short twd; + unsigned short fop; + long fip; + long fcs; + long foo; + long fos; + long mxcsr; + long reserved; + long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ + long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ + long padding[56]; +} __attribute__ ((aligned (16))); + +#ifdef __KERNEL__ +void vcpu_trap(struct pt_regs *regs, int trapno, long error_code, + unsigned long addr); +void vcpu_signal(struct pt_regs *regs); +void vcpu_vuser_syscall(struct pt_regs regs); +#endif + +enum vcpu_op { + VCPU_OP_RUN_VUSER = 1, + VCPU_OP_SWITCH_RUN_VUSER = 2, + VCPU_OP_SWITCH_RUN_VUSER_SINGLESTEP = 3, +}; + +enum vcpu_event { + VCPU_EVENT_SYSCALL = 1, + VCPU_EVENT_TRAP = 2, + VCPU_EVENT_SIGNAL = 3, +}; + +struct vcpu_tls_struct { + /* struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; */ +}; + +struct vcpu_trap_struct { + int trapno; + int error_code; + unsigned long addr; +}; + +#define VCPU_MAGIC 0x12419612 + +struct vcpu_state_struct { + struct x86_pt_regs regs; + struct x86_fxsave_struct *fxregs; + unsigned long xfs, xgs; + void *mm; + struct vcpu_tls_struct tls; + struct vcpu_trap_struct trap; + int magic; +}; + +#endif /* _LINUX_VCPU_H */ --- linux/include/linux/sched.h.orig +++ linux/include/linux/sched.h @@ -31,6 +31,7 @@ #include struct exec_domain; +extern int panic_timeout; /* * cloning flags: --- linux/include/asm-um/ptrace-i386.h.orig +++ linux/include/asm-um/ptrace-i386.h @@ -30,7 +30,7 @@ #define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) #define PT_FIX_EXEC_STACK(sp) do ; while(0) -#define user_mode(r) UPT_IS_USER(&(r)->regs) +#define user_mode(r) 1 /* UPT_IS_USER(&(r)->regs) */ #endif --- linux/include/asm-um/dma-mapping.h.orig +++ linux/include/asm-um/dma-mapping.h @@ -1 +1,4 @@ -#include + +#define flush_write_buffers() do { } while(0) + +#include "asm/arch/dma-mapping.h" --- linux/include/asm-i386/unistd.h.orig +++ linux/include/asm-i386/unistd.h @@ -286,8 +286,10 @@ #define __NR_mq_notify (__NR_mq_open+4) #define __NR_mq_getsetattr (__NR_mq_open+5) #define __NR_remap_file_pages 280 +#define __NR_vcpu 281 -#define NR_syscalls 281 + +#define NR_syscalls 282 /* user-visible error numbers are in the range -1 - -124: see */ --- linux/include/asm-i386/page.h.orig +++ linux/include/asm-i386/page.h @@ -145,6 +145,16 @@ static __inline__ int get_order(unsigned #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#if 0 +#ifdef CONFIG_X86_4G_VM_LAYOUT +#define __PAGE_OFFSET (0x02000000) +#define TASK_SIZE (0xff000000) +#else +#define __PAGE_OFFSET (0xc0000000) +#define TASK_SIZE (0xc0000000) +#endif +#endif + #endif /* __KERNEL__ */ #endif /* _I386_PAGE_H */ --- linux/include/asm-i386/i387.h.orig +++ linux/include/asm-i386/i387.h @@ -32,10 +32,10 @@ static inline void __save_init_fpu( stru { if ( cpu_has_fxsr ) { asm volatile( "fxsave %0 ; fnclex" - : "=m" (tsk->thread.i387.fxsave) ); + : "=m" (tsk->thread.fpu->fxsave) ); } else { asm volatile( "fnsave %0 ; fwait" - : "=m" (tsk->thread.i387.fsave) ); + : "=m" (tsk->thread.fpu->fsave) ); } tsk->thread_info->status &= ~TS_USEDFPU; } --- linux/include/asm-i386/thread_info.h.orig +++ linux/include/asm-i386/thread_info.h @@ -142,6 +142,7 @@ static inline unsigned long current_stac #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_IRET 5 /* return with iret */ #define TIF_DB7 6 /* has debug registers */ +#define TIF_VUSER 7 /* signal pending */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define _TIF_SYSCALL_TRACE (1< +#include #include #include #include @@ -413,7 +415,10 @@ struct thread_struct { /* fault info */ unsigned long cr2, trap_no, error_code; /* floating point info */ - union i387_union i387; + union i387_union *fpu; + union i387_union __fpu; + union i387_union __vfpu; + union i387_union *vfpu_user; /* virtual 86 mode info */ struct vm86_struct __user * vm86_info; unsigned long screen_bitmap; @@ -421,6 +426,9 @@ struct thread_struct { unsigned int saved_fs, saved_gs; /* IO permissions */ unsigned long *io_bitmap_ptr; + + /* Virtual Kernel Mode and Virtual User Mode state: */ + struct vcpu_state_struct vkernel, vuser, *vuser_ptr; }; #define INIT_THREAD { \ --- linux/include/asm-i386/bugs.h.orig +++ linux/include/asm-i386/bugs.h @@ -93,7 +93,7 @@ static void __init check_fpu(void) /* * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. */ - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) { + if (offsetof(struct task_struct, thread.__fpu.fxsave) & 15) { extern void __buggy_fxsr_alignment(void); __buggy_fxsr_alignment(); } --- linux/kernel/panic.c.orig +++ linux/kernel/panic.c @@ -60,6 +60,9 @@ NORET_TYPE void panic(const char * fmt, vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); printk(KERN_EMERG "Kernel panic: %s\n",buf); + printk("pre\n"); + show_stack(NULL, NULL); + printk("post\n"); if (in_interrupt()) printk(KERN_EMERG "In interrupt handler - not syncing\n"); else if (!current->pid) --- linux/kernel/time.c.orig +++ linux/kernel/time.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * The timezone where the local system is located. Used as a default by some @@ -85,8 +86,83 @@ asmlinkage long sys_stime(time_t *tptr) #endif +#include + asmlinkage long sys_gettimeofday(struct timeval __user *tv, struct timezone __user *tz) { +#ifdef CONFIG_X86 + if (!tv && !tz) { + int i; + unsigned long long t0, t1, t1a, t1b, t1c, delta; + + for (i = 0; i < 10; i++) { + unsigned int cr0 = read_cr0() | 8; + struct i387_fxsave_struct fpu; + + rdtscll(t0); + rdtscll(t1); + delta = t1-t0; + + rdtscll(t0); + __flush_tlb(); + rdtscll(t1); + printk("__flush_tlb(): %Ld (delta: %Ld)\n", t1-t0-delta, delta); + + rdtscll(t0); + do { + unsigned int tmpreg; + + __asm__ __volatile__( + "movl %%cr3, %0; \n" + "/*movl %0, %%cr3;*/ # flush TLB \n" + : "=r" (tmpreg) + :: "memory"); + } while (0); + + rdtscll(t1); + printk("movl cr3: %Ld (delta: %Ld)\n", t1-t0-delta, delta); + rdtscll(t0); + clts(); + rdtscll(t1); + printk("clts: %Ld (delta: %Ld)\n", t1-t0-delta, delta); + rdtscll(t0); + write_cr0(read_cr0() & ~8) + rdtscll(t1); + printk("manual clts: %Ld (delta: %Ld)\n", t1-t0-delta, delta); + rdtscll(t0); + stts(); + rdtscll(t1); + printk("stts: %Ld (delta: %Ld)\n", t1-t0-delta, delta); + rdtscll(t0); + write_cr0(cr0) + rdtscll(t1); + printk("new stts: %Ld (delta: %Ld)\n", t1-t0-delta, delta); + rdtscll(t0); + clts(); + asm volatile( "fwait; fxsave %0 ; fnclex" : "=m" (fpu)); + rdtscll(t1a); + asm volatile( "fld1"); + asm volatile( "fld1"); + asm volatile( "fld1"); + asm volatile( "fld1"); + rdtscll(t1b); + asm volatile( "fxrstor %0" : "=m" (fpu)); + rdtscll(t1c); + stts(); + rdtscll(t1); + printk("FPU switch: %Ld (%Ld + %Ld + %Ld + %Ld) (delta: %Ld)\n", t1-t0-4*delta, t1a-t0-delta, t1b-t1a-delta, t1c-t1b-delta, t1-t1c-delta, delta); + rdtscll(t0); + load_LDT(¤t->active_mm->context); + rdtscll(t1); + printk("load_LDT(): %Ld (delta: %Ld)\n", t1-t0-delta, delta); + + printk("\n"); + } + } +// if (!tv && !tz && panic_timeout) +// printk("%s:%d: user-space BUG!\n", current->comm, current->pid); +#endif + if (likely(tv != NULL)) { struct timeval ktv; do_gettimeofday(&ktv); --- linux/Makefile.orig +++ linux/Makefile @@ -149,7 +149,7 @@ SUBARCH := $(shell uname -m | sed -e s/i # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile -ARCH ?= $(SUBARCH) +ARCH = i386 CROSS_COMPILE ?= # Architecture as present in compile.h @@ -162,7 +162,7 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" HOSTCC = gcc HOSTCXX = g++ -HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 -fomit-frame-pointer +HOSTCFLAGS = -Wall -Wstrict-prototypes -O2 HOSTCXXFLAGS = -O2 # Decide whether to build built-in, modular, or both. @@ -439,7 +439,6 @@ CFLAGS += -O2 endif ifndef CONFIG_FRAME_POINTER -CFLAGS += -fomit-frame-pointer endif ifdef CONFIG_DEBUG_INFO