--- linux/fs/proc/array.c.orig Wed Jul 26 08:55:49 2000 +++ linux/fs/proc/array.c Wed Jul 26 08:55:55 2000 @@ -1049,6 +1049,8 @@ do { pte_t page = *pte; + conditional_schedule(); + address += PAGE_SIZE; pte++; if (pte_none(page)) --- linux/fs/ext2/namei.c.orig Wed Jul 26 08:55:32 2000 +++ linux/fs/ext2/namei.c Wed Jul 26 08:55:55 2000 @@ -147,6 +147,7 @@ offset += de_len; de = (struct ext2_dir_entry_2 *) ((char *) de + de_len); + conditional_schedule(); } brelse (bh); --- linux/fs/buffer.c.orig Wed Jul 26 08:55:46 2000 +++ linux/fs/buffer.c Wed Jul 26 08:55:55 2000 @@ -174,6 +174,7 @@ do { retry = 0; repeat: + run_task_queue(&tq_disk); /* We search all lists as a failsafe mechanism, not because we expect * there to be dirty buffers on any of the other lists. */ @@ -181,6 +182,12 @@ if (!bh) goto repeat2; for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto repeat; + } if (bh->b_list != BUF_DIRTY) goto repeat; next = bh->b_next_free; @@ -236,6 +243,12 @@ if (!bh) break; for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto repeat2; + } if (bh->b_list != BUF_LOCKED) goto repeat2; next = bh->b_next_free; @@ -625,6 +638,12 @@ for (i = nr_buffers_type[nlist] ; i > 0 ; bh = bhnext, i--) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto again; + } bhnext = bh->b_next_free; if (bh->b_dev != dev) continue; @@ -676,6 +695,12 @@ for (i = nr_buffers_type[nlist] ; i > 0 ; bh = bhnext, i--) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto again; + } bhnext = bh->b_next_free; if (bh->b_dev != dev || bh->b_size == size) continue; @@ -752,6 +777,7 @@ int isize; repeat: + conditional_schedule(); bh = get_hash_table(dev, block, size); if (bh) { if (!buffer_dirty(bh)) { @@ -1668,6 +1694,12 @@ bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0; bh = next) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto repeat; + } /* We may have stalled while waiting for I/O to complete. */ if(bh->b_list != nlist) goto repeat; next = bh->b_next_free; @@ -1817,11 +1849,16 @@ { ndirty = 0; repeat: - bh = lru_list[nlist]; if(bh) for (i = nr_buffers_type[nlist]; i-- > 0 && ndirty < bdf_prm.b_un.ndirty; bh = next) { + if (current->need_resched) { + bh->b_count++; + schedule(); + bh->b_count--; + goto repeat; + } /* We may have stalled while waiting for I/O to complete. */ if(bh->b_list != nlist) goto repeat; next = bh->b_next_free; --- linux/fs/dcache.c.orig Wed Jul 26 08:55:46 2000 +++ linux/fs/dcache.c Wed Jul 26 08:55:55 2000 @@ -251,6 +251,9 @@ struct dentry *dentry; struct list_head *tmp = dentry_unused.prev; + /* + * Tough to make preemptable as well ... + */ if (tmp == &dentry_unused) break; dentry_stat.nr_unused--; @@ -264,6 +267,7 @@ if (!--d_nr) break; } + conditional_schedule(); } return __i_nr - i_nr; @@ -290,8 +294,14 @@ * Pass one ... move the dentries for the specified * superblock to the most recent end of the unused list. */ +restart: next = dentry_unused.next; while (next != &dentry_unused) { + if (current->need_resched) { + schedule(); + goto restart; + } + tmp = next; next = tmp->next; dentry = list_entry(tmp, struct dentry, d_lru); @@ -419,6 +429,9 @@ struct list_head *tmp = next; struct dentry *dentry = list_entry(tmp, struct dentry, d_child); next = tmp->next; + /* + * Tough one to make preemptable ... + */ if (!dentry->d_count) { list_del(&dentry->d_lru); list_add(&dentry->d_lru, dentry_unused.prev); --- linux/fs/exec.c.orig Wed Jul 26 08:55:46 2000 +++ linux/fs/exec.c Wed Jul 26 08:55:55 2000 @@ -290,6 +290,7 @@ str += bytes_to_copy; len -= bytes_to_copy; } + conditional_schedule(); } if (from_kmem==2) set_fs(old_fs); --- linux/fs/inode.c.orig Wed Jul 26 08:55:32 2000 +++ linux/fs/inode.c Wed Jul 26 08:55:55 2000 @@ -100,7 +100,7 @@ } } -static void __wait_on_inode(struct inode * inode) +static inline void __wait_on_inode(struct inode * inode) { struct wait_queue wait = { current, NULL }; @@ -115,8 +115,9 @@ current->state = TASK_RUNNING; } -static inline void wait_on_inode(struct inode *inode) +static void wait_on_inode(struct inode *inode) { + conditional_schedule(); if (inode->i_state & I_LOCK) __wait_on_inode(inode); } @@ -156,6 +157,7 @@ spin_unlock(&inode_lock); write_inode(inode); + conditional_schedule(); spin_lock(&inode_lock); inode->i_state &= ~I_LOCK; @@ -266,6 +268,7 @@ break; inode = list_entry(tmp, struct inode, i_list); clear_inode(inode); + conditional_schedule(); count++; } @@ -346,21 +349,41 @@ static int __free_inodes(struct list_head * freeable) { struct list_head *entry; - int found = 0; + /* + * We free at most 128 inodes at once - freeing too many + * introduces very nasty scheduling latencies. + */ + int found = 0, count = 128; + int resched = 0; +again: entry = inode_in_use.next; while (entry != &inode_in_use) { struct list_head *tmp = entry; - + /* + * Tough one to time-limit ... + */ entry = entry->next; - if (!CAN_UNUSE(INODE(tmp))) + if (!CAN_UNUSE(INODE(tmp))) { + if (current->need_resched && !resched) { + INODE(tmp)->i_count++; + spin_unlock(&inode_lock); + schedule(); + iput(INODE(tmp)); + spin_lock(&inode_lock); + resched = 1; + goto again; + } continue; + } list_del(tmp); list_del(&INODE(tmp)->i_hash); INIT_LIST_HEAD(&INODE(tmp)->i_hash); list_add(tmp, freeable); list_entry(tmp, struct inode, i_list)->i_state = I_FREEING; found++; + if (!--count) + break; } return found; --- linux/fs/pipe.c.orig Mon Aug 9 21:04:41 1999 +++ linux/fs/pipe.c Wed Jul 26 08:55:55 2000 @@ -110,6 +110,7 @@ return -ERESTARTSYS; } while (count>0) { + conditional_schedule(); while ((PIPE_FREE(*inode) < free) || PIPE_LOCK(*inode)) { if (!PIPE_READERS(*inode)) { /* no readers */ send_sig(SIGPIPE,current,0); --- linux/fs/super.c.orig Wed Jul 26 08:55:33 2000 +++ linux/fs/super.c Wed Jul 26 08:55:55 2000 @@ -447,6 +447,7 @@ if (!sb->s_dirt) continue; /* N.B. Should lock the superblock while writing */ + conditional_schedule(); wait_on_super(sb); if (!sb->s_dev || !sb->s_dirt) continue; --- linux/kernel/exit.c.orig Wed Jul 26 08:55:34 2000 +++ linux/kernel/exit.c Wed Jul 26 08:55:55 2000 @@ -173,6 +173,7 @@ if (file) { files->fd[i] = NULL; filp_close(file, files); + conditional_schedule(); } } i++; --- linux/kernel/fork.c.orig Wed Jul 26 08:55:26 2000 +++ linux/kernel/fork.c Wed Jul 26 08:55:55 2000 @@ -276,6 +276,7 @@ pprev = &tmp->vm_next; if (retval) goto fail_nomem; + conditional_schedule(); } retval = 0; if (mm->map_count >= AVL_MIN_MAP_COUNT) @@ -373,6 +374,7 @@ return 0; } + conditional_schedule(); retval = -ENOMEM; mm = mm_alloc(); if (!mm) @@ -407,6 +409,7 @@ atomic_inc(¤t->fs->count); return 0; } + conditional_schedule(); tsk->fs = kmalloc(sizeof(*tsk->fs), GFP_KERNEL); if (!tsk->fs) return -1; @@ -435,6 +438,7 @@ goto out; } + conditional_schedule(); tsk->files = NULL; error = -ENOMEM; newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); @@ -541,6 +545,7 @@ atomic_inc(¤t->sig->count); return 0; } + conditional_schedule(); tsk->sig = kmalloc(sizeof(*tsk->sig), GFP_KERNEL); if (!tsk->sig) return -1; --- linux/kernel/sched.c.orig Wed Jul 26 08:55:49 2000 +++ linux/kernel/sched.c Wed Jul 26 08:55:55 2000 @@ -322,7 +322,7 @@ int this_cpu = smp_processor_id(); struct task_struct *tsk; - tsk = current; + tsk = cpu_curr(this_cpu); if (preemption_goodness(tsk, p, this_cpu) > 0) tsk->need_resched = 1; #endif @@ -1933,10 +1933,13 @@ /* * Short delay requests up to 2 ms will be handled with * high precision by a busy wait for all real-time processes. + * But even real-time processes have to be preempted by an + * even higher priority RT-thread, so we sleep in short 10 + * usec periods and recheck need_resched. * * Its important on SMP not to do this holding locks. */ - udelay((t.tv_nsec + 999) / 1000); + udelay_resched((t.tv_nsec + 999) / 1000); return 0; } --- linux/kernel/softirq.c.orig Sun Mar 21 16:22:00 1999 +++ linux/kernel/softirq.c Wed Jul 26 08:55:55 2000 @@ -43,15 +43,18 @@ unsigned long active; void (**bh)(void); - active = get_active_bhs(); - clear_active_bhs(active); - bh = bh_base; - do { - if (active & 1) - (*bh)(); - bh++; - active >>= 1; - } while (active); + while ((active = get_active_bhs())) { + __sti(); + clear_active_bhs(active); + bh = bh_base; + do { + if (active & 1) + (*bh)(); + bh++; + active >>= 1; + } while (active); + __cli(); + } } asmlinkage void do_bottom_half(void) --- linux/mm/filemap.c.orig Wed Jul 26 08:55:49 2000 +++ linux/mm/filemap.c Wed Jul 26 08:55:55 2000 @@ -98,6 +98,10 @@ while ((page = *p) != NULL) { unsigned long offset = page->offset; + if (current->need_resched) { + schedule(); + goto repeat; + } /* page wholly truncated - free it */ if (offset >= start) { if (PageLocked(page)) { @@ -148,6 +152,14 @@ page = mem_map + clock; do { int referenced; + unsigned long save_clock = clock; + + conditional_schedule(); + if (save_clock != clock) { + // Was run during sleep by higher prio process + // it should have succeeded... + return 1; + } /* This works even in the presence of PageSkip because * the first two entries at the beginning of a hole will @@ -758,7 +770,8 @@ filp->f_ramax = MIN_READAHEAD; { - int error = inode->i_op->readpage(filp, page); + int error; + error = inode->i_op->readpage(filp, page); if (!error) goto found_page; desc->error = error; @@ -773,7 +786,8 @@ * because this happens only if there were errors. */ { - int error = inode->i_op->readpage(filp, page); + int error; + error = inode->i_op->readpage(filp, page); if (!error) { wait_on_page(page); if (PageUptodate(page) && !PageError(page)) --- linux/mm/memory.c.orig Wed Jul 26 08:55:34 2000 +++ linux/mm/memory.c Wed Jul 26 08:55:55 2000 @@ -271,6 +271,10 @@ goto out; src_pte++; dst_pte++; + /* + * Reduce scheduling latencies: + */ + conditional_schedule(); } while ((unsigned long)src_pte & PTE_TABLE_MASK); cont_copy_pmd_range: src_pmd++; @@ -338,9 +342,15 @@ pte++; size--; if (pte_none(page)) - continue; + goto continue_loop; pte_clear(pte-1); freed += free_pte(page); +continue_loop: + /* + * Avoid large scheduling latencies when exiting big + * tasks: + */ + conditional_schedule(); } return freed; } --- linux/mm/page_alloc.c.orig Wed Jul 26 08:55:49 2000 +++ linux/mm/page_alloc.c Wed Jul 26 08:55:55 2000 @@ -199,6 +199,8 @@ } #endif + if (gfp_mask & __GFP_WAIT) + conditional_schedule(); /* * If this is a recursive call, we'd better * do our best to just allocate things without --- linux/mm/slab.c.orig Tue Nov 24 18:21:12 1998 +++ linux/mm/slab.c Wed Jul 26 08:55:55 2000 @@ -1352,6 +1352,8 @@ /* Sanity check. */ if (!cachep) goto nul_ptr; + if ((flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) + conditional_schedule(); spin_lock_irqsave(&cachep->c_spinlock, save_flags); try_again: /* Get slab alloc is to come from. */ --- linux/mm/swapfile.c.orig Wed Jul 26 08:55:34 2000 +++ linux/mm/swapfile.c Wed Jul 26 08:55:55 2000 @@ -715,14 +715,30 @@ void si_swapinfo(struct sysinfo *val) { - unsigned int i, j; + struct swap_info_struct * p; + unsigned int i, max; val->freeswap = val->totalswap = 0; - for (i = 0; i < nr_swapfiles; i++) { - if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK) + for (p = swap_info; p < swap_info+nr_swapfiles; p++) { + if ((p->flags & SWP_WRITEOK) != SWP_WRITEOK) continue; - for (j = 0; j < swap_info[i].max; ++j) - switch (swap_info[i].swap_map[j]) { + max = p->max; + for (i = 0; i < max; ++i) { + /* + * This is for statistics only - no problem if + * we reschedule in the middle, but we have to + * be a bit careful not to process removed + * swapfiles. + */ + if (!(i & 255)) { + if (current->need_resched) { + schedule(); + if (((p->flags & SWP_WRITEOK) != + SWP_WRITEOK) || (max != p->max)) + break; + } + } + switch (p->swap_map[i]) { case SWAP_MAP_BAD: continue; case 0: @@ -730,6 +746,7 @@ default: ++val->totalswap; } + } } val->freeswap <<= PAGE_SHIFT; val->totalswap <<= PAGE_SHIFT; --- linux/mm/vmscan.c.orig Wed Jul 26 08:55:49 2000 +++ linux/mm/vmscan.c Wed Jul 26 08:55:55 2000 @@ -39,6 +39,14 @@ unsigned long page; struct page * page_map; + /* + * Subtle, we must not sleep while scanning page tables for + * swappable pages. If we do we have to restart the scan: + */ + if (current->need_resched) { + schedule(); + return -1; + } pte = *page_table; if (!pte_present(pte)) return 0; @@ -332,6 +340,7 @@ counter = 1; for (; counter >= 0; counter--) { +restart: max_cnt = 0; assign = 0; pbest = NULL; @@ -362,8 +371,16 @@ goto out; } - if (swap_out_process(pbest, gfp_mask)) - return 1; + switch (swap_out_process(pbest, gfp_mask)) { + case 1: + return 1; + /* + * scan got preempted - restart it. + */ + case -1: + goto restart; + default: + } } out: return 0; --- linux/include/linux/condsched.h.orig Wed Jul 26 08:55:55 2000 +++ linux/include/linux/condsched.h Wed Jul 26 08:55:55 2000 @@ -0,0 +1,16 @@ +#ifndef _LINUX_CONDSCHED_H +#define _LINUX_CONDSCHED_H + +#define CONDSCHED_TRACE 0 +#ifndef __ASSEMBLY__ +#define conditional_schedule() \ +do { \ + if (current->need_resched) { \ + current->state = TASK_RUNNING; \ + schedule(); \ + } \ +} while(0) +#endif + +#endif + --- linux/include/linux/console_struct.h.orig Thu Sep 17 18:35:04 1998 +++ linux/include/linux/console_struct.h Wed Jul 26 08:55:55 2000 @@ -18,6 +18,7 @@ unsigned int vc_size_row; /* Bytes per row */ struct consw *vc_sw; unsigned short *vc_screenbuf; /* In-memory character/attribute buffer */ + unsigned long vc_videobuf; /* In-memory copy of text VideoRAM */ unsigned int vc_screenbuf_size; unsigned char vc_attr; /* Current attributes */ unsigned char vc_def_color; /* Default colors */ --- linux/include/linux/delay.h.orig Thu May 21 23:24:09 1998 +++ linux/include/linux/delay.h Wed Jul 26 08:55:55 2000 @@ -25,13 +25,24 @@ #define MAX_UDELAY_MS 5 #endif -#ifdef notdef +/* + * the 'preemptive' version of udelay. In some cases we want to use + * this variant, it guarantees that preemption will happing within + * 10 usecs (despite doing busy waiting). Not all drivers can use + * this automatically, the driver has to be sufficiently reentrant. + */ +#define udelay_resched(n) (\ + { int i; \ + for (i = 0; i < ((n+9)/10); i++) { \ + conditional_schedule(); \ + udelay(10); \ + } \ + }) + #define mdelay(n) (\ {unsigned long msec=(n); while (msec--) udelay(1000);}) -#else -#define mdelay(n) (\ - (__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \ - ({unsigned long msec=(n); while (msec--) udelay(1000);})) -#endif +#define mdelay_resched(n) (\ + {unsigned long msec=(n); while (msec--) udelay_resched(1000);}) + #endif /* defined(_LINUX_DELAY_H) */ --- linux/include/linux/fs.h.orig Wed Jul 26 08:55:49 2000 +++ linux/include/linux/fs.h Wed Jul 26 08:55:55 2000 @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -185,6 +186,7 @@ #define BH_Lock 2 /* 1 if the buffer is locked */ #define BH_Req 3 /* 0 if the buffer has been invalidated */ #define BH_Protected 6 /* 1 if the buffer is protected */ +#define BH_LowPrio 7 /* 1 if the buffer is lowprio */ /* * Try to keep the most commonly used fields in single cache lines (16 @@ -755,6 +757,7 @@ extern void refile_buffer(struct buffer_head * buf); extern void set_writetime(struct buffer_head * buf, int flag); extern int try_to_free_buffers(struct page *); +extern void cache_drop_behind(struct buffer_head *bh); extern int nr_buffers; extern long buffermem; @@ -765,22 +768,55 @@ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ #define NR_LIST 3 -void mark_buffer_uptodate(struct buffer_head * bh, int on); +extern void mark_buffer_uptodate(struct buffer_head * bh, int on); -extern inline void mark_buffer_clean(struct buffer_head * bh) +#define mark_buffer_clean(bh) \ +do { \ + if (test_and_clear_bit(BH_Dirty, &(bh)->b_state)) { \ + if ((bh)->b_list == BUF_DIRTY) \ + refile_buffer(bh); \ + } \ +} while(0) + +#define mark_buffer_dirty(bh,flag) \ +do { \ + if (!test_and_set_bit(BH_Dirty, &(bh)->b_state)) { \ + set_writetime(bh, flag); \ + if ((bh)->b_list != BUF_DIRTY) \ + refile_buffer(bh); \ + } \ + mark_buffer_highprio(bh); \ + conditional_schedule(); \ +} while(0) + +extern inline void mark_buffer_highprio(struct buffer_head * bh) { - if (test_and_clear_bit(BH_Dirty, &bh->b_state)) { - if (bh->b_list == BUF_DIRTY) - refile_buffer(bh); - } + clear_bit(BH_LowPrio, &bh->b_state); +} + +extern inline void mark_buffer_lowprio(struct buffer_head * bh) +{ + /* + * dirty buffers cannot be marked lowprio. + */ + if (!buffer_dirty(bh)) + set_bit(BH_LowPrio, &bh->b_state); +} + +static inline int buffer_lowprio(struct buffer_head * bh) +{ + return test_bit(BH_LowPrio, &bh->b_state); } -extern inline void mark_buffer_dirty(struct buffer_head * bh, int flag) +extern inline void mark_buffer_dirty_lowprio(struct buffer_head * bh) { if (!test_and_set_bit(BH_Dirty, &bh->b_state)) { - set_writetime(bh, flag); if (bh->b_list != BUF_DIRTY) refile_buffer(bh); + /* + * Mark it lowprio only if it was not dirty before! + */ + set_bit(BH_LowPrio, &bh->b_state); } } @@ -855,6 +891,7 @@ extern struct buffer_head * find_buffer(kdev_t dev, int block, int size); extern void ll_rw_block(int, int, struct buffer_head * bh[]); extern int is_read_only(kdev_t); +extern int is_device_idle(kdev_t); extern void __brelse(struct buffer_head *); extern inline void brelse(struct buffer_head *buf) { @@ -870,8 +907,12 @@ extern void set_blocksize(kdev_t dev, int size); extern unsigned int get_hardblocksize(kdev_t dev); extern struct buffer_head * bread(kdev_t dev, int block, int size); +extern struct buffer_head * buffer_ready (kdev_t dev, int block, int size); +extern void bread_ahead (kdev_t dev, int block, int size); extern struct buffer_head * breada(kdev_t dev,int block, int size, unsigned int pos, unsigned int filesize); +extern struct buffer_head * breada_blocks(kdev_t dev,int block, + int size, int blocks); extern int brw_page(int, struct page *, kdev_t, int [], int, int); --- linux/include/linux/locks.h.orig Tue May 11 19:36:15 1999 +++ linux/include/linux/locks.h Wed Jul 26 08:55:55 2000 @@ -18,12 +18,14 @@ { if (test_bit(BH_Lock, &bh->b_state)) __wait_on_buffer(bh); + conditional_schedule(); } extern inline void lock_buffer(struct buffer_head * bh) { while (test_and_set_bit(BH_Lock, &bh->b_state)) __wait_on_buffer(bh); + conditional_schedule(); } extern inline void unlock_buffer(struct buffer_head *bh) @@ -43,6 +45,7 @@ { if (sb->s_lock) __wait_on_super(sb); + conditional_schedule(); } extern inline void lock_super(struct super_block * sb) --- linux/include/linux/sched.h.orig Wed Jul 26 08:55:41 2000 +++ linux/include/linux/sched.h Wed Jul 26 08:55:55 2000 @@ -5,6 +5,7 @@ extern unsigned long global_event; +#include #include #include #include @@ -115,7 +116,7 @@ extern void show_state(void); extern void trap_init(void); -#define MAX_SCHEDULE_TIMEOUT LONG_MAX +#define MAX_SCHEDULE_TIMEOUT LONG_MAX extern signed long FASTCALL(schedule_timeout(signed long timeout)); asmlinkage void schedule(void); @@ -237,7 +238,7 @@ 0-0xFFFFFFFF for kernel-thread */ struct exec_domain *exec_domain; - long need_resched; + volatile long need_resched; /* various fields */ long counter; --- linux/include/linux/selection.h.orig Tue May 11 19:35:45 1999 +++ linux/include/linux/selection.h Wed Jul 26 08:55:55 2000 @@ -7,8 +7,6 @@ #ifndef _LINUX_SELECTION_H_ #define _LINUX_SELECTION_H_ -#include - extern int sel_cons; extern void clear_selection(void); --- linux/include/linux/vt_buffer.h.orig Tue May 11 19:35:44 1999 +++ linux/include/linux/vt_buffer.h Wed Jul 26 08:55:55 2000 @@ -19,25 +19,129 @@ #include #endif +#define VT_DOUBLEBUF 1 + #ifndef VT_BUF_HAVE_RW -#define scr_writew(val, addr) (*(addr) = (val)) -#define scr_readw(addr) (*(addr)) -#define scr_memcpyw(d, s, c) memcpy(d, s, c) -#define scr_memmovew(d, s, c) memmove(d, s, c) -#define VT_BUF_HAVE_MEMCPYW -#define VT_BUF_HAVE_MEMMOVEW -#define scr_memcpyw_from(d, s, c) memcpy(d, s, c) -#define scr_memcpyw_to(d, s, c) memcpy(d, s, c) -#define VT_BUF_HAVE_MEMCPYF + +#if VT_DOUBLEBUF + +extern unsigned long vga_vram_base, vga_vram_end; + +extern inline unsigned short * __v2m(const unsigned short * s, int cons) +{ + struct vc_data *c; + unsigned short * __res; + + c = vc_cons[cons].d; + + if (*c->vc_display_fg == c) { + __res = (unsigned short *)(c->vc_videobuf+ + ((unsigned long)(s) - vga_vram_base)); + /* + * debugging hacks: + */ + if (((unsigned int)s < vga_vram_base) || + ((unsigned int)s > vga_vram_end)) { +// printk("P1:<%p->%p(%p)>?\n", s, __res, (void *)&&__y); + __res = (unsigned short *)(s); + } else { + if ((__res < (unsigned short *)c->vc_videobuf) || (__res >= (unsigned short *)(c->vc_videobuf+128*1024))) { +// printk("P2:<%p->%p(%p)>?\n", s, __res, (void *)&&__y); + __res = (unsigned short *)(s); + } + } + } else + __res = (unsigned short *)(s); + return __res; +} + +#define v2m(x) __v2m(x,currcons) + +// #define v2m(x) ({ unsigned short * __res; if (vc_cons[currcons].d->vc_videobuf) __res = (unsigned short *)(vc_cons[currcons].d->vc_videobuf+((unsigned long)(x)-vc_cons[currcons].d->vc_origin)); else { extern int magic_flag; magic_flag++; __res = (unsigned short *)(x); } __res; }) +// #define v2m(x) ({ unsigned short * __res; __res = (unsigned short *)(vc_cons[currcons].d->vc_videobuf+((unsigned long)(x)-vga_vram_base)); __res; }) + +#if 1 +extern inline void __scr_writew(u16 val, u16 *addr, int currcons) +{ + *addr = val; + *(v2m(addr)) = val; +} +#define scr_writew(val, addr) __scr_writew(val, addr, currcons) + +# define scr_readw(addr) ({*(v2m(addr));}) +#else +# define scr_writew(val, addr) ({*(addr) = (val); }) +# define scr_readw(addr) ({*(addr);}) +#endif + +extern inline void __scr_memcpyw(void * d, void * s, + unsigned int c, int currcons) +{ + memcpy(v2m(d), v2m(s), c); + memcpy(d, v2m(d), c); +} +#define scr_memcpyw(d, s, c) __scr_memcpyw(d, s, c, currcons) + +extern inline void __scr_memmovew(void * d, void * s, + unsigned int c, int currcons) +{ + memmove(v2m(d), v2m(s), c); + memcpy(d, v2m(d), c); +} +# define scr_memmovew(d, s, c) __scr_memmovew(d, s, c, currcons) +# define VT_BUF_HAVE_MEMCPYW +# define VT_BUF_HAVE_MEMMOVEW +extern inline void __scr_memcpyw_to(void * d, void * s, + unsigned int c, int currcons) +{ + memcpy(v2m(d), s, c); + memcpy(d, s, c); +} +# define scr_memcpyw_to(d, s, c) __scr_memcpyw_to(d, s, c, currcons) +# define scr_memcpyw_from(d, s, c) ({ memcpy(d, v2m(s), c);}) +# define VT_BUF_HAVE_MEMCPYF + +# define scr_writew_nonbuffered(val, addr) ({*(addr) = (val); }) +# define scr_readw_nonbuffered(addr) ({*(addr);}) +# define scr_memcpyw_nonbuffered(d, s, c) ({ memcpy(d, s, c); }) +# define scr_memmovew_nonbuffered(d, s, c) ({ memmove(d, s, c); }) +# define VT_BUF_HAVE_MEMCPYW +# define VT_BUF_HAVE_MEMMOVEW +# define scr_memcpyw_from_nonbuffered(d, s, c) ({ memcpy(d, s, c);}) +# define scr_memcpyw_to_nonbuffered(d, s, c) ({ memcpy(d, s, c);}) +# define VT_BUF_HAVE_MEMCPYF +#else + +# define scr_writew(val, addr) ({*(addr) = (val); }) +# define scr_readw(addr) ({*(addr);}) +# define scr_memcpyw(d, s, c) ({ memcpy(d, s, c); }) +# define scr_memmovew(d, s, c) ({ memmove(d, s, c); }) +# define VT_BUF_HAVE_MEMCPYW +# define VT_BUF_HAVE_MEMMOVEW +# define scr_memcpyw_from(d, s, c) ({ memcpy(d, s, c);}) +# define scr_memcpyw_to(d, s, c) ({ memcpy(d, s, c);}) +# define VT_BUF_HAVE_MEMCPYF +#endif + #endif #ifndef VT_BUF_HAVE_MEMSETW -extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count) +extern inline void __scr_memsetw(u16 *s, u16 c, + unsigned int count, int currcons) { count /= 2; while (count--) scr_writew(c, s++); } +#define scr_memsetw(s,c,count) __scr_memsetw(s,c,count,currcons) +extern inline void scr_memsetw_nonbuffered(u16 *s, u16 c, + unsigned int count) +{ + count /= 2; + while (count--) + scr_writew_nonbuffered(c, s++); +} + #endif #ifndef VT_BUF_HAVE_MEMCPYW --- linux/include/asm-i386/softirq.h.orig Wed Jul 26 08:55:49 2000 +++ linux/include/asm-i386/softirq.h Wed Jul 26 08:55:55 2000 @@ -1,6 +1,8 @@ #ifndef __ASM_SOFTIRQ_H #define __ASM_SOFTIRQ_H +/*#define CHECK_IF*/ + #include #include @@ -33,7 +35,18 @@ static inline void end_bh_atomic(void) { + unsigned long flags; + atomic_dec(&global_bh_lock); + if (get_active_bhs()) { + __save_flags(flags); + if (flags & (1 << 9)) + do_bottom_half(); +#ifdef CHECK_IF + else + *(int *)0 = 0; +#endif + } } /* These are for the IRQs testing the lock */ @@ -65,8 +78,19 @@ extern inline void end_bh_atomic(void) { + unsigned long flags; + barrier(); local_bh_count[smp_processor_id()]--; + if (get_active_bhs()) { + __save_flags(flags); + if (flags & (1 << 9)) + do_bottom_half(); +#ifdef CHECK_IF + else + *(int *)0 = 0; +#endif + } } /* These are for the irq's testing the lock */ @@ -128,6 +152,15 @@ if (atomic_dec_and_test(&bh_mask_count[nr])) bh_mask |= 1 << nr; spin_unlock_irqrestore(&i386_bh_lock, flags); + if (get_active_bhs()) { + __save_flags(flags); + if (flags & (1 << 9)) + do_bottom_half(); +#ifdef CHECK_IF + else + *(int *)0 = 0; +#endif + } } #endif /* __ASM_SOFTIRQ_H */ --- linux/include/asm-i386/uaccess.h.orig Wed Jul 26 08:55:49 2000 +++ linux/include/asm-i386/uaccess.h Wed Jul 26 08:55:55 2000 @@ -6,6 +6,7 @@ */ #include #include +#include #include #define VERIFY_READ 0 @@ -253,6 +254,7 @@ #define __copy_user(to,from,size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ " movl %3,%0\n" \ @@ -275,6 +277,7 @@ #define __copy_user_zeroing(to,from,size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ " movl %3,%0\n" \ @@ -324,6 +327,7 @@ int __d0, __d1; \ switch (size & 3) { \ default: \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ "1:\n" \ @@ -408,6 +412,7 @@ int __d0, __d1; \ switch (size & 3) { \ default: \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ "1:\n" \ --- linux/ipc/shm.c.orig Wed Jul 26 08:55:49 2000 +++ linux/ipc/shm.c Wed Jul 26 08:55:55 2000 @@ -692,6 +692,8 @@ unsigned long id, idx; int loop = 0; int counter; + + conditional_schedule(); counter = shm_rss >> prio; if (!counter || !(swap_nr = get_swap_page())) --- linux/drivers/char/console.c.orig Wed Jul 26 08:55:44 2000 +++ linux/drivers/char/console.c Wed Jul 26 08:55:54 2000 @@ -101,6 +101,8 @@ #include "console_macros.h" +#include + struct consw *conswitchp = NULL; @@ -643,6 +645,7 @@ return -ENOMEM; vc_cons[currcons].d = (struct vc_data *)p; vt_cons[currcons] = (struct vt_struct *)(p+sizeof(struct vc_data)); + videobuf = 0; visual_init(currcons, 1); if (!*vc_cons[currcons].d->vc_uni_pagedir_loc) con_set_default_unimap(currcons); @@ -1819,9 +1822,10 @@ disable_bh(CONSOLE_BH); while (!tty->stopped && count) { enable_bh(CONSOLE_BH); - if (from_user) + if (from_user) { + conditional_schedule(); __get_user(c, buf); - else + } else c = *buf; buf++; n++; count--; disable_bh(CONSOLE_BH); @@ -2349,6 +2353,7 @@ kmem_start += sizeof(struct vc_data); vt_cons[currcons] = (struct vt_struct *) kmem_start; kmem_start += sizeof(struct vt_struct); + videobuf = 0; visual_init(currcons, 1); screenbuf = (unsigned short *) kmem_start; kmem_start += screenbuf_size; @@ -2426,6 +2431,7 @@ save_screen(i); old_was_color = vc_cons[i].d->vc_can_do_color; vc_cons[i].d->vc_sw->con_deinit(vc_cons[i].d); + videobuf = 0; visual_init(i, 0); update_attr(i); --- linux/drivers/char/console_macros.h.orig Thu Sep 17 18:35:03 1998 +++ linux/drivers/char/console_macros.h Wed Jul 26 08:55:54 2000 @@ -1,6 +1,7 @@ #define cons_num (vc_cons[currcons].d->vc_num) #define sw (vc_cons[currcons].d->vc_sw) #define screenbuf (vc_cons[currcons].d->vc_screenbuf) +#define videobuf (vc_cons[currcons].d->vc_videobuf) #define screenbuf_size (vc_cons[currcons].d->vc_screenbuf_size) #define origin (vc_cons[currcons].d->vc_origin) #define scr_top (vc_cons[currcons].d->vc_scr_top) --- linux/drivers/char/lp.c.orig Wed Jul 26 08:55:44 2000 +++ linux/drivers/char/lp.c Wed Jul 26 08:55:54 2000 @@ -234,8 +234,7 @@ { if (!parport_yield_blocking (lp_table[minor].dev)) { - if (current->need_resched) - schedule (); + conditional_schedule(); } else lp_table[minor].irq_missed = 1; } --- linux/drivers/char/mem.c.orig Wed Jul 26 08:55:28 2000 +++ linux/drivers/char/mem.c Wed Jul 26 08:55:55 2000 @@ -370,8 +370,7 @@ unsigned long unwritten = clear_user(buf, PAGE_SIZE); if (unwritten) return size + unwritten - PAGE_SIZE; - if (current->need_resched) - schedule(); + conditional_schedule(); buf += PAGE_SIZE; size -= PAGE_SIZE; } while (size); --- linux/drivers/char/random.c.orig Wed Jul 26 08:55:44 2000 +++ linux/drivers/char/random.c Wed Jul 26 08:55:55 2000 @@ -1293,8 +1293,9 @@ ret = -EINTR; break; } - schedule(); } + if (to_user) + conditional_schedule(); } /* Wipe data just returned from memory */ --- linux/drivers/char/tty_io.c.orig Wed Jul 26 08:55:45 2000 +++ linux/drivers/char/tty_io.c Wed Jul 26 08:55:55 2000 @@ -662,6 +662,7 @@ struct inode *inode = file->f_dentry->d_inode; up(&inode->i_sem); + conditional_schedule(); if (down_interruptible(&tty->atomic_write)) { down(&inode->i_sem); return -ERESTARTSYS; @@ -684,8 +685,7 @@ ret = -ERESTARTSYS; if (signal_pending(current)) break; - if (current->need_resched) - schedule(); + conditional_schedule(); } } if (written) { @@ -1666,7 +1666,7 @@ { struct tty_struct *tty, *real_tty; int retval; - + tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) return -EINVAL; --- linux/drivers/video/vgacon.c.orig Wed May 12 01:30:36 1999 +++ linux/drivers/video/vgacon.c Wed Jul 26 08:55:55 2000 @@ -33,6 +33,8 @@ * more details. */ +#define VGA_LOCAL 1 + #include #include #include @@ -51,6 +53,8 @@ #include +#include + #define BLANK 0x0020 @@ -96,8 +100,8 @@ /* Description of the hardware situation */ -static unsigned long vga_vram_base; /* Base of video memory */ -static unsigned long vga_vram_end; /* End of video memory */ +unsigned long vga_vram_base; /* Base of video memory */ +unsigned long vga_vram_end; /* End of video memory */ static u16 vga_video_port_reg; /* Video register select port */ static u16 vga_video_port_val; /* Video register value port */ static unsigned int vga_video_num_columns; /* Number of text columns */ @@ -115,6 +119,7 @@ static int vga_video_font_height; static unsigned int vga_rolled_over = 0; +static char vga_vram_copy[128*1024]; /* In-memory copy of video memory */ void no_scroll(char *str, int *ints) { @@ -267,24 +272,24 @@ * Are there smarter methods around? */ p = (u16 *)vga_vram_base; - saved1 = scr_readw(p); - saved2 = scr_readw(p + 1); - scr_writew(0xAA55, p); - scr_writew(0x55AA, p + 1); - if (scr_readw(p) != 0xAA55 || scr_readw(p + 1) != 0x55AA) { - scr_writew(saved1, p); - scr_writew(saved2, p + 1); + saved1 = scr_readw_nonbuffered(p); + saved2 = scr_readw_nonbuffered(p + 1); + scr_writew_nonbuffered(0xAA55, p); + scr_writew_nonbuffered(0x55AA, p + 1); + if (scr_readw_nonbuffered(p) != 0xAA55 || scr_readw_nonbuffered(p + 1) != 0x55AA) { + scr_writew_nonbuffered(saved1, p); + scr_writew_nonbuffered(saved2, p + 1); goto no_vga; } - scr_writew(0x55AA, p); - scr_writew(0xAA55, p + 1); - if (scr_readw(p) != 0x55AA || scr_readw(p + 1) != 0xAA55) { - scr_writew(saved1, p); - scr_writew(saved2, p + 1); + scr_writew_nonbuffered(0x55AA, p); + scr_writew_nonbuffered(0xAA55, p + 1); + if (scr_readw_nonbuffered(p) != 0x55AA || scr_readw_nonbuffered(p + 1) != 0xAA55) { + scr_writew_nonbuffered(saved1, p); + scr_writew_nonbuffered(saved2, p + 1); goto no_vga; } - scr_writew(saved1, p); - scr_writew(saved2, p + 1); + scr_writew_nonbuffered(saved1, p); + scr_writew_nonbuffered(saved2, p + 1); if (vga_video_type == VIDEO_TYPE_EGAC || vga_video_type == VIDEO_TYPE_VGAC @@ -318,6 +323,8 @@ vgacon_uni_pagedir[1]++; if (!vgacon_uni_pagedir[0] && p) con_set_default_unimap(c->vc_num); + + c->vc_videobuf = (unsigned long) vga_vram_copy; } static inline void vga_set_mem_top(struct vc_data *c) @@ -365,6 +372,7 @@ static void vgacon_invert_region(struct vc_data *c, u16 *p, int count) { int col = vga_can_do_color; + int currcons = c->vc_num; while (count--) { u16 a = scr_readw(p); @@ -372,7 +380,7 @@ a = ((a) & 0x88ff) | (((a) & 0x7000) >> 4) | (((a) & 0x0700) << 4); else a ^= ((a & 0x0700) == 0x0100) ? 0x7000 : 0x7700; - scr_writew(a, p++); + __scr_writew(a, p++, c->vc_num); } } @@ -451,6 +459,7 @@ static int vgacon_switch(struct vc_data *c) { + int currcons = c->vc_num; /* * We need to save screen size here as it's the only way * we can spot the screen has been resized and we need to @@ -641,10 +650,10 @@ return 0; } vgacon_set_origin(c); - scr_memsetw((void *)vga_vram_base, BLANK, c->vc_screenbuf_size); + __scr_memsetw((void *)vga_vram_base, BLANK, c->vc_screenbuf_size, c->vc_num); return 1; case -1: /* Entering graphic mode */ - scr_memsetw((void *)vga_vram_base, BLANK, c->vc_screenbuf_size); + __scr_memsetw((void *)vga_vram_base, BLANK, c->vc_screenbuf_size, c->vc_num); vga_is_gfx = 1; return 1; default: /* VESA blanking */ @@ -940,6 +949,7 @@ static void vgacon_save_screen(struct vc_data *c) { static int vga_bootup_console = 0; + int currcons = c->vc_num; if (!vga_bootup_console) { /* This is a gross hack, but here is the only place we can @@ -954,10 +964,15 @@ scr_memcpyw_from((u16 *) c->vc_screenbuf, (u16 *) c->vc_origin, c->vc_screenbuf_size); } +unsigned int scroll_lat, max_scroll_lat, max_scroll_size, max_scroll_addr1, +max_scroll_addr2; + static int vgacon_scroll(struct vc_data *c, int t, int b, int dir, int lines) { + unsigned long long t0, t1; unsigned long oldo; unsigned int delta; + int currcons = c->vc_num; if (t || b != c->vc_rows || vga_is_gfx) return 0; @@ -970,16 +985,19 @@ oldo = c->vc_origin; delta = lines * c->vc_size_row; +t0 = t1 = 0; if (dir == SM_UP) { if (c->vc_scr_end + delta >= vga_vram_end) { - scr_memcpyw((u16 *)vga_vram_base, - (u16 *)(oldo + delta), - c->vc_screenbuf_size - delta); +rdtscll(t0); + scr_memcpyw((u16 *)vga_vram_base, + (u16 *)(oldo + delta), + c->vc_screenbuf_size - delta); +rdtscll(t1); c->vc_origin = vga_vram_base; vga_rolled_over = oldo - vga_vram_base; } else c->vc_origin += delta; - scr_memsetw((u16 *)(c->vc_origin + c->vc_screenbuf_size - delta), c->vc_video_erase_char, delta); + __scr_memsetw((u16 *)(c->vc_origin + c->vc_screenbuf_size - delta), c->vc_video_erase_char, delta, c->vc_num); } else { if (oldo - delta < vga_vram_base) { scr_memmovew((u16 *)(vga_vram_end - c->vc_screenbuf_size + delta), @@ -990,8 +1008,18 @@ } else c->vc_origin -= delta; c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; - scr_memsetw((u16 *)(c->vc_origin), c->vc_video_erase_char, delta); + __scr_memsetw((u16 *)(c->vc_origin), c->vc_video_erase_char, delta, c->vc_num); } + +scroll_lat = (unsigned int)(t1-t0) / 400; +if (max_scroll_lat < scroll_lat) { + int currcons = c->vc_num; + max_scroll_lat = scroll_lat; + max_scroll_addr1 = (unsigned int)v2m((unsigned short *)vga_vram_base); + max_scroll_addr2 = (unsigned int)v2m((unsigned short *)(oldo + delta)); + max_scroll_size = (unsigned int)(c->vc_screenbuf_size - delta); +} + c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size; c->vc_visible_origin = c->vc_origin; vga_set_mem_top(c); --- linux/arch/i386/mm/fault.c.orig Wed Jul 26 08:55:35 2000 +++ linux/arch/i386/mm/fault.c Wed Jul 26 08:55:54 2000 @@ -200,7 +200,7 @@ tsk->tss.screen_bitmap |= 1 << bit; } up(&mm->mmap_sem); - return; + goto out; /* * Something tried to access memory that isn't in our memory map.. @@ -215,7 +215,7 @@ tsk->tss.error_code = error_code; tsk->tss.trap_no = 14; force_sig(SIGSEGV, tsk); - return; + goto out; } /* @@ -228,7 +228,7 @@ if (nr == 6) { do_invalid_op(regs, 0); - return; + goto out; } } @@ -236,7 +236,7 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; - return; + goto out; } /* @@ -255,7 +255,7 @@ * CPU state on certain buggy processors. */ printk("Ok"); - return; + goto out; } if (address < PAGE_SIZE) @@ -330,4 +330,5 @@ /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; +out: } --- linux/arch/i386/mm/init.c.orig Wed Jul 26 08:55:21 2000 +++ linux/arch/i386/mm/init.c Wed Jul 26 08:55:54 2000 @@ -487,6 +487,12 @@ val->freeram = nr_free_pages << PAGE_SHIFT; val->bufferram = buffermem; while (i-- > 0) { + /* + * si_meminfo() cannot be exact, but it can cause _long_ + * scheduling latencies on big memory boxes. + */ + if (!(i & 31)) + conditional_schedule(); if (PageReserved(mem_map+i)) continue; val->totalram++; --- linux/arch/i386/lib/usercopy.c.orig Wed Jul 26 08:55:21 2000 +++ linux/arch/i386/lib/usercopy.c Wed Jul 26 08:55:54 2000 @@ -10,6 +10,7 @@ unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long n) { + conditional_schedule(); if (access_ok(VERIFY_WRITE, to, n)) __copy_user(to,from,n); return n; @@ -18,6 +19,7 @@ unsigned long __generic_copy_from_user(void *to, const void *from, unsigned long n) { + conditional_schedule(); if (access_ok(VERIFY_READ, from, n)) __copy_user_zeroing(to,from,n); return n; @@ -60,6 +62,7 @@ __strncpy_from_user(char *dst, const char *src, long count) { long res; + conditional_schedule(); __do_strncpy_from_user(dst, src, count, res); return res; } @@ -68,6 +71,7 @@ strncpy_from_user(char *dst, const char *src, long count) { long res = -EFAULT; + conditional_schedule(); if (access_ok(VERIFY_READ, src, 1)) __do_strncpy_from_user(dst, src, count, res); return res; @@ -102,6 +106,7 @@ unsigned long clear_user(void *to, unsigned long n) { + conditional_schedule(); if (access_ok(VERIFY_WRITE, to, n)) __do_clear_user(to, n); return n; @@ -110,6 +115,7 @@ unsigned long __clear_user(void *to, unsigned long n) { + conditional_schedule(); __do_clear_user(to, n); return n; } @@ -125,6 +131,7 @@ unsigned long mask = -__addr_ok(s); unsigned long res, tmp; + conditional_schedule(); __asm__ __volatile__( " andl %0,%%ecx\n" "0: repne; scasb\n" --- linux/arch/i386/kernel/entry.S.orig Wed Jul 26 08:55:35 2000 +++ linux/arch/i386/kernel/entry.S Wed Jul 26 08:55:54 2000 @@ -80,8 +80,7 @@ ENOSYS = 38 -#define SAVE_ALL \ - cld; \ +#define __SAVE_ALL \ pushl %es; \ pushl %ds; \ pushl %eax; \ @@ -95,7 +94,9 @@ movl %dx,%ds; \ movl %dx,%es; -#define RESTORE_ALL \ +#define SAVE_ALL cld; __SAVE_ALL + +#define __RESTORE_ALL \ popl %ebx; \ popl %ecx; \ popl %edx; \ @@ -106,12 +107,22 @@ 1: popl %ds; \ 2: popl %es; \ addl $4,%esp; \ -3: iret; \ .section .fixup,"ax"; \ 4: movl $0,(%esp); \ jmp 1b; \ 5: movl $0,(%esp); \ jmp 2b; \ +.previous; \ +.section __ex_table,"a";\ + .align 4; \ + .long 1b,4b; \ + .long 2b,5b; \ +.previous + +#define RESTORE_ALL \ + __RESTORE_ALL; \ +3: iret; \ +.section .fixup,"ax"; \ 6: pushl %ss; \ popl %ds; \ pushl %ss; \ @@ -120,9 +131,6 @@ call do_exit; \ .previous; \ .section __ex_table,"a";\ - .align 4; \ - .long 1b,4b; \ - .long 2b,5b; \ .long 3b,6b; \ .previous @@ -187,11 +195,13 @@ jne handle_bottom_half ret_with_reschedule: cmpl $0,need_resched(%ebx) +resched_critical_start: jne reschedule cmpl $0,sigpending(%ebx) jne signal_return -restore_all: +resched_restore_all: RESTORE_ALL +resched_critical_end: ALIGN signal_return: @@ -201,7 +211,20 @@ jne v86_signal_return xorl %edx,%edx call SYMBOL_NAME(do_signal) - jmp restore_all +signal_resched: + cmpl $0,need_resched(%ebx) +signal_critical_start: + jne signal_critical_end +signal_restore_all: + RESTORE_ALL +signal_critical_end: + /* + * check need_resched but not sigpending. + * do not recurse signal handlers. This is the slow path. + */ + sti + call SYMBOL_NAME(schedule) + jmp signal_resched ALIGN v86_signal_return: @@ -209,7 +232,7 @@ movl %eax,%esp xorl %edx,%edx call SYMBOL_NAME(do_signal) - jmp restore_all + jmp signal_resched ALIGN tracesys: @@ -238,7 +261,12 @@ movb CS(%esp),%al testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor? jne ret_with_reschedule - jmp restore_all + movl EIP(%esp),%eax # interrupted critical return path? + subl $signal_critical_end,%eax + cmpl $(resched_critical_start-signal_critical_end),%eax + jae maybe_interrupted_critical +restore_all: + RESTORE_ALL ALIGN handle_bottom_half: @@ -249,6 +277,63 @@ reschedule: call SYMBOL_NAME(schedule) # test jmp ret_from_sys_call + +/* + * This is reached when we may have interrupted supervisor code inside + * something_critical_{start,end} and we may want to reschedule or + * deliver a signal. + */ + ALIGN +maybe_interrupted_critical: + cmpl $(signal_critical_start-signal_critical_end),%eax + jae interrupted_signal_return + addl $(signal_critical_end-resched_critical_end),%eax + jc restore_all + movl need_resched(%ebx),%ecx + orl sigpending(%ebx),%ecx + je restore_all + cmpl $(resched_restore_all-resched_critical_end),%eax + ja resched_critical_intercept + movl $ret_with_reschedule,EIP(%esp) # simply restart the checks. + jmp restore_all + ALIGN +resched_critical_intercept: + addl $(resched_fixup-resched_restore_all+resched_critical_end),%eax + movl %eax,EIP(%esp) + jmp restore_all # the iret jumps into resched_fixup. + ALIGN +interrupted_signal_return: + cmpl $0,need_resched(%ebx) + je restore_all + cmpl $(signal_restore_all-signal_critical_end),%eax + ja signal_critical_intercept + movl $signal_resched,EIP(%esp) # simply restart the checks. + jmp restore_all + ALIGN +signal_critical_intercept: + addl $(signal_fixup-signal_restore_all+signal_critical_end),%eax + movl %eax,EIP(%esp) + jmp restore_all + +/* + * These are the fixup code for interrupted xxx_restore_all. They must have + * *exactly* the same code layout as xxx_restore_all, up to the iret. + * + * These are used very rarely, and the only symptom of not doing this + * is extra scheduling and signal delivery latency. + */ +resched_fixup: + __RESTORE_ALL # complete resched_restore_all. + pushl %eax # undo it. + __SAVE_ALL + GET_CURRENT(%ebx) + jmp ret_with_reschedule +signal_fixup: + __RESTORE_ALL # complete signal_restore_all. + pushl %eax # undo it. + __SAVE_ALL + GET_CURRENT(%ebx) + jmp signal_resched ENTRY(divide_error) pushl $0 # no error code --- linux/arch/i386/kernel/process.c.orig Wed Jul 26 08:55:35 2000 +++ linux/arch/i386/kernel/process.c Wed Jul 26 08:55:54 2000 @@ -94,8 +94,12 @@ if (acpi_idle && (jiffies - start_idle > HARD_IDLE_TIMEOUT)) acpi_idle(); else { - if (boot_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched) + __cli(); + if (boot_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched) { + __sti(); __asm__("hlt"); + } + __sti(); } work = current->need_resched;