--- linux/include/asm-i386/condsched.h.orig Thu Aug 3 14:06:53 2000 +++ linux/include/asm-i386/condsched.h Thu Aug 3 14:18:58 2000 @@ -0,0 +1,59 @@ +#ifndef _ASM_CONDSCHED_H +#define _ASM_CONDSCHED_H + +/* + * Conditional scheduling points, to reduce latencies. + * + * Started by Ingo Molnar + */ + +#ifndef __ASSEMBLY__ + +#define __HAVE_ARCH_CONDITIONAL_SCHEDULE + +/* + * We do various tricks to make conditional_schedule() + * almost zero-overhead in the common case: + * + * - the slow path is stored in the separate .text.condsched + * section, not inline, so it does not impact icache footprint + * and does not disrupt the code stream. + * + * - we move register saving to the slow offline path, + * thus the impact of conditional_schedule() on + * current register allocation is zero. It's basically a 'nop' + * to GCC. + * + * - we do a forward conditional jump like in the spinlock case, + * which is default-predicted as not taken - ie. we do not + * interrupt the pipeline in the common case, it's straight + * fall-through code. + * + * - we use 'cmpb' (compare byte) to reduce icache footprint by + * 3 more bytes. + */ + +#define conditional_schedule() \ +do { \ + __asm__ __volatile__ ( \ + "cmpb $0, 20(%0);" \ + "jnz 2f;" \ + "1:;" \ + \ + ".section .text.condsched,\"ax\";" \ + "2: pushl %%eax; pushl %%ecx; pushl %%edx;" \ + "movl $0, (%0);" \ + "call schedule;" \ + "popl %%edx; popl %%ecx; popl %%eax;" \ + "jmp 1b;" \ + ".previous;" \ + \ + : /* no output */ \ + : "r" (current) \ + /* no registers clobbered */ ); \ +} while (0) + +#endif + +#endif + --- linux/fs/proc/proc_misc.c.orig Thu Aug 3 14:06:34 2000 +++ linux/fs/proc/proc_misc.c Thu Aug 3 14:06:53 2000 @@ -284,21 +284,24 @@ static int kstat_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int i, len; + int i, j, len; extern unsigned long total_forks; unsigned long jif = jiffies; unsigned int sum = 0, user = 0, nice = 0, system = 0; int major, disk; for (i = 0 ; i < smp_num_cpus; i++) { - int cpu = cpu_logical_map(i), j; + int cpu = cpu_logical_map(i); user += kstat.per_cpu_user[cpu]; nice += kstat.per_cpu_nice[cpu]; system += kstat.per_cpu_system[cpu]; - for (j = 0 ; j < NR_IRQS ; j++) - sum += kstat.irqs[cpu][j]; } + for (j = 0 ; j < NR_IRQS ; j++) + for (i = 0 ; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + sum += kstat.irqs[cpu][j]; + } len = sprintf(page, "cpu %u %u %u %lu\n", user, nice, system, jif * smp_num_cpus - (user + nice + system)); @@ -321,6 +324,7 @@ kstat.pswpout, sum ); + conditional_schedule(); for (i = 0 ; i < NR_IRQS ; i++) len += sprintf(page + len, " %u", kstat_irqs(i)); @@ -343,6 +347,7 @@ ); } } + conditional_schedule(); len += sprintf(page + len, "\nctxt %u\n" --- linux/fs/proc/array.c.orig Thu Aug 3 14:06:28 2000 +++ linux/fs/proc/array.c Thu Aug 3 14:06:53 2000 @@ -462,6 +462,7 @@ statm_pte_range(pmd, address, end - address, pages, shared, dirty, total); address = (address + PMD_SIZE) & PMD_MASK; pmd++; + conditional_schedule(); } while (address < end); } @@ -482,7 +483,7 @@ task_lock(task); mm = task->mm; - if(mm) + if (mm) atomic_inc(&mm->mm_users); task_unlock(task); if (mm) { --- linux/fs/ext2/inode.c.orig Thu Aug 3 14:06:36 2000 +++ linux/fs/ext2/inode.c Thu Aug 3 14:06:53 2000 @@ -607,6 +607,7 @@ struct buffer_head dummy; int error; + conditional_schedule(); dummy.b_state = 0; dummy.b_blocknr = -1000; error = ext2_get_block(inode, block, &dummy, create); --- linux/fs/ext2/truncate.c.orig Fri Dec 3 00:24:49 1999 +++ linux/fs/ext2/truncate.c Thu Aug 3 14:06:53 2000 @@ -108,6 +108,7 @@ /* Make sure both buffers are unlocked */ do { + conditional_schedule(); retry = 0; if (buffer_locked(bh)) { __wait_on_buffer(bh); --- linux/fs/buffer.c.orig Thu Aug 3 14:06:35 2000 +++ linux/fs/buffer.c Thu Aug 3 14:06:53 2000 @@ -196,14 +196,14 @@ if (!lru_list[BUF_DIRTY]) break; if (dev && bh->b_dev != dev) - continue; + goto continue_loop; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is * requested AND pass > 0. */ if (!wait || !pass) { retry = 1; - continue; + goto continue_loop; } atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); @@ -218,14 +218,14 @@ if (wait && buffer_req(bh) && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_uptodate(bh)) { err = -EIO; - continue; + goto continue_loop; } /* Don't write clean buffers. Don't write ANY buffers * on the third pass. */ if (!buffer_dirty(bh) || pass >= 2) - continue; + goto continue_loop; atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); @@ -233,6 +233,14 @@ atomic_dec(&bh->b_count); retry = 1; goto repeat; + continue_loop: + if (!(i & 127)) { + if (current->need_resched) { + spin_unlock(&lru_list_lock); + conditional_schedule(); + goto repeat; + } + } } repeat2: @@ -247,14 +255,14 @@ if (!lru_list[BUF_LOCKED]) break; if (dev && bh->b_dev != dev) - continue; + goto continue_loop2; if (buffer_locked(bh)) { /* Buffer is locked; skip it unless wait is * requested AND pass > 0. */ if (!wait || !pass) { retry = 1; - continue; + goto continue_loop2; } atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); @@ -263,6 +271,14 @@ atomic_dec(&bh->b_count); goto repeat2; } + continue_loop2: + if (!(i & 127)) { + if (current->need_resched) { + spin_unlock(&lru_list_lock); + conditional_schedule(); + goto repeat2; + } + } } spin_unlock(&lru_list_lock); @@ -594,6 +610,13 @@ if (!bh) continue; for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) { + if (!(i & 127)) { + if (current->need_resched) { + spin_unlock(&lru_list_lock); + conditional_schedule(); + goto retry; + } + } bh_next = bh->b_next_free; if (bh->b_dev != dev) continue; @@ -653,6 +676,13 @@ if (!bh) continue; for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) { + if (!(i & 127)) { + if (current->need_resched) { + spin_unlock(&lru_list_lock); + conditional_schedule(); + goto retry; + } + } bh_next = bh->b_next_free; if (bh->b_dev != dev || bh->b_size == size) continue; @@ -832,6 +862,7 @@ insert_into_queues(bh); out: touch_buffer(bh); + conditional_schedule(); return bh; } @@ -900,6 +931,7 @@ { __mark_buffer_dirty(bh, flag); balance_dirty(bh->b_dev); + conditional_schedule(); } /* @@ -1449,6 +1481,7 @@ block++, block_start=block_end, bh = bh->b_this_page) { if (!bh) BUG(); + conditional_schedule(); block_end = block_start+blocksize; if (block_end <= from) continue; @@ -1558,6 +1591,7 @@ i = 0; do { + conditional_schedule(); if (buffer_uptodate(bh)) continue; @@ -2369,6 +2403,13 @@ if (!bh) goto out_unlock; for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) { + if (!(i & 127)) { + if (current->need_resched) { + spin_unlock(&lru_list_lock); + conditional_schedule(); + goto restart; + } + } next = bh->b_next_free; if (!buffer_dirty(bh)) { @@ -2396,8 +2437,7 @@ ll_rw_block(WRITE, 1, &bh); atomic_dec(&bh->b_count); - if (current->need_resched) - schedule(); + conditional_schedule(); goto restart; } out_unlock: --- linux/fs/namei.c.orig Thu Aug 3 14:06:33 2000 +++ linux/fs/namei.c Thu Aug 3 14:06:53 2000 @@ -1682,8 +1682,9 @@ error = -ENOENT; else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) error = -EBUSY; - else + else { error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + } if (target) { if (!error) target->i_flags |= S_DEAD; --- linux/fs/dcache.c.orig Thu Aug 3 14:06:36 2000 +++ linux/fs/dcache.c Thu Aug 3 14:06:53 2000 @@ -353,6 +353,11 @@ prune_one_dentry(dentry); if (!--count) break; + if (current->need_resched) { + spin_unlock(&dcache_lock); + conditional_schedule(); + spin_lock(&dcache_lock); + } } spin_unlock(&dcache_lock); } @@ -417,6 +422,11 @@ list_del(tmp); INIT_LIST_HEAD(tmp); prune_one_dentry(dentry); + if (current->need_resched) { + spin_unlock(&dcache_lock); + conditional_schedule(); + spin_lock(&dcache_lock); + } goto repeat; } spin_unlock(&dcache_lock); @@ -485,7 +495,7 @@ { struct dentry *this_parent = parent; struct list_head *next; - int found = 0; + int found = 0, count = 0; spin_lock(&dcache_lock); repeat: @@ -500,6 +510,8 @@ list_add(&dentry->d_lru, dentry_unused.prev); found++; } + if ((count++ > 100) && (found > 10)) + goto out; /* * Descend a level if the d_subdirs list is non-empty. */ @@ -524,6 +536,7 @@ #endif goto resume; } +out: spin_unlock(&dcache_lock); return found; } @@ -539,8 +552,10 @@ { int found; - while ((found = select_parent(parent)) != 0) + while ((found = select_parent(parent)) != 0) { prune_dcache(found); + conditional_schedule(); + } } /* @@ -556,10 +571,15 @@ */ int shrink_dcache_memory(int priority, unsigned int gfp_mask) { - int count = 0; - if (priority) + int count = 100; + if (priority) { count = dentry_stat.nr_unused / priority; + // FIXME: stupid limit + if (count > 100) + count = 100; + } prune_dcache(count); + conditional_schedule(); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should work in a __GFP_DMA/__GFP_HIGHMEM behaviour --- linux/fs/file.c.orig Thu Aug 3 14:06:28 2000 +++ linux/fs/file.c Thu Aug 3 14:06:53 2000 @@ -88,6 +88,7 @@ error = -ENOMEM; new_fds = alloc_fd_array(nfds); + conditional_schedule(); write_lock(&files->file_lock); if (!new_fds) goto out; @@ -121,6 +122,7 @@ } error = 0; out: + conditional_schedule(); return error; } @@ -187,6 +189,7 @@ error = -ENOMEM; new_openset = alloc_fdset(nfds); new_execset = alloc_fdset(nfds); + conditional_schedule(); write_lock(&files->file_lock); if (!new_openset || !new_execset) goto out; @@ -213,6 +216,7 @@ new_openset = xchg(&files->open_fds, new_openset); new_execset = xchg(&files->close_on_exec, new_execset); write_unlock(&files->file_lock); + conditional_schedule(); free_fdset (new_openset, nfds); free_fdset (new_execset, nfds); write_lock(&files->file_lock); --- linux/fs/ioctl.c.orig Thu Aug 3 14:06:28 2000 +++ linux/fs/ioctl.c Thu Aug 3 14:06:53 2000 @@ -39,8 +39,11 @@ case FIONREAD: return put_user(inode->i_size - filp->f_pos, (int *) arg); } - if (filp->f_op && filp->f_op->ioctl) - return filp->f_op->ioctl(inode, filp, cmd, arg); + if (filp->f_op && filp->f_op->ioctl) { + int ret; + ret = filp->f_op->ioctl(inode, filp, cmd, arg); + return ret; + } return -ENOTTY; } @@ -104,8 +107,9 @@ error = -ENOTTY; if (S_ISREG(filp->f_dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); - else if (filp->f_op && filp->f_op->ioctl) + else if (filp->f_op && filp->f_op->ioctl) { error = filp->f_op->ioctl(filp->f_dentry->d_inode, filp, cmd, arg); + } } unlock_kernel(); fput(filp); --- linux/fs/read_write.c.orig Thu Aug 3 14:06:21 2000 +++ linux/fs/read_write.c Thu Aug 3 14:06:53 2000 @@ -128,8 +128,9 @@ if (!ret) { ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = -EINVAL; - if (file->f_op && (read = file->f_op->read) != NULL) + if (file->f_op && (read = file->f_op->read) != NULL) { ret = read(file, buf, count, &file->f_pos); + } } } fput(file); @@ -152,8 +153,9 @@ if (!ret) { ssize_t (*write)(struct file *, const char *, size_t, loff_t *); ret = -EINVAL; - if (file->f_op && (write = file->f_op->write) != NULL) + if (file->f_op && (write = file->f_op->write) != NULL) { ret = write(file, buf, count, &file->f_pos); + } } } fput(file); --- linux/fs/inode.c.orig Thu Aug 3 14:06:31 2000 +++ linux/fs/inode.c Thu Aug 3 14:06:53 2000 @@ -419,7 +419,7 @@ { LIST_HEAD(list); struct list_head *entry, *freeable = &list; - int count = 0; + int count = 0, iterations = 500; struct inode * inode; spin_lock(&inode_lock); @@ -431,6 +431,8 @@ { struct list_head *tmp = entry; + if (!--iterations) + break; entry = entry->prev; inode = INODE(tmp); if (inode->i_state & (I_FREEING|I_CLEAR)) @@ -450,22 +452,28 @@ } inodes_stat.nr_unused -= count; spin_unlock(&inode_lock); + conditional_schedule(); dispose_list(freeable); } int shrink_icache_memory(int priority, int gfp_mask) { - int count = 0; + int count = 100; - if (priority) + if (priority) { count = inodes_stat.nr_unused / priority; + // FIXME: stupid limit + if (count > 100) + count = 100; + } prune_icache(count); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should work in a __GFP_DMA/__GFP_HIGHMEM behaviour to free only the interesting pages in function of the needs of the current allocation. */ + conditional_schedule(); kmem_cache_shrink(inode_cachep); return 0; --- linux/kernel/exit.c.orig Thu Aug 3 14:06:36 2000 +++ linux/kernel/exit.c Thu Aug 3 14:06:53 2000 @@ -172,8 +172,10 @@ while (set) { if (set & 1) { struct file * file = xchg(&files->fd[i], NULL); - if (file) + if (file) { filp_close(file, files); + conditional_schedule(); + } } i++; set >>= 1; --- linux/kernel/printk.c.orig Thu Aug 3 14:06:29 2000 +++ linux/kernel/printk.c Thu Aug 3 14:06:53 2000 @@ -156,6 +156,7 @@ __put_user(c,buf); buf++; i++; + conditional_schedule(); spin_lock_irq(&console_lock); } spin_unlock_irq(&console_lock); @@ -196,6 +197,7 @@ c = log_buf[ j & LOG_BUF_MASK ]; spin_unlock_irq(&console_lock); __put_user(c,&buf[count-1-i]); + conditional_schedule(); spin_lock_irq(&console_lock); } spin_unlock_irq(&console_lock); @@ -206,6 +208,7 @@ for(i=0;ifile_lock); + conditional_schedule(); /* compute the remainder to be cleared */ size = (newf->max_fds - open_files) * sizeof(struct file *); /* This is long word aligned thus could use a optimized version */ memset(new_fds, 0, size); + conditional_schedule(); if (newf->max_fdset > open_files) { int left = (newf->max_fdset-open_files)/8; int start = open_files / (8 * sizeof(unsigned long)); memset(&newf->open_fds->fds_bits[start], 0, left); + conditional_schedule(); memset(&newf->close_on_exec->fds_bits[start], 0, left); } --- linux/mm/vmscan.c.orig Thu Aug 3 14:06:35 2000 +++ linux/mm/vmscan.c Thu Aug 3 14:06:53 2000 @@ -147,6 +147,7 @@ vma->vm_mm->rss--; flush_tlb_page(vma, address); vmlist_access_unlock(vma->vm_mm); + conditional_schedule(); error = swapout(page, file); UnlockPage(page); if (file) fput(file); @@ -179,6 +180,7 @@ set_pte(page_table, swp_entry_to_pte(entry)); flush_tlb_page(vma, address); vmlist_access_unlock(vma->vm_mm); + conditional_schedule(); /* OK, do a physical asynchronous write to swap. */ rw_swap_page(WRITE, page, 0); @@ -234,7 +236,10 @@ result = try_to_swap_out(mm, vma, address, pte, gfp_mask); if (result) return result; - if (!mm->swap_cnt) + /* + * Do at most 1MB scanning at once. + */ + if (!mm->swap_cnt || !(mm->swap_cnt & 127)) return 0; address += PAGE_SIZE; pte++; @@ -265,7 +270,7 @@ int result = swap_out_pmd(mm, vma, pmd, address, end, gfp_mask); if (result) return result; - if (!mm->swap_cnt) + if (!mm->swap_cnt || !(mm->swap_cnt & 127)) return 0; address = (address + PMD_SIZE) & PMD_MASK; pmd++; @@ -291,7 +296,7 @@ int result = swap_out_pgd(mm, vma, pgdir, address, end, gfp_mask); if (result) return result; - if (!mm->swap_cnt) + if (!mm->swap_cnt || !(mm->swap_cnt & 127)) return 0; address = (address + PGDIR_SIZE) & PGDIR_MASK; pgdir++; @@ -330,6 +335,7 @@ } } vmlist_access_unlock(mm); + conditional_schedule(); /* We didn't find anything for the process */ mm->swap_cnt = 0; @@ -494,6 +500,7 @@ while (shrink_mmap(priority, gfp_mask)) { if (!--count) goto done; + conditional_schedule(); } /* check if mission completed */ @@ -508,7 +515,9 @@ * really plenty of memory free. */ count -= shrink_dcache_memory(priority, gfp_mask); + conditional_schedule(); count -= shrink_icache_memory(priority, gfp_mask); + conditional_schedule(); /* * Not currently working, see fixme in shrink_?cache_memory * In the inner funtions there is a comment: @@ -522,6 +531,7 @@ goto done; while (shm_swap(priority, gfp_mask)) { + conditional_schedule(); if (!--count) goto done; } @@ -535,14 +545,17 @@ * as a "count" success. */ swap_count = SWAP_COUNT; - while (swap_out(priority, gfp_mask)) + while (swap_out(priority, gfp_mask)) { + conditional_schedule(); if (--swap_count < 0) break; + } } while (--priority >= 0); /* Always end on a shrink_mmap.., may sleep... */ while (shrink_mmap(0, gfp_mask)) { + conditional_schedule(); if (!--count) goto done; } --- linux/mm/mmap.c.orig Thu Aug 3 14:06:35 2000 +++ linux/mm/mmap.c Thu Aug 3 14:06:53 2000 @@ -722,6 +722,7 @@ extra = unmap_fixup(mm, mpnt, st, size, extra); if (file) atomic_inc(&file->f_dentry->d_inode->i_writecount); + conditional_schedule(); } /* Release the extra vma struct if it wasn't used */ @@ -870,6 +871,7 @@ fput(mpnt->vm_file); kmem_cache_free(vm_area_cachep, mpnt); mpnt = next; + conditional_schedule(); } /* This is just debugging */ --- linux/mm/memory.c.orig Thu Aug 3 14:06:35 2000 +++ linux/mm/memory.c Thu Aug 3 14:06:53 2000 @@ -103,8 +103,10 @@ } pmd = pmd_offset(dir, 0); pgd_clear(dir); - for (j = 0; j < PTRS_PER_PMD ; j++) + for (j = 0; j < PTRS_PER_PMD ; j++) { free_one_pmd(pmd+j); + conditional_schedule(); + } pmd_free(pmd); } @@ -243,6 +245,7 @@ goto out; src_pte++; dst_pte++; + conditional_schedule(); } while ((unsigned long)src_pte & PTE_TABLE_MASK); cont_copy_pmd_range: src_pmd++; @@ -310,9 +313,9 @@ pte++; size--; pte_clear(pte-1); - if (pte_none(page)) - continue; - freed += free_pte(page); + if (!pte_none(page)) + freed += free_pte(page); + conditional_schedule(); } return freed; } @@ -1097,15 +1100,12 @@ */ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { - int high = 0; struct page *page = NULL; pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { page = alloc_page(GFP_HIGHUSER); if (!page) return -1; - if (PageHighMem(page)) - high = 1; clear_user_highpage(page, addr); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); mm->rss++; --- linux/mm/slab.c.orig Thu Aug 3 14:06:31 2000 +++ linux/mm/slab.c Thu Aug 3 14:06:53 2000 @@ -1454,6 +1454,8 @@ */ void * kmem_cache_alloc (kmem_cache_t *cachep, int flags) { + if ((flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) + conditional_schedule(); return __kmem_cache_alloc(cachep, flags); } @@ -1923,6 +1925,7 @@ spin_unlock_irq(&cachep->spinlock); FIXUP(got_data_up); p = cachep->next.next; + conditional_schedule(); } while (p != &cache_cache.next); got_data_up: up(&cache_chain_sem); --- linux/mm/page_alloc.c.orig Thu Aug 3 14:06:35 2000 +++ linux/mm/page_alloc.c Thu Aug 3 14:06:53 2000 @@ -221,6 +221,9 @@ zone_t **zone; extern wait_queue_head_t kswapd_wait; + if (zonelist->gfp_mask & __GFP_WAIT) + conditional_schedule(); + /* * (If anyone calls gfp from interrupts nonatomically then it * will sooner or later tripped up by a schedule().) @@ -228,7 +231,6 @@ * We are falling back to lower-level zones if allocation * in a higher zone fails. */ - zone = zonelist->zones; for (;;) { zone_t *z = *(zone++); --- linux/mm/filemap.c.orig Thu Aug 3 14:06:35 2000 +++ linux/mm/filemap.c Thu Aug 3 14:06:53 2000 @@ -201,6 +201,7 @@ page_cache_release(page); page_cache_release(page); + conditional_schedule(); /* * We have done things without the pagecache lock, * so we'll have to repeat the scan. @@ -240,6 +241,7 @@ */ UnlockPage(page); page_cache_release(page); + conditional_schedule(); goto repeat; } spin_unlock(&pagecache_lock); @@ -252,16 +254,20 @@ */ int shrink_mmap(int priority, int gfp_mask) { - int ret = 0, count, nr_dirty; + int ret = 0, count, nr_dirty, iterations; struct list_head * page_lru; struct page * page = NULL; count = nr_lru_pages / (priority + 1); + // FIXME: stupid limit + if (count > 100) + count = 100; + iterations = count*4; nr_dirty = priority; /* we need pagemap_lru_lock for list_del() ... subtle code below */ spin_lock(&pagemap_lru_lock); - while (count > 0 && (page_lru = lru_cache.prev) != &lru_cache) { + while ((count > 0) && ((page_lru = lru_cache.prev) != &lru_cache)) { page = list_entry(page_lru, struct page, lru); list_del(page_lru); @@ -350,11 +356,14 @@ cache_unlock_continue: spin_unlock(&pagecache_lock); unlock_continue: + conditional_schedule(); spin_lock(&pagemap_lru_lock); UnlockPage(page); page_cache_release(page); dispose_continue: list_add(page_lru, &lru_cache); + if (!--iterations) + break; } goto out; @@ -1043,6 +1052,7 @@ struct page *page, **hash; unsigned long end_index, nr; + conditional_schedule(); end_index = inode->i_size >> PAGE_CACHE_SHIFT; if (index > end_index) break; @@ -2513,7 +2523,9 @@ status = copy_from_user(kaddr+offset, buf, bytes); if (status) goto fail_write; + conditional_schedule(); status = mapping->a_ops->commit_write(file, page, offset, offset+bytes); + conditional_schedule(); if (!status) status = bytes; --- linux/mm/swapfile.c.orig Thu Aug 3 14:06:29 2000 +++ linux/mm/swapfile.c Thu Aug 3 14:06:53 2000 @@ -507,7 +507,9 @@ len += sprintf(buf + len, "partition\t"); usedswap = 0; - for (j = 0; j < ptr->max; ++j) + for (j = 0; j < ptr->max; ++j) { + if (!(j & 127)) + conditional_schedule(); switch (ptr->swap_map[j]) { case SWAP_MAP_BAD: case 0: @@ -515,6 +517,7 @@ default: usedswap++; } + } len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10), usedswap << (PAGE_SHIFT - 10), ptr->prio); } @@ -800,6 +803,8 @@ if ((swap_info[i].flags & SWP_WRITEOK) != SWP_WRITEOK) continue; for (j = 0; j < swap_info[i].max; ++j) { + if (!(j & 127)) + conditional_schedule(); switch (swap_info[i].swap_map[j]) { case SWAP_MAP_BAD: continue; --- linux/include/linux/condsched.h.orig Thu Aug 3 14:06:53 2000 +++ linux/include/linux/condsched.h Thu Aug 3 14:06:53 2000 @@ -0,0 +1,19 @@ +#ifndef _LINUX_CONDSCHED_H +#define _LINUX_CONDSCHED_H + +#include + +#ifndef __ASSEMBLY__ +#ifndef __HAVE_ARCH_CONDITIONAL_SCHEDULE +#define conditional_schedule() \ +do { \ + if (current->need_resched) { \ + current->state = TASK_RUNNING; \ + schedule(); \ + } \ +} while (0) +#endif +#endif + +#endif + --- linux/include/linux/sched.h.orig Thu Aug 3 14:06:36 2000 +++ linux/include/linux/sched.h Thu Aug 3 14:07:36 2000 @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H +#include #include /* for HZ */ extern unsigned long event; --- linux/include/linux/delay.h.orig Thu May 21 23:24:09 1998 +++ linux/include/linux/delay.h Thu Aug 3 14:06:53 2000 @@ -34,4 +34,22 @@ ({unsigned long msec=(n); while (msec--) udelay(1000);})) #endif +/* + * the 'preemptive' version of udelay. In some cases we want to use + * this variant, it guarantees that preemption will happing within + * 10 usecs (despite doing busy waiting). Not all drivers can use + * this automatically, the driver has to be sufficiently reentrant. + */ +#define udelay_resched(n) \ + do { \ + unsigned int __left = (n), __step = 50; \ + \ + while (__left > __step) { \ + udelay(__step); \ + conditional_schedule(); \ + __left -= __step; \ + } \ + udelay(__left); \ + } while(0) + #endif /* defined(_LINUX_DELAY_H) */ --- linux/include/linux/locks.h.orig Thu May 25 04:53:17 2000 +++ linux/include/linux/locks.h Thu Aug 3 14:07:36 2000 @@ -1,6 +1,7 @@ #ifndef _LINUX_LOCKS_H #define _LINUX_LOCKS_H +#include #ifndef _LINUX_MM_H #include #endif @@ -16,6 +17,7 @@ extern inline void wait_on_buffer(struct buffer_head * bh) { + conditional_schedule(); if (test_bit(BH_Lock, &bh->b_state)) __wait_on_buffer(bh); } --- linux/include/asm-i386/uaccess.h.orig Thu May 25 04:52:42 2000 +++ linux/include/asm-i386/uaccess.h Thu Aug 3 14:06:53 2000 @@ -5,6 +5,7 @@ * User space memory access functions */ #include +#include #include #include @@ -255,6 +256,7 @@ #define __copy_user(to,from,size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ " movl %3,%0\n" \ @@ -277,6 +279,7 @@ #define __copy_user_zeroing(to,from,size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; movsl\n" \ " movl %3,%0\n" \ @@ -324,6 +327,7 @@ #define __constant_copy_user(to, from, size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ switch (size & 3) { \ default: \ __asm__ __volatile__( \ @@ -408,6 +412,7 @@ #define __constant_copy_user_zeroing(to, from, size) \ do { \ int __d0, __d1; \ + conditional_schedule(); \ switch (size & 3) { \ default: \ __asm__ __volatile__( \ @@ -540,8 +545,8 @@ } \ } while (0) -unsigned long __generic_copy_to_user(void *, const void *, unsigned long); -unsigned long __generic_copy_from_user(void *, const void *, unsigned long); +unsigned long __generic_copy_to_user(void *to, const void *from, unsigned long); +unsigned long __generic_copy_from_user(void *to, const void *from, unsigned long); static inline unsigned long __constant_copy_to_user(void *to, const void *from, unsigned long n) --- linux/include/asm-i386/page.h.orig Thu May 25 04:52:41 2000 +++ linux/include/asm-i386/page.h Thu Aug 3 14:06:53 2000 @@ -30,8 +30,17 @@ #endif -#define clear_user_page(page, vaddr) clear_page(page) -#define copy_user_page(to, from, vaddr) copy_page(to, from) +#define clear_user_page(page, vaddr) \ +do { \ + conditional_schedule(); \ + clear_page(page); \ +} while(0) + +#define copy_user_page(to, from, vaddr) \ +do { \ + conditional_schedule(); \ + copy_page(to, from); \ +} while(0) /* * These are used to make use of C type-checking.. --- linux/include/asm-i386/checksum.h.orig Tue Feb 1 08:41:14 2000 +++ linux/include/asm-i386/checksum.h Thu Aug 3 14:06:53 2000 @@ -45,20 +45,11 @@ unsigned int csum_partial_copy_from_user ( const char *src, char *dst, int len, int sum, int *err_ptr) { + conditional_schedule(); return csum_partial_copy_generic ( src, dst, len, sum, err_ptr, NULL); } /* - * These are the old (and unsafe) way of doing checksums, a warning message will be - * printed if they are used and an exeption occurs. - * - * these functions should go away after some time. - */ - -#define csum_partial_copy_fromuser csum_partial_copy -unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); - -/* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * @@ -185,6 +176,7 @@ static __inline__ unsigned int csum_and_copy_to_user (const char *src, char *dst, int len, int sum, int *err_ptr) { + conditional_schedule(); if (access_ok(VERIFY_WRITE, dst, len)) return csum_partial_copy_generic(src, dst, len, sum, NULL, err_ptr); --- linux/drivers/block/ll_rw_blk.c.orig Thu Aug 3 14:06:32 2000 +++ linux/drivers/block/ll_rw_blk.c Thu Aug 3 14:06:53 2000 @@ -950,11 +950,14 @@ generic_make_request(q, rw, bh); } +out: + conditional_schedule(); return; sorry: for (i = 0; i < nr; i++) buffer_IO_error(bhs[i]); + goto out; } --- linux/drivers/char/mem.c.orig Thu Aug 3 14:06:19 2000 +++ linux/drivers/char/mem.c Thu Aug 3 14:06:53 2000 @@ -95,6 +95,7 @@ unsigned long p = *ppos; unsigned long end_mem; ssize_t read; + size_t left; end_mem = __pa(high_memory); if (p >= end_mem) @@ -118,8 +119,16 @@ } } #endif - if (copy_to_user(buf, __va(p), count)) - return -EFAULT; + left = count; + while (left > PAGE_SIZE) { + if (copy_to_user(buf, __va(p), PAGE_SIZE)) + return -EFAULT; + left -= PAGE_SIZE; + p += PAGE_SIZE; + } + if (left) + if (copy_to_user(buf, __va(p), left)) + return -EFAULT; read += count; *ppos += read; return read; @@ -392,8 +401,7 @@ unsigned long unwritten = clear_user(buf, PAGE_SIZE); if (unwritten) return size + unwritten - PAGE_SIZE; - if (current->need_resched) - schedule(); + conditional_schedule(); buf += PAGE_SIZE; size -= PAGE_SIZE; } while (size); --- linux/drivers/char/lp.c.orig Thu Aug 3 14:06:32 2000 +++ linux/drivers/char/lp.c Thu Aug 3 14:06:53 2000 @@ -288,8 +288,8 @@ } parport_yield_blocking (lp_table[minor].dev); - } else if (current->need_resched) - schedule (); + } else + conditional_schedule(); if (count) { copy_size = count; --- linux/drivers/char/tty_io.c.orig Thu Aug 3 14:06:32 2000 +++ linux/drivers/char/tty_io.c Thu Aug 3 14:06:53 2000 @@ -728,8 +728,7 @@ ret = -ERESTARTSYS; if (signal_pending(current)) break; - if (current->need_resched) - schedule(); + conditional_schedule(); } } if (written) { @@ -1668,7 +1667,7 @@ { struct tty_struct *tty, *real_tty; int retval; - + tty = (struct tty_struct *)file->private_data; if (tty_paranoia_check(tty, inode->i_rdev, "tty_ioctl")) return -EINVAL; --- linux/drivers/char/random.c.orig Thu Aug 3 14:06:32 2000 +++ linux/drivers/char/random.c Thu Aug 3 14:06:53 2000 @@ -249,6 +249,7 @@ #include #include #include +#include /* * Configuration information @@ -1264,8 +1265,8 @@ /* * Check if we need to break out or reschedule.... */ - if ((flags & EXTRACT_ENTROPY_USER) && current->need_resched) { - if (signal_pending(current)) { + if (flags & EXTRACT_ENTROPY_USER) { + if (!current->need_resched || signal_pending(current)) { if (ret == 0) ret = -ERESTARTSYS; break; @@ -1320,6 +1321,8 @@ buf += i; ret += i; add_timer_randomness(&extract_timer_state, nbytes); + if (!in_interrupt()) + conditional_schedule(); } /* Wipe data just returned from memory */ --- linux/drivers/char/pc_keyb.c.orig Thu Aug 3 14:06:30 2000 +++ linux/drivers/char/pc_keyb.c Thu Aug 3 14:06:53 2000 @@ -108,7 +108,7 @@ static void kb_wait(void) { - unsigned long timeout = KBC_TIMEOUT; + unsigned long timeout = KBC_TIMEOUT*10; do { /* @@ -117,9 +117,14 @@ */ unsigned char status = handle_kbd_event(); - if (! (status & KBD_STAT_IBF)) + if (!(status & KBD_STAT_IBF)) return; - mdelay(1); + if (!in_interrupt()) { + spin_unlock_irq(&kbd_controller_lock); + udelay_resched(100); + spin_lock_irq(&kbd_controller_lock); + } else + udelay(100); timeout--; } while (timeout); #ifdef KBD_REPORT_TIMEOUTS @@ -502,7 +507,7 @@ int retries = 3; do { - unsigned long timeout = KBD_TIMEOUT; + unsigned long timeout = KBD_TIMEOUT * 10; acknowledge = 0; /* Set by interrupt routine on receipt of ACK. */ resend = 0; @@ -513,7 +518,12 @@ return 1; if (resend) break; - mdelay(1); + if (!in_interrupt()) { + spin_unlock_irq(&kbd_controller_lock); + udelay_resched(100); + spin_lock_irq(&kbd_controller_lock); + } else + udelay(100); if (!--timeout) { #ifdef KBD_REPORT_TIMEOUTS printk(KERN_WARNING "keyboard: Timeout - AT keyboard not present?\n"); @@ -893,9 +903,8 @@ static int open_aux(struct inode * inode, struct file * file) { - if (aux_count++) { + if (aux_count++) return 0; - } queue->head = queue->tail = 0; /* Flush input queue */ if (aux_request_irq(keyboard_interrupt, AUX_DEV)) { aux_count--; @@ -969,6 +978,7 @@ get_user(c, buffer++); aux_write_dev(c); written++; + conditional_schedule(); } while (--count); retval = -EIO; if (written) { --- linux/drivers/char/vt.c.orig Thu Aug 3 14:06:32 2000 +++ linux/drivers/char/vt.c Thu Aug 3 14:06:53 2000 @@ -436,7 +436,7 @@ perm = 0; if (current->tty == tty || suser()) perm = 1; - + kbd = kbd_table + console; switch (cmd) { case KIOCSOUND: --- linux/drivers/pci/proc.c.orig Sat Apr 29 19:19:57 2000 +++ linux/drivers/pci/proc.c Thu Aug 3 14:06:53 2000 @@ -401,6 +401,7 @@ } if (len+begin >= off+count) break; + conditional_schedule(); } off -= begin; *start = buf + off; --- linux/arch/i386/mm/fault.c.orig Thu May 25 03:38:26 2000 +++ linux/arch/i386/mm/fault.c Thu Aug 3 14:06:53 2000 @@ -214,7 +214,7 @@ tsk->thread.screen_bitmap |= 1 << bit; } up(&mm->mmap_sem); - return; + goto out; /* * Something tried to access memory that isn't in our memory map.. @@ -233,7 +233,7 @@ /* info.si_code has been set above */ info.si_addr = (void *)address; force_sig_info(SIGSEGV, &info, tsk); - return; + goto out; } /* @@ -246,7 +246,7 @@ if (nr == 6) { do_invalid_op(regs, 0); - return; + goto out; } } @@ -254,7 +254,7 @@ /* Are we prepared to handle this kernel fault? */ if ((fixup = search_exception_table(regs->eip)) != 0) { regs->eip = fixup; - return; + goto out; } /* @@ -266,7 +266,7 @@ if (boot_cpu_data.wp_works_ok < 0 && address == PAGE_OFFSET && (error_code & 1)) { handle_wp_test(); - return; + goto out; } if (address < PAGE_SIZE) @@ -318,4 +318,5 @@ /* Kernel mode? Handle exceptions or die */ if (!(error_code & 4)) goto no_context; +out: } --- linux/arch/i386/lib/usercopy.c.orig Fri Nov 12 13:29:47 1999 +++ linux/arch/i386/lib/usercopy.c Thu Aug 3 14:06:53 2000 @@ -40,19 +40,53 @@ #else unsigned long -__generic_copy_to_user(void *to, const void *from, unsigned long n) +__generic_copy_to_user(void *__to, const void *__from, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) - __copy_user(to,from,n); - return n; + int left = n; + const int delta = PAGE_SIZE; + const char *from = __from; + char *to = __to; + + if (access_ok(VERIFY_WRITE, to, n)) { + while (left > delta) { + int remaining = delta; + + __copy_user(to, from, remaining); + if (remaining) + goto out; + left -= delta; + to += delta; + from += delta; + } + __copy_user(to, from, left); + } +out: + return left; } unsigned long -__generic_copy_from_user(void *to, const void *from, unsigned long n) +__generic_copy_from_user(void *__to, const void *__from, unsigned long n) { - if (access_ok(VERIFY_READ, from, n)) - __copy_user_zeroing(to,from,n); - return n; + int left = n; + const int delta = PAGE_SIZE; + const char *from = __from; + char *to = __to; + + if (access_ok(VERIFY_READ, from, n)) { + while (left > delta) { + int remaining = delta; + + __copy_user_zeroing(to, from, remaining); + if (remaining) + goto out; + left -= delta; + to += delta; + from += delta; + } + __copy_user_zeroing(to, from, left); + } +out: + return left; } #endif @@ -114,6 +148,7 @@ #define __do_clear_user(addr,size) \ do { \ int __d0; \ + conditional_schedule(); \ __asm__ __volatile__( \ "0: rep; stosl\n" \ " movl %2,%0\n" \ @@ -133,18 +168,47 @@ } while (0) unsigned long -clear_user(void *to, unsigned long n) +clear_user(void *__to, unsigned long n) { - if (access_ok(VERIFY_WRITE, to, n)) - __do_clear_user(to, n); - return n; + int left = n; + char *to = __to; + const int delta = PAGE_SIZE; + + if (access_ok(VERIFY_WRITE, to, n)) { + while (left > delta) { + int remaining = delta; + + __do_clear_user(to, remaining); + if (remaining) + goto out; + left -= delta; + to += delta; + } + __do_clear_user(to, left); + } +out: + return left; } unsigned long -__clear_user(void *to, unsigned long n) +__clear_user(void *__to, unsigned long n) { - __do_clear_user(to, n); - return n; + int left = n; + char *to = __to; + const int delta = PAGE_SIZE; + + while (left > delta) { + int remaining = delta; + + __do_clear_user(to, remaining); + if (remaining) + goto out; + left -= delta; + to += delta; + } + __do_clear_user(to, left); +out: + return left; } /* @@ -158,6 +222,7 @@ unsigned long mask = -__addr_ok(s); unsigned long res, tmp; + conditional_schedule(); __asm__ __volatile__( " andl %0,%%ecx\n" "0: repne; scasb\n" --- linux/arch/i386/lib/mmx.c.orig Thu Oct 28 03:30:39 1999 +++ linux/arch/i386/lib/mmx.c Thu Aug 3 14:06:53 2000 @@ -199,6 +199,7 @@ static void slow_zero_page(void * page) { int d0, d1; + conditional_schedule(); __asm__ __volatile__( \ "cld\n\t" \ "rep ; stosl" \ @@ -218,6 +219,7 @@ static void slow_copy_page(void *to, void *from) { int d0, d1, d2; + conditional_schedule(); __asm__ __volatile__( \ "cld\n\t" \ "rep ; movsl" \ --- linux/arch/i386/lib/old-checksum.c.orig Sun Dec 27 19:32:09 1998 +++ linux/arch/i386/lib/old-checksum.c Thu Aug 3 14:06:53 2000 @@ -1,19 +0,0 @@ -/* - * FIXME: old compatibility stuff, will be removed soon. - */ - -#include - -unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum) -{ - int src_err=0, dst_err=0; - - sum = csum_partial_copy_generic ( src, dst, len, sum, &src_err, &dst_err); - - if (src_err || dst_err) - printk("old csum_partial_copy_fromuser(), tell mingo to convert me.\n"); - - return sum; -} - - --- linux/arch/i386/vmlinux.lds.orig Thu Aug 3 14:06:24 2000 +++ linux/arch/i386/vmlinux.lds Thu Aug 3 14:06:53 2000 @@ -13,7 +13,8 @@ *(.fixup) *(.gnu.warning) } = 0x9090 - .text.lock : { *(.text.lock) } /* out-of-line lock text */ + .text.lock : { *(.text.lock) } /* out-of-line lock text */ + .text.condsched : { *(.text.condsched) } /* out-of-line condsched text */ .rodata : { *(.rodata) } .kstrtab : { *(.kstrtab) }