--- linux/fs/pipe.c.orig Thu Oct 7 14:54:23 1999 +++ linux/fs/pipe.c Sun Nov 21 05:38:37 1999 @@ -51,6 +51,7 @@ /* Seeks are not allowed on pipes. */ ret = -ESPIPE; + read = 0; if (ppos != &filp->f_pos) goto out_nolock; @@ -65,6 +66,7 @@ goto out_nolock; if (PIPE_EMPTY(*inode)) { +do_more_read: ret = 0; if (!PIPE_WRITERS(*inode)) goto out; @@ -74,7 +76,9 @@ goto out; for (;;) { + PIPE_WAITING_READERS(*inode)++; pipe_wait(inode); + PIPE_WAITING_READERS(*inode)--; ret = -ERESTARTSYS; if (signal_pending(current)) goto out_nolock; @@ -90,7 +94,6 @@ /* Read what data is available. */ ret = -EFAULT; - read = 0; while (count > 0 && (size = PIPE_LEN(*inode))) { char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode); ssize_t chars = PIPE_MAX_RCHUNK(*inode); @@ -115,16 +118,26 @@ if (!PIPE_LEN(*inode)) PIPE_START(*inode) = 0; - /* Signal writers there is more room. */ + if (count && PIPE_WAITING_WRITERS(*inode) && !(filp->f_flags & O_NONBLOCK)) { + /* + * We know that we are going to sleep: signal + * writers synchronously that there is more + * room. + */ + wake_up_interruptible_sync(PIPE_WAIT(*inode)); + if (!PIPE_EMPTY(*inode)) + BUG(); + goto do_more_read; + } + /* Signal writers asynchronously that there is more room. */ wake_up_interruptible(PIPE_WAIT(*inode)); - if (read) - UPDATE_ATIME(inode); ret = read; - out: up(PIPE_SEM(*inode)); out_nolock: + if (read) + ret = read; return ret; } @@ -136,6 +149,7 @@ /* Seeks are not allowed on pipes. */ ret = -ESPIPE; + written = 0; if (ppos != &filp->f_pos) goto out_nolock; @@ -148,13 +162,13 @@ if (down_interruptible(PIPE_SEM(*inode))) goto out_nolock; +do_more_write: /* No readers yields SIGPIPE. */ if (!PIPE_READERS(*inode)) goto sigpipe; /* If count <= PIPE_BUF, we have to make it atomic. */ free = (count <= PIPE_BUF ? count : 1); - written = 0; /* Wait, or check for, available space. */ if (filp->f_flags & O_NONBLOCK) { @@ -163,7 +177,9 @@ goto out; } else { while (PIPE_FREE(*inode) < free) { + PIPE_WAITING_WRITERS(*inode)++; pipe_wait(inode); + PIPE_WAITING_WRITERS(*inode)--; ret = -ERESTARTSYS; if (signal_pending(current)) goto out_nolock; @@ -204,9 +220,15 @@ break; do { - /* This should be a synchronous wake-up: don't do idle reschedules! */ - wake_up_interruptible(PIPE_WAIT(*inode)); + /* + * Synchronous wake-up: it knows that this process + * is going to give up this CPU, so it doesnt have + * to do idle reschedules. + */ + wake_up_interruptible_sync(PIPE_WAIT(*inode)); + PIPE_WAITING_WRITERS(*inode)++; pipe_wait(inode); + PIPE_WAITING_WRITERS(*inode)--; if (signal_pending(current)) goto out_nolock; if (down_interruptible(PIPE_SEM(*inode))) @@ -217,19 +239,27 @@ ret = -EFAULT; } - /* Signal readers there is more data. */ + if (count && PIPE_WAITING_READERS(*inode) && + !(filp->f_flags & O_NONBLOCK)) { + wake_up_interruptible_sync(PIPE_WAIT(*inode)); + goto do_more_write; + } + /* Signal readers asynchronously that there is more data. */ wake_up_interruptible(PIPE_WAIT(*inode)); - ret = (written ? written : -EAGAIN); inode->i_ctime = inode->i_mtime = CURRENT_TIME; mark_inode_dirty(inode); out: up(PIPE_SEM(*inode)); out_nolock: + if (written) + ret = written; return ret; sigpipe: + if (written) + goto out; up(PIPE_SEM(*inode)); send_sig(SIGPIPE, current, 0); return -EPIPE; @@ -552,6 +582,7 @@ PIPE_BASE(*inode) = (char *) page; PIPE_START(*inode) = PIPE_LEN(*inode) = 0; PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1; + PIPE_WAITING_READERS(*inode) = PIPE_WAITING_WRITERS(*inode) = 0; /* * Mark the inode dirty from the very beginning, --- linux/kernel/sched.c.orig Sun Nov 21 03:01:11 1999 +++ linux/kernel/sched.c Mon Nov 22 01:14:07 1999 @@ -223,49 +223,88 @@ return goodness(p, cpu, prev->mm) - goodness(prev, cpu, prev->mm); } -static void reschedule_idle(struct task_struct * p) +/* + * This is ugly, but reschedule_idle() is very timing-critical. + * We enter with the runqueue spinlock held, but we might end + * up unlocking it early, so the caller must not unlock the + * runqueue, it's always done by reschedule_idle(). + */ +static inline void reschedule_idle(struct task_struct * p, unsigned long flags) { #ifdef __SMP__ int this_cpu = smp_processor_id(), target_cpu; - struct task_struct *tsk, *target_tsk; + struct task_struct *tsk; int cpu, best_cpu, i; - unsigned long flags; - - spin_lock_irqsave(&runqueue_lock, flags); /* * shortcut if the woken up task's last CPU is * idle now. */ best_cpu = p->processor; - target_tsk = idle_task(best_cpu); - if (cpu_curr(best_cpu) == target_tsk) + tsk = idle_task(best_cpu); + if (cpu_curr(best_cpu) == tsk) goto send_now; - target_tsk = NULL; + /* + * The only heuristics - we use the tsk->avg_slice value + * to detect 'frequent reschedulers'. + * + * If both the woken-up process and the preferred CPU is + * is a frequent rescheduler, then skip the asynchronous + * wakeup, the frequent rescheduler will likely chose this + * task during it's next schedule(): + */ + if (p->policy == SCHED_OTHER) { + tsk = cpu_curr(best_cpu); + if (p->avg_slice + tsk->avg_slice < cacheflush_time) + goto out_no_target; + } + + /* + * We know that the preferred CPU has a cache-affine current + * process, lets try to find a new idle CPU for the woken-up + * process: + */ for (i = 0; i < smp_num_cpus; i++) { cpu = cpu_logical_map(i); tsk = cpu_curr(cpu); + /* + * We use the first available idle CPU. This creates + * a priority list between idle CPUs, but this is not + * a problem. + */ if (tsk == idle_task(cpu)) - target_tsk = tsk; + goto send_now; } - if (target_tsk && p->avg_slice > cacheflush_time) - goto send_now; - + /* + * No CPU is idle, but maybe this process has enough priority + * to preempt it's preferred CPU. (this is a shortcut): + */ tsk = cpu_curr(best_cpu); if (preemption_goodness(tsk, p, best_cpu) > 0) - target_tsk = tsk; + goto send_now; /* - * found any suitable CPU? + * We should get here rarely - or in the high CPU contention + * case. No CPU is idle and this process is either lowprio or + * the preferred CPU is highprio. Maybe some other CPU can/must + * be preempted: */ - if (!target_tsk) - goto out_no_target; + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); + tsk = cpu_curr(cpu); + if (preemption_goodness(tsk, p, cpu) > 0) + goto send_now; + } + +out_no_target: + spin_unlock_irqrestore(&runqueue_lock, flags); + return; send_now: - target_cpu = target_tsk->processor; - target_tsk->need_resched = 1; + target_cpu = tsk->processor; + tsk->need_resched = 1; spin_unlock_irqrestore(&runqueue_lock, flags); /* * the APIC stuff can go outside of the lock because @@ -274,9 +313,6 @@ if (target_cpu != this_cpu) smp_send_reschedule(target_cpu); return; -out_no_target: - spin_unlock_irqrestore(&runqueue_lock, flags); - return; #else /* UP */ int this_cpu = smp_processor_id(); struct task_struct *tsk; @@ -320,7 +356,7 @@ * "current->state = TASK_RUNNING" to mark yourself runnable * without the overhead of this. */ -void wake_up_process(struct task_struct * p) +inline void wake_up_process(struct task_struct * p) { unsigned long flags; @@ -332,14 +368,29 @@ if (task_on_runqueue(p)) goto out; add_to_runqueue(p); - spin_unlock_irqrestore(&runqueue_lock, flags); + reschedule_idle(p, flags); // spin_unlocks runqueue - reschedule_idle(p); return; out: spin_unlock_irqrestore(&runqueue_lock, flags); } +static inline void wake_up_process_synchronous(struct task_struct * p) +{ + unsigned long flags; + + /* + * We want the common case fall through straight, thus the goto. + */ + spin_lock_irqsave(&runqueue_lock, flags); + p->state = TASK_RUNNING; + if (task_on_runqueue(p)) + goto out; + add_to_runqueue(p); +out: + spin_unlock_irqrestore(&runqueue_lock, flags); +} + static void process_timeout(unsigned long __data) { struct task_struct * p = (struct task_struct *) __data; @@ -541,8 +592,12 @@ { #ifdef __SMP__ if ((prev->state == TASK_RUNNING) && - (prev != idle_task(smp_processor_id()))) - reschedule_idle(prev); + (prev != idle_task(smp_processor_id()))) { + unsigned long flags; + + spin_lock_irqsave(&runqueue_lock, flags); + reschedule_idle(prev, flags); // spin_unlocks runqueue + } wmb(); prev->has_cpu = 0; #endif /* __SMP__ */ @@ -765,7 +820,7 @@ return; } -void __wake_up(wait_queue_head_t *q, unsigned int mode) +static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, const int sync) { struct list_head *tmp, *head; struct task_struct *p; @@ -801,7 +856,10 @@ #if WAITQUEUE_DEBUG curr->__waker = (long)__builtin_return_address(0); #endif - wake_up_process(p); + if (sync) + wake_up_process_synchronous(p); + else + wake_up_process(p); if (state & TASK_EXCLUSIVE) break; } @@ -809,6 +867,16 @@ wq_write_unlock_irqrestore(&q->lock, flags); out: return; +} + +void __wake_up(wait_queue_head_t *q, unsigned int mode) +{ + __wake_up_common(q, mode, 0); +} + +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode) +{ + __wake_up_common(q, mode, 1); } #define SLEEP_ON_VAR \ --- linux/include/linux/sched.h.orig Sun Nov 21 03:01:19 1999 +++ linux/include/linux/sched.h Sun Nov 21 12:22:49 1999 @@ -498,6 +498,7 @@ #define CURRENT_TIME (xtime.tv_sec) extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode)); +extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode)); extern void FASTCALL(sleep_on(wait_queue_head_t *q)); extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, signed long timeout)); @@ -507,7 +508,9 @@ extern void FASTCALL(wake_up_process(struct task_struct * tsk)); #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE) +#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE) #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE) +#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE) extern int in_group_p(gid_t); --- linux/include/linux/pipe_fs_i.h.orig Mon Aug 30 11:16:31 1999 +++ linux/include/linux/pipe_fs_i.h Sun Nov 21 03:06:11 1999 @@ -7,6 +7,8 @@ unsigned int start; unsigned int readers; unsigned int writers; + unsigned int waiting_readers; + unsigned int waiting_writers; }; /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual @@ -20,6 +22,8 @@ #define PIPE_LEN(inode) ((inode).i_size) #define PIPE_READERS(inode) ((inode).i_pipe->readers) #define PIPE_WRITERS(inode) ((inode).i_pipe->writers) +#define PIPE_WAITING_READERS(inode) ((inode).i_pipe->waiting_readers) +#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers) #define PIPE_EMPTY(inode) (PIPE_LEN(inode) == 0) #define PIPE_FULL(inode) (PIPE_LEN(inode) == PIPE_SIZE)