--- linux/fs/inode.c.orig Fri Feb 1 08:13:32 2002 +++ linux/fs/inode.c Fri Feb 1 08:16:55 2002 @@ -144,6 +144,7 @@ INIT_LIST_HEAD(&inode->i_devices); sema_init(&inode->i_sem, 1); sema_init(&inode->i_zombie, 1); + spin_lock_init(&inode->i_data.page_lock); spin_lock_init(&inode->i_data.i_shared_lock); } --- linux/mm/vmscan.c.orig Fri Feb 1 08:13:33 2002 +++ linux/mm/vmscan.c Fri Feb 1 08:16:55 2002 @@ -337,6 +337,8 @@ static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)); static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) { + struct address_space *mapping; + spinlock_t *pg_lock; struct list_head * entry; int max_scan = nr_inactive_pages / priority; int max_mapped = nr_pages << (9 - priority); @@ -467,13 +469,25 @@ } } - spin_lock(&pagecache_lock); + mapping = page->mapping; + if (!mapping) + goto no_mapping; + pg_lock = PAGECACHE_LOCK(page); + if (unlikely(!spin_trylock(pg_lock))) { + /* we hold the page lock so the page cannot go away from under us */ + spin_unlock(&pagemap_lru_lock); + spin_lock(pg_lock); + spin_lock(&pagemap_lru_lock); + } + spin_lock(&mapping->page_lock); /* * this is the non-racy check for busy page. */ - if (!page->mapping || !is_page_cache_freeable(page)) { - spin_unlock(&pagecache_lock); + if (!is_page_cache_freeable(page)) { + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); +no_mapping: UnlockPage(page); page_mapped: if (--max_mapped >= 0) @@ -493,7 +507,8 @@ * the page is freeable* so not in use by anybody. */ if (PageDirty(page)) { - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); UnlockPage(page); continue; } @@ -501,12 +516,14 @@ /* point of no return */ if (likely(!PageSwapCache(page))) { __remove_inode_page(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); } else { swp_entry_t swap; swap.val = page->index; __delete_from_swap_cache(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); swap_free(swap); } --- linux/mm/filemap.c.orig Fri Feb 1 08:13:33 2002 +++ linux/mm/filemap.c Fri Feb 1 08:16:55 2002 @@ -45,18 +45,32 @@ atomic_t page_cache_size = ATOMIC_INIT(0); unsigned int page_hash_bits; -struct page **page_hash_table; -spinlock_t pagecache_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; -/* - * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock - * with the pagecache_lock held. +struct page_cache_bucket *page_hash_table; + +/* Page-cache SMP locking rules: + * + * 1) The identity of a page (mapping, index) is only changed + * under PAGECACHE_LOCK. * - * Ordering: - * swap_lock -> - * pagemap_lru_lock -> - * pagecache_lock + * 2) The deadlock-free ordering of lock acquisition is + * PAGECACHE_LOCK ==> pagemap_lru_lock ==> mapping->page_lock + * There are cases where two of these locks need to be held + * simultaneously but cannot be obtained in the correct order. + * The way to handle this situation is as follows: + * + * repeat: + * spin_lock(&pagemap_lru_lock); + * some_loop_over_lru_pages() { + * ... + * if (!spin_trylock(PAGECACHE_LOCK(page))) { + * spin_unlock(&pagemap_lru_lock); + * goto repeat; + * } + * ... + * } */ + spinlock_t pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; #define CLUSTER_PAGES (1 << page_cluster) @@ -81,15 +95,24 @@ { struct list_head *head = &mapping->clean_pages; + spin_lock(&mapping->page_lock); mapping->nrpages++; list_add(&page->list, head); page->mapping = mapping; + spin_unlock(&mapping->page_lock); } static inline void remove_page_from_inode_queue(struct page * page) { struct address_space * mapping = page->mapping; +#if CONFIG_SMP + if (!spin_is_locked(PAGECACHE_LOCK(page))) + BUG(); + if (!spin_is_locked(&mapping->page_lock)) + BUG(); +#endif + mapping->nrpages--; list_del(&page->list); page->mapping = NULL; @@ -121,12 +144,79 @@ void remove_inode_page(struct page *page) { + struct address_space * mapping; + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + if (!PageLocked(page)) PAGE_BUG(page); - spin_lock(&pagecache_lock); + spin_lock(pg_lock); + mapping = page->mapping; + spin_lock(&mapping->page_lock); __remove_inode_page(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); + spin_unlock(pg_lock); +} + +/* + * Flush clean pages from the pagecache. + */ +void flush_inode_pages (struct inode * inode) +{ + unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT; + struct address_space * mapping = inode->i_mapping; + struct list_head *head, *curr; + struct page * page; + +retry: + head = &inode->i_mapping->clean_pages; + spin_lock(&pagemap_lru_lock); + spin_lock(&mapping->page_lock); + curr = head->next; + + while (curr != head) { + spinlock_t *pg_lock; + + page = list_entry(curr, struct page, list); + curr = curr->next; + + if (page->index == end_index) + continue; + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + goto retry; + } + + /* We cannot invalidate a locked page */ + if (TryLockPage(page)) { + spin_unlock(pg_lock); + continue; + } + + /* + * We cannot flush a page if buffers are still active. + */ + if (page->buffers) { + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + try_to_free_buffers(page, GFP_KERNEL); + UnlockPage(page); + goto retry; + } + + __lru_cache_del(page); + __remove_inode_page(page); + spin_unlock(pg_lock); + + UnlockPage(page); + page_cache_release(page); + } + + spin_unlock(&mapping->page_lock); + spin_unlock(&pagemap_lru_lock); } static inline int sync_page(struct page *page) @@ -147,10 +237,12 @@ struct address_space *mapping = page->mapping; if (mapping) { - spin_lock(&pagecache_lock); + spin_lock(PAGECACHE_LOCK(page)); + spin_lock(&mapping->page_lock); list_del(&page->list); list_add(&page->list, &mapping->dirty_pages); - spin_unlock(&pagecache_lock); + spin_unlock(PAGECACHE_LOCK(page)); + spin_unlock(&mapping->page_lock); if (mapping->host) mark_inode_dirty_pages(mapping->host); @@ -173,21 +265,35 @@ head = &inode->i_mapping->clean_pages; +retry: spin_lock(&pagemap_lru_lock); - spin_lock(&pagecache_lock); + spin_lock(&inode->i_mapping->page_lock); curr = head->next; while (curr != head) { + spinlock_t *pg_lock; + page = list_entry(curr, struct page, list); + pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&inode->i_mapping->page_lock); + spin_unlock(&pagemap_lru_lock); + goto retry; + } + curr = curr->next; /* We cannot invalidate something in dirty.. */ - if (PageDirty(page)) + if (PageDirty(page)) { + spin_unlock(pg_lock); continue; + } /* ..or locked */ - if (TryLockPage(page)) + if (TryLockPage(page)) { + spin_unlock(pg_lock); continue; + } if (page->buffers && !try_to_free_buffers(page, 0)) goto unlock; @@ -199,13 +305,15 @@ __remove_inode_page(page); UnlockPage(page); page_cache_release(page); + spin_unlock(pg_lock); continue; unlock: UnlockPage(page); + spin_unlock(pg_lock); continue; } - spin_unlock(&pagecache_lock); + spin_unlock(&inode->i_mapping->page_lock); spin_unlock(&pagemap_lru_lock); } @@ -239,13 +347,12 @@ * all sorts of fun problems ... */ ClearPageDirty(page); - ClearPageUptodate(page); remove_inode_page(page); page_cache_release(page); } -static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *)); -static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial) +static int FASTCALL(truncate_list_pages(struct address_space * mapping, struct list_head *, unsigned long, unsigned *)); +static int truncate_list_pages(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned *partial) { struct list_head *curr; struct page * page; @@ -254,14 +361,18 @@ restart: curr = head->prev; while (curr != head) { - unsigned long offset; + unsigned long index; page = list_entry(curr, struct page, list); - offset = page->index; + index = page->index; /* Is one of the pages to truncate? */ - if ((offset >= start) || (*partial && (offset + 1) == start)) { + if ((index >= start) || (*partial && (index + 1) == start)) { int failed; + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + + if (!spin_trylock(pg_lock)) + return 1; page_cache_get(page); failed = TryLockPage(page); @@ -274,11 +385,12 @@ /* Restart on this page */ list_add(head, curr); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); unlocked = 1; if (!failed) { - if (*partial && (offset + 1) == start) { + if (*partial && (index + 1) == start) { truncate_partial_page(page, *partial); *partial = 0; } else @@ -295,7 +407,7 @@ schedule(); } - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); goto restart; } curr = curr->prev; @@ -319,24 +431,25 @@ unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); int unlocked; - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); do { - unlocked = truncate_list_pages(&mapping->clean_pages, start, &partial); - unlocked |= truncate_list_pages(&mapping->dirty_pages, start, &partial); - unlocked |= truncate_list_pages(&mapping->locked_pages, start, &partial); + unlocked = truncate_list_pages(mapping, &mapping->clean_pages, start, &partial); + unlocked |= truncate_list_pages(mapping, &mapping->dirty_pages, start, &partial); + unlocked |= truncate_list_pages(mapping, &mapping->locked_pages, start, &partial); } while (unlocked); /* Traversed all three lists without dropping the lock */ - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } static inline int invalidate_this_page2(struct page * page, struct list_head * curr, - struct list_head * head) + struct list_head * head, + struct address_space * mapping) { int unlocked = 1; /* - * The page is locked and we hold the pagecache_lock as well + * The page is locked and we hold the pagecache locks as well * so both page_count(page) and page->buffers stays constant here. */ if (page_count(page) == 1 + !!page->buffers) { @@ -345,7 +458,7 @@ list_add_tail(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); truncate_complete_page(page); } else { if (page->buffers) { @@ -354,7 +467,7 @@ list_add_tail(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); block_invalidate_page(page); } else unlocked = 0; @@ -366,8 +479,8 @@ return unlocked; } -static int FASTCALL(invalidate_list_pages2(struct list_head *)); -static int invalidate_list_pages2(struct list_head *head) +static int FASTCALL(invalidate_list_pages2(struct list_head *, struct address_space * mapping)); +static int invalidate_list_pages2(struct list_head *head, struct address_space * mapping) { struct list_head *curr; struct page * page; @@ -381,7 +494,7 @@ if (!TryLockPage(page)) { int __unlocked; - __unlocked = invalidate_this_page2(page, curr, head); + __unlocked = invalidate_this_page2(page, curr, head, mapping); UnlockPage(page); unlocked |= __unlocked; if (!__unlocked) { @@ -394,7 +507,7 @@ list_add(head, curr); page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); unlocked = 1; wait_on_page(page); } @@ -405,7 +518,7 @@ schedule(); } - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); goto restart; } return unlocked; @@ -420,16 +533,16 @@ { int unlocked; - spin_lock(&pagecache_lock); do { - unlocked = invalidate_list_pages2(&mapping->clean_pages); - unlocked |= invalidate_list_pages2(&mapping->dirty_pages); - unlocked |= invalidate_list_pages2(&mapping->locked_pages); + spin_lock(&mapping->page_lock); + unlocked = invalidate_list_pages2(&mapping->clean_pages, mapping); + unlocked |= invalidate_list_pages2(&mapping->dirty_pages, mapping); + unlocked |= invalidate_list_pages2(&mapping->locked_pages, mapping); + spin_unlock(&mapping->page_lock); } while (unlocked); - spin_unlock(&pagecache_lock); } -static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long offset, struct page *page) +static inline struct page * __find_page_nolock(struct address_space *mapping, unsigned long index, struct page *page) { goto inside; @@ -440,7 +553,7 @@ goto not_found; if (page->mapping != mapping) continue; - if (page->index == offset) + if (page->index == index) break; } @@ -483,13 +596,13 @@ return error; } -static int do_buffer_fdatasync(struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *)) +static int do_buffer_fdatasync(struct address_space * mapping, struct list_head *head, unsigned long start, unsigned long end, int (*fn)(struct page *)) { struct list_head *curr; struct page *page; int retval = 0; - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); curr = head->next; while (curr != head) { page = list_entry(curr, struct page, list); @@ -502,7 +615,7 @@ continue; page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); lock_page(page); /* The buffers could have been free'd while we waited for the page lock */ @@ -510,11 +623,11 @@ retval |= fn(page); UnlockPage(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); curr = page->list.next; page_cache_release(page); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); return retval; } @@ -525,17 +638,18 @@ */ int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx) { + struct address_space * mapping = inode->i_mapping; int retval; /* writeout dirty buffers on pages from both clean and dirty lists */ - retval = do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, writeout_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, writeout_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, writeout_one_page); + retval = do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, writeout_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, writeout_one_page); /* now wait for locked buffers on pages from both clean and dirty lists */ - retval |= do_buffer_fdatasync(&inode->i_mapping->dirty_pages, start_idx, end_idx, waitfor_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->clean_pages, start_idx, end_idx, waitfor_one_page); - retval |= do_buffer_fdatasync(&inode->i_mapping->locked_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->dirty_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->clean_pages, start_idx, end_idx, waitfor_one_page); + retval |= do_buffer_fdatasync(mapping, &mapping->locked_pages, start_idx, end_idx, waitfor_one_page); return retval; } @@ -580,19 +694,28 @@ { int (*writepage)(struct page *) = mapping->a_ops->writepage; - spin_lock(&pagecache_lock); +repeat: + spin_lock(&mapping->page_lock); while (!list_empty(&mapping->dirty_pages)) { struct page *page = list_entry(mapping->dirty_pages.next, struct page, list); + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + goto repeat; + } list_del(&page->list); list_add(&page->list, &mapping->locked_pages); - if (!PageDirty(page)) + if (!PageDirty(page)) { + spin_unlock(pg_lock); continue; + } page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); lock_page(page); @@ -603,9 +726,9 @@ UnlockPage(page); page_cache_release(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } /** @@ -617,26 +740,35 @@ */ void filemap_fdatawait(struct address_space * mapping) { - spin_lock(&pagecache_lock); +repeat: + spin_lock(&mapping->page_lock); while (!list_empty(&mapping->locked_pages)) { struct page *page = list_entry(mapping->locked_pages.next, struct page, list); + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + if (!spin_trylock(pg_lock)) { + spin_unlock(&mapping->page_lock); + goto repeat; + } list_del(&page->list); list_add(&page->list, &mapping->clean_pages); - if (!PageLocked(page)) + if (!PageLocked(page)) { + spin_unlock(pg_lock); continue; + } page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); + spin_unlock(&mapping->page_lock); ___wait_on_page(page); page_cache_release(page); - spin_lock(&pagecache_lock); + spin_lock(&mapping->page_lock); } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); } /* @@ -647,17 +779,18 @@ */ void add_to_page_cache_locked(struct page * page, struct address_space *mapping, unsigned long index) { + spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, index); if (!PageLocked(page)) BUG(); page->index = index; page_cache_get(page); - spin_lock(&pagecache_lock); + spin_lock(pg_lock); add_page_to_inode_queue(mapping, page); add_page_to_hash_queue(page, page_hash(mapping, index)); - spin_unlock(&pagecache_lock); lru_cache_add(page); + spin_unlock(pg_lock); } /* @@ -665,7 +798,7 @@ * owned by us, but unreferenced, not uptodate and with no errors. */ static inline void __add_to_page_cache(struct page * page, - struct address_space *mapping, unsigned long offset, + struct address_space *mapping, unsigned long index, struct page **hash) { unsigned long flags; @@ -673,36 +806,40 @@ flags = page->flags & ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_dirty | 1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked); page->flags = flags | (1 << PG_locked); page_cache_get(page); - page->index = offset; + page->index = index; add_page_to_inode_queue(mapping, page); add_page_to_hash_queue(page, hash); } -void add_to_page_cache(struct page * page, struct address_space * mapping, unsigned long offset) +void add_to_page_cache(struct page * page, struct address_space * mapping, unsigned long index) { - spin_lock(&pagecache_lock); - __add_to_page_cache(page, mapping, offset, page_hash(mapping, offset)); - spin_unlock(&pagecache_lock); + spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, index); + + spin_lock(pg_lock); + __add_to_page_cache(page, mapping, index, page_hash(mapping, index)); + spin_unlock(pg_lock); lru_cache_add(page); } int add_to_page_cache_unique(struct page * page, - struct address_space *mapping, unsigned long offset, + struct address_space *mapping, unsigned long index, struct page **hash) { int err; struct page *alias; + spinlock_t *pg_lock = __PAGECACHE_LOCK(mapping, index); + + spin_lock(pg_lock); - spin_lock(&pagecache_lock); - alias = __find_page_nolock(mapping, offset, *hash); + alias = __find_page_nolock(mapping, index, *hash); err = 1; if (!alias) { - __add_to_page_cache(page,mapping,offset,hash); + __add_to_page_cache(page,mapping,index,hash); err = 0; } - spin_unlock(&pagecache_lock); + spin_unlock(pg_lock); if (!err) lru_cache_add(page); return err; @@ -712,16 +849,16 @@ * This adds the requested page to the page cache if it isn't already there, * and schedules an I/O to read in its contents from disk. */ -static int FASTCALL(page_cache_read(struct file * file, unsigned long offset)); -static int page_cache_read(struct file * file, unsigned long offset) +static int FASTCALL(page_cache_read(struct file * file, unsigned long index)); +static int page_cache_read(struct file * file, unsigned long index) { struct address_space *mapping = file->f_dentry->d_inode->i_mapping; - struct page **hash = page_hash(mapping, offset); + struct page **hash = page_hash(mapping, index); struct page *page; - spin_lock(&pagecache_lock); - page = __find_page_nolock(mapping, offset, *hash); - spin_unlock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); + page = __find_page_nolock(mapping, index, *hash); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); if (page) return 0; @@ -729,7 +866,7 @@ if (!page) return -ENOMEM; - if (!add_to_page_cache_unique(page, mapping, offset, hash)) { + if (!add_to_page_cache_unique(page, mapping, index, hash)) { int error = mapping->a_ops->readpage(file, page); page_cache_release(page); return error; @@ -746,19 +883,19 @@ * Read in an entire cluster at once. A cluster is usually a 64k- * aligned block that includes the page requested in "offset." */ -static int FASTCALL(read_cluster_nonblocking(struct file * file, unsigned long offset, +static int FASTCALL(read_cluster_nonblocking(struct file * file, unsigned long index, unsigned long filesize)); -static int read_cluster_nonblocking(struct file * file, unsigned long offset, +static int read_cluster_nonblocking(struct file * file, unsigned long index, unsigned long filesize) { unsigned long pages = CLUSTER_PAGES; - offset = CLUSTER_OFFSET(offset); - while ((pages-- > 0) && (offset < filesize)) { - int error = page_cache_read(file, offset); + index = CLUSTER_OFFSET(index); + while ((pages-- > 0) && (index < filesize)) { + int error = page_cache_read(file, index); if (error < 0) return error; - offset ++; + index++; } return 0; @@ -838,7 +975,7 @@ * hashed page atomically. */ struct page * __find_get_page(struct address_space *mapping, - unsigned long offset, struct page **hash) + unsigned long index, struct page **hash) { struct page *page; @@ -846,29 +983,29 @@ * We scan the hash list read-only. Addition to and removal from * the hash-list needs a held write-lock. */ - spin_lock(&pagecache_lock); - page = __find_page_nolock(mapping, offset, *hash); + spin_lock(__PAGECACHE_LOCK(mapping, index)); + page = __find_page_nolock(mapping, index, *hash); if (page) page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); return page; } /* * Same as above, but trylock it instead of incrementing the count. */ -struct page *find_trylock_page(struct address_space *mapping, unsigned long offset) +struct page *find_trylock_page(struct address_space *mapping, unsigned long index) { struct page *page; - struct page **hash = page_hash(mapping, offset); + struct page **hash = page_hash(mapping, index); - spin_lock(&pagecache_lock); - page = __find_page_nolock(mapping, offset, *hash); + spin_lock(__PAGECACHE_LOCK(mapping, index)); + page = __find_page_nolock(mapping, index, *hash); if (page) { if (TryLockPage(page)) page = NULL; } - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); return page; } @@ -879,7 +1016,7 @@ */ static struct page * FASTCALL(__find_lock_page_helper(struct address_space *, unsigned long, struct page *)); static struct page * __find_lock_page_helper(struct address_space *mapping, - unsigned long offset, struct page *hash) + unsigned long index, struct page *hash) { struct page *page; @@ -888,16 +1025,16 @@ * the hash-list needs a held write-lock. */ repeat: - page = __find_page_nolock(mapping, offset, hash); + page = __find_page_nolock(mapping, index, hash); if (page) { page_cache_get(page); if (TryLockPage(page)) { - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); lock_page(page); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); /* Has the page been re-allocated while we slept? */ - if (page->mapping != mapping || page->index != offset) { + if (page->mapping != mapping || page->index != index) { UnlockPage(page); page_cache_release(page); goto repeat; @@ -912,13 +1049,13 @@ * it's still valid once we own it. */ struct page * __find_lock_page (struct address_space *mapping, - unsigned long offset, struct page **hash) + unsigned long index, struct page **hash) { struct page *page; - spin_lock(&pagecache_lock); - page = __find_lock_page_helper(mapping, offset, *hash); - spin_unlock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); + page = __find_lock_page_helper(mapping, index, *hash); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); return page; } @@ -930,20 +1067,20 @@ struct page *page; struct page **hash = page_hash(mapping, index); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_lock_page_helper(mapping, index, *hash); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); if (!page) { struct page *newpage = alloc_page(gfp_mask); if (newpage) { - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_lock_page_helper(mapping, index, *hash); if (likely(!page)) { page = newpage; __add_to_page_cache(page, mapping, index, hash); newpage = NULL; } - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); if (newpage == NULL) lru_cache_add(page); else @@ -1269,7 +1406,7 @@ * This is really ugly. But the goto's actually try to clarify some * of the logic when it comes to error handling etc. */ -void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) +void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int nonblock) { struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; @@ -1344,17 +1481,24 @@ */ hash = page_hash(mapping, index); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_page_nolock(mapping, index, *hash); if (!page) goto no_cached_page; found_page: page_cache_get(page); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); - if (!Page_Uptodate(page)) + if (!Page_Uptodate(page)) { + if (nonblock) { + page_cache_release(page); + desc->error = -EWOULDBLOCKIO; + break; + } goto page_not_up_to_date; - generic_file_readahead(reada_ok, filp, inode, page); + } + if (!nonblock) + generic_file_readahead(reada_ok, filp, inode, page); page_ok: /* If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing @@ -1437,6 +1581,11 @@ break; no_cached_page: + if (nonblock) { + spin_unlock(__PAGECACHE_LOCK(mapping, index)); + desc->error = -EWOULDBLOCKIO; + break; + } /* * Ok, it wasn't cached, so we need to create a new * page.. @@ -1444,7 +1593,7 @@ * We get here with the page cache lock held. */ if (!cached_page) { - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); cached_page = page_cache_alloc(mapping); if (!cached_page) { desc->error = -ENOMEM; @@ -1455,7 +1604,7 @@ * Somebody may have added the page while we * dropped the page cache lock. Check for that. */ - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(mapping, index)); page = __find_page_nolock(mapping, index, *hash); if (page) goto found_page; @@ -1466,7 +1615,7 @@ */ page = cached_page; __add_to_page_cache(page, mapping, index, hash); - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(mapping, index)); lru_cache_add(page); cached_page = NULL; @@ -1608,7 +1757,7 @@ desc.count = count; desc.buf = buf; desc.error = 0; - do_generic_file_read(filp, ppos, &desc, file_read_actor); + do_generic_file_read(filp, ppos, &desc, file_read_actor, 0); retval = desc.written; if (!retval) @@ -1640,7 +1789,7 @@ } } -static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) +int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) { ssize_t written; unsigned long count = desc->count; @@ -1733,7 +1882,7 @@ desc.count = count; desc.buf = (char *) out_file; desc.error = 0; - do_generic_file_read(in_file, ppos, &desc, file_send_actor); + do_generic_file_read(in_file, ppos, &desc, file_send_actor, 0); retval = desc.written; if (!retval) @@ -2577,11 +2726,11 @@ struct address_space * as = vma->vm_file->f_dentry->d_inode->i_mapping; struct page * page, ** hash = page_hash(as, pgoff); - spin_lock(&pagecache_lock); + spin_lock(__PAGECACHE_LOCK(as, pgoff)); page = __find_page_nolock(as, pgoff, *hash); if ((page) && (Page_Uptodate(page))) present = 1; - spin_unlock(&pagecache_lock); + spin_unlock(__PAGECACHE_LOCK(as, pgoff)); return present; } @@ -2883,7 +3032,7 @@ * Check whether we've reached the file size limit. */ err = -EFBIG; - + if (limit != RLIM_INFINITY) { if (pos >= limit) { send_sig(SIGXFSZ, current, 0); @@ -3067,21 +3216,21 @@ void __init page_cache_init(unsigned long mempages) { - unsigned long htable_size, order; + unsigned long htable_size, order, i; htable_size = mempages; - htable_size *= sizeof(struct page *); + htable_size *= sizeof(struct page_cache_bucket); for(order = 0; (PAGE_SIZE << order) < htable_size; order++) ; do { - unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page *); + unsigned long tmp = (PAGE_SIZE << order) / sizeof(struct page_cache_bucket); page_hash_bits = 0; while((tmp >>= 1UL) != 0UL) page_hash_bits++; - page_hash_table = (struct page **) + page_hash_table = (struct page_cache_bucket *) __get_free_pages(GFP_ATOMIC, order); } while(page_hash_table == NULL && --order > 0); @@ -3089,5 +3238,9 @@ (1 << page_hash_bits), order, (PAGE_SIZE << order)); if (!page_hash_table) panic("Failed to allocate page hash table\n"); - memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *)); + + for (i = 0; i < PAGE_HASH_SIZE; i++) { + spin_lock_init(&page_hash_table[i].lock); + page_hash_table[i].chain = NULL; + } } --- linux/mm/swap_state.c.orig Thu Nov 1 00:31:03 2001 +++ linux/mm/swap_state.c Fri Feb 1 08:16:55 2002 @@ -41,6 +41,7 @@ LIST_HEAD_INIT(swapper_space.dirty_pages), LIST_HEAD_INIT(swapper_space.locked_pages), 0, /* nrpages */ + SPIN_LOCK_UNLOCKED, &swap_aops, }; @@ -121,9 +122,11 @@ entry.val = page->index; - spin_lock(&pagecache_lock); + spin_lock(PAGECACHE_LOCK(page)); + spin_lock(&page->mapping->page_lock); __delete_from_swap_cache(page); - spin_unlock(&pagecache_lock); + spin_unlock(PAGECACHE_LOCK(page)); + spin_unlock(&page->mapping->page_lock); swap_free(entry); page_cache_release(page); --- linux/mm/swap.c.orig Wed Nov 7 07:44:20 2001 +++ linux/mm/swap.c Fri Feb 1 08:16:56 2002 @@ -93,6 +93,7 @@ spin_unlock(&pagemap_lru_lock); } + /* * Perform any setup for the swap system */ --- linux/mm/swapfile.c.orig Fri Feb 1 08:13:33 2002 +++ linux/mm/swapfile.c Fri Feb 1 08:16:56 2002 @@ -239,10 +239,10 @@ /* Is the only swap cache user the cache itself? */ if (p->swap_map[SWP_OFFSET(entry)] == 1) { /* Recheck the page count with the pagecache lock held.. */ - spin_lock(&pagecache_lock); + spin_lock(PAGECACHE_LOCK(page)); if (page_count(page) - !!page->buffers == 2) retval = 1; - spin_unlock(&pagecache_lock); + spin_unlock(PAGECACHE_LOCK(page)); } swap_info_put(p); } @@ -307,13 +307,18 @@ retval = 0; if (p->swap_map[SWP_OFFSET(entry)] == 1) { /* Recheck the page count with the pagecache lock held.. */ - spin_lock(&pagecache_lock); + struct address_space *mapping = page->mapping; + spinlock_t *pg_lock = PAGECACHE_LOCK(page); + + spin_lock(pg_lock); + spin_lock(&mapping->page_lock); if (page_count(page) - !!page->buffers == 2) { __delete_from_swap_cache(page); SetPageDirty(page); retval = 1; } - spin_unlock(&pagecache_lock); + spin_unlock(&mapping->page_lock); + spin_unlock(pg_lock); } swap_info_put(p); --- linux/include/linux/swap.h.orig Fri Feb 1 08:13:30 2002 +++ linux/include/linux/swap.h Fri Feb 1 08:16:56 2002 @@ -89,7 +89,6 @@ extern atomic_t nr_async_pages; extern atomic_t page_cache_size; extern atomic_t buffermem_pages; -extern spinlock_t pagecache_lock; extern void __remove_inode_page(struct page *); /* Incomplete types for prototype declarations: */ --- linux/include/linux/errno.h.orig Fri Feb 9 23:46:13 2001 +++ linux/include/linux/errno.h Fri Feb 1 08:16:56 2002 @@ -21,6 +21,9 @@ #define EBADTYPE 527 /* Type not supported by server */ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ +/* Defined for TUX async IO */ +#define EWOULDBLOCKIO 530 /* Would block due to block-IO */ + #endif #endif --- linux/include/linux/fs.h.orig Fri Feb 1 08:13:32 2002 +++ linux/include/linux/fs.h Fri Feb 1 08:16:56 2002 @@ -374,6 +374,8 @@ struct list_head dirty_pages; /* list of dirty pages */ struct list_head locked_pages; /* list of locked pages */ unsigned long nrpages; /* number of total pages */ + spinlock_t page_lock; /* and spinlock protecting them */ + struct address_space_operations *a_ops; /* methods */ struct inode *host; /* owner: inode, block_device */ struct vm_area_struct *i_mmap; /* list of private mappings */ @@ -1435,6 +1437,7 @@ extern int generic_cont_expand(struct inode *inode, loff_t size) ; extern int block_commit_write(struct page *page, unsigned from, unsigned to); extern int block_sync_page(struct page *); +extern void flush_inode_pages (struct inode * inode); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int generic_commit_write(struct file *, struct page *, unsigned, unsigned); @@ -1446,7 +1449,7 @@ extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); -extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); +extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); --- linux/include/linux/pagemap.h.orig Thu Nov 22 20:46:44 2001 +++ linux/include/linux/pagemap.h Fri Feb 1 08:16:56 2002 @@ -41,12 +41,17 @@ */ #define page_cache_entry(x) virt_to_page(x) +struct page_cache_bucket { + spinlock_t lock; + struct page *chain; +} __attribute__((__aligned__(8))); + extern unsigned int page_hash_bits; #define PAGE_HASH_BITS (page_hash_bits) #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS) extern atomic_t page_cache_size; /* # of pages currently in the hash table */ -extern struct page **page_hash_table; +extern struct page_cache_bucket *page_hash_table; extern void page_cache_init(unsigned long); @@ -68,7 +73,12 @@ #undef s } -#define page_hash(mapping,index) (page_hash_table+_page_hashfn(mapping,index)) +#define page_hash(mapping,index) \ + &((page_hash_table+_page_hashfn(mapping,index))->chain) +#define __PAGECACHE_LOCK(mapping,index) \ + &((page_hash_table+_page_hashfn(mapping,index))->lock) +#define PAGECACHE_LOCK(page) \ + __PAGECACHE_LOCK((page)->mapping, (page)->index) extern struct page * __find_get_page(struct address_space *mapping, unsigned long index, struct page **hash); --- linux/net/khttpd/datasending.c.orig Mon Sep 10 16:58:35 2001 +++ linux/net/khttpd/datasending.c Fri Feb 1 08:16:56 2002 @@ -127,7 +127,7 @@ desc.count = ReadSize; desc.buf = (char *) CurrentRequest->sock; desc.error = 0; - do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor); + do_generic_file_read(CurrentRequest->filp, ppos, &desc, sock_send_actor, 0); if (desc.written>0) { CurrentRequest->BytesSent += desc.written; --- linux/drivers/block/loop.c.orig Fri Feb 1 08:13:31 2002 +++ linux/drivers/block/loop.c Fri Feb 1 08:16:56 2002 @@ -278,7 +278,7 @@ spin_lock_irq(&lo->lo_lock); file = lo->lo_backing_file; spin_unlock_irq(&lo->lo_lock); - do_generic_file_read(file, &pos, &desc, lo_read_actor); + do_generic_file_read(file, &pos, &desc, lo_read_actor, 0); return desc.error; } --- linux/m.orig Fri Feb 1 08:14:05 2002 +++ linux/m Fri Feb 1 08:16:56 2002 @@ -0,0 +1,10 @@ +#!/bin/bash + +(time make -j10 bzImage 2>&1 | tee e) +if [ "$?" = "0" ]; then + echo no compilation errors ... +#su -c ./d +else + echo 'compilation ERRORS!' +fi +cat e >> .e.log --- linux/mb.orig Fri Feb 1 08:14:05 2002 +++ linux/mb Fri Feb 1 08:16:56 2002 @@ -0,0 +1,6 @@ +./m +if [ "$?" -ne "0" -o "`grep 'compilation ERRORS!' e`" = "compilation ERRORS!" ]; then + echo 'not rebooting, error during compile!' +else + su -c './d;reboot' +fi --- linux/mi.orig Fri Feb 1 08:14:05 2002 +++ linux/mi Fri Feb 1 08:16:56 2002 @@ -0,0 +1,3 @@ +#./mo +make modules_install +/home/mingo/bin/beep --- linux/mo.orig Fri Feb 1 08:14:05 2002 +++ linux/mo Fri Feb 1 08:16:56 2002 @@ -0,0 +1,3 @@ +# rm -f net/tux/*.o +make -j8 modules 2>&1 | tee e +