diff -uNr linux-2.6.12-rc2-mm3/Documentation/filesystems/caching/backend-api.txt linux-2.6.12-rc2-mm3-cachefs/Documentation/filesystems/caching/backend-api.txt --- linux-2.6.12-rc2-mm3/Documentation/filesystems/caching/backend-api.txt 2005-04-12 15:48:42.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/Documentation/filesystems/caching/backend-api.txt 2005-04-15 18:44:59.000000000 +0100 @@ -116,13 +116,16 @@ (*) Cached search result. struct fscache_search_result { - unsigned ino; ... }; This is used by FS-Cache to keep track of what nodes it has found in what - caches. Some of the cache operations set the "cache node number" held - therein. + caches in lieu of actually dragging cache nodes into memory. It is also + used to cache negative search results. + + These structures are actually allocated, freed and queried through the + backend to which the result applies. Typically they will be embedded in a + structure representing the backend's concept of a search result. (*) In-cache node representation. @@ -170,13 +173,46 @@ This isn't strictly an operation, but should be pointed at a string naming the backend. + (*) Allocate/Free a search result record [mandatory]. + + struct fscache_search_result *(*srch_alloc)(struct fscache_cache *cache, + unsigned long gfp); + void (*srch_free)(struct fscache_search_result *srch); + + These methods are called to create a search result record and get rid of + it again. The format of the search result record is left to the underlying + backend. + + (*) Set a search result record to negative [mandatory]. + + void (*srch_negate)(struct fscache_search_result *srch); + + This method is called to mark a search result as now being a negative + result. + + (*) Determine if search result is positive [mandatory]. + + int (*is_srch_positive)(struct fscache_search_result *srch); + + This method is called to determine whether a search result is positive + (should return non-zero) or negative (should return 0). + + (*) Represent a search result for printing [mandatory]. + + unsigned long long (*srch_id)(struct fscache_search_result *srch); + + This method is called to render the search result as something printable + for debugging purposes. + (*) Node lookup [mandatory]. struct fscache_node *(*lookup_node)(struct fscache_cache *cache, - unsigned ino) + struct fscache_cookie *cookie, + struct fscache_search_result *srch) - This method is used to turn a logical cache node number into a handle on a - represention of that node. + This method is used to turn a cache search result into a handle on a + represention of that node. The cookie to which the node will be attached + is included for reference. (*) Increment node refcount [mandatory]. @@ -193,7 +229,7 @@ These methods are used to exclusively lock a node. It must be possible to schedule with the lock held, so a spinlock isn't sufficient. - (*) Unreference node [mandatory]. + (*) Release node reference [mandatory]. void (*put_node)(struct fscache_node *node) @@ -214,8 +250,8 @@ including updating or discarding existing index entries. An index entry can be updated by calling index->cookie->idef->update(). - If the search is successful, the node number should be stored in - result->ino and zero returned. If not successful, error ENOENT should be + If the search is successful, the specified search result structure should + be updated and zero returned. If not successful, error ENOENT should be returned if no entry was found, or some other error otherwise. (*) Create a new node [mandatory]. @@ -229,8 +265,8 @@ obtained by calling index->cookie->idef->update() and passing it the argument cookie. - If successful, the node number should be stored in result->ino and zero - should be returned. + If successful, the specified search result structure should be updated and + zero should be returned. (*) Update a node [mandatory]. diff -uNr linux-2.6.12-rc2-mm3/fs/afs/file.c linux-2.6.12-rc2-mm3-cachefs/fs/afs/file.c --- linux-2.6.12-rc2-mm3/fs/afs/file.c 2005-04-12 15:49:01.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/afs/file.c 2005-04-19 21:06:57.000000000 +0100 @@ -147,9 +147,6 @@ static int afs_file_readpage(struct file *file, struct page *page) { struct afs_rxfs_fetch_descriptor desc; -#ifdef CONFIG_AFS_FSCACHE - struct fscache_page *pageio; -#endif struct afs_vnode *vnode; struct inode *inode; int ret; @@ -167,10 +164,6 @@ goto error; #ifdef CONFIG_AFS_FSCACHE - pageio = fscache_page_get_private(page, GFP_NOIO); - if (IS_ERR(pageio)) - goto error2; - /* is it cached? */ ret = fscache_read_or_alloc_page(vnode->cache, page, @@ -241,10 +234,6 @@ _leave(" = 0"); return 0; -#ifdef CONFIG_AFS_FSCACHE - error2: - ret = PTR_ERR(pageio); -#endif error: SetPageError(page); unlock_page(page); @@ -256,23 +245,6 @@ /*****************************************************************************/ /* - * get a page cookie for the specified page - */ -#ifdef CONFIG_AFS_FSCACHE -struct fscache_page *afs_cache_get_page_token(struct page *page) -{ - struct fscache_page *page_cookie; - - _enter(""); - page_cookie = fscache_page_get_private(page, GFP_NOIO); - - _leave(" = %p", page_cookie); - return page_cookie; -} /* end afs_cache_get_page_token() */ -#endif - -/*****************************************************************************/ -/* * invalidate part or all of a page */ static int afs_file_invalidatepage(struct page *page, unsigned long offset) @@ -283,12 +255,12 @@ BUG_ON(!PageLocked(page)); - if (PagePrivate(page)) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - fscache_uncache_page(vnode->cache,page); + fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page); #endif + if (PagePrivate(page)) { + /* We release buffers only if the entire page is being * invalidated. * The get_block cached value has been unconditionally @@ -306,6 +278,7 @@ _leave(" = %d", ret); return ret; + } /* end afs_file_invalidatepage() */ /*****************************************************************************/ @@ -314,26 +287,15 @@ */ static int afs_file_releasepage(struct page *page, int gfp_flags) { - struct fscache_page *pageio; - _enter("{%lu},%x", page->index, gfp_flags); - if (PagePrivate(page)) { #ifdef CONFIG_AFS_FSCACHE - struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); - fscache_uncache_page(vnode->cache, page); + fscache_uncache_page(AFS_FS_I(page->mapping->host)->cache, page); #endif - pageio = (struct fscache_page *) page->private; - page->private = 0; - ClearPagePrivate(page); - - if (pageio) - kfree(pageio); - } - _leave(" = 0"); return 0; + } /* end afs_file_releasepage() */ /*****************************************************************************/ diff -uNr linux-2.6.12-rc2-mm3/fs/afs/main.c linux-2.6.12-rc2-mm3-cachefs/fs/afs/main.c --- linux-2.6.12-rc2-mm3/fs/afs/main.c 2005-04-12 15:49:01.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/afs/main.c 2005-04-20 17:45:16.000000000 +0100 @@ -1,6 +1,6 @@ /* main.c: AFS client file system * - * Copyright (C) 2002 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2002,5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -53,7 +53,6 @@ #ifdef CONFIG_AFS_FSCACHE static struct fscache_netfs_operations afs_cache_ops = { - .get_page_token = afs_cache_get_page_token, }; struct fscache_netfs afs_cache_netfs = { diff -uNr linux-2.6.12-rc2-mm3/fs/cachefiles/index.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/index.c --- linux-2.6.12-rc2-mm3/fs/cachefiles/index.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/index.c 2005-04-15 17:51:24.000000000 +0100 @@ -0,0 +1,53 @@ +/* index.c: cachefiles index management + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "internal.h" + +/*****************************************************************************/ +/* + * search an index + */ +int cachefiles_index_search(struct fscache_node *node, + struct fscache_cookie *target, + struct fscache_search_result *result) +{ + kenter(""); + return -ENOANO; + +} /* end cachefiles_index_search() */ + +/*****************************************************************************/ +/* + * add an entry to an index + */ +int cachefiles_index_add(struct fscache_node *node, + struct fscache_cookie *cookie, + struct fscache_search_result *result) +{ + kenter(""); + return -ENOANO; + +} /* end cachefiles_index_add() */ + +/*****************************************************************************/ +/* + * update an entry in an index + */ +int cachefiles_index_update(struct fscache_node *ixnode, + struct fscache_node *node) +{ + kenter(""); + return -ENOANO; + +} /* end cachefiles_index_update() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefiles/interface.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/interface.c --- linux-2.6.12-rc2-mm3/fs/cachefiles/interface.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/interface.c 2005-04-20 17:31:04.000000000 +0100 @@ -0,0 +1,294 @@ +/* interface.c: filesystem cache interface + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "internal.h" + +/*****************************************************************************/ +/* + * allocate a negative search result record + */ +struct fscache_search_result *cachefiles_srch_alloc(struct fscache_cache *cache, + unsigned long gfp) +{ + struct cachefiles_search_result *srch; + + srch = kmalloc(sizeof(struct cachefiles_search_result), gfp); + if (srch) { + fscache_init_search_result(cache, &srch->srch); + srch->subindex = -1; + } + + return &srch->srch; + +} /* end cachefiles_srch_alloc() */ + +/*****************************************************************************/ +/* + * free a search result record + */ +void cachefiles_srch_free(struct fscache_search_result *_srch) +{ + struct cachefiles_search_result *srch; + + if (_srch) { + srch = container_of(_srch, struct cachefiles_search_result, srch); + kfree(srch); + } + +} /* end cachefiles_srch_free() */ + +/*****************************************************************************/ +/* + * set a search result record to negative + */ +static void cachefiles_srch_negate(struct fscache_search_result *_srch) +{ + struct cachefiles_search_result *srch; + + srch = container_of(_srch, struct cachefiles_search_result, srch); + srch->subindex = -1; + +} /* end cachefiles_srch_negate() */ + +/*****************************************************************************/ +/* + * see if search result is positive + */ +static int cachefiles_is_srch_positive(struct fscache_search_result *_srch) +{ + struct cachefiles_search_result *srch; + + srch = container_of(_srch, struct cachefiles_search_result, srch); + return srch->subindex != -1; + +} /* end cachefiles_is_srch_positive() */ + +/*****************************************************************************/ +/* + * represent a search result for printing + */ +static unsigned long long cachefiles_srch_id(struct fscache_search_result *_srch) +{ + struct cachefiles_search_result *srch; + + srch = container_of(_srch, struct cachefiles_search_result, srch); + return srch->subindex; + +} /* end cachefiles_srch_id() */ + +/*****************************************************************************/ +/* + * look up the nominated node for this cache + */ +static +struct fscache_node *cachefiles_lookup_node(struct fscache_cache *_cache, + struct fscache_cookie *cookie, + struct fscache_search_result *_srch) +{ + struct cachefiles_search_result *srch; + struct cachefiles_cache *cache; + struct cachefiles_node *node; + + cache = container_of(_cache, struct cachefiles_cache, cache); + srch = container_of(_srch, struct cachefiles_search_result, srch); + + _enter("%p,%p,{%d}", _cache, cookie, srch->subindex); + + /* deal with the FSDEF cookie */ + if (!cookie->iparent) { + node = &cache->fsdef_node; + atomic_inc(&node->usage); + kleave(" = %p [FSDEF]", &node->node); + return &node->node; + } + + kleave(" = -ENOANO"); + return ERR_PTR(-ENOANO); + +} /* end cachefiles_lookup_node() */ + +/*****************************************************************************/ +/* + * increment the usage count on this inode (may fail if unmounting) + */ +static struct fscache_node *cachefiles_grab_node(struct fscache_node *_node) +{ + struct cachefiles_node *node; + + _enter("%p", _node); + + node = container_of(_node, struct cachefiles_node, node); + atomic_inc(&node->usage); + + _leave(" = %p", _node); + return _node; + +} /* end cachefiles_grab_node() */ + +/*****************************************************************************/ +/* + * lock a semaphore on a node + */ +static void cachefiles_lock_node(struct fscache_node *_node) +{ + struct cachefiles_node *node; + + _enter("%p", _node); + + node = container_of(_node, struct cachefiles_node, node); + down(&node->sem); + +} /* end cachefiles_lock_node() */ + +/*****************************************************************************/ +/* + * unlock a semaphore on a node + */ +static void cachefiles_unlock_node(struct fscache_node *_node) +{ + struct cachefiles_node *node; + + _enter("%p", _node); + + node = container_of(_node, struct cachefiles_node, node); + up(&node->sem); + +} /* end cachefiles_unlock_node() */ + +/*****************************************************************************/ +/* + * dispose of a reference to a node + */ +static void cachefiles_put_node(struct fscache_node *node) +{ + _enter("%p", node); + +} /* end cachefiles_put_node() */ + +/*****************************************************************************/ +/* + * sync a cache + */ +static void cachefiles_sync(struct fscache_cache *cache) +{ + _enter("%p", cache); + +} /* end cachefiles_sync() */ + +/*****************************************************************************/ +/* + * dissociate all backed netfs pages from their backing storage + */ +static void cachefiles_dissociate_pages(struct fscache_cache *cache) +{ + _enter("%p", cache); + +} /* end cachefiles_dissociate_pages() */ + +/*****************************************************************************/ +/* + * read a page from the cache or allocate a block in which to store it + * - if the page is backed by a block in the cache: + * - a read will be started which will call end_io_func on completion + * - the wb-journal will be searched for an entry pertaining to this block + * - if an entry is found: + * - 1 will be returned + * else + * - 0 will be returned + * - else if the page is unbacked: + * - a block will be allocated and attached + * - the v-journal will be marked to note the block contains invalid data + * - -ENODATA will be returned + */ +static int cachefiles_read_or_alloc_page(struct fscache_node *_node, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + unsigned long gfp) +{ + struct cachefiles_node *node; + int ret; + + _enter(""); + + node = container_of(_node, struct cachefiles_node, node); + + ret = -ENODATA; + _leave(" = %d", ret); + return ret; + +} /* end cachefiles_read_or_alloc_page() */ + +/*****************************************************************************/ +/* + * request a page be stored in the cache + * - this request may be ignored if no cache block is currently attached, in + * which case it returns -ENOBUFS + * - if a cache block was already allocated: + * - the page cookie will be updated to reflect the block selected + * - a BIO will have been dispatched to write the page - the BIO's bi_end_io + * routine will call end_io_func on completion + * - end_io_func can be NULL, in which case a default function will just + * clear the writeback bit + * - if there's a v-journal entry associated with the page, that entry will + * be erased + * - returns 0 + */ +static int cachefiles_write_page(struct fscache_node *node, + struct page *page, + fscache_rw_complete_t end_io_func, + void *end_io_data, + unsigned long gfp) +{ + _enter(""); + _leave(" = -ENOBUFS"); + return -ENOBUFS; + +} /* end cachefiles_write_page() */ + +/*****************************************************************************/ +/* + * detach a backing block from a page + * - if the block backing the page still has a vjentry then the block will be + * recycled + */ +static void cachefiles_uncache_page(struct fscache_node *node, + struct page *page) +{ + _enter(""); + _leave(""); + +} /* end cachefiles_uncache_page() */ + +struct fscache_cache_ops cachefiles_cache_ops = { + .name = "cachefiles", + .srch_alloc = cachefiles_srch_alloc, + .srch_free = cachefiles_srch_free, + .srch_negate = cachefiles_srch_negate, + .is_srch_positive = cachefiles_is_srch_positive, + .srch_id = cachefiles_srch_id, + .lookup_node = cachefiles_lookup_node, + .grab_node = cachefiles_grab_node, + .lock_node = cachefiles_lock_node, + .unlock_node = cachefiles_unlock_node, + .put_node = cachefiles_put_node, + .index_search = cachefiles_index_search, + .index_add = cachefiles_index_add, + .index_update = cachefiles_index_update, + .sync = cachefiles_sync, + .dissociate_pages = cachefiles_dissociate_pages, + .read_or_alloc_page = cachefiles_read_or_alloc_page, + .write_page = cachefiles_write_page, + .uncache_page = cachefiles_uncache_page, +}; diff -uNr linux-2.6.12-rc2-mm3/fs/cachefiles/internal.h linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/internal.h --- linux-2.6.12-rc2-mm3/fs/cachefiles/internal.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/internal.h 2005-04-15 18:21:01.000000000 +0100 @@ -0,0 +1,134 @@ +/* internal.h: general netfs cache on cachefiles internal defs + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * + * Cachefiles layout: + * + * /..../CacheDir/ + * index + * 0/ + * 1/ + * 2/ + * index + * 0/ + * 1/ + * 2/ + * index + * 0 + * 1 + * 2 + */ + +#include +#include + +extern int cachefiles_debug; +extern struct fscache_cache_ops cachefiles_cache_ops; + +/*****************************************************************************/ +/* + * search result + */ +struct cachefiles_search_result +{ + struct fscache_search_result srch; /* embedded FS-Cache record */ + int subindex; +}; + +extern struct fscache_search_result *cachefiles_srch_alloc(struct fscache_cache *cache, + unsigned long gfp); + +extern void cachefiles_srch_free(struct fscache_search_result *srch); + +static inline void cachefiles_srch_set(struct fscache_search_result *_srch, int subindex) +{ + struct cachefiles_search_result *srch; + srch = container_of(_srch, struct cachefiles_search_result, srch); + srch->subindex = subindex; +} + +/*****************************************************************************/ +/* + * node records + */ +struct cachefiles_node +{ + struct fscache_node node; /* fscache handle */ + struct file *backer; /* backing file */ + atomic_t usage; /* node usage count */ + struct semaphore sem; +}; + +extern int cachefiles_index_search(struct fscache_node *node, + struct fscache_cookie *target, + struct fscache_search_result *result); + +extern int cachefiles_index_add(struct fscache_node *node, + struct fscache_cookie *cookie, + struct fscache_search_result *result); + +extern int cachefiles_index_update(struct fscache_node *ixnode, + struct fscache_node *node); + +/*****************************************************************************/ +/* + * Cache files cache definition + */ +struct cachefiles_cache +{ + struct fscache_cache cache; /* FS-Cache record */ + struct cachefiles_node fsdef_node; /* fs definition index node */ + char *rootdirname; /* name of cache root directory */ +}; + +/*****************************************************************************/ +/* + * debug tracing + */ +#define dbgprintk(FMT,...) \ + printk("[%-6.6s] "FMT"\n",current->comm ,##__VA_ARGS__) +#define _dbprintk(FMT,...) do { } while(0) + +#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__) +#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__) +#define kdebug(FMT,...) dbgprintk(FMT ,##__VA_ARGS__) + +#define kjournal(FMT,...) _dbprintk(FMT ,##__VA_ARGS__) + +#define dbgfree(ADDR) _dbprintk("%p:%d: FREEING %p",__FILE__,__LINE__,ADDR) + +#define dbgpgalloc(PAGE) \ +do { \ + _dbprintk("PGALLOC %s:%d: %p {%lx,%lu}\n", \ + __FILE__,__LINE__, \ + (PAGE),(PAGE)->mapping->host->i_ino,(PAGE)->index \ + ); \ +} while(0) + +#define dbgpgfree(PAGE) \ +do { \ + if ((PAGE)) \ + _dbprintk("PGFREE %s:%d: %p {%lx,%lu}\n", \ + __FILE__,__LINE__, \ + (PAGE), \ + (PAGE)->mapping->host->i_ino, \ + (PAGE)->index \ + ); \ +} while(0) + +#if 1 //def __KDEBUG +#define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) +#define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) +#define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) +#else +#define _enter(FMT,...) do { } while(0) +#define _leave(FMT,...) do { } while(0) +#define _debug(FMT,...) do { } while(0) +#endif diff -uNr linux-2.6.12-rc2-mm3/fs/cachefiles/main.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/main.c --- linux-2.6.12-rc2-mm3/fs/cachefiles/main.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/main.c 2005-04-15 18:26:34.000000000 +0100 @@ -0,0 +1,167 @@ +/* main.c: network filesystem caching backend to use cache files on a + * premounted filesystem + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "internal.h" + +int cachefiles_debug = 0; + +static int cachefiles_init(void); +static void cachefiles_exit(void); + +fs_initcall(cachefiles_init); +module_exit(cachefiles_exit); + +MODULE_DESCRIPTION("Cache File System"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +static char cachefiles_root_dirname[PATH_MAX]; +static struct cachefiles_cache cachefiles_cache; + +module_param_string(root, cachefiles_root_dirname, PATH_MAX, 0); +MODULE_PARM_DESC(cachefiles_root_dir, "Root directory for cache files tree"); + + +/*****************************************************************************/ +/* + * initialise the fs caching module + */ +static int cachefiles_init(void) +{ + struct fscache_search_result *srch = NULL; + struct cachefiles_node *fsdef; + int ret; + + if (!cachefiles_root_dirname[0]) { + printk(KERN_ERR "Cachefiles: no cache directory specified\n"); + return -EINVAL; + } + + /* open the caching directory */ + cachefiles_cache.rootdirname = cachefiles_root_dirname; + + fsdef = &cachefiles_cache.fsdef_node; + + fsdef->backer = filp_open(cachefiles_cache.rootdirname, + O_DIRECTORY | O_RDONLY, 0700); + if (IS_ERR(fsdef->backer)) { + ret = PTR_ERR(fsdef->backer); + fsdef->backer = NULL; + goto error; + } + + atomic_set(&fsdef->usage, 1); + init_MUTEX(&fsdef->sem); + + /* allocate the root search record */ + srch = cachefiles_srch_alloc(&cachefiles_cache.cache, GFP_KERNEL); + if (!srch) + goto nomem; + + cachefiles_srch_set(srch, 0); + + fscache_init_cache(&cachefiles_cache.cache, + &cachefiles_cache_ops, + "%02x:%02x", + MAJOR(fsdef->backer->f_dentry->d_sb->s_dev), + MINOR(fsdef->backer->f_dentry->d_sb->s_dev) + ); + + fscache_add_cache(&cachefiles_cache.cache, srch); + + /* done */ + printk(KERN_INFO "Cachefiles: general fs caching (cachefiles) registered\n"); + return 0; + +nomem: + ret = -ENOMEM; +error: + cachefiles_srch_free(srch); + fput(fsdef->backer); + printk(KERN_ERR "Cachefiles: failed to register: %d\n", ret); + return ret; + +} /* end cachefiles_init() */ + +/*****************************************************************************/ +/* + * clean up on module removal + */ +static void __exit cachefiles_exit(void) +{ + struct cachefiles_node *fsdef; + + printk(KERN_INFO "Cachefiles: general fs caching (cachefiles) unregistering\n"); + + fscache_withdraw_cache(&cachefiles_cache.cache); + + fsdef = &cachefiles_cache.fsdef_node; + + if (fsdef->backer->f_op && + fsdef->backer->f_op->flush) + fsdef->backer->f_op->flush(fsdef->backer); + + fput(fsdef->backer); + +} /* end cachefiles_exit() */ + +/*****************************************************************************/ +/* + * clear the dead space between task_struct and kernel stack + * - called by supplying -finstrument-functions to gcc + */ +#if 0 +void __cyg_profile_func_enter (void *this_fn, void *call_site) +__attribute__((no_instrument_function)); + +void __cyg_profile_func_enter (void *this_fn, void *call_site) +{ + asm volatile(" movl %%esp,%%edi \n" + " andl %0,%%edi \n" + " addl %1,%%edi \n" + " movl %%esp,%%ecx \n" + " subl %%edi,%%ecx \n" + " shrl $2,%%ecx \n" + " movl $0xedededed,%%eax \n" + " rep stosl \n" + : + : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info)) + : "eax", "ecx", "edi", "memory", "cc" + ); +} + +void __cyg_profile_func_exit(void *this_fn, void *call_site) +__attribute__((no_instrument_function)); + +void __cyg_profile_func_exit(void *this_fn, void *call_site) +{ + asm volatile(" movl %%esp,%%edi \n" + " andl %0,%%edi \n" + " addl %1,%%edi \n" + " movl %%esp,%%ecx \n" + " subl %%edi,%%ecx \n" + " shrl $2,%%ecx \n" + " movl $0xdadadada,%%eax \n" + " rep stosl \n" + : + : "i"(~(THREAD_SIZE-1)), "i"(sizeof(struct thread_info)) + : "eax", "ecx", "edi", "memory", "cc" + ); +} +#endif diff -uNr linux-2.6.12-rc2-mm3/fs/cachefiles/Makefile linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/Makefile --- linux-2.6.12-rc2-mm3/fs/cachefiles/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefiles/Makefile 2005-04-15 16:06:39.000000000 +0100 @@ -0,0 +1,12 @@ +# +# Makefile for caching in files system +# + +#CFLAGS += -finstrument-functions + +cachefiles-objs := \ + index.o \ + interface.o \ + main.o + +obj-$(CONFIG_CACHEFILES) := cachefiles.o diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/allocator.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/allocator.c --- linux-2.6.12-rc2-mm3/fs/cachefs/allocator.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/allocator.c 2005-04-20 17:28:38.000000000 +0100 @@ -0,0 +1,153 @@ +/* allocator.c: CacheFS disk block allocator + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "cachefs-int.h" + +/*****************************************************************************/ +/* + * allocator for one node's path + */ +static void cachefs_allocate_one(struct cachefs_super *super, + struct cachefs_data_op *data_op) +{ +} /* end cachefs_allocate_one() */ + +/*****************************************************************************/ +/* + * handle write with allocation operations + */ +void cachefs_allocator(struct cachefs_super *super) +{ + struct cachefs_data_op *data_op; + + kenter(""); + + spin_lock(&super->alloc_lock); + + if (!list_empty(&super->alloc_allocq)) { + data_op = list_entry(super->alloc_allocq.next, + struct cachefs_data_op, link); + list_del_init(&data_op->link); + spin_unlock(&super->alloc_lock); + + cachefs_allocate_one(super, data_op); + } + else { + spin_unlock(&super->alloc_lock); + } + + kleave(""); + +} /* end cachefs_allocator() */ + +#if 0 +/*****************************************************************************/ +/* + * allocate and write a page + */ +static int cachefs_write_alloc_page(struct cache_inode *inode, + struct page *page, + fscache_rw_complete_t callback_func, + void *callback_data, + unsigned long gfp) +{ + struct cachefs_transaction *trans = NULL; + struct cachefs_super *super; + int loop, ret; + + kenter(""); + + super = inode->vfs_inode.i_sb->s_fs_info; + + /* we do the allocation and write as one journalled entry */ + trans = cachefs_trans_alloc(super, gfp); + if (!trans) + goto nomem; + + trans->data_op = data_op; + + trans->jentry->ino = inode->vfs_inode.i_ino; + trans->jentry->pgnum = page->index; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_DATA_ALLOCING; + trans->jentry->auxmark = 0; + + /* allocate the missing blocks in the chain */ + down(&super->alloc_sem); + + ret = cachefs_indr_chain_alloc(trans); + if (ret < 0) + goto alloc_failed; + + set_bit(CACHEFS_BLOCK_NETFSBUSY, &trans->data_op->block->flags); + + /* note the affected blocks */ + cachefs_trans_affects_block(trans, + trans->jentry->u.alloc.block[0], + 0, + PAGE_SIZE); + + for (loop = 1; loop < CACHEFS_ONDISC_UJNL_ALLOC_SLOTS; loop++) + if ((trans->jentry->u.change.map >> loop) & 1) + cachefs_trans_affects_block( + trans, + trans->jentry->u.alloc.block[loop], + trans->jentry->u.alloc.entry[loop], + sizeof(cachefs_blockix_t)); + + /* make sure the journal is marked on disc before doing + * anything else */ + if (cachefs_trans_mark(trans) < 0) + goto mark_failed; + + up(&super->alloc_sem); + + /* make the changes */ + cachefs_indr_chain_set(trans); + + /* okay... done that */ + cachefs_trans_commit(trans); + + kleave(" = 0 [trans %p]", trans); + return 0; + +nomem: + kleave(" = -ENOMEM"); + return -ENOMEM; + +alloc_failed: + up(&super->alloc_sem); + cachefs_trans_put(trans); + kleave(" = %d"); + return ret; + +mark_failed: + up(&super->alloc_sem); + cachefs_trans_put(trans); + kleave(" = %d"); + return ret; + +} /* end cachefs_write_alloc_page() */ +#endif diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/block.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/block.c --- linux-2.6.12-rc2-mm3/fs/cachefs/block.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/block.c 2005-04-20 20:18:05.000000000 +0100 @@ -1,6 +1,6 @@ /* block.c: metadata block management * - * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2003-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -28,7 +28,6 @@ SLAB_CTOR_CONSTRUCTOR) { memset(block, 0, sizeof(*block)); - rwlock_init(&block->ref_lock); init_waitqueue_head(&block->writewq); INIT_LIST_HEAD(&block->batch_link); } @@ -36,151 +35,281 @@ /*****************************************************************************/ /* - * initialise the block with zeros + * look up a block record by index, creating a new one if not found + * - returns a pointer to the block or -ENOMEM + * - the block's refcount will have been elevated by 1 */ -static int cachefs_block_dummy_filler(void *data, struct page *page) +struct cachefs_block *cachefs_block_insert(struct cachefs_super *super, + cachefs_blockix_t bix) { - struct fscache_page *pageio; + struct cachefs_block *newblock, *block; + struct rb_node *parent, **p; + unsigned long flags; - _enter("%p,{%lu}", data, page->index); + _enter(",%u", bix); - /* we need somewhere to note journal ACKs that need to be made */ - pageio = fscache_page_get_private(page, GFP_KERNEL); - if (IS_ERR(pageio)) - return PTR_ERR(pageio); + if (bix > i_size_read(super->sb->s_bdev->bd_inode) / PAGE_SIZE) { + printk("CacheFS: trying to insert out of range block %x/%lx\n", + bix, + (unsigned long) + (i_size_read(super->sb->s_bdev->bd_inode) >> PAGE_SHIFT) + ); + BUG(); + } - pageio->mapped_block = data; - cachefs_block_get(pageio->mapped_block); + /* allocate and initialise a block record just in case */ + newblock = kmem_cache_alloc(cachefs_block_jar, SLAB_KERNEL); + if (!newblock) { + _leave(" = -ENOMEM"); + return ERR_PTR(-ENOMEM); + } - memclear_highpage_flush(page, 0, PAGE_SIZE); + atomic_set(&newblock->usage, 1); + newblock->flags = 0; + newblock->bix = bix; + newblock->super = super; + newblock->page = NULL; + newblock->writeback = NULL; - SetPageUptodate(page); - unlock_page(page); - return 0; + parent = NULL; + block = NULL; + + /* see if the block is already recorded */ + write_lock_irqsave(&super->blk_tree_lock, flags); + p = &super->blk_tree.rb_node; + + while (*p) { + parent = *p; + block = rb_entry(parent, struct cachefs_block, lookup_node); + + if (bix < block->bix) + p = &(*p)->rb_left; + else if (bix > block->bix) + p = &(*p)->rb_right; + else + goto block_already_present; + } -} /* end cachefs_block_dummy_filler() */ + /* there's no block record yet - use the new one we allocated + * earlier */ + rb_link_node(&newblock->lookup_node, parent, p); + rb_insert_color(&newblock->lookup_node, &super->blk_tree); + write_unlock_irqrestore(&super->blk_tree_lock, flags); + + atomic_inc(&super->cnt_blk_tree); + _leave(" = %p {u=%d} [new]", newblock, atomic_read(&newblock->usage)); + return newblock; + + /* the block is already recorded, pin that one and dispose of + * the new one */ + block_already_present: + cachefs_block_get(block); + write_unlock_irqrestore(&super->blk_tree_lock, flags); + + dbgfree(newblock); + kmem_cache_free(cachefs_block_jar, newblock); + + _leave(" = %p {u=%d}", block, atomic_read(&block->usage)); + return block; + +} /* end cachefs_block_insert() */ /*****************************************************************************/ /* - * associate a page with a block, dislodging any old page association + * find a block in the superblock's lookup tree + * - returns a pointer to the block, if it exists; -ENOENT otherwise + * - the block's refcount will have been elevated by 1 */ -int cachefs_block_set(struct cachefs_super *super, - struct cachefs_block *block, - struct page *page, - struct fscache_page *pageio) +struct cachefs_block *cachefs_block_find(struct cachefs_super *super, + cachefs_blockix_t bix) { - DECLARE_WAITQUEUE(myself,current); + struct cachefs_block *block; + struct rb_node *node; + unsigned long flags; - struct cachefs_block *block2; + _enter(",%d", bix); - _enter(",%u,", block->bix); + /* do the lookup */ + read_lock_irqsave(&super->blk_tree_lock, flags); + node = super->blk_tree.rb_node; - /* don't do anything if already associated as we want */ - block2 = pageio->mapped_block; - if (block2) { - if (block2 == block) { - if (block->page == page) { - _leave(" = 0 [assoc preset]"); - return 0; - } + while (node) { + block = rb_entry(node, struct cachefs_block, lookup_node); - block->page = page; - _leave(" = 0 [assoc xchg]"); - return 0; - } + if (bix < block->bix) + node = node->rb_left; + else if (bix > block->bix) + node = node->rb_right; + else + goto block_found; + } + read_unlock_irqrestore(&super->blk_tree_lock, flags); + + /* not found */ + _leave(" = -ENOENT"); + return ERR_PTR(-ENOENT); + + /* found - pin and return */ +block_found: + cachefs_block_get(block); + read_unlock_irqrestore(&super->blk_tree_lock, flags); + + _leave(" = %p{u=%d}", block, atomic_read(&block->usage)); + return block; - BUG(); /* page already associated with a different block! */ +} /* end cachefs_block_find() */ + +/*****************************************************************************/ +/* + * dispose of a block record + */ +void __cachefs_block_put(struct cachefs_block *block) +{ + struct cachefs_super *super = block->super; + unsigned long flags; + + _enter(",{u=%d bix=%d}", atomic_read(&block->usage), block->bix); + + /* see if we can remove from the superblock's lookup tree */ + write_lock_irqsave(&super->blk_tree_lock, flags); + + if (atomic_read(&block->usage) == 0) + rb_erase(&block->lookup_node, &super->blk_tree); + else + block = NULL; + + write_unlock_irqrestore(&super->blk_tree_lock, flags); + + /* destroy if now completely unused */ + if (block) { + atomic_dec(&super->cnt_blk_tree); + dbgfree(block); + kmem_cache_free(cachefs_block_jar, block); } - /* get the page alloc lock for this block */ + _leave(""); + +} /* end __cachefs_block_put() */ + +/*****************************************************************************/ +/* + * lock a block's allocation lock + */ +static int cachefs_block_alloc_lock(struct cachefs_block *block, int interruptible) +{ + unsigned long wait_state; + + DECLARE_WAITQUEUE(myself, current); + if (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - set_current_state(TASK_INTERRUPTIBLE); + wait_state = TASK_UNINTERRUPTIBLE; + if (interruptible) + wait_state = TASK_INTERRUPTIBLE; + + set_current_state(wait_state); add_wait_queue(&block->writewq, &myself); while (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - if (signal_pending(current)) - break; + if (wait_state == TASK_INTERRUPTIBLE && + signal_pending(current)) + goto interrupt; schedule(); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(wait_state); } set_current_state(TASK_RUNNING); remove_wait_queue(&block->writewq, &myself); - - if (signal_pending(current)) - goto intr; } - /* make the association */ - pageio->mapped_block = cachefs_block_get(block); - - clear_bit(CACHEFS_BLOCK_COW,&block->flags); - block->page = page; - - clear_bit(CACHEFS_BLOCK_ALLOC,&block->flags); - wake_up_all(&block->writewq); - - _leave(" = 0 [assoc set]"); return 0; - intr: - _leave(" = -EINTR"); +interrupt: + set_current_state(TASK_RUNNING); + remove_wait_queue(&block->writewq, &myself); return -EINTR; -} /* end cachefs_block_set() */ +} /* end cachefs_block_alloc_lock() */ /*****************************************************************************/ /* - * associate a page with a block, dislodging any old page association + * unlock a block's allocation lock */ -int cachefs_block_set2(struct cachefs_super *super, - cachefs_blockix_t bix, - struct page *page, - struct fscache_page *pageio, - struct cachefs_block **_block) +static void cachefs_block_alloc_unlock(struct cachefs_block *block) { - struct cachefs_block *block; - int ret; + clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); + wake_up_all(&block->writewq); - _enter(",%u,,",bix); +} /* end cachefs_block_alloc_lock() */ - if (_block) - *_block = NULL; +/*****************************************************************************/ +/* + * wait until a block's flag becomes clear + */ +static int cachefs_block_wait_on_flag(struct cachefs_block *block, int flag, + int interruptible) +{ + unsigned long wait_state; - /* get the block definition */ - block = cachefs_block_insert(super, bix); - if (IS_ERR(block)) { - ret = PTR_ERR(block); - goto error; - } + if (test_bit(flag, &block->flags)) { + DECLARE_WAITQUEUE(myself, current); - /* associate the block with the page */ - ret = cachefs_block_set(super, block, page, pageio); - if (ret < 0) - goto error2; + wait_state = TASK_UNINTERRUPTIBLE; + if (interruptible) + wait_state = TASK_INTERRUPTIBLE; - /* we return the block to the caller with an extra ref held if - * they ask for it */ - if (_block) { - *_block = block; - goto error; + set_current_state(wait_state); + add_wait_queue(&block->writewq, &myself); + + while (test_bit(flag, &block->flags)) { + if (wait_state == TASK_INTERRUPTIBLE && + signal_pending(current)) + break; + + schedule(); + set_current_state(wait_state); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&block->writewq, &myself); } - error2: - cachefs_block_put(block); - error: - _leave(" = %d", ret); - return ret; + if (signal_pending(current)) + return -EINTR; + return 0; + +} /* end cachefs_block_wait_on_flag() */ + +/*****************************************************************************/ +/* + * dummy readpage to initialise a block with zeros + */ +static int cachefs_block_zero_filler(void *data, struct page *page) +{ + struct cachefs_block *block = data; + + _enter("%p,{%lu}", block, page->index); + + BUG_ON(PagePrivate(page)); + SetPagePrivate(page); + + cachefs_block_get(block); + page->private = (unsigned long) block; + + memclear_highpage_flush(page, 0, PAGE_SIZE); -} /* end cachefs_block_set2() */ + SetPageUptodate(page); + unlock_page(page); + return 0; + +} /* end cachefs_block_zero_filler() */ /*****************************************************************************/ /* * read a metadata block from disc or initialise it + * - reads pages through the misc inode */ int cachefs_block_read(struct cachefs_super *super, - struct cachefs_inode *inode, cachefs_blockix_t bix, int wipe, struct cachefs_block **_block, @@ -191,10 +320,7 @@ struct page *page; filler_t *filler; - DECLARE_WAITQUEUE(myself, current); - - _enter(",%lx,%u,%d,,", - inode ? inode->vfs_inode.i_ino : CACHEFS_INO_MISC, bix, wipe); + _enter(",%u,%d,,", bix, wipe); if (_block) *_block = NULL; @@ -209,59 +335,24 @@ } /* get the page alloc lock for this block */ - if (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&block->writewq, &myself); - - while (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - if (signal_pending(current)) - break; - - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&block->writewq, &myself); - - if (signal_pending(current)) - goto intr; - } + if (cachefs_block_alloc_lock(block, 1) < 0) + goto intr; /* get a page for it if it doesn't already exist */ if (!block->page) { /* if the block is marked as currently undergoing writeback * then there must have been an ENOMEM encountered whilst * trying to COW the block */ - if (test_bit(CACHEFS_BLOCK_WRITEBACK, &block->flags)) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&block->writewq, &myself); - - while (test_bit(CACHEFS_BLOCK_WRITEBACK, - &block->flags)) { - if (signal_pending(current)) - break; - - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&block->writewq, &myself); - - if (signal_pending(current)) + if (cachefs_block_wait_on_flag(block, CACHEFS_BLOCK_WRITEBACK, + 1) < 0) goto intr2; - } /* load the page into the page cache */ - if (inode) - mapping = inode->vfs_inode.i_mapping; - else - mapping = super->imisc->i_mapping; + mapping = super->imisc->i_mapping; filler = (filler_t *) mapping->a_ops->readpage; if (wipe) - filler = cachefs_block_dummy_filler; + filler = cachefs_block_zero_filler; page = read_cache_page(mapping, bix, filler, block); @@ -278,8 +369,7 @@ get_page(page); } - clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); - wake_up_all(&block->writewq); + cachefs_block_alloc_unlock(block); if (_block) { *_block = block; @@ -301,8 +391,7 @@ return 0; intr2: - clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); - wake_up_all(&block->writewq); + cachefs_block_alloc_unlock(block); intr: cachefs_block_put(block); _leave(" = -EINTR"); @@ -317,30 +406,13 @@ */ int cachefs_block_cow(struct cachefs_super *super, struct cachefs_block *block) { - DECLARE_WAITQUEUE(myself, current); - #ifndef CACHEFS_BLOCK_USE_COW _enter(",{%u}", block->bix); /* if COW is not permitted, then simply wait for the page to finish * being written back */ - if (test_bit(CACHEFS_BLOCK_COW, &block->flags)) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&block->writewq, &myself); - - while (test_bit(CACHEFS_BLOCK_COW, &block->flags)) { - if (signal_pending(current)) - break; - - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&block->writewq, &myself); - } - + cachefs_block_wait_on_flag(block, CACHEFS_BLOCK_COW, 1); _leave(" = 0"); return 0; @@ -354,18 +426,7 @@ _enter(",%u", block->bix); /* get the page alloc lock for this block */ - if (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&block->writewq, &myself); - - while (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&block->writewq, &myself); - } + cachefs_block_alloc_lock(block, 0); /* duplicate the page if it's flagged copy-on-write */ if (test_bit(CACHEFS_BLOCK_COW, &block->flags)) { @@ -410,18 +471,14 @@ get_page(page); } - clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); - wake_up_all(&block->writewq); - + cachefs_block_alloc_unlock(block); _leave(" = 0"); return 0; error_page: page_cache_release(newpage); error: - clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); - wake_up_all(&block->writewq); - + cachefs_block_alloc_unlock(block); _leave(" = %d", ret); return ret; #endif @@ -456,173 +513,16 @@ /*****************************************************************************/ /* - * insert a block into the superblock's lookup tree (if it doesn't already - * exist) - */ -struct cachefs_block *cachefs_block_insert(struct cachefs_super *super, - cachefs_blockix_t bix) -{ - struct cachefs_block *newblock, *block; - struct rb_node *parent, **p; - unsigned long flags; - - _enter(",%u", bix); - - if (bix > i_size_read(super->sb->s_bdev->bd_inode) / PAGE_SIZE) { - printk("CacheFS: trying to insert out of range block %x/%lx\n", - bix, - (unsigned long) - (i_size_read(super->sb->s_bdev->bd_inode) >> PAGE_SHIFT) - ); - BUG(); - } - - /* allocate and initialise a block record just in case */ - newblock = kmem_cache_alloc(cachefs_block_jar, SLAB_KERNEL); - if (!newblock) { - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); - } - - atomic_set(&newblock->usage,1); - newblock->flags = 0; - newblock->bix = bix; - newblock->super = super; - newblock->page = NULL; - newblock->writeback = NULL; - newblock->ref = NULL; - - parent = NULL; - block = NULL; - - /* see if the block is already recorded */ - write_lock_irqsave(&super->blk_tree_lock, flags); - p = &super->blk_tree.rb_node; - - while (*p) { - parent = *p; - block = rb_entry(parent, struct cachefs_block, lookup_node); - - if (bix < block->bix) - p = &(*p)->rb_left; - else if (bix > block->bix) - p = &(*p)->rb_right; - else - goto block_already_present; - } - - /* there's no block record yet - use the new one we allocated - * earlier */ - rb_link_node(&newblock->lookup_node, parent, p); - rb_insert_color(&newblock->lookup_node, &super->blk_tree); - write_unlock_irqrestore(&super->blk_tree_lock, flags); - - atomic_inc(&super->cnt_blk_tree); - _leave(" = %p {u=%d} [new]", newblock, atomic_read(&newblock->usage)); - return newblock; - - /* the block is already recorded, pin that one and dispose of - * the new one */ - block_already_present: - cachefs_block_get(block); - write_unlock_irqrestore(&super->blk_tree_lock, flags); - - dbgfree(newblock); - kmem_cache_free(cachefs_block_jar, newblock); - - _leave(" = %p {u=%d}", block, atomic_read(&block->usage)); - return block; - -} /* end cachefs_block_insert() */ - -/*****************************************************************************/ -/* - * find a block in the superblock's lookup tree - */ -struct cachefs_block *cachefs_block_find(struct cachefs_super *super, - cachefs_blockix_t bix) -{ - struct cachefs_block *block; - struct rb_node *node; - unsigned long flags; - - _enter(",%d", bix); - - /* do the lookup */ - read_lock_irqsave(&super->blk_tree_lock, flags); - node = super->blk_tree.rb_node; - - while (node) { - block = rb_entry(node, struct cachefs_block, lookup_node); - - if (bix < block->bix) - node = node->rb_left; - else if (bix > block->bix) - node = node->rb_right; - else - goto block_found; - } - read_unlock_irqrestore(&super->blk_tree_lock, flags); - - /* not found */ - _leave(" = -ENOENT"); - return ERR_PTR(-ENOENT); - - /* found - pin and return */ -block_found: - cachefs_block_get(block); - read_unlock_irqrestore(&super->blk_tree_lock, flags); - - _leave(" = %p{u=%d}", block, atomic_read(&block->usage)); - return block; - -} /* end cachefs_block_find() */ - -/*****************************************************************************/ -/* - * dispose of a block record - */ -void __cachefs_block_put(struct cachefs_block *block) -{ - struct cachefs_super *super = block->super; - unsigned long flags; - - _enter(",{u=%d bix=%d}", atomic_read(&block->usage), block->bix); - - /* see if we can remove from the superblock's lookup tree */ - write_lock_irqsave(&super->blk_tree_lock, flags); - - if (atomic_read(&block->usage) == 0) - rb_erase(&block->lookup_node, &super->blk_tree); - else - block = NULL; - - write_unlock_irqrestore(&super->blk_tree_lock, flags); - - /* destroy if now completely unused */ - if (block) { - atomic_dec(&super->cnt_blk_tree); - dbgfree(block); - kmem_cache_free(cachefs_block_jar, block); - } - - _leave(""); - -} /* end __cachefs_block_put() */ - -/*****************************************************************************/ -/* * withdraw from active service all the blocks residing on a device */ void cachefs_block_dissociate(struct fscache_cache *cache) { - struct cachefs_block *block, *xblock; + struct cachefs_block *block; //, *xblock; struct cachefs_super *super; - struct fscache_page *pageio; struct rb_node *node; unsigned long flags; - DECLARE_WAITQUEUE(myself, current); + //DECLARE_WAITQUEUE(myself, current); super = container_of(cache, struct cachefs_super, cache); @@ -644,6 +544,7 @@ for (;;) { block = NULL; +#if 0 /* find the next one in the tree */ write_lock_irqsave(&super->blk_tree_lock, flags); @@ -685,25 +586,11 @@ cachefs_block_put(xblock); /* wait for the netfs to finish with the block */ - if (test_bit(CACHEFS_BLOCK_NETFSBUSY, &block->flags)) { - set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&block->writewq, &myself); - - while (test_bit(CACHEFS_BLOCK_NETFSBUSY, - &block->flags)) { - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&block->writewq, &myself); - } - - /* a block that's not yet achieved validity must be - * cancelled to avoid bad data later */ - cachefs_vj_cancel(block); - + cachefs_block_wait_on_flag(block, CACHEFS_BLOCK_NETFSBUSY, 0); cachefs_block_put(block); +#endif + + BUG(); } _leave(""); diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/cachefs-int.h linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/cachefs-int.h --- linux-2.6.12-rc2-mm3/fs/cachefs/cachefs-int.h 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/cachefs-int.h 2005-04-21 12:22:34.000000000 +0100 @@ -1,6 +1,6 @@ /* cachefs-int.h: general filesystem caching internal defs * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -28,6 +28,7 @@ struct cachefs_super; struct cachefs_block; struct cachefs_inode; +struct cachefs_page_record; struct cachefs_transaction; extern struct fscache_cache_ops cachefs_cache_ops; @@ -45,142 +46,252 @@ extern int cachefs_io_dummy_filler(void *data, struct page *page); -extern int cachefs_indr_io_get_block(struct inode *inode, struct page *page, - struct fscache_page *pageio, int create); +extern int cachefs_indr_io_locate_block(struct cachefs_inode *inode, + struct cachefs_page_record *record, + struct page **_page); + +enum cachefs_pagetype { + CACHEFS_PAGETYPE_DATA, + CACHEFS_PAGETYPE_INODE, + CACHEFS_PAGETYPE_INDR_SINGLE, + CACHEFS_PAGETYPE_INDR_DOUBLE, + CACHEFS_PAGETYPE_INDR_DOUBLE_SINGLE, + CACHEFS_PAGETYPE_INDR_TRIPLE, + CACHEFS_PAGETYPE_INDR_TRIPLE_DOUBLE, + CACHEFS_PAGETYPE_INDR_TRIPLE_DOUBLE_SINGLE, +} __attribute__((packed)); + +enum cachefs_pagestate { + CACHEFS_PAGESTATE_UNKNOWN, /* don't know if there's a block on disc */ + CACHEFS_PAGESTATE_LOOKUP, /* looking up on disc */ + CACHEFS_PAGESTATE_EXTANT, /* block allocated on disk */ + CACHEFS_PAGESTATE_ABSENT, /* block does not exist */ + CACHEFS_PAGESTATE_ALLOCRESV, /* reserved block allocation */ + CACHEFS_PAGESTATE_ALLOCPEND, /* pending block allocation */ + CACHEFS_PAGESTATE_ERROR, /* chain broken by I/O error */ +} __attribute__((packed)); -struct cachefs_reclaimable { +struct cachefs_cull { unsigned ino; time_t atime; }; /*****************************************************************************/ /* + * cachefs page record + * - stored per-inode + */ +struct cachefs_page_record +{ + struct rb_node lookup; /* page record tree link */ + unsigned long index; /* page offset */ + unsigned long flags; +#define CACHEFS_PAGEREC_LOCK 0 /* access lock */ +#define CACHEFS_PAGEREC_BOUNDARY 1 /* boundary block */ +#define CACHEFS_PAGEREC_NEW 2 /* new block */ +#define CACHEFS_PAGEREC_NETFS_HOLDING 3 /* held by netfs */ + enum cachefs_pagetype pagetype; /* type of page */ + enum cachefs_pagestate pagestate; /* state of page */ + uint16_t ptr_ix; /* index into parent pointer block */ + atomic_t usage; /* usage count */ + struct cachefs_page_record *parent; /* parent record in indirection chain */ + struct cachefs_block *block; /* this metadata block */ +}; + +extern kmem_cache_t *cachefs_pagerec_jar; + +struct cachefs_page_record *cachefs_pagerec_get(struct cachefs_inode *inode, + unsigned long index, + unsigned long gfp); + +extern void _cachefs_pagerec_put(struct cachefs_inode *inode, + struct cachefs_page_record *rec); +static inline void cachefs_pagerec_put(struct cachefs_inode *inode, + struct cachefs_page_record *rec) +{ + if (atomic_dec_and_test(&rec->usage)) + _cachefs_pagerec_put(inode, rec); +} + +extern struct cachefs_page_record *cachefs_pagerec_find(struct cachefs_inode *inode, + unsigned long index); + +extern int cachefs_pagerec_begin_lookup(struct cachefs_inode *inode, + struct cachefs_page_record *record); + +extern void cachefs_pagerec_end_lookup(struct cachefs_inode *inode, + struct cachefs_page_record *record, + int pass_along); + +extern int cachefs_pagerec_lock(struct cachefs_inode *inode, + struct cachefs_page_record *record); + +extern void cachefs_pagerec_unlock(struct cachefs_inode *inode, + struct cachefs_page_record *record); + +/*****************************************************************************/ +/* + * data operation record + */ +struct cachefs_data_op +{ + struct list_head link; + fscache_rw_complete_t callback; /* netfs callback function */ + void *callback_data; /* netfs callback data */ + void *cookie_data; /* cookie callback data */ + struct page *netfs_page; /* netfs data page */ + struct cachefs_inode *inode; /* inode to which it'll be attached */ + struct cachefs_page_record *record; /* cachefs page->block mapping */ + atomic_t usage; /* usage count */ +}; + +extern void __cachefs_data_op_put(struct cachefs_data_op *data_op); +static inline void cachefs_data_op_put(struct cachefs_data_op *data_op) +{ + if (atomic_dec_and_test(&data_op->usage)) + __cachefs_data_op_put(data_op); +} + +/*****************************************************************************/ +/* * cachefs superblock private information */ struct cachefs_super { - struct fscache_cache cache; /* cache handle */ - struct super_block *sb; - struct cachefs_inode *imetadata; /* the metadata records file */ - struct inode *imisc; /* an inode covering the whole blkdev */ + struct fscache_cache cache; /* cache handle */ + struct super_block *sb; + struct cachefs_inode *imetadata; /* the metadata records file */ + struct inode *imisc; /* an inode covering the whole blkdev */ - unsigned long flags; + unsigned long flags; #define CACHEFS_SUPER_INIT_BLKDEV 0 /* T if initialising blockdev */ #define CACHEFS_SUPER_BATCH_TIMER 1 /* T if batch timer expired */ #define CACHEFS_SUPER_DO_RECLAIM 2 /* T if should do reclamation */ -#define CACHEFS_SUPER_RCM_IMM_SCAN 3 /* T if should scan for immediately - * reclaimable inodes */ -#define CACHEFS_SUPER_REPLAYING_UJNL 4 /* T if replaying u-journal */ - - int bio_wr_barrier; /* command to submit a write barrier BIO */ +#define CACHEFS_SUPER_DO_REAP 3 /* T if should reap dead inodes */ +#define CACHEFS_SUPER_DO_CULL 4 /* T if should cull old inodes */ +#define CACHEFS_SUPER_CULL_DISABLED 5 /* T if inode cull disabled */ +#define CACHEFS_SUPER_REPLAYING_UJNL 6 /* T if replaying update journal */ + + int bio_wr_barrier; /* command to submit a write barrier BIO */ + + /* space tracking */ + unsigned space_total; /* total amount of space */ + unsigned space_slack; /* amount of space to keep available */ + unsigned space_unpinned; /* amount of space not marked pinned */ + unsigned space_held; /* amount of space held by being in use */ + atomic_t space_reserve; /* amount of space reserved for future alloc */ + + /* block allocation interface */ + spinlock_t alloc_lock; + struct list_head alloc_readyq; /* queue of ready blocks */ + struct list_head alloc_maybeq; /* data ops alloc'd waiting on netfs */ + struct list_head alloc_allocq; /* data ops awaiting alloc+write */ + struct list_head alloc_processq; /* data ops undergoing alloc+write */ + atomic_t alloc_pool_n; /* number of available requests */ + atomic_t alloc_urgent_n; /* number of urgent requests */ /* block allocation and recycling management */ - struct rb_root blk_tree; /* block mapping tree */ - rwlock_t blk_tree_lock; - - cachefs_blockix_t alloc_cur; /* current free block alloc stack */ - unsigned alloc_cur_n; /* current occupancy of alloc stack */ - unsigned short alloc_leaf; /* next leaf to allocate */ - struct cachefs_block *alloc_block; /* current node in allocation stack */ - struct page *alloc_node; /* current node in allocation stack */ - struct cachefs_block *alloc_nxblock; /* next node in allocation tree */ - struct page *alloc_next; /* next node in allocation tree */ - struct semaphore alloc_sem; /* allocation semaphore */ - wait_queue_head_t alloc_wq; /* processes waiting for allocation */ - - struct cachefs_block *recycle_block; /* current node in recycle stack */ - struct page *recycle_node; /* current node being recycled to */ - unsigned recycle_room; /* room remaining in front recycle node */ - cachefs_blockix_t recycle_cur; /* current node in recycle stack */ - unsigned recycle_cur_n; /* current occupancy of reserve stack */ - - /* inode reclamation */ - spinlock_t rcm_lock; - - unsigned *rcm_imm_buf; /* circular immediate-reclaim buffer */ - unsigned short rcm_imm_head; - unsigned short rcm_imm_tail; + struct rb_root blk_tree; /* block mapping tree */ + rwlock_t blk_tree_lock; -#define CACHEFS_RCM_IMM_BUFSIZE (PAGE_SIZE/sizeof(unsigned)) + cachefs_blockix_t alloc_cur; /* current free block alloc stack */ + unsigned alloc_cur_n; /* current occupancy of alloc stack */ + unsigned short alloc_leaf; /* next leaf to allocate */ + struct cachefs_block *alloc_block; /* current node in allocation stack */ + struct page *alloc_node; /* current node in allocation stack */ + struct cachefs_block *alloc_nxblock; /* next node in allocation tree */ + struct page *alloc_next; /* next node in allocation tree */ + struct semaphore alloc_sem; /* allocation semaphore */ +// wait_queue_head_t alloc_wq; /* processes waiting for allocation */ + + struct cachefs_block *recycle_block; /* current node in recycle stack */ + struct page *recycle_node; /* current node being recycled to */ + unsigned recycle_room; /* room remaining in front recycle node */ + cachefs_blockix_t recycle_cur; /* current node in recycle stack */ + unsigned recycle_cur_n; /* current occupancy of reserve stack */ + + /* inode culling - finding the inodes with the oldest atime and culling them */ + unsigned cull_hiwater; /* cull enable limit */ + unsigned cull_lowater; /* cull disable limit */ + struct cachefs_cull *cull_list; /* list of culls to be made */ + unsigned short cull_end; /* end of buffer contents */ + +#define CACHEFS_CULL_LISTSIZE (PAGE_SIZE / sizeof(struct cachefs_cull)) + + /* inode reaping - finding dead inodes and recycling them */ + spinlock_t reap_lock; + unsigned *reap_buf; /* circular buffer of reapable inodes */ + unsigned short reap_head; + unsigned short reap_tail; - struct cachefs_reclaimable *rcm_atm_list; /* atime-based reclaimable inode list */ - unsigned short rcm_atm_end; /* end of buffer contents */ +#define CACHEFS_REAP_BUFSIZE (PAGE_SIZE / sizeof(unsigned)) -#define CACHEFS_RCM_ATM_LISTSIZE (PAGE_SIZE/sizeof(struct cachefs_reclaimable)) - - unsigned rcm_ino; /* inode being reclaimed */ - unsigned rcm_indirect; /* current indirect block index */ - cachefs_blockix_t rcm_block; /* current block being recycled */ - unsigned short rcm_ptrnext; /* next entry in rcyblock to process */ - unsigned short rcm_ptrstop; /* entry in rcyblock to stop at */ + /* inode reclamation */ + unsigned rcm_ino; /* inode being reclaimed */ + unsigned rcm_indirect; /* current indirect block index */ + cachefs_blockix_t rcm_block; /* current block being recycled */ + unsigned short rcm_ptrnext; /* next entry in rcyblock to process */ + unsigned short rcm_ptrstop; /* entry in rcyblock to stop at */ - struct cachefs_inode *rcm_inode; /* inode being reclaimed */ - struct page *rcm_curpage; /* page holding rcm_block */ + struct cachefs_inode *rcm_inode; /* inode being reclaimed */ + struct page *rcm_curpage; /* page holding rcm_block */ /* update journal tracking */ - unsigned short ujnl_step; /* journal block size */ - unsigned short ujnl_head; /* next journal block to alloc */ - unsigned short ujnl_tail; /* next journal block to ACK */ - wait_queue_head_t ujnl_sync_wq; /* journal sync waitqueue */ - - struct semaphore ujnl_alloc_sem; - wait_queue_head_t ujnl_alloc_wq; - - unsigned ujnl_jsof; /* u-journal start sector */ - int16_t ujnl_batch; /* next batch to be written */ - uint16_t ujnl_serial; /* next serial to use in batch */ - spinlock_t ujnl_mk_lock; - struct list_head ujnl_markq; /* marked transactions */ - struct list_head ujnl_commitq; /* committed transactions */ - struct list_head ujnl_writeq; /* transactions being written */ - struct list_head ujnl_replayq; /* blocks having allocation replayed */ - - struct cachefs_alteration *njalt_markq; /* unjournalled alterations - marked */ - struct cachefs_alteration *njalt_writeq; /* unjournalled alterations - writing */ - spinlock_t njalt_lock; - - struct semaphore batch_sem; /* batching mutex */ - struct semaphore batch_uj_sem; /* ujnl written sync mutex */ - struct rw_semaphore batch_ctrl_sem; /* marking/batching interleave control */ - spinlock_t batch_qlock; - struct list_head batch_writeq; /* blocks awaiting writing */ - struct list_head batch_doneq; /* blocks written */ - struct list_head batch_errorq; /* blocks that got write error */ - wait_queue_head_t batch_done_wq; /* blocks write complete wait queue */ - struct timer_list batch_timer; /* time to next batch write */ - wait_queue_head_t batch_timer_wq; /* batch timer wait queue */ - wait_queue_head_t batch_sync_wq; /* batch sync wait queue */ - - /* validity journal tracking */ - unsigned long *vjnl_map; /* bitmap of free entries (1 page) */ - unsigned vjnl_count; /* number of free entries */ - spinlock_t vjnl_lock; /* allocation lock */ - wait_queue_head_t vjnl_alloc_wq; /* allocation queue */ - struct list_head vjnl_unallocq; /* entries requiring unallocation */ - struct list_head vjnl_writtenq; /* entries requiring clearing */ + unsigned short ujnl_step; /* journal block size */ + unsigned short ujnl_head; /* next journal block to alloc */ + unsigned short ujnl_tail; /* next journal block to ACK */ + wait_queue_head_t ujnl_sync_wq; /* journal sync waitqueue */ + + struct semaphore ujnl_alloc_sem; + wait_queue_head_t ujnl_alloc_wq; + + unsigned ujnl_jsof; /* u-journal start sector */ + int16_t ujnl_batch; /* next batch to be written */ + uint16_t ujnl_serial; /* next serial to use in batch */ + spinlock_t ujnl_mk_lock; + struct list_head ujnl_markq; /* marked transactions */ + struct list_head ujnl_commitq; /* committed transactions */ + struct list_head ujnl_writeq; /* transactions being written */ + struct list_head ujnl_replayq; /* blocks having allocation replayed */ + + struct cachefs_alteration *njalt_markq; /* unjournalled alterations - marked */ + struct cachefs_alteration *njalt_writeq; /* unjournalled alterations - writing */ + spinlock_t njalt_lock; + + struct semaphore batch_sem; /* batching mutex */ + struct semaphore batch_uj_sem; /* ujnl written sync mutex */ + struct rw_semaphore batch_ctrl_sem; /* marking/batching interleave control */ + spinlock_t batch_qlock; + struct list_head batch_writeq; /* blocks awaiting writing */ + struct list_head batch_doneq; /* blocks written */ + struct list_head batch_errorq; /* blocks that got write error */ + wait_queue_head_t batch_done_wq; /* blocks write complete wait queue */ + struct timer_list batch_timer; /* time to next batch write */ + wait_queue_head_t batch_timer_wq; /* batch timer wait queue */ + wait_queue_head_t batch_sync_wq; /* batch sync wait queue */ /* writeback journal tracking */ - unsigned long *wbj_map; /* bitmap of free entries (1 page) */ - unsigned wbj_count; /* number of free entries */ - spinlock_t wbj_lock; /* allocation lock */ - wait_queue_head_t wbj_alloc_wq; /* allocation queue */ + unsigned long *wbj_map; /* bitmap of free entries (1 page) */ + unsigned wbj_count; /* number of free entries */ + spinlock_t wbj_lock; /* allocation lock */ + wait_queue_head_t wbj_alloc_wq; /* allocation queue */ /* cache management daemon for this fs */ - task_t *dmn_task; /* cache daemon task */ - struct completion dmn_alive; /* completion of initialisation */ - struct completion dmn_dead; /* completion of death */ - wait_queue_head_t dmn_sleepq; /* general sleep queue */ - int dmn_die; /* request to die */ + task_t *dmn_task; /* cache daemon task */ + struct completion dmn_alive; /* completion of initialisation */ + struct completion dmn_dead; /* completion of death */ + wait_queue_head_t dmn_sleepq; /* general sleep queue */ + int dmn_die; /* request to die */ /* event counting */ - atomic_t cnt_blk_tree; /* number of outstanding blk_tree nodes */ - atomic_t cnt_ujnl_mkrq; /* number of marks requested */ - atomic_t cnt_ujnl_mkgr; /* number of marks granted */ - atomic_t cnt_ujnl_mkwr; /* number of marks written */ - atomic_t cnt_ujnl_akrq; /* number of ACKs requested */ - atomic_t cnt_ujnl_akgr; /* number of ACKs granted */ - atomic_t cnt_ujnl_akwr; /* number of ACKs written */ - atomic_t cnt_ujnl_free; /* number of marks freed */ + atomic_t cnt_blk_tree; /* number of outstanding blk_tree nodes */ + atomic_t cnt_ujnl_mkrq; /* number of marks requested */ + atomic_t cnt_ujnl_mkgr; /* number of marks granted */ + atomic_t cnt_ujnl_mkwr; /* number of marks written */ + atomic_t cnt_ujnl_akrq; /* number of ACKs requested */ + atomic_t cnt_ujnl_akgr; /* number of ACKs granted */ + atomic_t cnt_ujnl_akwr; /* number of ACKs written */ + atomic_t cnt_ujnl_free; /* number of marks freed */ /* superblock copy */ struct cachefs_ondisc_superblock *layout; @@ -193,6 +304,9 @@ extern int cachefs_ujnl_check_barrier_cap(struct cachefs_super *super); +extern void cachefs_allocator(struct cachefs_super *super); +extern void cachefs_reaper(struct cachefs_super *super); + /*****************************************************************************/ /* * block management record @@ -226,9 +340,7 @@ struct list_head batch_link; /* link in batch writer's list */ struct page *page; /* current data for this block */ struct page *writeback; /* source of writeback for this block */ - struct fscache_page *ref; /* netfs's ref to this page */ - rwlock_t ref_lock; /* lock governing ref pointer */ - struct cachefs_vj_entry *vjentry; /* invalid block record */ + uint16_t serial; /* last time the block was allocated */ }; extern kmem_cache_t *cachefs_block_jar; @@ -242,19 +354,7 @@ extern struct cachefs_block * cachefs_block_find(struct cachefs_super *super, cachefs_blockix_t bix); -extern int cachefs_block_set(struct cachefs_super *super, - struct cachefs_block *block, - struct page *page, - struct fscache_page *pageio); - -extern int cachefs_block_set2(struct cachefs_super *super, - cachefs_blockix_t bix, - struct page *page, - struct fscache_page *pageio, - struct cachefs_block **_block); - extern int cachefs_block_read(struct cachefs_super *super, - struct cachefs_inode *inode, cachefs_blockix_t bix, int wipe, struct cachefs_block **_block, @@ -296,22 +396,46 @@ } } -static inline struct cachefs_block *__cachefs_get_page_block(struct page *page) +static inline struct cachefs_block *cachefs_get_ixpage_block(struct page *page) { BUG_ON(!PagePrivate(page)); - return ((struct fscache_page *) page->private)->mapped_block; + return ((struct cachefs_page_record *) page->private)->block; +} + +static inline cachefs_blockix_t cachefs_get_ixpage_bix(struct page *page) +{ + return cachefs_get_ixpage_block(page)->bix; } -static inline void cachefs_page_modify(struct cachefs_super *super, - struct page **page) +static inline void cachefs_ixpage_modify(struct cachefs_super *super, + struct page **page) { - cachefs_block_modify(super, __cachefs_get_page_block(*page), page); + cachefs_block_modify(super, cachefs_get_ixpage_block(*page), page); } extern void cachefs_block_dissociate(struct fscache_cache *cache); -#define cachefs_mapped_block(PGIO) ((struct cachefs_block *) (PGIO)->mapped_block) -#define cachefs_mapped_bix(PGIO) (((struct cachefs_block *) (PGIO)->mapped_block)->bix) +/*****************************************************************************/ +/* + * CacheFS specific search result + */ +struct cachefs_search_result +{ + struct fscache_search_result srch; /* embedded FS-Cache record */ + unsigned ino; /* node ID (or 0 if negative) */ +}; + +extern struct fscache_search_result *cachefs_srch_alloc(struct fscache_cache *cache, + unsigned long gfp); + +extern void cachefs_srch_free(struct fscache_search_result *srch); + +static inline void cachefs_srch_set(struct fscache_search_result *_srch, unsigned ino) +{ + struct cachefs_search_result *srch; + srch = container_of(_srch, struct cachefs_search_result, srch); + srch->ino = ino; +} /*****************************************************************************/ /* @@ -319,17 +443,22 @@ */ struct cachefs_inode { - struct inode vfs_inode; /* VFS inode record for this file */ - struct fscache_node node; /* fscache handle */ + struct inode vfs_inode; /* VFS inode record for this file */ + struct fscache_node node; /* fscache handle */ - struct cachefs_block *metadata; /* block containing metadata */ - struct page *metadata_page; /* page mapped to metadata block */ - struct rw_semaphore metadata_sem; /* metadata page access semaphore */ - unsigned short metadata_offset; /* metadata record offset */ - - unsigned short index_dsize; /* size of data in each index entry */ - unsigned short index_esize; /* size of index entries */ - unsigned short index_epp; /* number of index entries per page */ + struct rb_root page_records; /* tree of represented page records */ + struct list_head pagerec_accwq; /* access lock wait queue for page records */ + struct list_head pagerec_lookupwq; /* lookup completion queue for page records */ + spinlock_t pagerec_lock; + + struct cachefs_block *metadata; /* block containing metadata */ + struct page *metadata_page; /* page mapped to metadata block */ + struct rw_semaphore metadata_sem; /* metadata page access semaphore */ + unsigned short metadata_offset; /* metadata record offset */ + + unsigned short index_dsize; /* size of data in each index entry */ + unsigned short index_esize; /* size of index entries */ + unsigned short index_epp; /* number of index entries per page */ }; extern struct inode_operations cachefs_status_inode_operations; @@ -366,7 +495,6 @@ extern int cachefs_sync_page(struct page *page); extern int cachefs_invalidatepage(struct page *page, unsigned long offset); -extern int cachefs_releasepage(struct page *page, int gfp_flags); extern int cachefs_no_writepage(struct page *page, struct writeback_control *wbc); extern int cachefs_no_writepages(struct address_space *mapping, @@ -434,34 +562,6 @@ extern int cachefs_index_reclaim_one_entry(struct cachefs_super *super, struct cachefs_transaction **_trans); -/*****************************************************************************/ -/* - * record of as-yet invalid data block for which a v-journal entry exists - */ -struct cachefs_vj_entry -{ - struct list_head link; - cachefs_blockix_t bix; - unsigned ino; /* inode to which applies */ - unsigned pgnum; /* page in inode */ - unsigned vslot; /* v-journal slot in which mark stored */ - struct page *vpage; /* page holding vblock */ - struct cachefs_block *vblock; /* v-journal block in which mark stored */ - unsigned ventry; /* offset in vblock at which mark stored */ - unsigned upblock; /* block in which pointer stored */ - unsigned upentry; /* offset in upblock at which pointer stored */ - int written; /* set when written */ -}; - -extern int cachefs_vj_alloc(struct cachefs_transaction *trans, - struct cachefs_inode *inode); -extern void cachefs_vj_release(struct cachefs_super *super, - struct cachefs_vj_entry *vjentry); -extern void cachefs_vj_cancel(struct cachefs_block *block); -extern void cachefs_vj_write_complete(struct cachefs_block *block); -extern void cachefs_vj_note_write_completion(struct cachefs_super *super); -extern int cachefs_vj_replay(struct cachefs_super *super); - /*****************************************************************************/ /* @@ -481,7 +581,7 @@ struct page *held_page; /* page on hold till writeback complete */ }; -#define CACHEFS_EFFECTS_PER_TRANS 4 +#define CACHEFS_EFFECTS_PER_TRANS 10 struct cachefs_transaction { @@ -496,10 +596,11 @@ struct cachefs_block *jblock; /* block holding ondisc u-journal entry */ struct page *jpage; /* page holding u-journal entry */ - struct cachefs_vj_entry *vjentry; /* associated v-journal entry */ struct cachefs_super *super; struct list_head sblink; /* next transaction in superblock's list */ + struct cachefs_data_op *data_op; /* operation to which applied */ + atomic_t usage; /* keep track of special changes that must only take effect under @@ -538,18 +639,22 @@ __cachefs_trans_put(trans); } +extern void cachefs_trans_record_chain(struct cachefs_ondisc_ujnl_chain *chain, + struct page *page); + extern void cachefs_trans_affects_block(struct cachefs_transaction *trans, struct cachefs_block *target, unsigned offset, unsigned size); static inline -void cachefs_trans_affects_page(struct cachefs_transaction *trans, - struct fscache_page *pageio, - unsigned offset, - unsigned size) +void cachefs_trans_affects_ixpage(struct cachefs_transaction *trans, + struct page *page, + unsigned offset, + unsigned size) { - cachefs_trans_affects_block(trans, pageio->mapped_block, offset, size); + struct cachefs_block *block = cachefs_get_ixpage_block(page); + cachefs_trans_affects_block(trans, block, offset, size); } static inline @@ -567,11 +672,9 @@ static inline void cachefs_trans_affects_super(struct cachefs_transaction *trans) { struct cachefs_super *super = trans->super; - cachefs_trans_affects_page(trans, - fscache_page_grab_private( - virt_to_page(super->layout)), - 0, - super->sb->s_blocksize); + struct cachefs_block *block = + (struct cachefs_block *) virt_to_page(super->layout)->private; + cachefs_trans_affects_block(trans, block, 0, super->sb->s_blocksize); } extern int cachefs_trans_mark(struct cachefs_transaction *trans); diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/cachefs-layout.h linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/cachefs-layout.h --- linux-2.6.12-rc2-mm3/fs/cachefs/cachefs-layout.h 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/cachefs-layout.h 2005-04-21 16:01:25.661939062 +0100 @@ -1,6 +1,6 @@ /* cachefs-layout.h: general filesystem caching on-disc layout * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -47,16 +47,17 @@ #define CACHEFS_SUPER_ENDIAN 0x1234 uint32_t version; /* format version */ -#define CACHEFS_SUPER_VERSION 1 +#define CACHEFS_SUPER_VERSION 2 /* layout */ uint32_t bsize; /* cache block size */ + uint16_t asize; /* alloc chunk size (blocks) */ + uint16_t asize_shift; /* alloc chunk size (log2 blocks) */ uint32_t metadata_size; /* cache metadata record size */ uint32_t metadata_bits; /* log2 cache metadata record size */ uint32_t ujnl_rsize; /* update journal record size */ uint32_t ujnl_recperblk; /* u-journal records per block */ cachefs_blockix_t bix_ujournal; /* start of update journal */ - cachefs_blockix_t bix_vjournal; /* start of invalid block journal */ cachefs_blockix_t bix_wbjournal; /* start of writeback journal */ cachefs_blockix_t bix_cache; /* start of data cache */ cachefs_blockix_t bix_unready; /* start of initially unallocated blocks */ @@ -90,7 +91,10 @@ #define CACHEFS_ONDISC_INDEX_ENTRY_MINSIZE \ (sizeof(struct cachefs_ondisc_index_entry) + sizeof(uint32_t)) -/* index definition description */ +/*****************************************************************************/ +/* + * index definition description + */ struct cachefs_ondisc_index_def { uint16_t dsize; @@ -128,8 +132,8 @@ struct cachefs_ondisc_index_entry header; uint32_t freelink; /* head of free entry list (or UINT_MAX) */ + uint32_t nblocks; /* number of allocated blocks */ uint32_t atime; /* last access time */ - uint32_t mtime; /* last modification time */ uint32_t pindex; /* parent index ID (0 for top of tree) */ uint32_t pindex_entry; /* parent index entry number */ uint64_t size; /* size of file */ @@ -146,8 +150,8 @@ /*****************************************************************************/ /* - * Free blocks are kept in pair of a very one sided trees (more horsetail - * plants than trees) + * Free blocks are kept in three very one sided trees (more horsetail plants + * than trees) * * +---------+ +---------+ +---------+ +---------+ * stk--->| |--->| |--->| |--->| |---> NULL @@ -157,9 +161,10 @@ * / | \ / | \ / | \ / | \ * free blocks free blocks free blocks free blocks * - * - each free block is on one of two trees, both pointed to by the ujournal: + * - each free block is on one of three trees, all pointed to by the ujournal: * - the "recycling stack" - all newly freed blocks end up on here * - the "alloc stack" - all allocations are popped off here + * - the "excise stack" - allocations removed by data-write replay * - when the alloc stack is empty, the recycling stack is transferred into * it * - the front node on the alloc stack is the current source of block @@ -168,6 +173,7 @@ * be allocated * - the front node on the recycling stack is the current sink of recycled * blocks + * - the excise stack is emptied onto the recycling stack */ struct cachefs_ondisc_free_node { @@ -181,268 +187,194 @@ /*****************************************************************************/ /* - * on-disc update journal - * - records changes being made to disc content, particularly the metadata + * record of indirection chain walked/allocated to get deal with a block + */ +struct cachefs_ondisc_ujnl_chain +{ +#define CACHEFS_ONDISC_UJNL_ALLOC_SLOTS 5 + + /* - block[0] is the data block being touched + * - block[1] is the single indirection block (if present) + * - block[2] is the double indirection block (if present) + * ... + * - block[N] is the block containing the inode entry + */ + cachefs_blockix_t block[CACHEFS_ONDISC_UJNL_ALLOC_SLOTS]; + + /* - entry[0] is unused + * - entry[1] is the offset of the affected pointer in block[1] + * ... + * - entry[N] is the offset of the affected pointer in block[N] + */ + uint16_t ptr_ix[CACHEFS_ONDISC_UJNL_ALLOC_SLOTS]; + + /* - allocmap bit X is set if block[X] is allocated by this entry */ + uint8_t allocmap; + + /* - changemap bit Y is set if block[Y] is changed by this entry */ + uint8_t changemap; +} alloc; + +/*****************************************************************************/ +/* + * on-disk update journal + * - records changes being made to disk content, particularly the metadata * - the serial number cycles through in ascending order * - ACKs specify everything between "index" & "block" as being complete * - serial numbers can wrap, but can't go into window of un-ACK'd marks * - journal slots are the size of a sector (blockdev block size) * - this means that two adjacent marks are made on separate sectors, and so - * the second doesn't have to wait for the first to be written to disc + * the second doesn't have to wait for the first to be written to disk * - the current slot allocation point is not permitted to lap the currently * un-ACK'd slots - the requestor must wait */ enum cachefs_ondisc_ujnl_mark { - /* NULL mark */ - CACHEFS_ONDISC_UJNL_NULL, - - /* batch stop mark */ - CACHEFS_ONDISC_UJNL_BATCH, - - /* batch completion mark */ - CACHEFS_ONDISC_UJNL_ACK, - - /* beginning new recycle_stk front node - * - block = block being begun - * - index = old front recycling node - * - ixentry = old front recycling node's count - * - upblock = block from which transferred (or 0 if from unready list) - * - upentry = entry in upblock[] - * - pgnum = new super->layout.bix_unready - */ - CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW, - - /* transfer recycle_stk to alloc_stk - * - block = front block being transferred - * - upblock = 0 or else block at TOS of recycling stack if this was 2OS - */ - CACHEFS_ONDISC_UJNL_RECYC_TRANSFER, - - /* scavenge sets of pointers from super->rcyblock - * - block = block holding pointer array being processed - * - entry = index into block[] of first pointer transferred - * - auxblock = recycling node that dependents are transferred to - * - auxentry = index into auxblock[] of first leaf filled - * - count = number of pointers transferred - */ - CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE, - - /* transfer bix_unready to recycle_stk - * - block = recycling node that blocks were pasted into - * - entry = index into block[] of first pointer inserted - * - auxblock = first unready block transferred - * - pgnum = new super->layout.bix_unready - * - count = number of blocks pasted - */ - CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY, - - /* data file being created - * - index = parent index being attached to - * - ixentry = entry in parent index - * - pgnum = page in file holding index entry being allocated - * - block = block holding index entry being allocated - * - entry = offset of entry in block - * - ino = inode being attached to hold index contents - * - auxblock = metadata file block holding inode metadata - * - auxentry = offset of entry in auxblock - * - upblock = metadata file block holding index metadata - * - upentry = offset of entry in upblock - * - count = size of index entry in block - * - ixdata = index data - * - next_ino = next free metadata file entry - * - next_index = next free index file entry - */ - CACHEFS_ONDISC_UJNL_INODE_CREATING, - - /* data file being updated */ - CACHEFS_ONDISC_UJNL_INODE_UPDATING, - - /* data or index file being deleted - * - index = parent index being attached to [opt] - * - ixentry = entry in parent index [opt] - * - pgnum = page in file holding index entry being allocated [opt] - * - block = block holding index entry being allocated [opt] - * - entry = offset of entry in block [opt] - * - ino = inode being attached to hold index contents - * - auxblock = metadata file block holding inode metadata - * - auxentry = offset of entry in auxblock - * - upblock = metadata file block holding index metadata [opt] - * - upentry = offset of entry in upblock [opt] - * - count = size of index entry in block [opt] - * - next_ino = next free metadata file entry - * - next_index = next free index file entry [opt] - */ - CACHEFS_ONDISC_UJNL_INODE_DELETING, - - /* inode being marked for reclamation - * - ino = target inode - * - index = inode's parent index - * - ixentry = inode's parent index entry - * - pgnum = page in index holding entry being marked - * - block = metadata file block holding index metadata - * - entry = offset of entry in upblock - * - auxblock = metadata file block holding inode metadata - * - auxentry = offset of entry in auxblock - */ - CACHEFS_ONDISC_UJNL_INODE_MARK_RECLAIM, - - /* inode being reclaimed - * - ino = target inode - * - index = inode's parent index - * - ixentry = inode's parent index entry - * - pgnum = page in index holding entry being marked - * - block = metadata file block holding index metadata - * - entry = offset of entry in upblock - * - auxblock = metadata file block holding inode metadata - * - auxentry = offset of entry in auxblock - */ - CACHEFS_ONDISC_UJNL_INODE_RECLAIMING, - - /* data file block allocation - * - ino = inode for which block allocated - * - pgnum = page of inode being instantiated - * - size = current file size - * - block = block allocated - * - auxblock = block holding inode's metadata - * - auxentry = offset in auxblock of metadata record - * - upblock = block which will point to this one - * - upentry = entry in block pointing to this one - * - auxmark = v-journal entry number - */ - CACHEFS_ONDISC_UJNL_DATA_ALLOCING, - - /* completed write on page in cache - * - ino = inode for which block was written - * - pgnum = which page of inode was written - * - block = block written - * - auxmark = v-journal entry number - */ - CACHEFS_ONDISC_UJNL_DATA_WRITTEN, - - /* data block being unallocated - * - index = old front recycling node - * - ixentry = old front recycling node's count - * - ino = inode to which block belongs - * - pgnum = which page of inode being unallocated - * - block = block being recycled - * - auxblock = (old) front recycling node - * - auxentry = index into auxblock[] of leaf filled (or UINT_MAX if new node) - * - upblock = block from which transferred - * - upentry = entry in upblock[] - * - auxmark = v-journal entry number - */ - CACHEFS_ONDISC_UJNL_DATA_UNALLOCING, - - /* indirect block being allocated - * - auxmark = which level being allocated - * - ino = inode for which block is being allocated - * - pgnum = which page of inode being allocated - * - size = current file size - * - block = block being allocated - * - auxblock = block holding inode's metadata - * - auxentry = offset in auxblock of metadata record - * - upblock = block which will point to this one - * - upentry = entry in block pointing to this one - */ - CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING, - - /* index file being extended (as for data block allocation) - * - ino = index inode - * - pgnum = page in file holding index entry being allocated - * - size = current file size - * - block = new block being allocated - * - auxblock = metadata file block holding index metadata - * - auxentry = offset of entry in auxblock - * - upblock = block holding pointer to new block - * - upentry = offset of entry in upblock - * - count = size of index entry (inc header) in block - * - next_index = next free index file entry - */ - CACHEFS_ONDISC_UJNL_INDEX_EXTENDING, - - /* index file being created - * - index = parent index being attached to - * - ixentry = entry in parent index - * - pgnum = page in file holding index entry being allocated - * - block = block holding index entry being allocated - * - entry = offset of entry in block - * - ino = inode being attached to hold index contents - * - auxblock = metadata file block holding inode metadata - * - auxentry = offset of entry in auxblock - * - upblock = metadata file block holding index metadata - * - upentry = offset of entry in upblock - * - count = size of index entry in block - * - ixdata = index definition and data - * - next_ino = next free metadata file entry - * - next_index = next free index file entry - */ - CACHEFS_ONDISC_UJNL_INDEX_CREATING, - - /* index entry being updated - * - index = index being modified - * - ixentry = entry in index - * - pgnum = page in file holding index entry being allocated - * - block = block holding index entry being allocated - * - entry = offset of entry in block - * - count = size of entry in block - * - ixdata = revised index data - */ - CACHEFS_ONDISC_UJNL_INDEX_UPDATING, - + CACHEFS_ONDISC_UJNL_NULL, /* NULL mark */ + CACHEFS_ONDISC_UJNL_BATCH, /* batch stop mark */ + CACHEFS_ONDISC_UJNL_ACK, /* batch completion mark */ + CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW, /* beginning new recycle_stk front node */ + CACHEFS_ONDISC_UJNL_RECYC_TRANSFER, /* transfer recycle_stk to alloc_stk */ + CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE, /* scavenge sets of pointers from super->rcyblock */ + CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY, /* transfer bix_unready to recycle_stk */ + CACHEFS_ONDISC_UJNL_RECYC_INODE, /* recycle an inode's metadata */ + CACHEFS_ONDISC_UJNL_REAP_INODE, /* begin reclamation of inode */ + CACHEFS_ONDISC_UJNL_CREATE_INDEX, /* create an index file */ + CACHEFS_ONDISC_UJNL_CREATE_FILE, /* create a data file */ + CACHEFS_ONDISC_UJNL_DELETE_INODE, /* inode marked for reclamation */ + CACHEFS_ONDISC_UJNL_UPDATE_INDEX, /* update index entry */ + CACHEFS_ONDISC_UJNL_WRITE_DATA, /* write data to disk */ CACHEFS_ONDISC_UJNL__LAST } __attribute__((packed)); -struct cachefs_ondisc_ujnl_index { - struct cachefs_ondisc_index_def def; - uint32_t next_ino; /* next inode entry */ - uint32_t next_index; /* next index entry */ - uint8_t data[0]; -}; - struct cachefs_ondisc_update_journal { + /* journal control */ enum cachefs_ondisc_ujnl_mark mark; - - uint32_t auxmark; -#define CACHEFS_ONDISC_UJNL_SINGLE_0 0 /* single indirect (1 of) */ -#define CACHEFS_ONDISC_UJNL_DOUBLE_0 1 /* double indirect level 0 (1 of) */ -#define CACHEFS_ONDISC_UJNL_DOUBLE_1 2 /* double indirect level 1 (1K of) */ -#define CACHEFS_ONDISC_UJNL_TRIPLE_0 3 /* triple indirect level 0 (1 of) */ -#define CACHEFS_ONDISC_UJNL_TRIPLE_1 4 /* triple indirect level 1 (1K of) */ -#define CACHEFS_ONDISC_UJNL_TRIPLE_2 5 /* triple indirect level 2 (1M of) */ - int16_t batch; /* batch number */ uint16_t serial; /* serial number of entry in batch */ - uint32_t ino; /* in-cache inode number */ - uint32_t pgnum; - uint32_t size; - uint32_t index; - uint32_t ixentry; - uint16_t entry; - uint16_t auxentry; - uint16_t upentry; + + /* state tracking */ + uint16_t alloc_leaf; /* current alloc point in alloc_cur */ uint16_t rcm_ptrnext; /* next ptr in rcm_block to be reclaimed */ uint16_t rcm_ptrstop; /* last ptr in rcm_block + 1 */ - uint16_t count; - uint16_t alloc_leaf; /* current alloc point in alloc_cur */ uint16_t rcm_indirect; /* indirect block being reclaimed */ uint32_t rcm_ino; /* number of inode being reclaimed */ - cachefs_blockix_t block; - cachefs_blockix_t auxblock; - cachefs_blockix_t upblock; cachefs_blockix_t rcm_block; /* block currently being reclaimed */ cachefs_blockix_t alloc_cur; /* current block allocation node */ cachefs_blockix_t recycle_cur; /* current block recycling node */ + cachefs_blockix_t excise_cur; /* current block excism node */ + /* mark-specific data */ union { - /* recycled pointers */ - cachefs_blockix_t rcyptrs[0]; - - /* new/updated index entry */ - struct cachefs_ondisc_ujnl_index ixdata[0]; + /* beginning new recycle_stk front node */ + struct { + cachefs_blockix_t front; /* new front recycling node */ + cachefs_blockix_t second; /* old front recycling node */ + cachefs_blockix_t unready; /* new super->layout.bix_unready */ + cachefs_blockix_t ptr; /* ptr block from which detached (or 0) */ + uint32_t seccnt; /* second's count */ + uint16_t ptrix; /* index into ptr[] of pointer */ + } begin_new; + + /* transfer bix_unready to recycle_stk */ + struct { + cachefs_blockix_t dst; /* recycling node that blocks were pasted into */ + cachefs_blockix_t first; /* first unready block transferred */ + cachefs_blockix_t unready; /* new super->layout.bix_unready */ + uint16_t dstix; /* index into dest[] of first pointer inserted */ + uint16_t count; /* number of block pasted */ + } make_ready; + + /* transfer of recycling stack to allocation stack */ + struct { + cachefs_blockix_t first; /* first block being transferred */ + cachefs_blockix_t front; /* front partial node or 0 */ + } recycle_transfer; + + /* pointer scavenging for recycling */ + struct { + cachefs_blockix_t src; /* block holding pointer array being processed */ + cachefs_blockix_t dst; /* recycling node that ptrs are transferred to */ + uint32_t ino; /* inode being dismantled */ + uint16_t srcix; /* index into source[] of first ptr transferred */ + uint16_t dstix; /* index into dest[] of first leaf filled */ + uint16_t count; /* number of pointers transferred */ + cachefs_blockix_t ptrs[0]; + } recycle_scavenge; + + /* index or data file deletion */ + struct { + cachefs_blockix_t ino_block; /* inode meta block */ + cachefs_blockix_t ix_block; /* index block */ + cachefs_blockix_t ix_ino_block; /* index meta block */ + uint32_t ino; /* deleted inode number */ + uint32_t ix_ino; /* index inode */ + uint32_t ix_entry; /* index entry */ + uint32_t ix_next; /* new next index entry */ + uint32_t ino_next; /* new next inode entry */ + uint16_t ix_dsize; /* index data size */ + uint16_t ix_esize; /* index entry size */ + uint16_t ix_epp; /* index entries per page */ + } recyc_inode; + + /* begin inode reap */ + struct { + uint32_t ino; /* marked inode number */ + } reap_inode; + + /* index or data file creation */ + struct { + struct cachefs_ondisc_ujnl_chain ino_chain; /* inode meta chain */ + struct cachefs_ondisc_ujnl_chain ix_chain; /* index chain */ + struct cachefs_ondisc_index_def def; /* index definition */ + cachefs_blockix_t ix_ino_block; /* index meta block */ + uint32_t ino; /* new inode number */ + uint32_t ix_ino; /* index inode */ + uint32_t ix_entry; /* index entry */ + uint32_t ix_next; /* new next index entry */ + uint32_t ix_size; /* revised index file size */ + uint32_t ix_nblocks; /* revised number of index blocks */ + uint32_t ino_next; /* new next inode entry */ + uint32_t ino_size; /* revised metadata file size */ + uint32_t ino_nblocks; /* revised number of metadata blocks */ + uint16_t ix_esize; /* index entry size */ + uint8_t data[0]; /* index entry data */ + } create_inode; + + /* mark an inode as being deleted */ + struct { + cachefs_blockix_t ino_block; /* inode meta block */ + cachefs_blockix_t ix_block; /* index block */ + uint32_t ino; /* marked inode number */ + uint32_t ix_ino; /* index inode */ + uint32_t ix_entry; /* index entry */ + uint16_t ix_esize; /* index entry size */ + uint16_t ix_epp; /* index entries per page */ + } delete_inode; + + /* index entry update */ + struct { + cachefs_blockix_t ix_block; /* index block */ + uint32_t ino; /* inode who's index is being updated */ + uint32_t ix_ino; /* parent index inode */ + uint32_t ix_entry; /* parent index entry */ + uint16_t ix_dsize; /* index data size */ + uint16_t ix_esize; /* index entry size */ + uint16_t ix_epp; /* index entries per page */ + uint8_t data[0]; + } update_index; + + /* data block write */ + struct { + struct cachefs_ondisc_ujnl_chain chain; + uint32_t ino; /* new inode number */ + uint64_t pgnum; /* page index in file */ + uint64_t size; /* revised file size */ + uint32_t nblocks; /* revised number of blocks */ + } write_data; /* miscellaneous data */ - uint8_t data[0]; + uint8_t misc[0]; } u; }; @@ -451,28 +383,6 @@ /*****************************************************************************/ /* - * on-disc block validity journal - * - blocks noted here don't yet have valid data downloaded from the remote - * server - * - unused entries have ino==0 - * - changed under the influence of the u-journal - */ -struct cachefs_ondisc_validity_journal -{ - uint32_t ino; /* inode number */ - uint32_t pgnum; /* page within inode */ -}; - -#define CACHEFS_ONDISC_VJNL_ENTPERPAGE \ - (PAGE_SIZE / sizeof(struct cachefs_ondisc_validity_journal)) - -#define CACHEFS_ONDISC_VJNL_SIZE 16 /* blocks */ - -#define CACHEFS_ONDISC_VJNL_ENTS \ - (CACHEFS_ONDISC_VJNL_ENTPERPAGE * CACHEFS_ONDISC_VJNL_SIZE) - -/*****************************************************************************/ -/* * on-disc writeback journal * - records pages that are pending being written back to the server */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/index.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/index.c --- linux-2.6.12-rc2-mm3/fs/cachefs/index.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/index.c 2005-04-21 13:44:16.000000000 +0100 @@ -1,6 +1,6 @@ /* index.c: general filesystem cache: index file management * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -26,7 +26,7 @@ #include #include "cachefs-int.h" -struct fscache_index_search_record { +struct cachefs_index_search_record { struct fscache_cookie *index; struct fscache_cookie *target; struct cachefs_inode *iinode; @@ -40,7 +40,7 @@ * mark an inode/index entry pair for deletion when so requested by the match * function supplied by the netfs */ -static void cachefs_index_search_delete(struct fscache_index_search_record *rec, +static void cachefs_index_search_delete(struct cachefs_index_search_record *rec, struct page *ixpage, unsigned ixentry, unsigned ixoffset, @@ -75,18 +75,16 @@ if (!trans) goto error; - trans->jentry->mark = CACHEFS_ONDISC_UJNL_INODE_MARK_RECLAIM; - trans->jentry->ino = inode->vfs_inode.i_ino; - trans->jentry->index = rec->iinode->vfs_inode.i_ino; - trans->jentry->ixentry = ixentry; - trans->jentry->pgnum = ixpage->index; - trans->jentry->block = __cachefs_get_page_block(ixpage)->bix; - trans->jentry->entry = ixoffset; - trans->jentry->auxblock = inode->metadata->bix; - trans->jentry->auxentry = inode->metadata_offset; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_DELETE_INODE; + trans->jentry->u.delete_inode.ino = inode->vfs_inode.i_ino; + trans->jentry->u.delete_inode.ix_ino = rec->iinode->vfs_inode.i_ino; + trans->jentry->u.delete_inode.ix_entry = ixentry; + trans->jentry->u.delete_inode.ix_esize = rec->iinode->index_esize; + trans->jentry->u.delete_inode.ix_epp = rec->iinode->index_epp; + trans->jentry->u.delete_inode.ino_block = inode->metadata->bix; + trans->jentry->u.delete_inode.ix_block = cachefs_get_ixpage_bix(ixpage); - cachefs_trans_affects_page(trans, fscache_page_grab_private(ixpage), - ixoffset, sizeof(*xent)); + cachefs_trans_affects_ixpage(trans, ixpage, ixoffset, sizeof(*xent)); cachefs_trans_affects_inode(trans, inode); /* record the transaction in the journal */ @@ -96,7 +94,7 @@ /* change the parent index entry and the index's inode entry as to the * recycle state */ - cachefs_page_modify(super, &ixpage); + cachefs_ixpage_modify(super, &ixpage); xent = kmap_atomic(ixpage, KM_USER0) + ixoffset; xent->state = CACHEFS_ONDISC_INDEX_RECYCLE; @@ -110,23 +108,23 @@ cachefs_trans_commit(trans); /* attempt to schedule for immediate reclamation */ - spin_lock_irqsave(&super->rcm_lock, flags); + spin_lock_irqsave(&super->reap_lock, flags); - if (CIRC_SPACE(super->rcm_imm_head, - super->rcm_imm_tail, - CACHEFS_RCM_IMM_BUFSIZE) > 0 + if (CIRC_SPACE(super->reap_head, + super->reap_tail, + CACHEFS_REAP_BUFSIZE) > 0 ) { - super->rcm_imm_buf[super->rcm_imm_head] = + super->reap_buf[super->reap_head] = inode->vfs_inode.i_ino; - super->rcm_imm_head = - (super->rcm_imm_head + 1) & - (CACHEFS_RCM_IMM_BUFSIZE - 1); + super->reap_head = + (super->reap_head + 1) & + (CACHEFS_REAP_BUFSIZE - 1); } else { - set_bit(CACHEFS_SUPER_RCM_IMM_SCAN, &super->flags); + set_bit(CACHEFS_SUPER_DO_REAP, &super->flags); } - spin_unlock_irqrestore(&super->rcm_lock, flags); + spin_unlock_irqrestore(&super->reap_lock, flags); /* wake up kcachefsd */ set_bit(CACHEFS_SUPER_DO_RECLAIM, &super->flags); @@ -150,7 +148,7 @@ * mark an inode/index entry pair for deletion when so requested by the match * function supplied by the netfs */ -static void cachefs_index_search_update(struct fscache_index_search_record *rec, +static void cachefs_index_search_update(struct cachefs_index_search_record *rec, struct page *ixpage, unsigned ixentry, unsigned ixoffset, @@ -171,21 +169,20 @@ if (!trans) goto error; - trans->jentry->mark = CACHEFS_ONDISC_UJNL_INDEX_UPDATING; - trans->jentry->ino = ino; - trans->jentry->index = rec->iinode->vfs_inode.i_ino; - trans->jentry->ixentry = ixentry; - trans->jentry->pgnum = ixpage->index; - trans->jentry->block = __cachefs_get_page_block(ixpage)->bix; - trans->jentry->entry = ixoffset; - trans->jentry->count = rec->iinode->index_dsize; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_UPDATE_INDEX; + trans->jentry->u.update_index.ino = ino; + trans->jentry->u.update_index.ix_ino = rec->iinode->vfs_inode.i_ino; + trans->jentry->u.update_index.ix_entry = ixentry; + trans->jentry->u.update_index.ix_dsize = rec->iinode->index_dsize; + trans->jentry->u.update_index.ix_esize = rec->iinode->index_esize; + trans->jentry->u.update_index.ix_epp = rec->iinode->index_epp; + trans->jentry->u.update_index.ix_block = cachefs_get_ixpage_bix(ixpage); - cachefs_trans_affects_page(trans, fscache_page_grab_private(ixpage), - ixoffset, sizeof(*xent)); + cachefs_trans_affects_ixpage(trans, ixpage, ixoffset, sizeof(*xent)); /* have the netfs transcribe the update into the transaction */ rec->index->idef->update(rec->target->netfs_data, - trans->jentry->u.ixdata[0].data); + trans->jentry->u.update_index.data); /* record the transaction in the journal */ ret = cachefs_trans_mark(trans); @@ -193,11 +190,11 @@ goto error; /* actually change the index entry in the page cache */ - cachefs_page_modify(super, &ixpage); + cachefs_ixpage_modify(super, &ixpage); xent = kmap_atomic(ixpage, KM_USER0) + ixoffset; memcpy(xent->u.data, - trans->jentry->u.ixdata[0].data, + trans->jentry->u.update_index.data, rec->iinode->index_dsize); kunmap_atomic(xent, KM_USER0); @@ -223,14 +220,14 @@ unsigned long offset, unsigned long size) { - struct fscache_index_search_record *rec; + struct cachefs_index_search_record *rec; unsigned long stop, tmp, esize; void *content; int ret; _enter(",{%lu},%lu,%lu", page->index, offset, size); - rec = (struct fscache_index_search_record *) desc->arg.buf; + rec = (struct cachefs_index_search_record *) desc->arg.buf; ret = size; /* round up to the first record boundary after the offset */ @@ -288,7 +285,7 @@ rec->iinode->vfs_inode.i_ino, rec->entry, page->index, - __cachefs_get_page_block(page)->bix, + cachefs_get_ixpage_bix(page), offset / esize); BUG(); } @@ -330,15 +327,17 @@ */ int cachefs_index_search(struct fscache_node *node, struct fscache_cookie *target, - struct fscache_search_result *result) + struct fscache_search_result *_result) { - struct fscache_index_search_record rec; + struct cachefs_index_search_record rec; + struct cachefs_search_result *result; struct cachefs_inode *index; struct file_ra_state ra; read_descriptor_t desc; loff_t pos; int ret; + result = container_of(_result, struct cachefs_search_result, srch); index = container_of(node, struct cachefs_inode, node); _enter("{%s,%lu,%Lu}", @@ -395,20 +394,90 @@ /*****************************************************************************/ /* + * update the index entry for an index or data file from the associated netfs + * data + */ +int cachefs_index_update(struct fscache_node *ixnode, + struct fscache_node *node) +{ + struct cachefs_ondisc_index_entry *xent; + struct cachefs_ondisc_metadata *meta; + struct fscache_cookie *cookie = node->cookie; + struct cachefs_super *super; + struct cachefs_inode *index, *inode; + struct cachefs_block *block; + struct page *ixpage; + unsigned offs; + int ret; + + index = container_of(ixnode, struct cachefs_inode, node); + inode = container_of(node, struct cachefs_inode, node); + + _enter(","); + + super = inode->vfs_inode.i_sb->s_fs_info; + + /* find the entry number of this inode's index entry */ + meta = cachefs_metadata_preread(inode); + offs = meta->pindex_entry; + cachefs_metadata_postread(inode, meta); + + /* get the page holding the index data */ + ixpage = cachefs_get_page(index, offs / index->index_epp); + if (IS_ERR(ixpage)) { + _leave(" = %ld", PTR_ERR(ixpage)); + return PTR_ERR(ixpage); + } + + offs = (offs % index->index_epp) * index->index_esize; + + _debug("update ino=%lx pg={%lu}+%x", + index->vfs_inode.i_ino, ixpage->index, offs); + + /* we just alter the index entry directly without journalling the + * change - if what's on disc winds up obsolete because someone trips + * over the power cable, the netfs will ask for the entry to be deleted + * later. We do, however, let the journal writer write the block for us + */ + block = cachefs_get_ixpage_block(ixpage); + + ret = cachefs_block_begin_alter(block); + if (ret < 0) + goto error_page; + + /* we may now need to look at a different page as the old one may have + * been C-O-W'd */ + cachefs_block_modify(super, block, &ixpage); + + /* get the netfs to make the change */ + xent = kmap_atomic(ixpage, KM_USER0) + offs; + cookie->iparent->idef->update(cookie->netfs_data, xent->u.data); + kunmap_atomic(xent, KM_USER0); + + cachefs_block_end_alter(block); + + error_page: + cachefs_put_page(ixpage); + _leave(" = %d", ret); + return ret; + +} /* end cachefs_index_update() */ + +#if 0 +/*****************************************************************************/ +/* * initialise a new index page (called in lieu of readpage) */ static int cachefs_index_preinit_page(void *data, struct page *page) { - struct fscache_page *pageio; + struct cachefs_page_record *record = data; _enter(",%p{%lu}", page, page->index); - /* attach a mapping cookie to the page */ - pageio = fscache_page_get_private(page, GFP_KERNEL); - if (IS_ERR(pageio)) { - _leave(" = %ld", PTR_ERR(pageio)); - return PTR_ERR(pageio); - } + /* attach a page record to the page */ + atomic_inc(&record->usage); + page->private = (unsigned long) record; + SetPagePrivate(page); /* clear the page */ clear_highpage(page); @@ -547,19 +616,20 @@ /*****************************************************************************/ /* * allocate an entry in the specified index file and associate an inode with it + * - the caller holds a write lock on the index cookie semaphore * - target->cookie->def determines whether the new inode will be a file or an * index - * - if an inode is successfully allocated *_newino will be set with the inode + * - if an inode is successfully allocated *_result will be set with the inode * number */ -int cachefs_index_add(struct fscache_node *node, +int cachefs_index_add(struct fscache_node *ixnode, struct fscache_cookie *cookie, - struct fscache_search_result *result) -// unsigned *_newino) + struct fscache_search_result *_result) { struct cachefs_ondisc_index_entry *xent; struct cachefs_ondisc_ujnl_index *jindex; struct cachefs_ondisc_metadata *metadata; + struct cachefs_search_result *result; struct cachefs_transaction *trans; struct cachefs_super *super; struct cachefs_inode *index; @@ -567,18 +637,20 @@ unsigned ino, ixentry, offset, inonext, ixnext, ino_offset; int ret, loop; - index = container_of(node, struct cachefs_inode, node); + index = container_of(ixnode, struct cachefs_inode, node); + result = container_of(_result, struct cachefs_search_result, srch); _enter("{%lu},{%s},", index->vfs_inode.i_ino, index->node.cookie->idef->name); -// *_newino = 0; - super = index->vfs_inode.i_sb->s_fs_info; inopage = NULL; ixpage = NULL; trans = NULL; + down_write(&ixnode->i_alloc_sem); + down_write(&super->imetadata->i_alloc_sem); + /* reserve the next free entry in the parent index */ ret = cachefs_index_select_free_entry(index, &ixpage, &ixentry, &ixnext); @@ -613,18 +685,16 @@ trans->jentry->ino = ino; trans->jentry->size = i_size_read(&index->vfs_inode); trans->jentry->pgnum = ixpage->index; - trans->jentry->block = __cachefs_get_page_block(ixpage)->bix; + trans->jentry->block = cachefs_get_ixpage_bix(ixpage); trans->jentry->entry = offset; trans->jentry->count = index->index_dsize; - trans->jentry->auxblock = __cachefs_get_page_block(inopage)->bix; + trans->jentry->auxblock = cachefs_get_ixpage_bix(inopage); trans->jentry->auxentry = ino_offset; - trans->jentry->upblock = index->metadata->bix; - trans->jentry->upentry = index->metadata_offset; - cachefs_trans_affects_page(trans, fscache_page_grab_private(ixpage), - offset, index->index_esize); - cachefs_trans_affects_page(trans, fscache_page_grab_private(inopage), - ino_offset, super->layout->metadata_size); + cachefs_trans_affects_ixpage(trans, ixpage, + offset, index->index_esize); + cachefs_trans_affects_ixpage(trans, inopage, + ino_offset, super->layout->metadata_size); cachefs_trans_affects_inode(trans, index); cachefs_trans_affects_inode(trans, super->imetadata); @@ -632,7 +702,7 @@ /* also store in the journal information about the index modifications * we're going to make, including the netfs's search keys and other * data */ - jindex = &trans->jentry->u.ixdata[0]; + jindex = &trans->jentry->u.index.data[0]; jindex->next_ino = inonext; jindex->next_index = ixnext; @@ -700,7 +770,6 @@ metadata->header.ino = 0xfefefe; metadata->size = 0; metadata->freelink = UINT_MAX; - metadata->mtime = CURRENT_TIME.tv_sec; metadata->atime = CURRENT_TIME.tv_sec; metadata->pindex = index->vfs_inode.i_ino; metadata->pindex_entry = ixentry; @@ -719,10 +788,11 @@ cachefs_trans_commit(trans); trans = NULL; -// *_newino = ino; result->ino = ino; error: + up_write(&super->imetadata->i_alloc_sem); + up_write(&ixnode->i_alloc_sem); cachefs_trans_put(trans); cachefs_put_page(inopage); cachefs_put_page(ixpage); @@ -731,218 +801,4 @@ return ret; } /* end cachefs_index_add() */ - -/*****************************************************************************/ -/* - * update the index entry for an index or data file from the associated netfs - * data - */ -int cachefs_index_update(struct fscache_node *ixnode, - struct fscache_node *node) -{ - struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_metadata *meta; - struct fscache_cookie *cookie = node->cookie; - struct cachefs_super *super; - struct cachefs_inode *index, *inode; - struct cachefs_block *block; - struct page *ixpage; - unsigned offs; - int ret; - - index = container_of(ixnode, struct cachefs_inode, node); - inode = container_of(node, struct cachefs_inode, node); - - _enter(","); - - super = inode->vfs_inode.i_sb->s_fs_info; - - if (fscache_is_cache_withdrawn(&super->cache)) - return 0; - - /* find the entry number of this inode's index entry */ - meta = cachefs_metadata_preread(inode); - offs = meta->pindex_entry; - cachefs_metadata_postread(inode, meta); - - /* get the page holding the index data */ - ixpage = cachefs_get_page(index, offs / index->index_epp); - if (IS_ERR(ixpage)) { - _leave(" = %ld", PTR_ERR(ixpage)); - return PTR_ERR(ixpage); - } - - offs = (offs % index->index_epp) * index->index_esize; - - _debug("update ino=%lx pg={%lu}+%x", - index->vfs_inode.i_ino, ixpage->index, offs); - - /* we just alter the index entry directly without journalling the - * change - if what's on disc winds up obsolete because someone trips - * over the power cable, the netfs will ask for the entry to be deleted - * later. We do, however, let the journal writer write the block for us - */ - block = __cachefs_get_page_block(ixpage); - - ret = cachefs_block_begin_alter(block); - if (ret < 0) - goto error_page; - - /* we may now need to look at a different page as the old one may have - * been C-O-W'd */ - cachefs_block_modify(super, block, &ixpage); - - /* get the netfs to make the change */ - xent = kmap_atomic(ixpage, KM_USER0) + offs; - cookie->iparent->idef->update(cookie->netfs_data, xent->u.data); - kunmap_atomic(xent, KM_USER0); - - cachefs_block_end_alter(block); - - error_page: - cachefs_put_page(ixpage); - _leave(" = %d", ret); - return ret; - -} /* end cachefs_index_update() */ - -/*****************************************************************************/ -/* - * mark as obsolete the next inode pinned by an entry in the index currently - * being reclaimed - * - called from kcachefsd - */ -int cachefs_index_reclaim_one_entry(struct cachefs_super *super, - struct cachefs_transaction **_trans) -{ - struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_metadata *metadata; - struct cachefs_transaction *trans; - struct cachefs_inode *inode = NULL; - unsigned long flags; - struct page *page = NULL; - unsigned pgnum, offset, ino; - int ret; - - _enter("{%x,%x}", super->rcm_ino, super->rcm_block); - - try_next_block: - /* find the next block we're going to scan */ - pgnum = super->rcm_block / super->rcm_inode->index_epp; - offset = super->rcm_block % super->rcm_inode->index_epp; - offset *= super->rcm_inode->index_esize; - - if (pgnum >= (i_size_read(&super->rcm_inode->vfs_inode) >> PAGE_SHIFT)) { - /* we've done this index entirely */ - _leave(" = 0"); - return 0; - } - - /* get the page holding the next index entry and extract the inode - * number from it */ - page = cachefs_get_page(super->rcm_inode, pgnum); - if (IS_ERR(page)) { - if (PTR_ERR(page) == -EIO) { - /* forget about this block - it's buggy */ - super->rcm_block = - (pgnum + 1) * super->rcm_inode->index_epp; - } - - _leave(" = %ld", PTR_ERR(page)); - return PTR_ERR(page); - } - - try_next_entry: - xent = kmap_atomic(page, KM_USER0) + offset; - ino = xent->ino; - BUG_ON(ino == 0 && xent->state != CACHEFS_ONDISC_INDEX_FREE); - BUG_ON(ino != 0 && xent->state == CACHEFS_ONDISC_INDEX_FREE); - kunmap_atomic(xent, KM_USER0); - - if (!ino) { - _debug("skip slot %u", super->rcm_block); - super->rcm_block++; - - offset += super->rcm_inode->index_esize; - if (offset + super->rcm_inode->index_esize <= PAGE_SIZE) - goto try_next_entry; - - cachefs_put_page(page); - page = NULL; - goto try_next_block; - } - - inode = cachefs_iget(super, ino); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - if (ret == -EIO) - super->rcm_block++; - goto error_noinode; - } - - /* use the pre-created a transaction to record the change of state */ - trans = *_trans; - - trans->jentry->mark = CACHEFS_ONDISC_UJNL_INODE_MARK_RECLAIM; - trans->jentry->ino = inode->vfs_inode.i_ino; - trans->jentry->index = super->rcm_ino; - trans->jentry->ixentry = super->rcm_block; - trans->jentry->pgnum = page->index; - trans->jentry->block = __cachefs_get_page_block(page)->bix; - trans->jentry->entry = offset; - trans->jentry->auxblock = inode->metadata->bix; - trans->jentry->auxentry = inode->metadata_offset; - - cachefs_trans_affects_inode(trans, inode); - - trans->jentry->rcm_block = super->rcm_block + 1; - - /* record the transaction in the journal */ - ret = cachefs_trans_mark(trans); - if (ret < 0) - goto error; - - *_trans = NULL; - - /* modify the inode metadata entry */ - metadata = cachefs_metadata_prewrite(inode); - metadata->header.state = CACHEFS_ONDISC_INDEX_RECYCLE; - metadata->pindex = 0; - metadata->pindex_entry = 0; - cachefs_metadata_postwrite(inode, metadata); - - /* commit the changes to disc */ - cachefs_trans_commit(trans); - - /* attempt to schedule the inode we've just marked for immediate - * reclamation */ - spin_lock_irqsave(&super->rcm_lock, flags); - - if (CIRC_SPACE(super->rcm_imm_head, - super->rcm_imm_tail, - CACHEFS_RCM_IMM_BUFSIZE) > 0 - ) { - super->rcm_imm_buf[super->rcm_imm_head] = - inode->vfs_inode.i_ino; - super->rcm_imm_head = - (super->rcm_imm_head + 1) & - (CACHEFS_RCM_IMM_BUFSIZE - 1); - } - else { - set_bit(CACHEFS_SUPER_RCM_IMM_SCAN, &super->flags); - } - - spin_unlock_irqrestore(&super->rcm_lock, flags); - - /* there may be more to do on this index */ - ret = -EAGAIN; - - error: - cachefs_iput(inode); - error_noinode: - cachefs_put_page(page); - - _leave(" = %d [%u]", ret, super->rcm_block); - return ret; - -} /* end cachefs_index_reclaim_one_entry() */ +#endif diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/indirection-io.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/indirection-io.c --- linux-2.6.12-rc2-mm3/fs/cachefs/indirection-io.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/indirection-io.c 2005-04-20 20:13:52.000000000 +0100 @@ -7,7 +7,7 @@ * further into a file a block is. All data cache files and index * files are in this form. * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -59,6 +59,7 @@ struct address_space *mapping, struct list_head *pages, unsigned nr_pages); +static int cachefs_indr_io_releasepage(struct page *page, int gfp_flags); struct address_space_operations cachefs_indr_io_addrspace_operations = { .readpage = cachefs_indr_io_readpage, @@ -70,11 +71,159 @@ .set_page_dirty = cachefs_no_set_page_dirty, .sync_page = cachefs_sync_page, .invalidatepage = cachefs_invalidatepage, - .releasepage = cachefs_releasepage, + .releasepage = cachefs_indr_io_releasepage, }; /*****************************************************************************/ /* + * locate a block by walking the indirection chain + * - if requested we load the page into memory and return a ref to it + */ +int cachefs_indr_io_locate_block(struct cachefs_inode *inode, + struct cachefs_page_record *record, + struct page **_page) +{ + struct cachefs_block *block; + cachefs_blockix_t bix, *ptrs; + struct page *ptrpage; + int ret; + + kenter("{%lu},{%lu,%u},%p", + (unsigned long) inode->vfs_inode.i_ino, + record->index, record->pagetype, _page); + + /* all indirection chains start from the metadata page, which we should + * have in memory already + * - we deal with this here so that all further records can assume a + * parent + */ + if (record->pagetype == CACHEFS_PAGETYPE_INODE) { + BUG_ON(_page); + get_page(inode->metadata_page); + *_page = inode->metadata_page; + kleave(" = 0 [metadata]"); + return 0; + } + + /* deal with the page appropriately */ +state_changed: + switch (record->pagestate) { + /* if there was a previous I/O error on the chain then abort */ + case CACHEFS_PAGESTATE_ERROR: + kleave(" = -EIO [abort]"); + return -EIO; + + /* if we know there's no allocated pointer anywhere then return + * immediately */ + case CACHEFS_PAGESTATE_ABSENT: + case CACHEFS_PAGESTATE_ALLOCRESV: + case CACHEFS_PAGESTATE_ALLOCPEND: + kleave(" = -ENODATA [absent]"); + return -ENODATA; + + /* if the block is known to exist, then we may return it if we + * have an in-memory copy, otherwise we should load it */ + case CACHEFS_PAGESTATE_EXTANT: + BUG_ON(!record->block); + goto block_is_mapped; + + /* if we just don't know then we have to attempt to follow the + * parent pointer */ + case CACHEFS_PAGESTATE_UNKNOWN: + case CACHEFS_PAGESTATE_LOOKUP: + goto read_pointer; + } + + /* we don't know if there's a pointer, so we must lock the record and + * examine what's on disk */ +read_pointer: + switch (cachefs_pagerec_begin_lookup(inode, record)) { + case 0: + /* we get to do the lookup */ + break; + case 1: + /* lookup completed by someone else */ + goto state_changed; + default: + kleave(" = -EINTR"); + return -EINTR; + } + + BUG_ON(record->block); + + /* fetch the parent block */ + ptrpage = NULL; + ret = cachefs_indr_io_locate_block(inode, record->parent, &ptrpage); + if (ret < 0) { + if (ret == -ENODATA) + goto block_is_absent; + if (ret == -EIO) + goto parent_chain_broken; + + /* nomem or intr - pass along to another process */ + cachefs_pagerec_end_lookup(inode, record, 1); + kleave(" = %d [locate fail]", ret); + return ret; + } + + /* extract the pointer */ + ptrs = kmap_atomic(ptrpage, KM_USER0); + bix = ptrs[record->ptr_ix]; + kunmap_atomic(ptrs, KM_USER0); + put_page(ptrpage); + ptrpage = NULL; + + /* and record it */ + if (!bix) + goto block_is_absent; + + block = cachefs_block_insert(inode->vfs_inode.i_sb->s_fs_info, bix); + if (IS_ERR(block)) { + cachefs_pagerec_end_lookup(inode, record, 1); + kleave(" = %ld [insert fail]", PTR_ERR(block)); + return PTR_ERR(block); + } + + record->block = block; + record->pagestate = CACHEFS_PAGESTATE_EXTANT; + cachefs_pagerec_end_lookup(inode, record, 0); + + /* we now know which block holds the data for this page */ +block_is_mapped: + if (!_page) { + kleave(" = 0 [block %u]", record->block->bix); + return 0; + } + + /* we were also requested to allocate and load the page */ + ret = cachefs_block_read(inode->vfs_inode.i_sb->s_fs_info, + record->block->bix, 0, NULL, _page); + if (ret < 0) { + kleave(" = %d [read fail]", ret); + return ret; + } + + kleave(" = 0 [block %u pg %p]", record->block->bix, *_page); + return 0; + + /* found a gap in the chain */ +block_is_absent: + record->pagestate = CACHEFS_PAGESTATE_ABSENT; + cachefs_pagerec_end_lookup(inode, record, 0); + kleave(" = -ENODATA [noptr]"); + return -ENODATA; + + /* the parent chain was broken by an I/O error */ +parent_chain_broken: + record->pagestate = CACHEFS_PAGESTATE_ERROR; + cachefs_pagerec_end_lookup(inode, record, 0); + kleave(" = -EIO [parent]"); + return -EIO; + +} /* end cachefs_indr_io_locate_block() */ + +/*****************************************************************************/ +/* * set up to read a page from disc * - we try to amalgamate reads to consecutive pages * - modelled on the stuff in fs/buffer.c @@ -84,43 +233,61 @@ unsigned nr_pages, sector_t *last_block_in_bio) { - struct fscache_page *pageio; - struct inode *inode = page->mapping->host; + struct cachefs_page_record *record; + struct cachefs_inode *inode; + struct cachefs_block *block; sector_t last_block; int ret; - _enter(""); + inode = CACHEFS_FS_I(page->mapping->host); - /* get the page mapping cookie */ - pageio = fscache_page_get_private(page, GFP_KERNEL); - if (IS_ERR(pageio)) { - ret = PTR_ERR(pageio); - goto error; + kenter(",{%lu},%u,", page->index, nr_pages); + + /* get an indirection chain record */ + if (PagePrivate(page)) { + record = (struct cachefs_page_record *) page->private; + } + else { + record = cachefs_pagerec_get(inode, page->index, GFP_KERNEL); + if (!record) { + ret = -ENOMEM; + goto error; + } + SetPagePrivate(page); + page->private = (unsigned long) record; } /* check we aren't trying to go beyond the end of the file */ - last_block = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; + last_block = i_size_read(&inode->vfs_inode); + last_block = (last_block + PAGE_SIZE - 1) >> PAGE_SHIFT; if (page->index >= last_block) goto hole; /* follow the on-disc block pointer indirection chain */ - if (inode->i_ino != CACHEFS_INO_METADATA || page->index != 0) { - ret = cachefs_indr_io_get_block(inode, page, pageio, 0); - if (ret<0) + if (inode->vfs_inode.i_ino != CACHEFS_INO_METADATA || + page->index != 0 + ) { + ret = cachefs_indr_io_locate_block(inode, record, NULL); + if (ret < 0) goto error; } - else { - /* the first block of the metadata file holds its own metadata, - * so we can't follow the chain there */ - ret = cachefs_block_set2(inode->i_sb->s_fs_info, 1, page, - pageio, NULL); - if (ret < 0) + /* the first block of the metadata file holds its own metadata, + * so we can't follow the chain there */ + else if (!record->block) { + block = cachefs_block_insert(inode->vfs_inode.i_sb->s_fs_info, + 1); + if (IS_ERR(block)) { + ret = PTR_ERR(block); goto error; + } + + record->block = block; } /* handle a hole */ - if (!pageio->mapped_block) + if (record->pagestate != CACHEFS_PAGESTATE_EXTANT) goto hole; + BUG_ON(!record->block); /* we need to add the page we're looking at to a BIO... if there's no * current BIO, or the page is not contiguous with the current BIO's @@ -128,33 +295,33 @@ */ if (!*_bio) goto allocate_new_bio; - else if (*last_block_in_bio + 1 != cachefs_mapped_bix(pageio)) + else if (*last_block_in_bio + 1 != record->block->bix) goto dispatch_bio; /* add the page to the current BIO */ - add_page: +add_page: if (!bio_add_page(*_bio, page, PAGE_SIZE, 0)) goto dispatch_bio; /* current BIO was full */ /* dispatch the BIO immediately if the current page lives on an * indirection chain boundary */ - if (test_bit(FSCACHE_PAGE_BOUNDARY, &pageio->flags)) { + if (record->flags & CACHEFS_PAGEREC_BOUNDARY) { submit_bio(READ, *_bio); *_bio = NULL; } else { - *last_block_in_bio = cachefs_mapped_bix(pageio); + *last_block_in_bio = record->block->bix; } _leave(" = 0"); return 0; /* dispatch the current BIO and allocate a new one */ - dispatch_bio: +dispatch_bio: submit_bio(READ, *_bio); - allocate_new_bio: - ret = cachefs_io_alloc(inode->i_sb, - cachefs_mapped_bix(pageio), +allocate_new_bio: + ret = cachefs_io_alloc(inode->vfs_inode.i_sb, + record->block->bix, nr_pages, GFP_KERNEL, _bio); if (ret < 0) { *_bio = NULL; @@ -166,16 +333,16 @@ * - in a data cache file that represents an unfetched block * - in an index file that's an error */ - hole: +hole: ret = -ENODATA; - if (test_bit(FSCACHE_NODE_ISINDEX, &CACHEFS_FS_I(inode)->node.flags)) { + if (test_bit(FSCACHE_NODE_ISINDEX, &inode->node.flags)) { printk("CacheFS: found unexpected hole in index/metadata file:" " ino=%lu pg=%lu\n", - inode->i_ino, page->index); + inode->vfs_inode.i_ino, page->index); ret = -EIO; } - error: +error: if (*_bio) { submit_bio(READ, *_bio); *_bio = NULL; @@ -184,6 +351,7 @@ _leave("= %d", ret); return ret; + } /* end cachefs_indr_io_do_readpage() */ /*****************************************************************************/ @@ -261,573 +429,38 @@ /*****************************************************************************/ /* - * allocate a block - * - journal mark is preallocated and pointed to by step->mark + * release an indirection-io page */ -static int cachefs_indr_io_get_block_alloc(struct super_block *sb, - struct cachefs_inode *inode, - struct cachefs_io_block_path *step) +static int cachefs_indr_io_releasepage(struct page *page, int gfp_flags) { - struct cachefs_ondisc_update_journal *jentry; - struct cachefs_ondisc_free_node *node; - struct cachefs_ondisc_metadata *metadata; - struct cachefs_super *super = sb->s_fs_info; - struct cachefs_block *block; - cachefs_blockix_t alloc2os = 0; - uint32_t next_count = 0; - int ret; - u8 *data; - - DECLARE_WAITQUEUE(myself, current); + struct cachefs_page_record *record; - _enter(",,{pg=%p}", step->page); + _enter("{%lu},%x", page->index, gfp_flags); - jentry = step->transaction->jentry; - - lock_page(step[1].page); - - /* do all the allocation first */ - ret = -ENOMEM; - - BUG_ON(!step[1].pageio); - BUG_ON(!step[1].pageio->mapped_block); - - cachefs_trans_affects_page(step->transaction, - step[1].pageio, - step->offset, - sizeof(cachefs_blockix_t)); - - /* index content data blocks need to be initialised on disc */ - if (step->flags & CACHEFS_BLOCK_INIT_INDEX) { - _debug("init index"); - - if (!(step[1].flags & CACHEFS_BLOCK_IS_INODE)) - cachefs_trans_affects_inode(step->transaction, inode); - - jentry->count = inode->index_esize; - jentry->ixentry = step->page->index * inode->index_epp; - - metadata = cachefs_metadata_preread(inode); - jentry->index = metadata->freelink; - cachefs_metadata_postread(inode, metadata); - } - - /* freshly allocated data blocks must be recorded in the v-journal */ - if (step->flags & CACHEFS_BLOCK_INIT_NETFSDATA) { - _debug("init data"); - - ret = cachefs_vj_alloc(step->transaction, inode); - if (ret<0) - goto error_trans; - - step->transaction->vjentry->pgnum = step->page->index; - step->transaction->vjentry->upblock = step[1].bix; - step->transaction->vjentry->upentry = step->offset; - - jentry->auxmark = step->transaction->vjentry->vslot; - } - - /* wait for a node to become available in the allocation stack */ - down(&super->alloc_sem); - - if (!super->alloc_node) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&super->alloc_wq, &myself); - - while (!super->alloc_node && !signal_pending(current)) { - wake_up(&super->dmn_sleepq); - schedule(); - set_current_state(TASK_INTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&super->alloc_wq, &myself); - - ret = -EINTR; - if (signal_pending(current)) - goto error_sem; - } - - _debug("use leaf %u/%lu", - super->alloc_leaf, CACHEFS_ONDISC_LEAVES_PER_FREE_NODE); - - BUG_ON(super->alloc_leaf > CACHEFS_ONDISC_LEAVES_PER_FREE_NODE); - - step->transaction->changed |= CACHEFS_TRANS_CHANGED_ALLOC; - - /* choose either a dependent block or the now empty node */ - if (super->alloc_leaf == CACHEFS_ONDISC_LEAVES_PER_FREE_NODE) { - /* no dependent blocks left - take the alloc node itself */ - block = super->alloc_block; - BUG_ON(!block); - - jentry->block = super->alloc_cur; - BUG_ON(!jentry->block); - - node = kmap_atomic(super->alloc_node, KM_USER0); - jentry->alloc_cur = node->next; - jentry->alloc_leaf = 0; - next_count = node->count; - kunmap_atomic(node, KM_USER0); - - alloc2os = jentry->alloc_cur; - - if (step->page) - cachefs_block_set(super, - block, - step->page, - step->pageio); - } - else { - /* take the next dependent page */ - node = kmap_atomic(super->alloc_node, KM_USER0); - jentry->block = node->leaves[super->alloc_leaf]; - alloc2os = node->next; - kunmap_atomic(node, KM_USER0); - BUG_ON(!jentry->block); - - jentry->alloc_cur = super->alloc_cur; - jentry->alloc_leaf = super->alloc_leaf + 1; - - if (!step->page) { - ret = cachefs_block_read(super, NULL, jentry->block, 1, - &block, &step->page); - if (ret < 0) - goto error_block; - step->pageio = fscache_page_grab_private(step->page); - } - else { - ret = cachefs_block_set2(super, jentry->block, - step->page, step->pageio, - &block); - if (ret < 0) - goto error_block; - } - } - - if (step->flags & - (CACHEFS_BLOCK_INIT_INDEX | CACHEFS_BLOCK_INIT_INDIRECT)) - cachefs_trans_affects_block(step->transaction, block, 0, - PAGE_SIZE); - - jentry->auxblock = inode->metadata->bix; - jentry->auxentry = inode->metadata_offset; - jentry->size = i_size_read(&inode->vfs_inode); - - _debug("selected block %u", jentry->block); - - BUG_ON(jentry->block > super->layout->bix_end); - - /* start 2OS block loading if we're near the end of the TOS block */ - if (alloc2os && - super->alloc_leaf >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE - 30 && - !super->alloc_next - ) { - _debug("prepare 2OS %u", alloc2os); - - ret = cachefs_block_read(super, NULL, alloc2os, 0, - &super->alloc_nxblock, - &super->alloc_next); - if (ret == 0) - set_bit(CACHEFS_BLOCK_CRITICAL, - &super->alloc_nxblock->flags); - else - printk("CacheFS: can't read 2OS of alloc stack: %d\n", - ret); - } - - /* make sure the journal is marked on disc before doing anything else */ - if (cachefs_trans_mark(step->transaction) < 0) - goto error_block; - - if (step->flags & CACHEFS_BLOCK_INIT_NETFSDATA) { - set_bit(CACHEFS_BLOCK_NETFSDATA, &block->flags); - block->vjentry = step->transaction->vjentry; - block->vjentry->bix = block->bix; - } - - /* index and indirection blocks need to be initialised before use */ - if (step->flags & (CACHEFS_BLOCK_INIT_INDIRECT | - CACHEFS_BLOCK_INIT_INDEX) - ) { - cachefs_block_modify(super, block, &step->page); - - if (step->flags & CACHEFS_BLOCK_INIT_INDIRECT) { - clear_highpage(step->page); - } - else { - struct cachefs_ondisc_index_entry *xent; - uint32_t entry, next; - void *content; - int loop; - - next = jentry->index; - entry = jentry->ixentry; - - content = kmap_atomic(step->page, KM_USER0); - clear_page(content); - - for (loop = inode->index_epp - 1; loop >= 0; loop--) { - xent = content + loop * jentry->count; - xent->state = CACHEFS_ONDISC_INDEX_FREE; - xent->u.freelink[0] = next; - next = entry + loop; + if (PagePrivate(page)) { + /* detach the page record from this page */ + record = (struct cachefs_page_record *) page->private; + page->private = 0; + ClearPagePrivate(page); + +#ifdef CONFIG_DEBUG_SLAB + if (record) { + int usage = atomic_read(&record->usage); + + if ((usage & 0xffffff00) == 0x6b6b6b00) { + printk("RECORD PUT ERROR" + " pg=%p{ix=%lu} rec=%p{u=%x}\n", + page, page->index, record, usage); + BUG(); } - - kunmap_atomic(content, KM_USER0); - - _debug("new freelink: %u", jentry->ixentry); } - } - - /* clean up the alloc stack tracking */ - if (super->alloc_leaf == 0) { - struct page *dead; - - /* move the allocation stack to the 2OS */ - dead = super->alloc_node; - - super->alloc_cur_n = next_count; - super->alloc_node = super->alloc_next; - super->alloc_block = super->alloc_nxblock; - super->alloc_next = NULL; - super->alloc_nxblock = NULL; - dbgpgfree(dead); - page_cache_release(dead); - } - - super->alloc_cur_n--; - - up(&super->alloc_sem); - - /* set the appropriate pointer on disc to point to this block */ - step->bix = jentry->block; - - if (!(step[1].flags & CACHEFS_BLOCK_IS_INODE)) { - cachefs_page_modify(super, &step[1].page); - - data = kmap_atomic(step[1].page, KM_USER0); - *(cachefs_blockix_t *)(data + step->offset) = step->bix; - kunmap_atomic(data, KM_USER0); - } - - metadata = cachefs_metadata_prewrite(inode); - metadata->size = i_size_read(&inode->vfs_inode); - metadata->mtime = CURRENT_TIME.tv_sec; - - if (step->flags & CACHEFS_BLOCK_INIT_INDEX) { - metadata->freelink = jentry->ixentry; - } - - if (step[1].flags & CACHEFS_BLOCK_IS_INODE) { - unsigned long pageaddr = (unsigned long) metadata & PAGE_MASK; - *(cachefs_blockix_t *)(pageaddr + step->offset) = step->bix; - } - - cachefs_metadata_postwrite(inode, metadata); - - unlock_page(step[1].page); +#endif - /* okay... done that */ - cachefs_trans_commit(step->transaction); - step->transaction = NULL; - - /* the allocation must be journalled before journalling-independent - * writes are permitted to modify a reused metadata block that had - * critical data on it - */ - if ((step->flags & CACHEFS_BLOCK_INIT_NETFSDATA) && - test_bit(CACHEFS_BLOCK_CRITICAL, &block->flags) - ) { - cachefs_trans_sync(super, CACHEFS_TRANS_SYNC_WAIT_FOR_MARK); - clear_bit(CACHEFS_BLOCK_CRITICAL, &block->flags); + cachefs_pagerec_put(CACHEFS_FS_I(page->mapping->host), record); } - cachefs_block_put(block); - block = NULL; - - _leave(" = 0 [block %u]", step->bix); + _leave(" = 0"); return 0; - error_block: - cachefs_block_put(block); - block = NULL; - error_sem: - up(&super->alloc_sem); - error_trans: - cachefs_trans_put(step->transaction); - step->transaction = NULL; - unlock_page(step[1].page); - _leave(" = %d", ret); - return ret; - -} /* end cachefs_indr_io_get_block_alloc() */ - -/*****************************************************************************/ -/* - * map a block in a file to a block within the block device - * - the inode meta-data contains: - * - ~120 direct pointers for the first part of the file - * - 1 single-indirect pointer for the first indirection block (1024 ptrs) - * - 1 double-indirect pointer for the remainder of the file - * and must be included in the final journal mark - * - returns: - * - 0 if successful and the block details are set in result - * - -ENODATA if no block at that index - * - sets CACHEFS_PAGE_BOUNDARY if the next block has a different indirection - * chain - * - if the inode forms part of an index, then the any blocks belong to that - * index and must be initialised as part of the final journalling mark - */ -int cachefs_indr_io_get_block(struct inode *vfs_inode, struct page *page, - struct fscache_page *pageio, int create) -{ - struct cachefs_io_block_path path[4]; - struct cachefs_inode *inode = CACHEFS_FS_I(vfs_inode); - struct cachefs_super *super = inode->vfs_inode.i_sb->s_fs_info; - const size_t ptrperblk = PAGE_SIZE / sizeof(cachefs_blockix_t); - sector_t iblock; - size_t ptrqty, notboundary = 1; - int pix, ret; - - _enter("%lu,{%p}%lu,,%d", - inode->vfs_inode.i_ino, page, page->index, create); - - BUG_ON(pageio->mapped_block); - - if (page->index / ptrperblk >= ptrperblk) { - _leave(" = -EIO [range]"); - return -EIO; - } - - memset(path, 0, sizeof(path)); - path[2].mktype = CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING; - path[1].mktype = CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING; - path[0].mktype = CACHEFS_ONDISC_UJNL_DATA_ALLOCING; - path[0].flags = CACHEFS_BLOCK_INIT_NETFSDATA; - - if (inode->index_esize) { - path[0].mktype = CACHEFS_ONDISC_UJNL_INDEX_EXTENDING; - path[0].flags = CACHEFS_BLOCK_INIT_INDEX; - } - - path[0].page = page; - path[0].pageio = pageio; - - /* is it inside direct range? */ - iblock = page->index; - ptrqty = super->sb->s_blocksize; - ptrqty -= sizeof(struct cachefs_ondisc_metadata); - ptrqty /= sizeof(cachefs_blockix_t); - if (iblock < ptrqty) { - _debug("direct (%llu/%u)", iblock, ptrqty); - notboundary = ptrqty - iblock + 1; - - path[0].offset = iblock * sizeof(cachefs_blockix_t); - path[0].offset += offsetof(struct cachefs_ondisc_metadata, - direct); - path[1].flags = CACHEFS_BLOCK_IS_INODE; - path[1].page = inode->metadata_page; - pix = 0; - goto process; - } - iblock -= ptrqty; - - /* is it inside single-indirect range? */ - ptrqty = ptrperblk; - if (iblock < ptrqty) { - _debug("indirect (%llu/%u)", iblock, ptrqty); - notboundary = (iblock + 1) & (ptrperblk - 1); - - path[0].offset = iblock * sizeof(cachefs_blockix_t); - path[1].flags = CACHEFS_BLOCK_INIT_INDIRECT; - path[1].offset = offsetof(struct cachefs_ondisc_metadata, - single_indirect); - path[1].auxmark = CACHEFS_ONDISC_UJNL_SINGLE_0; - path[2].flags = CACHEFS_BLOCK_IS_INODE; - path[2].page = inode->metadata_page; - pix = 1; - goto process; - } - iblock -= ptrqty; - - /* is it inside double-indirect range? */ - ptrqty *= ptrqty; - if (iblock < ptrqty) { - _debug("double indirect (%llu/%u)", iblock, ptrqty); - notboundary = (iblock + 1) & (ptrperblk - 1); - - path[0].offset = - sector_div(iblock, - PAGE_SIZE / sizeof(cachefs_blockix_t)); - path[0].offset *= sizeof(cachefs_blockix_t); - path[1].flags = CACHEFS_BLOCK_INIT_INDIRECT; - path[1].offset = iblock * sizeof(cachefs_blockix_t); - path[1].auxmark = CACHEFS_ONDISC_UJNL_DOUBLE_1; - path[2].flags = CACHEFS_BLOCK_INIT_INDIRECT; - path[2].offset = offsetof(struct cachefs_ondisc_metadata, - double_indirect); - path[2].auxmark = CACHEFS_ONDISC_UJNL_DOUBLE_0; - path[3].flags = CACHEFS_BLOCK_IS_INODE; - path[3].page = inode->metadata_page; - pix = 2; - goto process; - } - - /* it seems to be inside triple-indirect range, which isn't supported - * yet (TODO) */ - BUG(); - pix = 3; - - /* walk the path, filling in missing steps if required */ - process: - dbgpgalloc(path[pix + 1].page); - page_cache_get(path[pix + 1].page); - - path[pix].offset += inode->metadata_offset; - - down_read(&inode->metadata_sem); - path[pix + 1].pageio = fscache_page_grab_private(inode->metadata_page); - up_read(&inode->metadata_sem); - - path[pix + 1].bix = cachefs_mapped_bix(path[pix + 1].pageio); - - ret = 0; - for (; pix >= 0; pix--) { - struct cachefs_io_block_path *step = &path[pix]; - - _debug("step level %u { ptr={%lu}+%u / bix=%u }", - pix, step[1].page->index, step->offset, step[1].bix); - - /* get the block number for this level */ - if (!step->bix) { - u8 *data = kmap_atomic(step[1].page, KM_USER0); - step->bix = - *(cachefs_blockix_t *)(data + step->offset); - kunmap_atomic(data, KM_USER0); - } - - /* allocate this block if necessary */ - if (!step->bix) { - struct cachefs_ondisc_update_journal *jentry; - - if (!create) { - _debug("path incomplete at level %d", pix); - ret = -ENODATA; - break; - } - - _debug("need to allocate level %d block", pix); - - step->transaction = - cachefs_trans_alloc( - inode->vfs_inode.i_sb->s_fs_info, - GFP_NOFS); - - ret = -ENOMEM; - if (!step->transaction) - break; - - jentry = step->transaction->jentry; - - jentry->ino = inode->vfs_inode.i_ino; - jentry->pgnum = page->index; - jentry->mark = step->mktype; - jentry->auxmark = step->auxmark; - jentry->upblock = - __cachefs_get_page_block(step[1].page)->bix; - jentry->upentry = step->offset; - - ret = cachefs_indr_io_get_block_alloc( - inode->vfs_inode.i_sb, inode, step); - if (ret < 0) - break; - step->flags |= CACHEFS_BLOCK_NEW; - } - else if (step->page) { - ret = cachefs_block_set2(super, step->bix, step->page, - step->pageio, NULL); - if (ret < 0) - break; - } - - /* if we're at the leaf, we don't need to actually access the - * block */ - if (pix <= 0) - continue; - - /* initiate or read the this block as appropriate */ - if (!step->page) { - if (step->flags & CACHEFS_BLOCK_NEW) { - _debug("getting level %d block %u", - pix, step->bix); - } - else { - _debug("reading level %d block %u", - pix, step->bix); - } - - ret = cachefs_block_read( - super, NULL, step->bix, - step->flags & CACHEFS_BLOCK_NEW, - NULL, &step->page); - if (ret < 0) { - printk("CacheFS: " - "read I/O error on level %d block %u:" - " %d\n", - pix, step->bix, ret); - break; - } - - wait_on_page_locked(step->page); - } - - if (!step->pageio) { - step->pageio = __fscache_page_grab_private(step->page); - if (!step->pageio) { - printk("step level %u" - " { ptr={%lu}+%u / bix=%u }", - pix, step[1].page->index, - step->offset, step[1].bix); - printk("mk=%u aux=%u flags=%x", - step->mktype, - step->auxmark, - step->flags); - BUG(); - } - } - } - - /* release the pages used to walk the path */ - for (pix = sizeof(path) / sizeof(path[0]) - 1; pix > 0; pix--) - if (path[pix].page) { - dbgpgfree(path[pix].page); - page_cache_release(path[pix].page); - } - - if (ret < 0) { - cachefs_block_put(xchg(&pageio->mapped_block, NULL)); - _leave(" = %d", ret); - return ret; - } - else if (path[0].flags & CACHEFS_BLOCK_INIT_NETFSDATA) { - set_bit(CACHEFS_BLOCK_NETFSDATA, - &cachefs_mapped_block(pageio)->flags); - } - - /* got the block - set the block offset in the page mapping record */ - if (path[0].flags & CACHEFS_BLOCK_NEW) - set_bit(FSCACHE_PAGE_NEW, &pageio->flags); - - _debug("notboundary = %u", notboundary); - if (!notboundary) - set_bit(FSCACHE_PAGE_BOUNDARY, &pageio->flags); - - _leave(" = 0 [bix=%u %c%c]", - cachefs_mapped_bix(pageio), - test_bit(FSCACHE_PAGE_BOUNDARY, &pageio->flags) ? 'b' : '-', - test_bit(FSCACHE_PAGE_NEW, &pageio->flags) ? 'n' : '-' - ); - return 0; -} /* end cachefs_indr_io_get_block() */ +} /* end cachefs_indr_io_releasepage() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/inode.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/inode.c --- linux-2.6.12-rc2-mm3/fs/cachefs/inode.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/inode.c 2005-04-20 20:14:41.000000000 +0100 @@ -141,7 +141,7 @@ inode->metadata_page = metadata_page; /* finish initialising the inode from its own contents */ - inode->metadata = __cachefs_get_page_block(inode->metadata_page); + inode->metadata = cachefs_get_ixpage_block(inode->metadata_page); metadata = cachefs_metadata_preread(inode); @@ -152,9 +152,9 @@ inode->vfs_inode.i_blocks = metadata->size + inode->vfs_inode.i_blksize - 1; inode->vfs_inode.i_blocks >>= PAGE_SHIFT; inode->vfs_inode.i_version = 1; - inode->vfs_inode.i_atime.tv_sec = metadata->mtime; - inode->vfs_inode.i_mtime.tv_sec = metadata->mtime; - inode->vfs_inode.i_ctime.tv_sec = metadata->mtime; + inode->vfs_inode.i_atime.tv_sec = metadata->atime; + inode->vfs_inode.i_mtime.tv_sec = metadata->atime; + inode->vfs_inode.i_ctime.tv_sec = metadata->atime; inode->index_dsize = metadata->index.dsize; inode->index_esize = metadata->index.esize; @@ -210,7 +210,7 @@ inode->metadata_page = metadata_page; /* initialise the inode from the data we read */ - inode->metadata = __cachefs_get_page_block(inode->metadata_page); + inode->metadata = cachefs_get_ixpage_block(inode->metadata_page); _debug("Reading inode %lu metadata record {%lu,{%u}}+%04x", inode->vfs_inode.i_ino, @@ -229,9 +229,9 @@ inode->vfs_inode.i_gid = 0; inode->vfs_inode.i_nlink = 1; inode->vfs_inode.i_size = metadata->size; - inode->vfs_inode.i_atime.tv_sec = metadata->mtime; - inode->vfs_inode.i_mtime.tv_sec = metadata->mtime; - inode->vfs_inode.i_ctime.tv_sec = metadata->mtime; + inode->vfs_inode.i_atime.tv_sec = metadata->atime; + inode->vfs_inode.i_mtime.tv_sec = metadata->atime; + inode->vfs_inode.i_ctime.tv_sec = metadata->atime; inode->vfs_inode.i_blksize = PAGE_SIZE; inode->vfs_inode.i_blkbits = PAGE_SHIFT; inode->vfs_inode.i_blocks = metadata->size; @@ -381,8 +381,13 @@ */ void cachefs_clear_inode(struct inode *vfs_inode) { + struct cachefs_inode *inode; + _enter("{ino=%lu nl=%u}", vfs_inode->i_ino, vfs_inode->i_nlink); + inode = CACHEFS_FS_I(vfs_inode); + BUG_ON(inode->page_records.rb_node); + } /* end cachefs_clear_inode() */ /*****************************************************************************/ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/interface.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/interface.c --- linux-2.6.12-rc2-mm3/fs/cachefs/interface.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/interface.c 2005-04-20 17:53:11.000000000 +0100 @@ -1,6 +1,6 @@ /* interface.c: filesystem cache interface * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -15,28 +15,98 @@ #include #include "cachefs-int.h" -struct cachefs_io_end { - fscache_rw_complete_t func; - void *data; - void *cookie_data; - struct cachefs_block *block; -}; +/*****************************************************************************/ +/* + * allocate a negative search result record + */ +struct fscache_search_result *cachefs_srch_alloc(struct fscache_cache *cache, + unsigned long gfp) +{ + struct cachefs_search_result *srch; + + srch = kmalloc(sizeof(struct cachefs_search_result), gfp); + if (srch) { + fscache_init_search_result(cache, &srch->srch); + srch->ino = 0; + } + + return &srch->srch; + +} /* end cachefs_srch_alloc() */ + +/*****************************************************************************/ +/* + * free a search result record + */ +void cachefs_srch_free(struct fscache_search_result *_srch) +{ + struct cachefs_search_result *srch; + + if (_srch) { + srch = container_of(_srch, struct cachefs_search_result, srch); + kfree(srch); + } + +} /* end cachefs_srch_free() */ + +/*****************************************************************************/ +/* + * set a search result record to negative + */ +static void cachefs_srch_negate(struct fscache_search_result *_srch) +{ + struct cachefs_search_result *srch; + + srch = container_of(_srch, struct cachefs_search_result, srch); + srch->ino = 0; + +} /* end cachefs_srch_negate() */ + +/*****************************************************************************/ +/* + * see if search result is positive + */ +static int cachefs_is_srch_positive(struct fscache_search_result *_srch) +{ + struct cachefs_search_result *srch; + + srch = container_of(_srch, struct cachefs_search_result, srch); + return srch->ino != 0; + +} /* end cachefs_is_srch_positive() */ + +/*****************************************************************************/ +/* + * represent a search result for printing + */ +static unsigned long long cachefs_srch_id(struct fscache_search_result *_srch) +{ + struct cachefs_search_result *srch; + + srch = container_of(_srch, struct cachefs_search_result, srch); + return srch->ino; + +} /* end cachefs_srch_id() */ /*****************************************************************************/ /* * look up the nominated node for this cache */ -static struct fscache_node *cachefs_lookup_node(struct fscache_cache *cache, - unsigned ino) +static +struct fscache_node *cachefs_lookup_node(struct fscache_cache *cache, + struct fscache_cookie *cookie, + struct fscache_search_result *_srch) { + struct cachefs_search_result *srch; struct cachefs_super *super; struct cachefs_inode *inode; - _enter("%p,%d", cache, ino); - super = container_of(cache, struct cachefs_super, cache); + srch = container_of(_srch, struct cachefs_search_result, srch); - inode = cachefs_iget(super, ino); + _enter("%p,%p,{%d}", cache, cookie, srch->ino); + + inode = cachefs_iget(super, srch->ino); if (IS_ERR(inode)) { _leave(" = %ld [error]", PTR_ERR(inode)); return ERR_PTR(PTR_ERR(inode)); @@ -127,13 +197,43 @@ /*****************************************************************************/ /* + * reserve allocation space for this block and its indirection parents + * - we use the inode semaphore to prevent parallel page reads from reserving + * space for indirection blocks twice + */ +static inline void cachefs_reserve_space(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + struct cachefs_super *super; + int qty; + + down(&inode->vfs_inode.i_sem); + + qty = 0; + for (; record; record = record->parent) { + if (record->pagestate != CACHEFS_PAGESTATE_ABSENT) + break; + record->pagestate = CACHEFS_PAGESTATE_ALLOCRESV; + qty++; + } + + super = inode->vfs_inode.i_sb->s_fs_info; + atomic_add(qty, &super->space_reserve); + + up(&inode->vfs_inode.i_sem); + + wake_up(&super->dmn_sleepq); + +} /* end cachefs_reserve_space() */ + +/*****************************************************************************/ +/* * handle notifications about read operations on a block */ static int cachefs_page_read_endio(struct bio *bio, unsigned int bytes_done, int error) { - struct cachefs_io_end *end_io = bio->bi_private; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct cachefs_data_op *data_op = bio->bi_private; _enter("%p{%u},%u,%d", bio, bio->bi_size, bytes_done, error); @@ -143,19 +243,23 @@ return 1; } + bio_put(bio); + /* let the netfs know that all reads are now complete */ - for (; bvec >= bio->bi_io_vec; bvec--) - end_io->func(end_io->cookie_data, bvec->bv_page, end_io->data, - error); + data_op->callback(data_op->cookie_data, + data_op->netfs_page, + data_op->callback_data, + error); + + if (error) { + data_op->record->pagestate = CACHEFS_PAGESTATE_ERROR; + set_bit(CACHEFS_BLOCK_ERROR, &data_op->record->block->flags); + } /* wake up anyone waiting to juggle that block on disc */ - clear_bit(CACHEFS_BLOCK_NETFSBUSY, &end_io->block->flags); - wake_up(&end_io->block->writewq); - cachefs_block_put(end_io->block); - - dbgfree(end_io); - kfree(end_io); - bio_put(bio); + clear_bit(CACHEFS_BLOCK_NETFSBUSY, &data_op->record->block->flags); + wake_up(&data_op->record->block->writewq); + cachefs_data_op_put(data_op); _leave(" = 0"); return 0; @@ -164,162 +268,143 @@ /*****************************************************************************/ /* * read a page from the cache or allocate a block in which to store it + * - cache withdrawal is prevented by the caller + * - returns -EINTR if interrupted + * - returns -ENOMEM if ran out of memory + * - returns -ENOBUFS if no buffers can be made available for any other reason * - if the page is backed by a block in the cache: - * - a read will be started which will call end_io_func on completion + * - the page record will be left pinned + * - a read will be started which will call the callback on completion * - the wb-journal will be searched for an entry pertaining to this block * - if an entry is found: * - 1 will be returned * else * - 0 will be returned * - else if the page is unbacked: - * - a block will be allocated and attached - * - the v-journal will be marked to note the block contains invalid data + * - the page record will be left pinned + * - the page records for absent blocks will be moved to the ALLOCRESV state + * - the alloc reservation counter will be increased * - -ENODATA will be returned */ static int cachefs_read_or_alloc_page(struct fscache_node *node, struct page *page, - struct fscache_page *pageio, - fscache_rw_complete_t end_io_func, - void *end_io_data, + fscache_rw_complete_t callback, + void *callback_data, unsigned long gfp) { - struct cachefs_io_end *end_io = NULL; + struct cachefs_page_record *record; + struct cachefs_data_op *data_op; + struct cachefs_super *super; struct cachefs_inode *inode; - struct cachefs_block *block = NULL; - struct bio *bio = NULL; + struct bio *bio; int ret; _enter(""); inode = container_of(node, struct cachefs_inode, node); - /* handle the case of there already being a mapping, - * - must protect against cache removal - */ - _debug("check mapping"); - read_lock(&pageio->lock); + /* get the page record for this page */ + record = cachefs_pagerec_get(inode, page->index, gfp); + if (!record) + goto nomem; + + /* if there's no backing block, we need to consult the disk */ + if (!record->block) { + _debug("lookup block"); + + /* walk the indirection tree to see if there's a block on disc + * holding the data */ + ret = cachefs_indr_io_locate_block(inode, record, NULL); + if (ret < 0) + goto locate_error; + + if (record->pagestate != CACHEFS_PAGESTATE_EXTANT) { + /* there wasn't any data to be read */ + _debug("no data"); + + /* pin page record for the netfs */ + if (test_and_set_bit(CACHEFS_PAGEREC_NETFS_HOLDING, + &record->flags)) + cachefs_pagerec_put(inode, record); - block = pageio->mapped_block; - if (block && !fscache_is_cache_withdrawn(&block->super->cache)) - goto available_on_disc; /* already mapped */ - - read_unlock(&pageio->lock); - block = NULL; - - /* we don't know of a backing page, but there may be one recorded on - * disc... and if there isn't we'll request that one be allocated */ - _debug("get block"); - down(&inode->vfs_inode.i_sem); + cachefs_reserve_space(inode, record); - /* walk the indirection tree to see if there's a block on disc - * holding the data and if not, attempt to allocate one */ - ret = cachefs_indr_io_get_block(&inode->vfs_inode, page, pageio, 1); - if (ret < 0) - goto error_i; - - if (!test_and_clear_bit(FSCACHE_PAGE_NEW, &pageio->flags)) { - /* there was data - pin the block underlying it and read */ - read_lock(&pageio->lock); - - block = pageio->mapped_block; - if (block && - !fscache_is_cache_withdrawn(&block->super->cache)) - goto available_on_disc_i; - - /* it went out of service for some reason */ - read_unlock(&pageio->lock); - block = NULL; - ret = -ENOBUFS; - goto error_i; + _leave(" = -ENODATA"); + return -ENODATA; + } } - /* we allocated a new block, but didn't assign any data to it */ - up(&inode->vfs_inode.i_sem); - - /* point the mapped block at its referencer */ - write_lock(&cachefs_mapped_block(pageio)->ref_lock); - cachefs_mapped_block(pageio)->ref = pageio; - write_unlock(&cachefs_mapped_block(pageio)->ref_lock); - - _debug("no data [bix=%u ref=%p]", cachefs_mapped_bix(pageio), pageio); - - /* tell the caller we've allocated a block, but we don't have any data - * for them */ - _leave(" = -ENODATA"); - return -ENODATA; - - /* load the contents of the block into the specified page - we don't - * need the inode any more as we have a representation of the block */ - available_on_disc_i: - _debug("available_i"); - up(&inode->vfs_inode.i_sem); + /* there's a block available on disk, so we need to allocate an + * operation record and dispatch a BIO + */ + _debug("data available"); - available_on_disc: - _debug("available"); + data_op = kmalloc(sizeof(*data_op), gfp); + if (!data_op) + goto nomem_r; + + data_op->callback = callback; + data_op->callback_data = callback_data; + data_op->cookie_data = node->cookie->netfs_data; + data_op->netfs_page = page; + data_op->inode = inode; + data_op->record = record; - /* pin the block whilst there's a BIO running on it */ - cachefs_block_get(block); - set_bit(CACHEFS_BLOCK_NETFSBUSY, &block->flags); - - read_unlock(&pageio->lock); - - /* record the netfs's callback */ - ret = -ENOMEM; - end_io = kmalloc(sizeof(*end_io), gfp); - if (!end_io) - goto error_nb; - - end_io->func = end_io_func; - end_io->data = end_io_data; - end_io->cookie_data = node->cookie->netfs_data; - end_io->block = block; + INIT_LIST_HEAD(&data_op->link); + atomic_set(&data_op->usage, 1); /* dispatch an operation to the block device */ - ret = -ENOMEM; bio = bio_alloc(gfp, 1); if (!bio) - goto error_nb; + goto nomem_dop; + + set_bit(CACHEFS_BLOCK_NETFSBUSY, &record->block->flags); + + super = inode->vfs_inode.i_sb->s_fs_info; - bio->bi_bdev = block->super->sb->s_bdev; - bio->bi_private = end_io; + bio->bi_bdev = super->sb->s_bdev; + bio->bi_private = data_op; bio->bi_end_io = cachefs_page_read_endio; - bio->bi_sector = block->bix; - bio->bi_sector <<= PAGE_SHIFT - block->super->sb->s_blocksize_bits; + bio->bi_sector = record->block->bix; + bio->bi_sector <<= PAGE_SHIFT - super->sb->s_blocksize_bits; if (!bio_add_page(bio, page, PAGE_SIZE, 0)) BUG(); - submit_bio(READ, bio); + /* pin for the netfs */ + if (!test_and_set_bit(CACHEFS_PAGEREC_NETFS_HOLDING, &record->flags)) + atomic_inc(&record->usage); - _debug("done"); + submit_bio(READ, bio); - /* point the mapped block at its referencer */ - write_lock(&block->ref_lock); - block->ref = pageio; - write_unlock(&block->ref_lock); + _debug("submitted"); /* tell the caller that there's a read operation in progress */ _leave(" = 0"); return 0; - error_nb: - clear_bit(CACHEFS_BLOCK_NETFSBUSY, &block->flags); - wake_up(&block->writewq); - goto error; - error_i: - _debug("error_i"); - up(&inode->vfs_inode.i_sem); - error: - _debug("error"); - cachefs_block_put(block); - if (bio) - bio_put(bio); - if (end_io) { - dbgfree(end_io); - kfree(end_io); +nomem_dop: + cachefs_data_op_put(data_op); +nomem_r: + cachefs_pagerec_put(inode, record); +nomem: + _leave(" = -ENOMEM"); + return -ENOMEM; + +locate_error: + _debug("locate error"); + if (ret == -ENOMEM) + goto nomem_r; + + cachefs_pagerec_put(inode, record); + + if (ret == -EINTR) { + _leave(" = -EINTR"); + return -EINTR; } - _leave(" = %d", ret); - return ret; + + _leave(" = -ENOBUFS [error %d]", ret); + return -ENOBUFS; } /* end cachefs_read_or_alloc_page() */ @@ -330,8 +415,7 @@ static int cachefs_page_written(struct bio *bio, unsigned int bytes_done, int error) { - struct cachefs_io_end *end_io = bio->bi_private; - struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + struct cachefs_data_op *data_op = bio->bi_private; _enter("%p{%u},%u,%d", bio, bio->bi_size, bytes_done, error); @@ -341,27 +425,25 @@ return 1; } - /* let the netfs know that all writes are now complete */ - for (; bvec >= bio->bi_io_vec; bvec--) - end_io->func(end_io->cookie_data, bvec->bv_page, end_io->data, - error); - - /* update the block validity journal with the new block state */ - if (end_io->block->vjentry) { - if (error == 0) - cachefs_vj_write_complete(end_io->block); - else - cachefs_vj_cancel(end_io->block); + bio_put(bio); + + /* let the netfs know that the data is now safely written or that we've + * failed utterly */ + data_op->callback(data_op->cookie_data, + data_op->netfs_page, + data_op->callback_data, + error); + + if (error) { + data_op->record->pagestate = CACHEFS_PAGESTATE_ERROR; + set_bit(CACHEFS_BLOCK_ERROR, &data_op->record->block->flags); } /* wake up anyone waiting to juggle that block on disc */ - clear_bit(CACHEFS_BLOCK_NETFSBUSY, &end_io->block->flags); - wake_up(&end_io->block->writewq); - cachefs_block_put(end_io->block); + clear_bit(CACHEFS_BLOCK_NETFSBUSY, &data_op->record->block->flags); + wake_up(&data_op->record->block->writewq); + cachefs_data_op_put(data_op); - dbgfree(end_io); - kfree(end_io); - bio_put(bio); _leave(" = 0"); return 0; @@ -370,71 +452,112 @@ /*****************************************************************************/ /* * request a page be stored in the cache + * - cache withdrawal is prevented by the caller * - this request may be ignored if no cache block is currently attached, in * which case it returns -ENOBUFS * - if a cache block was already allocated: * - the page cookie will be updated to reflect the block selected - * - a BIO will have been dispatched to write the page - the BIO's bi_end_io - * routine will call end_io_func on completion - * - end_io_func can be NULL, in which case a default function will just - * clear the writeback bit - * - if there's a v-journal entry associated with the page, that entry will - * be erased + * - a BIO will have been dispatched to write the page - the BIO's bi_data_op + * routine will call data_op_func on completion * - returns 0 */ static int cachefs_write_page(struct fscache_node *node, struct page *page, - struct fscache_page *pageio, - fscache_rw_complete_t end_io_func, - void *end_io_data, + fscache_rw_complete_t callback_func, + void *callback_data, unsigned long gfp) { - struct cachefs_io_end *end_io = NULL; - struct cachefs_block *block; - struct bio *bio = NULL; - int ret; + struct cachefs_page_record *record; + struct cachefs_data_op *data_op; + struct cachefs_super *super; + struct cachefs_inode *inode; + struct bio *bio; - _enter(""); + kenter(""); + + inode = container_of(node, struct cachefs_inode, node); - read_lock(&pageio->lock); + /* we're going to want to record the operation */ + data_op = kmalloc(sizeof(*data_op), gfp); + if (!data_op) + goto nomem; + + data_op->callback = callback_func; + data_op->callback_data = callback_data; + data_op->cookie_data = node->cookie->netfs_data; + data_op->netfs_page = page; + + if (cachefs_igrab(inode)) + goto nobufs; + + data_op->inode = inode; + + /* get the page record for this page */ + record = cachefs_pagerec_get(inode, page->index, gfp); + if (!record) + goto nomem_i; - /* only write if there's somewhere to write to */ - block = cachefs_mapped_block(pageio); - if (!block || fscache_is_cache_withdrawn(&block->super->cache)) - goto no_block; - - /* pin the block and drop the lock */ - _debug("write [bix=%u ref=%p]", block->bix, pageio); - cachefs_block_get(block); - set_bit(CACHEFS_BLOCK_NETFSBUSY, &block->flags); - - read_unlock(&pageio->lock); - - /* record the netfs's callback */ - ret = -ENOMEM; - end_io = kmalloc(sizeof(*end_io), gfp); - if (!end_io) - goto error; - - end_io->func = end_io_func; - end_io->data = end_io_data; - end_io->cookie_data = node->cookie->netfs_data; - end_io->block = block; + if (!test_bit(CACHEFS_PAGEREC_NETFS_HOLDING, &record->flags)) { + printk(KERN_ERR "CacheFS: Page not prepared by the netfs\n"); + BUG(); + } - /* dispatch an operation to the block device */ - ret = -ENOMEM; + data_op->record = record; + + INIT_LIST_HEAD(&data_op->link); + atomic_set(&data_op->usage, 1); + + /* we ignore pages with erroneous backing information */ + if (record->pagestate == CACHEFS_PAGESTATE_ERROR) { + cachefs_data_op_put(data_op); + kleave(" = -ENOBUFS [error state]"); + return -ENOBUFS; + } + + /* if there's no extant block, but a reservation for allocation space + * has been put in, then pass to kcachefsd to deal with in the + * background + */ + if (record->pagestate == CACHEFS_PAGESTATE_ALLOCRESV) { + struct cachefs_super *super; + + kdebug("queue for background alloc+write"); + + super = inode->vfs_inode.i_sb->s_fs_info; + spin_lock(&super->alloc_lock); + list_add_tail(&data_op->link, &super->alloc_allocq); + spin_unlock(&super->alloc_lock); + + wake_up(&super->dmn_sleepq); + kleave(" = 0 [alloc+write]"); + return 0; + } + + /* object to pages that are in odd states */ + BUG_ON(record->pagestate != CACHEFS_PAGESTATE_EXTANT); + + /* this page has an extant block already */ + BUG_ON(!record->block); + + _debug("write [bix=%u]", record->block->bix); + + /* dispatch a write directly to the block device */ bio = bio_alloc(gfp, 1); if (!bio) - goto error; + goto nomem_r; + + set_bit(CACHEFS_BLOCK_NETFSBUSY, &record->block->flags); + + super = inode->vfs_inode.i_sb->s_fs_info; - bio->bi_bdev = block->super->sb->s_bdev; - bio->bi_private = end_io; + bio->bi_bdev = super->sb->s_bdev; + bio->bi_private = data_op; bio->bi_end_io = cachefs_page_written; - bio->bi_sector = block->bix; - bio->bi_sector <<= PAGE_SHIFT - block->super->sb->s_blocksize_bits; + bio->bi_sector = record->block->bix; + bio->bi_sector <<= PAGE_SHIFT - super->sb->s_blocksize_bits; _debug("%u,%u,%llu", - block->bix, block->super->sb->s_blocksize_bits, bio->bi_sector); + block->bix, super->sb->s_blocksize_bits, bio->bi_sector); if (!bio_add_page(bio, page, PAGE_SIZE, 0)) BUG(); @@ -443,73 +566,46 @@ submit_bio(WRITE, bio); /* tell the caller it's in progress */ - _leave(" = 0"); + _leave(" = 0 [direct write]"); return 0; - error: - _debug("error"); - clear_bit(CACHEFS_BLOCK_NETFSBUSY, &block->flags); - wake_up(&block->writewq); - cachefs_block_put(block); - if (bio) - bio_put(bio); - if (end_io) { - dbgfree(end_io); - kfree(end_io); - } - _leave(" = %d", ret); - return ret; - - /* tell the caller there wasn't a block to write into */ - no_block: - read_unlock(&pageio->lock); - _leave(" = -ENOBUFS"); +nobufs: + kfree(data_op); + kleave(" = -ENOBUFS"); return -ENOBUFS; +nomem_r: + cachefs_pagerec_put(inode, record); +nomem_i: + cachefs_iput(inode); + kfree(data_op); +nomem: + kleave(" = -ENOMEM"); + return -ENOMEM; + } /* end cachefs_write_page() */ /*****************************************************************************/ /* * detach a backing block from a page - * - if the block backing the page still has a vjentry then the block will be - * recycled + * - cache withdrawal is prevented by the caller */ static void cachefs_uncache_page(struct fscache_node *node, - struct fscache_page *pageio) + struct page *page) { - struct cachefs_block *block, *xblock; + struct cachefs_page_record *record; + struct cachefs_inode *inode; _enter(""); - /* un-cross-link the page cookie and the block */ - xblock = NULL; - write_lock(&pageio->lock); - - block = pageio->mapped_block; - if (block) { - pageio->mapped_block = NULL; /* pin the block */ - pageio->flags = 0; - write_unlock(&pageio->lock); - - if (unlikely(block->ref != pageio)) { - printk("%p != %p", block->ref, pageio); - BUG(); - } - - /* locking order needs to be reversed */ - write_lock(&block->ref_lock); - write_lock(&pageio->lock); - block->ref = NULL; - write_unlock(&block->ref_lock); - } + inode = container_of(node, struct cachefs_inode, node); + record = cachefs_pagerec_find(inode, page->index); - write_unlock(&pageio->lock); - - /* if the block was marked as in the process of receiving data then - * cancel the mark in the validity journal */ - if (block) { - cachefs_vj_cancel(block); - cachefs_block_put(block); + if (record) { + if (test_and_clear_bit(CACHEFS_PAGEREC_NETFS_HOLDING, + &record->flags)) + cachefs_pagerec_put(inode, record); + cachefs_pagerec_put(inode, record); } _leave(""); @@ -518,6 +614,11 @@ struct fscache_cache_ops cachefs_cache_ops = { .name = "cachefs", + .srch_alloc = cachefs_srch_alloc, + .srch_free = cachefs_srch_free, + .srch_negate = cachefs_srch_negate, + .is_srch_positive = cachefs_is_srch_positive, + .srch_id = cachefs_srch_id, .lookup_node = cachefs_lookup_node, .grab_node = cachefs_grab_node, .lock_node = cachefs_lock_node, diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/journal.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/journal.c --- linux-2.6.12-rc2-mm3/fs/cachefs/journal.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/journal.c 2005-04-20 20:15:49.000000000 +0100 @@ -1,6 +1,6 @@ /* journal.c: general filesystem cache: journalling * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -226,11 +226,6 @@ if (trans->jblock) cachefs_block_put(xchg(&trans->jblock, NULL)); - /* if this transaction touched the validity journal, release - * the entry there too if we didn't mark the journal */ - if (trans->vjentry && trans->phase == CACHEFS_TRANS_PREPARING) - cachefs_vj_release(trans->super, trans->vjentry); - /* final clean up */ if (trans->jentry) { dbgfree(trans->jentry); @@ -277,7 +272,6 @@ */ int cachefs_trans_mark(struct cachefs_transaction *trans) { - struct cachefs_ondisc_validity_journal *vjentry; struct cachefs_trans_effect *effect; struct cachefs_super *super = trans->super; cachefs_blockix_t bix; @@ -364,8 +358,7 @@ bix = index >> (PAGE_SHIFT - super->sb->s_blocksize_bits); bix += super->layout->bix_ujournal; - ret = cachefs_block_read(super, NULL, bix, 1, - &trans->jblock, &trans->jpage); + ret = cachefs_block_read(super, bix, 1, &trans->jblock, &trans->jpage); if (ret < 0) goto error_release_journal; @@ -462,20 +455,6 @@ atomic_inc(&super->cnt_ujnl_mkgr); - /* record a mark in the validity journal if we need to */ - if (trans->vjentry) { - /* wait for read completion and deal with C-O-W */ - wait_on_page_locked(trans->vjentry->vpage); - cachefs_block_modify(super, trans->vjentry->vblock, - &trans->vjentry->vpage); - - vjentry = kmap_atomic(trans->vjentry->vpage, KM_USER0) + - trans->vjentry->ventry; - vjentry->ino = trans->vjentry->ino; - vjentry->pgnum = trans->vjentry->pgnum; - kunmap_atomic(vjentry, KM_USER0); - } - /* mark made */ _leave(" = 0"); return 0; @@ -1203,7 +1182,7 @@ try_again: /* try accessing the journal block - we don't read from disc, but * rather start with a clean page */ - ret = cachefs_block_read(super, NULL, bix, 1, &jblock, &jpage); + ret = cachefs_block_read(super, bix, 1, &jblock, &jpage); if (ret < 0) goto cant_set_up_block; @@ -1282,7 +1261,7 @@ try_again: /* try accessing the journal block - we don't read from disc, but * rather start with a clean page */ - ret = cachefs_block_read(super, NULL, bix, 1, &jblock, &jpage); + ret = cachefs_block_read(super, bix, 1, &jblock, &jpage); if (ret < 0) goto cant_set_up_block; @@ -1321,7 +1300,7 @@ /* wait for I/O completion */ set_current_state(TASK_UNINTERRUPTIBLE); - add_wait_queue(&jblock->writewq,&myself); + add_wait_queue(&jblock->writewq, &myself); while (test_bit(CACHEFS_BLOCK_WRITEBACK, &jblock->flags)) { schedule(); diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/kcachefsd.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/kcachefsd.c --- linux-2.6.12-rc2-mm3/fs/cachefs/kcachefsd.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/kcachefsd.c 2005-04-20 18:01:09.000000000 +0100 @@ -1,6 +1,6 @@ /* kcachefsd.c: CacheFS management daemon * - * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2003-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -61,10 +61,7 @@ )) break; - if (test_bit(CACHEFS_SUPER_BATCH_TIMER, &super->flags) || - !list_empty(&super->vjnl_unallocq) || - !list_empty(&super->vjnl_writtenq) - ) + if (test_bit(CACHEFS_SUPER_BATCH_TIMER, &super->flags)) break; /* deal with the server being asked to die */ @@ -92,6 +89,8 @@ */ static void kcachefsd_work(struct cachefs_super *super) { + unsigned want; + _debug("@@@ Begin Cache Management"); if (super->dmn_die) @@ -119,14 +118,40 @@ if (!super->alloc_node) cachefs_recycle_transfer_stack(super); - dying: - /* deal with validity journal changes */ - if (!list_empty(&super->vjnl_unallocq)) - cachefs_recycle_unallocate_data_block(super); + /* cull the old inodes if we don't have enough spare blocks available for + * allocation, but only if we wouldn't immediately thrash the cache + */ + if (test_bit(CACHEFS_SUPER_CULL_DISABLED, &super->flags)) { + /* if the cull is disabled, check to see if there're enough + * unpinned blocks to reenable it */ + if (super->space_unpinned > super->cull_hiwater) { + clear_bit(CACHEFS_SUPER_CULL_DISABLED, &super->flags); + printk(KERN_INFO "CacheFS: enabling culling"); + } + } + else { + if (super->space_unpinned > super->cull_lowater) { + set_bit(CACHEFS_SUPER_CULL_DISABLED, &super->flags); + printk(KERN_INFO "CacheFS: disabling culling"); + } + else { + want = super->space_slack; + want += atomic_read(&super->space_reserve); + if (want < super->alloc_cur_n + super->recycle_cur_n) + set_bit(CACHEFS_SUPER_DO_CULL, &super->flags); + } + } - if (!list_empty(&super->vjnl_writtenq)) - cachefs_vj_note_write_completion(super); + /* run the reaper/culler if necessary */ + if (test_and_clear_bit(CACHEFS_SUPER_DO_REAP, &super->flags) || + test_and_clear_bit(CACHEFS_SUPER_DO_CULL, &super->flags)) + cachefs_reaper(super); + + /* invoke the allocator if there are things to allocate */ + if (!list_empty(&super->alloc_allocq)) + cachefs_allocator(super); + dying: /* write a batch of metadata if it's time to do so */ if (test_bit(CACHEFS_SUPER_BATCH_TIMER, &super->flags)) cachefs_trans_batch_write(super); diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/linear-io.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/linear-io.c --- linux-2.6.12-rc2-mm3/fs/cachefs/linear-io.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/linear-io.c 2005-04-20 19:22:54.000000000 +0100 @@ -32,6 +32,7 @@ struct address_space *mapping, struct list_head *pages, unsigned nr_pages); +static int cachefs_linear_io_releasepage(struct page *page, int gfp_flags); struct address_space_operations cachefs_linear_io_addrspace_operations = { .readpage = cachefs_linear_io_readpage, @@ -43,7 +44,7 @@ .set_page_dirty = cachefs_no_set_page_dirty, .sync_page = cachefs_sync_page, .invalidatepage = cachefs_invalidatepage, - .releasepage = cachefs_releasepage, + .releasepage = cachefs_linear_io_releasepage, }; /*****************************************************************************/ @@ -57,19 +58,11 @@ cachefs_blockix_t *last_block_in_bio) { struct cachefs_block *block; - struct fscache_page *pageio; struct inode *inode = page->mapping->host; int ret; _enter(""); - /* get the page mapping cookie */ - pageio = fscache_page_get_private(page, GFP_KERNEL); - if (IS_ERR(pageio)) { - ret = PTR_ERR(pageio); - goto error; - } - /* install the block into the superblock's lookup tree */ block = cachefs_block_insert(inode->i_sb->s_fs_info, page->index); if (IS_ERR(block)) { @@ -77,7 +70,8 @@ goto error; } - pageio->mapped_block = block; + SetPagePrivate(page); + page->private = (unsigned long) block; /* dispatch the outstanding BIO if the pages are not adjacent */ if (*_bio && *last_block_in_bio != page->index - 1) { @@ -176,20 +170,12 @@ int cachefs_linear_io_readpage(struct file *file, struct page *page) { struct cachefs_block *block; - struct fscache_page *pageio; struct inode *inode = page->mapping->host; struct bio *bio; int ret; _enter(",{%lu}", page->index); - /* get the page mapping cookie */ - pageio = fscache_page_get_private(page, GFP_KERNEL); - if (IS_ERR(pageio)) { - _leave(" = %ld [pgp]", PTR_ERR(pageio)); - return PTR_ERR(pageio); - } - /* install the block into the superblock's lookup tree */ block = cachefs_block_insert(inode->i_sb->s_fs_info, page->index); if (IS_ERR(block)) { @@ -197,7 +183,8 @@ return PTR_ERR(block); } - pageio->mapped_block = block; + SetPagePrivate(page); + page->private = (unsigned long) block; /* dispatch a call to perform the read */ ret = -ENOMEM; @@ -220,3 +207,64 @@ return ret; } /* end cachefs_linear_io_readpage() */ + +/*****************************************************************************/ +/* + * release a page and clean up its private data + */ +static int cachefs_linear_io_releasepage(struct page *page, int gfp_flags) +{ + struct cachefs_block *block; + + _enter("{%lu},%x", page->index, gfp_flags); + + if (PagePrivate(page)) { + /* detach the block from this page */ + block = (struct cachefs_block *) page->private; + page->private = 0; + ClearPagePrivate(page); + +#ifdef CONFIG_DEBUG_SLAB + if (block) { + int usage = atomic_read(&block->usage); + + if ((usage & 0xffffff00) == 0x6b6b6b00) { + printk("BLOCK PUT ERROR" + " pg=%p{ix=%lu} blk=%p{u=%x}\n", + page, page->index, block, usage); + BUG(); + } + } +#endif + + /* get the page alloc lock for this block */ + if (test_and_set_bit(CACHEFS_BLOCK_ALLOC, &block->flags)) { + DECLARE_WAITQUEUE(myself, current); + + set_current_state(TASK_UNINTERRUPTIBLE); + add_wait_queue(&block->writewq, &myself); + + while (test_and_set_bit(CACHEFS_BLOCK_ALLOC, + &block->flags)) { + schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + remove_wait_queue(&block->writewq, &myself); + } + + /* stop the block pointing at this page */ + if (block->page == page) + block->page = NULL; + + /* drop the page alloc lock */ + clear_bit(CACHEFS_BLOCK_ALLOC, &block->flags); + wake_up_all(&block->writewq); + cachefs_block_put(block); + } + + _leave(" = 0"); + return 0; + +} /* end cachefs_linear_io_releasepage() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/main.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/main.c --- linux-2.6.12-rc2-mm3/fs/cachefs/main.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/main.c 2005-04-20 18:00:49.000000000 +0100 @@ -1,6 +1,6 @@ /* main.c: general filesystem caching manager * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -51,20 +51,37 @@ goto error; } + /* create a page record jar */ + cachefs_pagerec_jar = + kmem_cache_create("cachefs_pagerec_jar", + sizeof(struct cachefs_page_record), + 0, + SLAB_HWCACHE_ALIGN, + NULL, + NULL); + if (!cachefs_pagerec_jar) { + printk(KERN_NOTICE + "CacheFS: Failed to allocate a page record jar\n"); + goto error_block_jar; + } + /* initialise the filesystem */ ret = cachefs_fs_init(); if (ret < 0) - goto error_block_jar; + goto error_pagerec_jar; - printk(KERN_INFO "CacheFS: general fs caching v0.1 registered\n"); + printk(KERN_INFO "CacheFS: registered\n"); return ret; + error_pagerec_jar: + kmem_cache_destroy(cachefs_pagerec_jar); error_block_jar: kmem_cache_destroy(cachefs_block_jar); error: printk(KERN_ERR "CacheFS: failed to register: %d\n", ret); return ret; + } /* end cachefs_init() */ /*****************************************************************************/ @@ -73,9 +90,10 @@ */ static void __exit cachefs_exit(void) { - printk(KERN_INFO "CacheFS: general fs caching v0.1 unregistering\n"); + printk(KERN_INFO "CacheFS: unregistering\n"); cachefs_fs_exit(); + kmem_cache_destroy(cachefs_pagerec_jar); kmem_cache_destroy(cachefs_block_jar); } /* end cachefs_exit() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/Makefile linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/Makefile --- linux-2.6.12-rc2-mm3/fs/cachefs/Makefile 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/Makefile 2005-04-19 14:59:16.000000000 +0100 @@ -5,6 +5,7 @@ #CFLAGS += -finstrument-functions cachefs-objs := \ + allocator.o \ block.o \ index.o \ indirection-io.o \ @@ -16,11 +17,11 @@ main.o \ misc.o \ nowrite.o \ + page-record.o \ recycling.o \ replay.o \ rootdir.o \ status.o \ - super.o \ - vjournal.o + super.o obj-$(CONFIG_CACHEFS) := cachefs.o diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/misc.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/misc.c --- linux-2.6.12-rc2-mm3/fs/cachefs/misc.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/misc.c 2005-04-20 19:20:32.000000000 +0100 @@ -27,33 +27,6 @@ /*****************************************************************************/ /* - * get a page caching token from for a page, allocating it and attaching it to - * the page's private pointer if it doesn't exist - */ -struct fscache_page * __cachefs_page_get_private(struct page *page, - unsigned gfp_flags) -{ - struct fscache_page *pageio = (struct fscache_page *) page->private; - - if (!pageio) { - pageio = kmalloc(sizeof(*pageio), gfp_flags); - if (!pageio) - return ERR_PTR(-ENOMEM); - - memset(pageio,0, sizeof(*pageio)); - rwlock_init(&pageio->lock); - - page->private = (unsigned long) pageio; - SetPagePrivate(page); - } - - return pageio; -} /* end __cachefs_page_get_private() */ - -EXPORT_SYMBOL(__cachefs_page_get_private); - -/*****************************************************************************/ -/* * handle the completion of a BIO that read a bundle of pages */ int cachefs_io_pages_read(struct bio *bio, unsigned int bytes_done, int err) @@ -145,7 +118,6 @@ */ int cachefs_invalidatepage(struct page *page, unsigned long offset) { - struct fscache_page *pageio; int ret = 1; _enter("{%lu},%lu", page->index, offset); @@ -153,9 +125,6 @@ BUG_ON(!PageLocked(page)); if (PagePrivate(page)) { - pageio = (struct fscache_page *) page->private; - pageio->flags = 0; - /* we release page attachments only if the entire page is being * invalidated - in that case, the block mapping has been * unconditionally invalidated, so real IO is not possible @@ -174,48 +143,6 @@ /*****************************************************************************/ /* - * release a page and cleanup its private data - */ -int cachefs_releasepage(struct page *page, int gfp_flags) -{ - struct cachefs_block *block; - struct fscache_page *pageio; - - _enter("{%lu},%x", page->index, gfp_flags); - - /* detach the page mapping cookie and mapped block */ - if (PagePrivate(page)) { - /* detach the mapped block from the page if there is one */ - pageio = (struct fscache_page *) page->private; - page->private = 0; - ClearPagePrivate(page); - - block = xchg(&pageio->mapped_block, NULL); -#ifdef CONFIG_DEBUG_SLAB - if (block) { - int usage = atomic_read(&block->usage); - - if ((usage & 0xffffff00) == 0x6b6b6b00) { - printk("BLOCK PUT ERROR" - " pg=%p{ix=%lu} blk=%p{u=%x}\n", - page, page->index, block, usage); - BUG(); - } - } -#endif - - cachefs_block_put(block); - dbgfree(pageio); - kfree(pageio); - } - - _leave(" = 0"); - return 0; - -} /* end cachefs_releasepage() */ - -/*****************************************************************************/ -/* * read a page from a cachefs file into the page cache */ struct page *cachefs_get_page(struct cachefs_inode *inode, unsigned index) diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/page-record.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/page-record.c --- linux-2.6.12-rc2-mm3/fs/cachefs/page-record.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/page-record.c 2005-04-21 10:19:44.000000000 +0100 @@ -0,0 +1,661 @@ +/* page-record.c: CacheFS's known page records handling + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include "cachefs-int.h" + +kmem_cache_t *cachefs_pagerec_jar; + +static struct cachefs_page_record *__cachefs_pagerec_get(struct cachefs_inode *inode, + unsigned long index, + enum cachefs_pagetype type, + unsigned long gfp); + +static void __cachefs_pagerec_put(struct cachefs_inode *inode, + struct cachefs_page_record *record); + +struct cachefs_pagerec_lock_waiter { + struct list_head link; + struct task_struct *task; + struct cachefs_page_record *record; + int result; +}; + +/*****************************************************************************/ +/* + * get the parent indirection record of a page or indirection block + * - also sets boundary and parent pointer offset for this record + */ +static inline +void cachefs_pagerec_calc_parent(struct cachefs_inode *inode, + struct cachefs_page_record *record, + enum cachefs_pagetype type, + unsigned long index, + enum cachefs_pagetype *_ptype, + unsigned long *_pindex) +{ + enum cachefs_pagetype ptype; + struct cachefs_super *super; + unsigned long ptrperblk, ndirect, pindex, ptr_ix; + unsigned long notboundary = 1; + + super = inode->vfs_inode.i_sb->s_fs_info; + ptrperblk = PAGE_SIZE / sizeof(cachefs_blockix_t); + + /* find the parent indirection */ + switch (type) { + case CACHEFS_PAGETYPE_DATA: + ndirect = super->layout->metadata_size; + ndirect -= sizeof(struct cachefs_ondisc_metadata); + ndirect /= sizeof(cachefs_blockix_t); + + if (index < ndirect) { + ptype = CACHEFS_PAGETYPE_INODE; + pindex = 0; + ptr_ix = index; + notboundary = ndirect - index + 1; + } + else if (index -= ndirect, index < ptrperblk) { + ptype = CACHEFS_PAGETYPE_INDR_SINGLE; + pindex = 0; + ptr_ix = index; + notboundary = (index + 1) & (ptrperblk - 1); + } + else if (index -= ptrperblk, index < ptrperblk * ptrperblk) { + ptype = CACHEFS_PAGETYPE_INDR_DOUBLE_SINGLE; + pindex = index / ptrperblk; + ptr_ix = index % ptrperblk; + notboundary = (index + 1) & (ptrperblk - 1); + } + else if (index -= ptrperblk * ptrperblk, + index < ptrperblk * ptrperblk * ptrperblk + ) { + ptype = CACHEFS_PAGETYPE_INDR_TRIPLE_DOUBLE_SINGLE; + pindex = index / (ptrperblk * ptrperblk); + ptr_ix = index % (ptrperblk * ptrperblk); + notboundary = (index + 1) & (ptrperblk - 1); + } + else { + ptype = 0; + pindex = 0; + ptr_ix = 0; + BUG(); + } + break; + + case CACHEFS_PAGETYPE_INODE: + *_ptype = 0; + *_pindex = 0; + record->ptr_ix = 0; + return; + + /* top-level indirection blocks */ + case CACHEFS_PAGETYPE_INDR_SINGLE: + ptype = CACHEFS_PAGETYPE_INODE; + pindex = 0; + ptr_ix = inode->metadata_offset; + ptr_ix += offsetof(struct cachefs_ondisc_metadata, + single_indirect); + ptr_ix /= sizeof(cachefs_blockix_t); + break; + + case CACHEFS_PAGETYPE_INDR_DOUBLE: + ptype = CACHEFS_PAGETYPE_INODE; + pindex = 0; + ptr_ix = inode->metadata_offset; + ptr_ix += offsetof(struct cachefs_ondisc_metadata, + double_indirect); + ptr_ix /= sizeof(cachefs_blockix_t); + break; + + case CACHEFS_PAGETYPE_INDR_TRIPLE: + ptype = CACHEFS_PAGETYPE_INODE; + pindex = 0; + ptr_ix = inode->metadata_offset; + ptr_ix += offsetof(struct cachefs_ondisc_metadata, + triple_indirect); + ptr_ix /= sizeof(cachefs_blockix_t); + break; + + /* second-level indirection blocks */ + case CACHEFS_PAGETYPE_INDR_DOUBLE_SINGLE: + case CACHEFS_PAGETYPE_INDR_TRIPLE_DOUBLE: + ptype = type - 1; + pindex = 0; + ptr_ix = index % ptrperblk; + break; + + /* third-level indirection blocks */ + case CACHEFS_PAGETYPE_INDR_TRIPLE_DOUBLE_SINGLE: + ptype = type - 1; + pindex = index / ptrperblk; + ptr_ix = index % ptrperblk; + break; + + default: + ptype = 0; + pindex = 0; + ptr_ix = 0; + BUG(); + } + + record->ptr_ix = ptr_ix; + if (!notboundary) + set_bit(CACHEFS_PAGEREC_BOUNDARY, &record->flags); + + *_ptype = ptype; + *_pindex = pindex; + +} /* end cachefs_pagerec_calc_parent() */ + +/*****************************************************************************/ +/* + * get an element of the indirection chain of a page represented by the inode + * - the caller must hold the inode semaphore + */ +static +struct cachefs_page_record *__cachefs_pagerec_get(struct cachefs_inode *inode, + unsigned long index, + enum cachefs_pagetype type, + unsigned long gfp) +{ + struct cachefs_page_record *record, *parent; + struct rb_node *p, **pp; + + kenter("{%lu},%lu,%u,", + (unsigned long) inode->vfs_inode.i_ino, index, type); + + /* see if the record is already present */ + p = NULL; + pp = &inode->page_records.rb_node; + + while (*pp) { + p = *pp; + record = rb_entry(p, struct cachefs_page_record, lookup); + + if (record->index > index) { + pp = &(*pp)->rb_left; + continue; + } + + if (record->index < index) { + pp = &(*pp)->rb_right; + continue; + } + + if (record->pagetype > type) { + pp = &(*pp)->rb_left; + continue; + } + + if (record->pagetype < type) { + pp = &(*pp)->rb_right; + continue; + } + + /* we found what we were looking for */ + atomic_inc(&record->usage); + kleave(" = %p [found]", record); + return record; + } + + /* there's no page record yet - need to allocate one */ + record = kmem_cache_alloc(cachefs_pagerec_jar, gfp); + if (!record) { + kleave(" [ENOMEM]"); + return NULL; + } + + record->index = index; + record->pagetype = type; + record->pagestate = CACHEFS_PAGESTATE_UNKNOWN; + atomic_set(&record->usage, 1); + + /* find and pin the parent */ + parent = NULL; + if (type != CACHEFS_PAGETYPE_INODE) { + enum cachefs_pagetype ptype; + unsigned long pindex; + + cachefs_pagerec_calc_parent(inode, record, type, index, + &ptype, &pindex); + + kdebug("- parent: ty=%u ix=%lu b=%u", + ptype, pindex, + test_bit(CACHEFS_PAGEREC_BOUNDARY, &record->flags)); + + parent = __cachefs_pagerec_get(inode, pindex, ptype, gfp); + if (!parent) { + kmem_cache_free(cachefs_pagerec_jar, record); + kleave(" [ENOMEM]"); + return NULL; + } + } + + record->parent = parent; + + /* insert */ + rb_link_node(&record->lookup, p, pp); + rb_insert_color(&record->lookup, &inode->page_records); + + kleave(" = %p", record); + return record; + +} /* end __cachefs_pagerec_get() */ + +/*****************************************************************************/ +/* + * get a record of an inode's page and its indirection chain + */ +struct cachefs_page_record *cachefs_pagerec_get(struct cachefs_inode *inode, + unsigned long index, + unsigned long gfp) +{ + struct cachefs_page_record *record; + + kenter("{%lu},%lu,", (unsigned long) inode->vfs_inode.i_ino, index); + + down(&inode->vfs_inode.i_sem); + record = __cachefs_pagerec_get(inode, index, CACHEFS_PAGETYPE_DATA, + gfp); + up(&inode->vfs_inode.i_sem); + + kleave(" = %p", record); + return record; + +} /* end cachefs_pagerec_get() */ + +/*****************************************************************************/ +/* + * find the record of an inode's page if it exists + */ +struct cachefs_page_record *cachefs_pagerec_find(struct cachefs_inode *inode, + unsigned long index) +{ + struct cachefs_page_record *record; + enum cachefs_pagetype type; + struct rb_node **pp; + + kenter("{%lu},%lu", (unsigned long) inode->vfs_inode.i_ino, index); + + type = CACHEFS_PAGETYPE_DATA; + + down(&inode->vfs_inode.i_sem); + + pp = &inode->page_records.rb_node; + + while (*pp) { + record = rb_entry(*pp, struct cachefs_page_record, lookup); + + if (record->index > index) { + pp = &(*pp)->rb_left; + continue; + } + + if (record->index < index) { + pp = &(*pp)->rb_right; + continue; + } + + if (record->pagetype > type) { + pp = &(*pp)->rb_left; + continue; + } + + if (record->pagetype < type) { + pp = &(*pp)->rb_right; + continue; + } + + /* we found what we were looking for */ + atomic_inc(&record->usage); + up(&inode->vfs_inode.i_sem); + kleave(" = %p", record); + return record; + } + + up(&inode->vfs_inode.i_sem); + kleave(" = NULL"); + return NULL; + +} /* end cachefs_pagerec_find() */ + +/*****************************************************************************/ +/* + * dispose of an indirection chain record set + */ +static void __cachefs_pagerec_put(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + struct cachefs_page_record *parent; + struct cachefs_super *super; + int unresv = 0; + + for (;;) { + kdebug("- zap %p", record); + + parent = record->parent; + + if (record->pagestate == CACHEFS_PAGESTATE_ALLOCRESV) + unresv++; + + cachefs_block_put(record->block); + rb_erase(&record->lookup, &inode->page_records); + kmem_cache_free(cachefs_pagerec_jar, record); + + if (!parent) + return; + + record = parent; + if (!atomic_dec_and_test(&record->usage)) + return; + } + + super = inode->vfs_inode.i_sb->s_fs_info; + atomic_sub(unresv, &super->space_reserve); + +} /* end __cachefs_pagerec_put() */ + +/*****************************************************************************/ +/* + * dispose of an indirection chain record set + */ +void _cachefs_pagerec_put(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + kenter("{%lu},{%u,%lu}", + (unsigned long) inode->vfs_inode.i_ino, + record->pagetype, record->index); + + down(&inode->vfs_inode.i_sem); + + if (atomic_read(&record->usage) == 0) + __cachefs_pagerec_put(inode, record); + + up(&inode->vfs_inode.i_sem); + kleave(""); + +} /* end _cachefs_pagerec_put() */ + +/*****************************************************************************/ +/* + * begin lookup of a page record on disk, excluding other attempts to do so + * - return 0 if this process should do the lookup + * - return 1 if record is no longer in UNKNOWN or LOOKUP states + * - return -EINTR if interrupted whilst waiting + */ +int cachefs_pagerec_begin_lookup(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + struct cachefs_pagerec_lock_waiter waiter; + unsigned long flags; + + kenter("{%lu},{%u,%lu}", + (unsigned long) inode->vfs_inode.i_ino, + record->pagetype, record->index); + + /* no - we're going to need to add ourselves to the wait queue */ + waiter.task = current; + waiter.record = record; + waiter.result = 0; + + spin_lock_irqsave(&inode->pagerec_lock, flags); + + /* check the state, lest someone changed it whilst we were awaiting the + * spinlock */ + if (record->pagestate != CACHEFS_PAGESTATE_UNKNOWN && + record->pagestate != CACHEFS_PAGESTATE_LOOKUP + ) { + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + goto complete; + } + + /* see if this caller gets to do the lookup */ + if (record->pagestate == CACHEFS_PAGESTATE_UNKNOWN) { + record->pagestate = CACHEFS_PAGESTATE_LOOKUP; + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + kleave(" = 0 [do lookup]"); + return 0; + } + + /* queue up until lookup completed */ + get_task_struct(waiter.task); + list_add_tail(&waiter.link, &inode->pagerec_lookupwq); + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + + /* and wait for the state to change */ + set_current_state(TASK_INTERRUPTIBLE); + while (!waiter.result) { + if (signal_pending(current)) + goto interrupt; + + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + if (waiter.result == 2) + goto passed_along; + +complete: + kleave(" = 1 [complete]"); + return 1; + +passed_along: + /* passed along due to error */ + kleave(" = 0 [passed do lookup]"); + return 0; + +interrupt: + spin_lock_irqsave(&inode->pagerec_lock, flags); + list_del_init(&waiter.link); + put_task_struct(waiter.task); + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + + if (waiter.result == 2) + goto passed_along; + + kleave(" = -EINTR"); + return -EINTR; + +} /* end cachefs_pagerec_begin_lookup() */ + +/*****************************************************************************/ +/* + * notify failure or completion of lookup of a page record on disk + * - state must have been changed by caller + */ +void cachefs_pagerec_end_lookup(struct cachefs_inode *inode, + struct cachefs_page_record *record, + int pass_along) +{ + struct cachefs_pagerec_lock_waiter *waiter, *next; + struct task_struct *task; + unsigned long flags; + int result; + + kenter("{%lu},{%u,%lu},%d", + (unsigned long) inode->vfs_inode.i_ino, + record->pagetype, record->index, + pass_along); + + BUG_ON(record->pagestate == CACHEFS_PAGESTATE_UNKNOWN); + BUG_ON(record->pagestate == CACHEFS_PAGESTATE_LOOKUP); + + result = pass_along ? 2 : 1; + + /* look for someone to give the lock to */ + spin_lock_irqsave(&inode->pagerec_lock, flags); + + list_for_each_entry_safe(waiter, next, &inode->pagerec_lookupwq, link) { + if (waiter->record == record) { + /* found someone to give the lock to directly */ + list_del_init(&waiter->link); + task = waiter->task; + mb(); + waiter->result = result; + wake_up_process(task); + put_task_struct(task); + if (result == 2) + goto passed_along; + } + } + + /* if nothing to pass along to, then return to unknown state */ + if (result == 2) + record->pagestate = CACHEFS_PAGESTATE_UNKNOWN; + +passed_along: + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + kleave(""); + +} /* end cachefs_pagerec_end_lookup() */ + +/*****************************************************************************/ +/* + * lock a page record's access lock + * - this controls access to the metadata lookup + */ +int cachefs_pagerec_lock(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + struct cachefs_pagerec_lock_waiter waiter; + unsigned long flags; + int ret; + + /* see if we can get the lock immediately */ + if (!test_and_set_bit(CACHEFS_PAGEREC_LOCK, &record->flags)) + return 0; + + /* no - we're going to need to add ourselves to the wait queue */ + waiter.task = current; + waiter.record = record; + waiter.result = 0; + + spin_lock_irqsave(&inode->pagerec_lock, flags); + + /* check again, lest someone returned it whilst we were awaiting the + * spinlock */ + if (!test_and_set_bit(CACHEFS_PAGEREC_LOCK, &record->flags)) { + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + return 0; + } + + /* queue up */ + get_task_struct(waiter.task); + list_add_tail(&waiter.link, &inode->pagerec_accwq); + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + + /* and wait to be given it */ + set_current_state(TASK_INTERRUPTIBLE); + while (!waiter.result && !signal_pending(current)) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + + if (waiter.result) + return 0; + + /* must've been interrupted then */ + BUG_ON(!signal_pending(current)); + + spin_lock_irqsave(&inode->pagerec_lock, flags); + + /* dequeue if not given it whilst waiting for the spinlock */ + ret = 0; + if (waiter.result) { + list_del_init(&waiter.link); + put_task_struct(waiter.task); + ret = -EINTR; + } + + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + return ret; + +} /* end cachefs_pagerec_lock() */ + +/*****************************************************************************/ +/* + * unlock a page record's access lock + */ +void cachefs_pagerec_unlock(struct cachefs_inode *inode, + struct cachefs_page_record *record) +{ + struct cachefs_pagerec_lock_waiter *waiter; + struct task_struct *task; + unsigned long flags; + + /* look for someone to give the lock to */ + spin_lock_irqsave(&inode->pagerec_lock, flags); + + list_for_each_entry(waiter, &inode->pagerec_accwq, link) { + if (waiter->record == record) { + /* found someone to give the lock to directly */ + list_del_init(&waiter->link); + task = waiter->task; + mb(); + waiter->result = 1; + wake_up_process(task); + put_task_struct(task); + goto woken; + } + } + + /* no one wanted this lock */ + clear_bit(CACHEFS_PAGEREC_LOCK, &record->flags); + +woken: + spin_unlock_irqrestore(&inode->pagerec_lock, flags); + +} /* end cachefs_pagerec_unlock() */ + +/*****************************************************************************/ +/* + * record an indirection chain for a transaction + * - some blocks will have been allocated and some will have been changed to + * point to the new blocks + */ +void cachefs_trans_record_chain(struct cachefs_ondisc_ujnl_chain *chain, + struct page *page) +{ + struct cachefs_page_record *record; + unsigned step; + uint8_t allocmap, changemap, bit, nextbit; + + record = (struct cachefs_page_record *) page->private; + + allocmap = changemap = 0; + bit = 1; + step = 0; + do { + nextbit = bit << 1; + + chain->block[step] = record->block->bix; + chain->ptr_ix[step] = record->ptr_ix; + + if (test_bit(CACHEFS_PAGEREC_NEW, &record->flags)) { + allocmap |= bit; + changemap |= nextbit; + } + + bit = nextbit; + step++; + record = record->parent; + } while(record); + + chain->allocmap = allocmap; + chain->changemap = changemap; + +} /* end cachefs_trans_record_chain() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/reaper.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/reaper.c --- linux-2.6.12-rc2-mm3/fs/cachefs/reaper.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/reaper.c 2005-04-21 11:22:41.000000000 +0100 @@ -0,0 +1,17 @@ +/* reaper.c: CacheFS dead file reaper + * + * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include "cachefs-int.h" diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/recycling.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/recycling.c --- linux-2.6.12-rc2-mm3/fs/cachefs/recycling.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/recycling.c 2005-04-21 11:59:22.000000000 +0100 @@ -23,6 +23,7 @@ #include #include #include +#include #include "cachefs-int.h" /*****************************************************************************/ @@ -54,7 +55,7 @@ _debug("begin new recycling on block %x", bix); /* mirror the block in memory */ - ret = cachefs_block_read(super, NULL, bix, 1, &block, &page); + ret = cachefs_block_read(super, bix, 1, &block, &page); if (ret < 0) { printk("kcachefsd: Failed to get page: %d\n", ret); _leave(" = %d", ret); @@ -77,12 +78,12 @@ cachefs_trans_affects_super(trans); } - trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW; - trans->jentry->index = super->recycle_cur; - trans->jentry->ixentry = super->recycle_cur_n; - trans->jentry->block = bix; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW; + trans->jentry->u.begin_new.front = bix; + trans->jentry->u.begin_new.second = super->recycle_cur; + trans->jentry->u.begin_new.seccnt = super->recycle_cur_n; - trans->jentry->pgnum = + trans->jentry->u.begin_new.unready = super->layout->bix_unready + used_unready_node; trans->changed |= CACHEFS_TRANS_CHANGED_RECYCLE; @@ -97,8 +98,8 @@ cachefs_block_modify(super, block, &page); node = kmap_atomic(page, KM_USER0); - node->next = trans->jentry->index; - node->count = trans->jentry->ixentry; + node->next = trans->jentry->u.begin_new.second; + node->count = trans->jentry->u.begin_new.seccnt; kunmap_atomic(node, KM_USER0); super->recycle_room = CACHEFS_ONDISC_LEAVES_PER_FREE_NODE; @@ -167,12 +168,12 @@ cachefs_trans_affects_block(trans, super->recycle_block, 0, PAGE_SIZE); cachefs_trans_affects_super(trans); - trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY; - trans->jentry->block = super->recycle_cur; - trans->jentry->entry = pos; - trans->jentry->auxblock = super->layout->bix_unready; - trans->jentry->pgnum = super->layout->bix_unready + qty; - trans->jentry->count = qty; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY; + trans->jentry->u.make_ready.dst = super->recycle_cur; + trans->jentry->u.make_ready.dstix = pos; + trans->jentry->u.make_ready.first = super->layout->bix_unready; + trans->jentry->u.make_ready.unready = super->layout->bix_unready + qty; + trans->jentry->u.make_ready.count = qty; /* record the transaction in the journal */ ret = cachefs_trans_mark(trans); @@ -253,9 +254,9 @@ return; } - trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_TRANSFER; - trans->jentry->block = allocTOS; - + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_TRANSFER; + trans->jentry->u.recycle_transfer.first = allocTOS; + trans->jentry->u.recycle_transfer.front = 0; trans->jentry->recycle_cur = 0; trans->jentry->alloc_cur = allocTOS; trans->jentry->alloc_leaf = 0; @@ -293,8 +294,7 @@ } /* read the TOS as that contains a pointer to the 2OS */ - ret = cachefs_block_read(super, NULL, allocTOS, 0, - &block, &page); + ret = cachefs_block_read(super, allocTOS, 0, &block, &page); if (ret < 0) { printk("CacheFS: failed to read page: %d\n", ret); _leave(" [error %d]", ret); @@ -314,10 +314,9 @@ cachefs_trans_affects_block(trans, super->recycle_block, 0, PAGE_SIZE); - trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_TRANSFER; - trans->jentry->block = allocTOS; - trans->jentry->upblock = super->recycle_cur; - + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_TRANSFER; + trans->jentry->u.recycle_transfer.first = allocTOS; + trans->jentry->u.recycle_transfer.front = super->recycle_cur; trans->jentry->alloc_cur = allocTOS; trans->jentry->alloc_leaf = 0; trans->changed |= CACHEFS_TRANS_CHANGED_ALLOC; @@ -354,7 +353,7 @@ /* force a batch to be written immediately */ set_bit(CACHEFS_SUPER_BATCH_TIMER, &super->flags); - wake_up(&super->alloc_wq); + //wake_up(&super->alloc_wq); _leave(""); @@ -382,7 +381,8 @@ super->rcm_ptrstop); max = super->sb->s_blocksize; - max -= sizeof(struct cachefs_ondisc_update_journal); + max -= offsetof(struct cachefs_ondisc_update_journal, + u.recycle_scavenge.ptrs); max /= sizeof(cachefs_blockix_t); /* wait for the page to finish being read */ @@ -403,9 +403,9 @@ /* make sure there's a recycling node with space available */ if (super->recycle_room == 0) { - trans->jentry->upblock = super->rcm_block; - trans->jentry->upentry = src; - trans->jentry->rcm_ptrnext = src + 1; + trans->jentry->u.begin_new.ptr = super->rcm_block; + trans->jentry->u.begin_new.ptrix = src; + trans->jentry->rcm_ptrnext = src + 1; ret = cachefs_recycle_begin_new_node(super, indirect[src], trans); @@ -418,14 +418,15 @@ limit = min(super->recycle_room, max); dst = CACHEFS_ONDISC_LEAVES_PER_FREE_NODE - super->recycle_room; - trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE; - trans->jentry->block = super->rcm_block; - trans->jentry->entry = src; - trans->jentry->auxblock = super->recycle_cur; - trans->jentry->auxentry = dst; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE; + trans->jentry->u.recycle_scavenge.ino = super->rcm_ino; + trans->jentry->u.recycle_scavenge.src = super->rcm_block; + trans->jentry->u.recycle_scavenge.srcix = src; + trans->jentry->u.recycle_scavenge.dst = super->recycle_cur; + trans->jentry->u.recycle_scavenge.dstix = dst; /* transfer the pointers into the journal entry first */ - jeptr = trans->jentry->u.rcyptrs; + jeptr = trans->jentry->u.recycle_scavenge.ptrs; count = 0; while (count < limit && src < super->rcm_ptrstop) { if (indirect[src]) { @@ -443,7 +444,7 @@ src++; } - trans->jentry->count = count; + trans->jentry->u.recycle_scavenge.count = count; trans->jentry->rcm_ptrnext = src; /* write the transaction to the journal */ @@ -468,7 +469,7 @@ node = kmap_atomic(super->recycle_node, KM_USER0); memcpy(&node->leaves[dst], - trans->jentry->u.rcyptrs, + trans->jentry->u.recycle_scavenge.ptrs, count * sizeof(cachefs_blockix_t)); kunmap_atomic(node, KM_USER0); @@ -490,12 +491,12 @@ * - metadata record of metadata inode (we've got a new free inode) * - index entry pointing to inode being reclaimed * - metadata record of index (we've got a new free index entry) + * - there's no index record if the index has alreday been reclaimed */ static int cachefs_recycle_reclaim_inode_metadata(struct cachefs_super *super, struct cachefs_transaction **_trans) { struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_ujnl_index *jindex; struct cachefs_ondisc_metadata *metadata; struct cachefs_transaction *trans; struct cachefs_inode *iinode = NULL; @@ -511,8 +512,7 @@ ixentry = metadata->pindex_entry; cachefs_metadata_postread(super->rcm_inode, metadata); - /* open up the parent index inode if there is one and get the page it - * references */ + /* open up the parent index inode and get the page it references */ if (iino) { iinode = cachefs_iget(super, iino); if (IS_ERR(iinode)) { @@ -531,50 +531,24 @@ offset = (ixentry % iinode->index_epp) * iinode->index_esize; } else { - ixentry = 0; - offset = 0; + offset = 0; } /* we record the event in the journal */ trans = *_trans; - jindex = &trans->jentry->u.ixdata[0]; - jindex->next_ino = UINT_MAX; - jindex->next_index = UINT_MAX; - - trans->jentry->mark = CACHEFS_ONDISC_UJNL_INODE_DELETING; - - if (iinode) { - trans->jentry->index = iino; - trans->jentry->ixentry = ixentry; - trans->jentry->pgnum = ixpage->index; - trans->jentry->block = - __cachefs_get_page_block(ixpage)->bix; - trans->jentry->entry = offset; - trans->jentry->upblock = iinode->metadata->bix; - trans->jentry->upentry = iinode->metadata_offset; - trans->jentry->size = i_size_read(&iinode->vfs_inode); - trans->jentry->count = iinode->index_dsize; - - metadata = cachefs_metadata_preread(iinode); - jindex->next_index = metadata->freelink; - cachefs_metadata_postread(iinode, metadata); - - cachefs_trans_affects_page(trans, - fscache_page_grab_private(ixpage), - trans->jentry->entry, - trans->jentry->count); - - cachefs_trans_affects_inode(trans, iinode); - } + trans->jentry->mark = CACHEFS_ONDISC_UJNL_RECYC_INODE; + trans->jentry->u.recyc_inode.ino = super->rcm_inode->vfs_inode.i_ino; + trans->jentry->u.recyc_inode.ino_block = super->rcm_inode->metadata->bix; + trans->jentry->u.recyc_inode.ix_block = cachefs_get_ixpage_bix(ixpage); + trans->jentry->u.recyc_inode.ix_ino_block = iinode->metadata->bix; + trans->jentry->u.recyc_inode.ix_ino = iino; + trans->jentry->u.recyc_inode.ix_entry = ixentry; metadata = cachefs_metadata_preread(super->imetadata); - jindex->next_ino = metadata->freelink; + trans->jentry->u.recyc_inode.ino_next = metadata->freelink; cachefs_metadata_postread(super->imetadata, metadata); - trans->jentry->auxblock = super->rcm_inode->metadata->bix; - trans->jentry->auxentry = super->rcm_inode->metadata_offset; - cachefs_trans_affects_inode(trans, super->rcm_inode); cachefs_trans_affects_inode(trans, super->imetadata); @@ -584,6 +558,22 @@ trans->jentry->rcm_ptrnext = 0; trans->jentry->rcm_ptrstop = 0; + /* record the parent index info if there was one */ + if (iinode) { + trans->jentry->u.recyc_inode.ix_dsize = iinode->index_dsize; + trans->jentry->u.recyc_inode.ix_esize = iinode->index_esize; + trans->jentry->u.recyc_inode.ix_epp = iinode->index_epp; + + metadata = cachefs_metadata_preread(iinode); + trans->jentry->u.recyc_inode.ix_next = metadata->freelink; + cachefs_metadata_postread(iinode, metadata); + + cachefs_trans_affects_ixpage(trans, ixpage, offset, + iinode->index_esize); + + cachefs_trans_affects_inode(trans, iinode); + } + /* write the transaction to the journal */ ret = cachefs_trans_mark(trans); if (ret < 0) @@ -592,37 +582,36 @@ *_trans = NULL; /* we can now make the changes in memory - * we start by freeing up the parent index entry */ + * - we start by freeing up the parent index entry */ if (iinode) { - cachefs_page_modify(super, &ixpage); + cachefs_ixpage_modify(super, &ixpage); xent = kmap_atomic(ixpage, KM_USER0) + offset; xent->state = CACHEFS_ONDISC_INDEX_FREE; xent->type = 0; xent->ino = 0; - xent->u.freelink[0] = jindex->next_index; + xent->u.freelink[0] = trans->jentry->u.recyc_inode.ix_next; memset(xent->u.data, 0, iinode->index_dsize); kunmap_atomic(xent, KM_USER0); /* modify the index inode metadata entry */ metadata = cachefs_metadata_prewrite(iinode); - metadata->freelink = ixentry; + metadata->freelink = ixentry; cachefs_metadata_postwrite(iinode, metadata); } - /* clear the index's metadata definition */ + /* clear the inode's metadata definition */ metadata = cachefs_metadata_prewrite(super->rcm_inode); memset(metadata, 0, sizeof(super->imetadata->index_esize)); metadata->header.state = CACHEFS_ONDISC_INDEX_FREE; - metadata->freelink = jindex->next_ino; - metadata->mtime = CURRENT_TIME.tv_sec; + metadata->freelink = trans->jentry->u.recyc_inode.ino_next; metadata->atime = CURRENT_TIME.tv_sec; cachefs_metadata_postwrite(super->rcm_inode, metadata); /* modify the metadata inode metadata entry */ metadata = cachefs_metadata_prewrite(super->imetadata); - metadata->freelink = trans->jentry->ino; + metadata->freelink = trans->jentry->u.recyc_inode.ino; cachefs_metadata_postwrite(super->imetadata, metadata); /* do the writing */ @@ -639,6 +628,146 @@ /*****************************************************************************/ /* + * mark as obsolete the next inode pinned by an entry in the index currently + * being reclaimed + * - use super->rcm_block to iterate through the index entry numbers + */ +int cachefs_recycle_one_index_entry(struct cachefs_super *super, + struct cachefs_transaction **_trans) +{ + struct cachefs_ondisc_index_entry *xent; + struct cachefs_ondisc_metadata *metadata; + struct cachefs_transaction *trans; + struct cachefs_inode *inode = NULL; + unsigned long flags; + struct page *page = NULL; + unsigned pgnum, offset, ino; + int ret; + + _enter("{%x,%x}", super->rcm_ino, super->rcm_block); + + try_next_block: + /* find the next block we're going to scan */ + pgnum = super->rcm_block / super->rcm_inode->index_epp; + offset = super->rcm_block % super->rcm_inode->index_epp; + offset *= super->rcm_inode->index_esize; + + if (pgnum >= (i_size_read(&super->rcm_inode->vfs_inode) >> PAGE_SHIFT)) { + /* we've done this index entirely */ + _leave(" = 0"); + return 0; + } + + /* get the page holding the next index entry and extract the inode + * number from it */ + page = cachefs_get_page(super->rcm_inode, pgnum); + if (IS_ERR(page)) { + if (PTR_ERR(page) == -EIO) { + /* forget about this block - it's buggy */ + super->rcm_block = + (pgnum + 1) * super->rcm_inode->index_epp; + } + + _leave(" = %ld", PTR_ERR(page)); + return PTR_ERR(page); + } + + try_next_entry: + xent = kmap_atomic(page, KM_USER0) + offset; + ino = xent->ino; + BUG_ON(ino == 0 && xent->state != CACHEFS_ONDISC_INDEX_FREE); + BUG_ON(ino != 0 && xent->state == CACHEFS_ONDISC_INDEX_FREE); + kunmap_atomic(xent, KM_USER0); + + if (!ino) { + _debug("skip slot %u", super->rcm_block); + super->rcm_block++; + + offset += super->rcm_inode->index_esize; + if (offset + super->rcm_inode->index_esize <= PAGE_SIZE) + goto try_next_entry; + + cachefs_put_page(page); + page = NULL; + goto try_next_block; + } + + inode = cachefs_iget(super, ino); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + if (ret == -EIO) + super->rcm_block++; + goto error_noinode; + } + + /* use the pre-created a transaction to record the change of state */ + trans = *_trans; + + trans->jentry->mark = CACHEFS_ONDISC_UJNL_DELETE_INODE; + trans->jentry->u.delete_inode.ino = ino; + trans->jentry->u.delete_inode.ix_ino = super->rcm_ino; + trans->jentry->u.delete_inode.ix_block = cachefs_get_ixpage_bix(page); + trans->jentry->u.delete_inode.ix_entry = super->rcm_block; + trans->jentry->u.delete_inode.ino_block = inode->metadata->bix; + trans->jentry->u.update_index.ix_esize = super->rcm_inode->index_esize; + trans->jentry->u.update_index.ix_epp = super->rcm_inode->index_epp; + + cachefs_trans_affects_inode(trans, inode); + + trans->jentry->rcm_block = super->rcm_block + 1; + + /* record the transaction in the journal */ + ret = cachefs_trans_mark(trans); + if (ret < 0) + goto error; + + *_trans = NULL; + + /* modify the inode metadata entry */ + metadata = cachefs_metadata_prewrite(inode); + metadata->header.state = CACHEFS_ONDISC_INDEX_RECYCLE; + metadata->pindex = 0; + metadata->pindex_entry = 0; + cachefs_metadata_postwrite(inode, metadata); + + /* commit the changes to disc */ + cachefs_trans_commit(trans); + + /* attempt to schedule the inode we've just marked for immediate + * reclamation */ + spin_lock_irqsave(&super->reap_lock, flags); + + if (CIRC_SPACE(super->reap_head, + super->reap_tail, + CACHEFS_REAP_BUFSIZE) > 0 + ) { + super->reap_buf[super->reap_head] = + inode->vfs_inode.i_ino; + super->reap_head = + (super->reap_head + 1) & + (CACHEFS_REAP_BUFSIZE - 1); + } + else { + set_bit(CACHEFS_SUPER_DO_REAP, &super->flags); + } + + spin_unlock_irqrestore(&super->reap_lock, flags); + + /* there may be more to do on this index */ + ret = -EAGAIN; + + error: + cachefs_iput(inode); + error_noinode: + cachefs_put_page(page); + + _leave(" = %d [%u]", ret, super->rcm_block); + return ret; + +} /* end cachefs_recycle_one_index_entry() */ + +/*****************************************************************************/ +/* * do next step in reclamation of a file * - need to dispose of: * (1) index entries in file content (if it's an index file) @@ -686,7 +815,6 @@ return; } - trans->jentry->ino = super->rcm_ino; trans->jentry->rcm_ino = super->rcm_ino; trans->jentry->rcm_indirect = super->rcm_indirect; trans->jentry->rcm_block = super->rcm_block; @@ -741,7 +869,7 @@ } /* read the double-indirection block from disc */ - ret = cachefs_block_read(super, NULL, dblbix, 0, NULL, &dpage); + ret = cachefs_block_read(super, dblbix, 0, NULL, &dpage); if (ret < 0) goto error; @@ -769,7 +897,7 @@ /* start processing the double indirect block */ super->rcm_curpage = dpage; - super->rcm_block = __cachefs_get_page_block(dpage)->bix; + super->rcm_block = dblbix; goto process_pointer_array; } @@ -807,7 +935,7 @@ /* read an array of block pointers into the page cache */ start_pointer_array: - ret = cachefs_block_read(super, NULL, trans->jentry->rcm_block, 0, + ret = cachefs_block_read(super, trans->jentry->rcm_block, 0, NULL, &super->rcm_curpage); if (ret < 0) goto error; @@ -882,9 +1010,9 @@ } /* see if there's an inode we can start reclaiming */ - if (super->rcm_imm_head != super->rcm_imm_tail) { + if (super->reap_head != super->reap_tail) { _debug("begin reclaim {%u-%u}", - super->rcm_imm_tail, super->rcm_imm_head); + super->reap_tail, super->reap_head); /* allocate a transaction to record the event */ trans = cachefs_trans_alloc(super, GFP_KERNEL); @@ -893,11 +1021,10 @@ goto done; } - /* we can now make the changes in memory */ - trans->jentry->ino = super->rcm_imm_buf[super->rcm_imm_tail]; - trans->jentry->mark = CACHEFS_ONDISC_UJNL_INODE_RECLAIMING; + trans->jentry->mark = CACHEFS_ONDISC_UJNL_REAP_INODE; + trans->jentry->u.reap_inode.ino = super->reap_buf[super->reap_tail]; - trans->jentry->rcm_ino = trans->jentry->ino; + trans->jentry->rcm_ino = trans->jentry->u.reap_inode.ino; trans->jentry->rcm_indirect = 0; trans->jentry->rcm_block = 0; trans->jentry->rcm_ptrnext = 0; @@ -919,9 +1046,8 @@ cachefs_trans_commit(trans); /* remove the inode from the reclamation ring buffer */ - super->rcm_imm_tail = - (super->rcm_imm_tail + 1) & - (CACHEFS_RCM_IMM_BUFSIZE - 1); + super->reap_tail = + (super->reap_tail + 1) & (CACHEFS_REAP_BUFSIZE - 1); goto done; } @@ -929,162 +1055,9 @@ /* if we haven't finished digesting the current inode, or there are * more to eat, set a flag to call us back later */ if (super->rcm_ino || - super->rcm_imm_head != super->rcm_imm_tail + super->reap_head != super->reap_tail ) set_bit(CACHEFS_SUPER_DO_RECLAIM, &super->flags); _leave(""); } /* end cachefs_recycle_reclaim() */ - -/*****************************************************************************/ -/* - * unallocate and recycle a single data metadata block that's marked as invalid - * in the validity journal - */ -void cachefs_recycle_unallocate_data_block(struct cachefs_super *super) -{ - struct cachefs_ondisc_free_node *node; - struct cachefs_transaction *trans; - struct cachefs_vj_entry *vjentry; - struct cachefs_block *rcyblock = NULL, *upblock = NULL, *block; - struct page *rcypage = NULL, *uppage = NULL; - void *ptr; - int ret; - - _enter(""); - - BUG_ON(list_empty(&super->vjnl_unallocq)); - - /* we can access the next pointer without a lock because we know we're - * the only ones going to change it now */ - vjentry = list_entry(super->vjnl_unallocq.next, - struct cachefs_vj_entry, - link); - - /* allocate a transaction to record the event */ - ret = -ENOMEM; - trans = cachefs_trans_alloc(super, GFP_KERNEL); - if (!trans) - goto error; - - trans->jentry->mark = CACHEFS_ONDISC_UJNL_DATA_UNALLOCING; - trans->jentry->index = super->recycle_cur; - trans->jentry->ixentry = super->recycle_cur_n; - trans->jentry->ino = vjentry->ino; - trans->jentry->auxmark = vjentry->vslot; - trans->jentry->block = vjentry->bix; - trans->jentry->upblock = vjentry->upblock; - trans->jentry->upentry = vjentry->upentry; - trans->jentry->auxblock = super->recycle_cur; - trans->jentry->auxentry = ~0; - - ret = cachefs_block_read(super, NULL, vjentry->upblock, 1, - &upblock, &uppage); - if (ret < 0) - goto error_free; - - cachefs_trans_affects_block(trans, upblock, vjentry->upentry, - sizeof(cachefs_blockix_t)); - - cachefs_trans_affects_block( - trans, vjentry->vblock, vjentry->ventry, - sizeof(struct cachefs_ondisc_validity_journal)); - - block = cachefs_block_find(super, vjentry->bix); - if (!IS_ERR(block)) { - clear_bit(CACHEFS_BLOCK_NETFSDATA, &block->flags); - cachefs_block_put(block); - } - - /* determine how we're going to deal with this newly freed block */ - if (super->recycle_room == 0) { - /* incorporate it as a new recycling node */ - ret = cachefs_block_read(super, NULL, vjentry->bix, 1, - &rcyblock, &rcypage); - if (ret < 0) - goto error_free; - - cachefs_trans_affects_block(trans, rcyblock, 0, PAGE_SIZE); - - trans->jentry->recycle_cur = vjentry->bix; - trans->changed |= CACHEFS_TRANS_CHANGED_RECYCLE; - } - else { - /* we can add into an existing recycling node */ - cachefs_trans_affects_block(trans, super->recycle_block, 0, - PAGE_SIZE); - - trans->jentry->auxentry = - CACHEFS_ONDISC_LEAVES_PER_FREE_NODE - - super->recycle_room; - } - - /* write the transaction to the journal */ - ret = cachefs_trans_mark(trans); - if (ret < 0) - goto error_free; - - /* we can now make the changes in memory */ - cachefs_block_modify(super, vjentry->vblock, &vjentry->vpage); - ptr = kmap_atomic(vjentry->vpage, KM_USER0) + vjentry->ventry; - memset(ptr, 0, sizeof(struct cachefs_ondisc_validity_journal)); - kunmap_atomic(ptr, KM_USER0); - - cachefs_block_modify(super, upblock, &uppage); - ptr = kmap_atomic(uppage, KM_USER0) + vjentry->upentry; - memset(ptr, 0, sizeof(cachefs_blockix_t)); - kunmap_atomic(ptr, KM_USER0); - - if (trans->changed & CACHEFS_TRANS_CHANGED_RECYCLE) { - /* turn into a new node in the recycling stack */ - node = kmap_atomic(rcypage, KM_USER0); - node->next = trans->jentry->index; - node->count = trans->jentry->ixentry; - kunmap_atomic(node, KM_USER0); - - super->recycle_room = CACHEFS_ONDISC_LEAVES_PER_FREE_NODE; - rcypage = xchg(&super->recycle_node, rcypage); - rcyblock = xchg(&super->recycle_block, rcyblock); - } - else { - /* add to existing recycling node */ - cachefs_block_modify(super, super->recycle_block, - &super->recycle_node); - - node = kmap_atomic(super->recycle_node, KM_USER0); - node->leaves[trans->jentry->auxentry] = vjentry->bix; - kunmap_atomic(node, KM_USER0); - - super->recycle_room--; - super->recycle_cur_n++; - if (super->recycle_room == 0) - super->recycle_cur_n++; - } - - /* queue the transaction to be written to disc */ - cachefs_trans_commit(trans); - - /* we've done that entry */ - spin_lock_irq(&super->vjnl_lock); - list_del_init(&vjentry->link); - spin_unlock_irq(&super->vjnl_lock); - - /* done */ - cachefs_vj_release(super, vjentry); - cachefs_put_page(uppage); - cachefs_put_page(rcypage); - cachefs_block_put(upblock); - cachefs_block_put(rcyblock); - _leave(""); - return; - - error_free: - cachefs_put_page(uppage); - cachefs_put_page(rcypage); - cachefs_block_put(upblock); - cachefs_block_put(rcyblock); - cachefs_trans_put(trans); - error: - _leave(" [error %d]", ret); - -} /* end cachefs_recycle_unallocate_data_block() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/replay.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/replay.c --- linux-2.6.12-rc2-mm3/fs/cachefs/replay.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/replay.c 2005-04-21 13:52:15.000000000 +0100 @@ -18,11 +18,12 @@ #include "cachefs-int.h" #define UJNL_WRAP(X) ((X) & (CACHEFS_ONDISC_UJNL_NUMENTS - 1)) -#define BLOCK_VALID(BLOCK,PAGE,JENTRY) \ - ((BLOCK) && \ - (uint16_t) (unsigned long) (BLOCK)->ref <= (JENTRY)->serial && \ - (wait_on_page_locked((PAGE)), 1) \ - ) +#define BLOCK_VALID(BLOCK, PAGE, JENTRY) \ +({ \ + (BLOCK) && \ + (BLOCK)->serial <= (JENTRY)->serial && \ + (wait_on_page_locked((PAGE)), 1); \ +}) struct cachefs_replay_find_batch_desc { read_descriptor_t desc; @@ -49,59 +50,38 @@ static int cachefs_replay_ujnl_recyc_makeready(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, struct cachefs_transaction *trans); +static int cachefs_replay_ujnl_recyc_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans); +static int cachefs_replay_ujnl_reap_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_inode_creating(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_inode_updating(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_inode_deleting(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_inode_mark_reclaim(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_inode_reclaiming(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_index_extending(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_index_modifying(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); - -static int cachefs_replay_ujnl_data_allocing(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_data_written(struct cachefs_super *super, +static int cachefs_replay_ujnl_create_inode(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_data_unallocing(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); -static int cachefs_replay_ujnl_indirect_allocing(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans); +static int cachefs_replay_ujnl_delete_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans); +static int cachefs_replay_ujnl_update_index(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans); +static int cachefs_replay_ujnl_write_data(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans); static const cachefs_ujnl_replay_func_t cachefs_ujnl_replay_tbl[CACHEFS_ONDISC_UJNL__LAST] = { [CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW] = cachefs_replay_ujnl_recyc_begin_new, [CACHEFS_ONDISC_UJNL_RECYC_TRANSFER] = cachefs_replay_ujnl_recyc_transfer, [CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE] = cachefs_replay_ujnl_recyc_scavenge, [CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY] = cachefs_replay_ujnl_recyc_makeready, - [CACHEFS_ONDISC_UJNL_INODE_CREATING] = cachefs_replay_ujnl_inode_creating, - [CACHEFS_ONDISC_UJNL_INODE_UPDATING] = cachefs_replay_ujnl_inode_updating, - [CACHEFS_ONDISC_UJNL_INODE_DELETING] = cachefs_replay_ujnl_inode_deleting, - [CACHEFS_ONDISC_UJNL_INODE_MARK_RECLAIM]= cachefs_replay_ujnl_inode_mark_reclaim, - [CACHEFS_ONDISC_UJNL_INODE_RECLAIMING] = cachefs_replay_ujnl_inode_reclaiming, - [CACHEFS_ONDISC_UJNL_DATA_ALLOCING] = cachefs_replay_ujnl_data_allocing, - [CACHEFS_ONDISC_UJNL_DATA_WRITTEN] = cachefs_replay_ujnl_data_written, - [CACHEFS_ONDISC_UJNL_DATA_UNALLOCING] = cachefs_replay_ujnl_data_unallocing, - [CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING] = cachefs_replay_ujnl_indirect_allocing, - [CACHEFS_ONDISC_UJNL_INDEX_EXTENDING] = cachefs_replay_ujnl_index_extending, - [CACHEFS_ONDISC_UJNL_INDEX_CREATING] = cachefs_replay_ujnl_inode_creating, - [CACHEFS_ONDISC_UJNL_INDEX_UPDATING] = cachefs_replay_ujnl_index_modifying + [CACHEFS_ONDISC_UJNL_RECYC_INODE] = cachefs_replay_ujnl_recyc_inode, + [CACHEFS_ONDISC_UJNL_REAP_INODE] = cachefs_replay_ujnl_reap_inode, + [CACHEFS_ONDISC_UJNL_CREATE_INDEX] = cachefs_replay_ujnl_create_inode, + [CACHEFS_ONDISC_UJNL_CREATE_FILE] = cachefs_replay_ujnl_create_inode, + [CACHEFS_ONDISC_UJNL_DELETE_INODE] = cachefs_replay_ujnl_delete_inode, + [CACHEFS_ONDISC_UJNL_UPDATE_INDEX] = cachefs_replay_ujnl_update_index, + [CACHEFS_ONDISC_UJNL_WRITE_DATA] = cachefs_replay_ujnl_write_data, }; /*****************************************************************************/ @@ -282,6 +262,45 @@ /*****************************************************************************/ /* + * add a chain allocation to the overlap data + */ +static int cachefs_ujnl_determine_overlap_chain(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_ondisc_ujnl_chain *chain) +{ + struct cachefs_block *block; + unsigned step; + uint8_t allocmap; + + allocmap = chain->allocmap; + + for (step = 0; + step < CACHEFS_ONDISC_UJNL_ALLOC_SLOTS; + step++, allocmap >>= 1 + ) { + if (!(allocmap & 1)) + continue; + + /* record the affected block in the lookup tree */ + block = cachefs_block_insert(super, chain->block[step]); + if (IS_ERR(block)) + return PTR_ERR(block); + + /* link to the block replaying queue */ + block->serial = jentry->serial; + if (list_empty(&block->batch_link)) + list_add_tail(&block->batch_link, + &super->ujnl_replayq); + else + cachefs_block_put(block); + } + + return 0; + +} /* end cachefs_ujnl_determine_overlap_chain() */ + +/*****************************************************************************/ +/* * determine the earliest journal entry applicable to each block mentioned in * the journal (if a block changes content type halfway through the journal, * older transactions may not need to be reapplied) @@ -296,6 +315,7 @@ struct cachefs_block *block; unsigned long stop; void *data; + int ret; _enter("{%zx},{%lu},%lu,%lu", desc->count, page->index, offset, size); @@ -314,24 +334,52 @@ jentry = (struct cachefs_ondisc_update_journal *) data; switch (jentry->mark) { - /* all these transactions have jentry->block indicating - * the block being allocated */ + /* recyc-begin-new nominates a block to use as an array + * of free block pointers */ case CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW: - case CACHEFS_ONDISC_UJNL_DATA_ALLOCING: - case CACHEFS_ONDISC_UJNL_INDIRECT_ALLOCING: - case CACHEFS_ONDISC_UJNL_INDEX_EXTENDING: /* record the affected block in the lookup tree */ - block = cachefs_block_insert(super, jentry->block); + block = cachefs_block_insert(super, + jentry->u.begin_new.front); if (IS_ERR(block)) { desc->error = PTR_ERR(block); goto error; } /* link to the block replaying queue */ - block->ref = (void *) (unsigned long) jentry->serial; + block->serial = jentry->serial; if (list_empty(&block->batch_link)) list_add_tail(&block->batch_link, &super->ujnl_replayq); + else + cachefs_block_put(block); + break; + + /* inode creation populates two chains */ + case CACHEFS_ONDISC_UJNL_CREATE_INDEX: + case CACHEFS_ONDISC_UJNL_CREATE_FILE: + ret = cachefs_ujnl_determine_overlap_chain( + super, jentry, &jentry->u.create_inode.ino_chain); + if (ret < 0) { + desc->error = ret; + goto error; + } + + ret = cachefs_ujnl_determine_overlap_chain( + super, jentry, &jentry->u.create_inode.ix_chain); + if (ret < 0) { + desc->error = ret; + goto error; + } + break; + + /* inode creation populates one chains */ + case CACHEFS_ONDISC_UJNL_WRITE_DATA: + ret = cachefs_ujnl_determine_overlap_chain( + super, jentry, &jentry->u.write_data.chain); + if (ret < 0) { + desc->error = ret; + goto error; + } break; default: @@ -523,7 +571,7 @@ /* first of all scan to determine the bounds of the latest batch of * u-journal entries */ memset(&find_batch, 0, sizeof(find_batch)); - find_batch.desc.count = super->layout->bix_vjournal; + find_batch.desc.count = super->layout->bix_wbjournal; find_batch.desc.count -= super->layout->bix_ujournal; find_batch.desc.count *= super->layout->bsize; find_batch.desc.arg.buf = (char *) super; @@ -597,7 +645,7 @@ struct cachefs_block, batch_link); list_del_init(&block->batch_link); - block->ref = NULL; + block->serial = 0; cachefs_block_put(block); } @@ -640,7 +688,6 @@ if (super->alloc_cur) { /* read the alloc stack TOS */ ret = cachefs_block_read(super, - CACHEFS_FS_I(super->imisc), super->alloc_cur, 0, &super->alloc_block, @@ -671,7 +718,6 @@ if (super->recycle_cur) { /* read the recycling stack TOS */ ret = cachefs_block_read(super, - CACHEFS_FS_I(super->imisc), super->recycle_cur, 0, &super->recycle_block, @@ -734,7 +780,8 @@ /*****************************************************************************/ /* - * replay inauguration of a node as the TOS node of the recycling stack + * replay CACHEFS_ONDISC_UJNL_RECYC_BEGIN_NEW + * - inauguration of a node as the TOS node of the recycling stack */ static int cachefs_replay_ujnl_recyc_begin_new(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -747,7 +794,8 @@ _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - ret = cachefs_block_read(super, NULL, jentry->block, 0, &block, &page); + ret = cachefs_block_read(super, jentry->u.begin_new.front, 0, + &block, &page); if (ret < 0) goto error; @@ -756,8 +804,8 @@ if (BLOCK_VALID(block, page, jentry)) { node = kmap_atomic(page, KM_USER0); clear_page(node); - node->next = jentry->index; - node->count = jentry->ixentry; + node->next = jentry->u.begin_new.second; + node->count = jentry->u.begin_new.seccnt; cachefs_trans_replays_effect(trans, block, "rcynode"); kunmap_atomic(node, KM_USER0); } @@ -773,8 +821,9 @@ /*****************************************************************************/ /* - * replay the transferal of the recycling stack to the allocation stack, either - * completely or from the 2OS down + * replay CACHEFS_ONDISC_UJNL_RECYC_TRANSFER + * - transferal of the recycling stack to the allocation stack, either + * completely or from the 2OS down */ static int cachefs_replay_ujnl_recyc_transfer(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -790,8 +839,9 @@ /* we need to break the link from the recycling stack TOS to the new * alloc stack TOS if we only moved part of the stack */ - if (jentry->upblock) { - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, + if (jentry->u.recycle_transfer.front) { + ret = cachefs_block_read(super, + jentry->u.recycle_transfer.front, 0, &block, &page); if (ret < 0) goto error; @@ -820,8 +870,9 @@ /*****************************************************************************/ /* - * replay the transferal of block pointers from source to recycling stack - * made during pointer block recycling + * replay CACHEFS_ONDISC_UJNL_RECYC_SCAVENGE + * - transferal of block pointers from source to recycling stack made during + * pointer block recycling */ static int cachefs_replay_ujnl_recyc_scavenge(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -830,21 +881,25 @@ struct cachefs_ondisc_free_node *node; struct cachefs_block *block; struct page *page; + unsigned dstix, count; int ret; _enter(",{%hd.%hu}", jentry->batch, jentry->serial); /* we need to copy the pointers cached in the journal entry into the * recycling block */ - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, + ret = cachefs_block_read(super, jentry->u.recycle_scavenge.dst, 0, &block, &page); if (ret < 0) goto error; + dstix = jentry->u.recycle_scavenge.dstix; + count = jentry->u.recycle_scavenge.count; + ret = -EINVAL; - if (jentry->auxentry >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE || - jentry->auxentry + jentry->count >= - CACHEFS_ONDISC_LEAVES_PER_FREE_NODE) { + if (dstix >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE || + dstix + count >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE + ) { printk("CacheFS:" " UJNL Scavenge entry specifies out-of-range window\n"); goto error2; @@ -853,13 +908,13 @@ if (BLOCK_VALID(block, page, jentry)) { node = kmap_atomic(page, KM_USER0); - if (memcmp(&node->leaves[jentry->auxblock], - &jentry->u.rcyptrs[0], - jentry->count * sizeof(cachefs_blockix_t) + if (memcmp(&node->leaves[dstix], + jentry->u.recycle_scavenge.ptrs, + count * sizeof(cachefs_blockix_t) ) != 0) { - memcpy(&node->leaves[jentry->auxblock], - &jentry->u.rcyptrs[0], - jentry->count * sizeof(cachefs_blockix_t)); + memcpy(&node->leaves[dstix], + jentry->u.recycle_scavenge.ptrs, + count * sizeof(cachefs_blockix_t)); cachefs_trans_replays_effect(trans, block, "rcyptrs"); } kunmap_atomic(node, KM_USER0); @@ -879,7 +934,8 @@ /*****************************************************************************/ /* - * replay the addition of spare space onto the recycling stack + * replay CACHEFS_ONDISC_UJNL_RECYC_MAKEREADY + * - the addition of spare space onto the recycling stack */ static int cachefs_replay_ujnl_recyc_makeready(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -890,22 +946,26 @@ struct cachefs_block *block, *superblock; cachefs_blockix_t bix; struct page *page, *spage; - int loop, changed, ret; + unsigned dstix, count; + int changed, ret; _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - ret = cachefs_block_read(super, NULL, jentry->block, 0, + ret = cachefs_block_read(super, jentry->u.make_ready.dst, 0, &block, &page); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, 0, 0, &superblock, &spage); + ret = cachefs_block_read(super, 0, 0, &superblock, &spage); if (ret < 0) goto error2; + dstix = jentry->u.make_ready.dstix; + count = jentry->u.make_ready.count; + ret = -EINVAL; - if (jentry->entry >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE || - jentry->entry + jentry->count > CACHEFS_ONDISC_LEAVES_PER_FREE_NODE + if (dstix >= CACHEFS_ONDISC_LEAVES_PER_FREE_NODE || + dstix + count > CACHEFS_ONDISC_LEAVES_PER_FREE_NODE ) { printk("CacheFS:" " UJNL MakeReady entry specifies out-of-range" @@ -916,8 +976,8 @@ /* we need to make sure the superblock keeps track of the top of the * ready area */ layout = kmap_atomic(spage, KM_USER0); - if (layout->bix_unready < jentry->pgnum) { - layout->bix_unready = jentry->pgnum; + if (layout->bix_unready < jentry->u.make_ready.unready) { + layout->bix_unready = jentry->u.make_ready.unready; cachefs_trans_replays_effect(trans, superblock, "unready"); } kunmap_atomic(layout, KM_USER0); @@ -927,11 +987,12 @@ node = kmap_atomic(page, KM_USER0); changed = 0; - bix = jentry->auxblock; - for (loop = 0; loop < jentry->count; loop++, bix++) { - if (node->leaves[loop] != bix) { + count += dstix; + bix = jentry->u.make_ready.first; + for (; dstix < count; dstix++, bix++) { + if (node->leaves[dstix] != bix) { changed = 1; - node->leaves[loop] = bix; + node->leaves[dstix] = bix; } } @@ -956,53 +1017,66 @@ /*****************************************************************************/ /* - * replay the creation of a new data or index inode + * replay CACHEFS_ONDISC_UJNL_RECYC_INODE + * - recycling of an inode and its associated index entry */ -static int cachefs_replay_ujnl_inode_creating(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) +static int cachefs_replay_ujnl_recyc_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans) { struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_ujnl_index *jindex; struct cachefs_ondisc_metadata *metadata; - struct cachefs_block *metameta, *inometa, *indexmeta, *indexdata; - struct page *metapage, *inompage, *ixmpage, *ixdatapage; - uint32_t type; + struct cachefs_block *metameta, *inometa; + struct cachefs_block *indexmeta = NULL, *indexdata = NULL; + struct page *metapage, *inompage, *ixmpage = NULL, *ixdatapage = NULL; + unsigned inomoff, ixmoff, ixdataoff, tmp, shift; + uint32_t next_ino, next_index; int ret; _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - jindex = &jentry->u.ixdata[0]; - - ret = cachefs_block_read(super, NULL, 1, 0, &metameta, &metapage); + ret = cachefs_block_read(super, 1, 0, &metameta, &metapage); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, + ret = cachefs_block_read(super, jentry->u.recyc_inode.ino_block, 0, &inometa, &inompage); if (ret < 0) goto error2; - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, - &indexmeta, &ixmpage); - if (ret < 0) - goto error3; + ixdataoff = 0; + if (jentry->u.recyc_inode.ix_ino) { + ret = cachefs_block_read(super, + jentry->u.recyc_inode.ix_ino_block, 0, + &indexmeta, &ixmpage); + if (ret < 0) + goto error3; - ret = cachefs_block_read(super, NULL, jentry->block, 0, - &indexdata, &ixdatapage); - if (ret < 0) - goto error4; + ret = cachefs_block_read(super, + jentry->u.recyc_inode.ix_block, 0, + &indexdata, &ixdatapage); + if (ret < 0) + goto error4; - type = CACHEFS_ONDISC_INDEX_DATAFILE; - if (jentry->mark == CACHEFS_ONDISC_UJNL_INDEX_CREATING) - type = CACHEFS_ONDISC_INDEX_INDEXFILE; + ixdataoff = jentry->u.recyc_inode.ix_entry; + ixdataoff %= jentry->u.recyc_inode.ix_epp ?: 1; + ixdataoff *= jentry->u.recyc_inode.ix_esize; + } + + shift = super->layout->metadata_bits; + tmp = (1 << (PAGE_SHIFT - shift)) - 1; + + inomoff = (jentry->u.recyc_inode.ino & tmp) << shift; + ixmoff = (jentry->u.recyc_inode.ix_ino & tmp) << shift; + next_ino = jentry->u.recyc_inode.ino_next; + next_index = jentry->u.recyc_inode.ix_next; /* make sure the metadata file's freelink pointer is correct */ if (BLOCK_VALID(metameta, metapage, jentry)) { metadata = kmap_atomic(metapage, KM_USER0) + super->layout->metadata_size * CACHEFS_INO_METADATA; - if (metadata->freelink != jindex->next_ino) { - metadata->freelink = jindex->next_ino; + if (metadata->freelink != next_ino) { + metadata->freelink = next_ino; cachefs_trans_replays_effect(trans, metameta, "meta.freelink"); } @@ -1011,41 +1085,34 @@ /* make sure the index file's freelink pointer is correct */ if (BLOCK_VALID(indexmeta, ixmpage, jentry)) { - metadata = kmap_atomic(ixmpage, KM_USER0) + jentry->upentry; - if (metadata->freelink != jindex->next_index || - metadata->size != jentry->size) { - metadata->freelink = jindex->next_index; - metadata->size = jentry->size; + metadata = kmap_atomic(ixmpage, KM_USER0) + ixmoff; + if (metadata->freelink != next_index + ) { + metadata->freelink = next_index; cachefs_trans_replays_effect(trans, indexmeta, "ix.freelink"); } kunmap_atomic(metadata, KM_USER0); } - /* make sure the new inode's metadata contains the right data */ + /* make sure the recycled inode's metadata contains the right data */ if (BLOCK_VALID(inometa, inompage, jentry)) { - metadata = kmap_atomic(inompage, KM_USER0) + jentry->auxentry; - if (metadata->header.state != CACHEFS_ONDISC_INDEX_ACTIVE || - metadata->header.type != type || - metadata->header.ino != 0xfefdfc || - metadata->freelink != UINT_MAX || - metadata->pindex != jentry->index || - metadata->pindex_entry != jentry->ixentry || - memcmp(&metadata->index, - &jindex->def, - sizeof(metadata->index)) != 0 + metadata = kmap_atomic(inompage, KM_USER0) + inomoff; + if (metadata->header.state != CACHEFS_ONDISC_INDEX_FREE || + metadata->header.type != 0 || + metadata->header.ino != 0xfefdfc || + metadata->freelink != next_ino || + metadata->pindex != 0 || + metadata->pindex_entry != 0 ) { - metadata->header.state = CACHEFS_ONDISC_INDEX_ACTIVE; - metadata->header.type = type; + memset(metadata, 0, super->layout->metadata_size); + + metadata->header.state = CACHEFS_ONDISC_INDEX_FREE; metadata->header.ino = 0xfefdfc; - metadata->freelink = UINT_MAX; + metadata->freelink = next_ino; metadata->atime = CURRENT_TIME.tv_sec; - metadata->pindex = jentry->index; - metadata->pindex_entry = jentry->ixentry; - - memcpy(&metadata->index, - &jindex->def, - sizeof(metadata->index)); + metadata->pindex = 0; + metadata->pindex_entry = 0; cachefs_trans_replays_effect(trans, inometa, "ino.meta"); @@ -1055,17 +1122,16 @@ /* make sure the index data is written into the index entry */ if (BLOCK_VALID(indexdata, ixdatapage, jentry)) { - xent = kmap_atomic(ixdatapage, KM_USER0) + jentry->entry; - if (xent->state != CACHEFS_ONDISC_INDEX_ACTIVE || - xent->type != type || - xent->ino != jentry->ino || - memcmp(xent->u.data, jindex->data, jentry->count) != 0 + xent = kmap_atomic(ixdatapage, KM_USER0) + ixdataoff; + if (xent->state != CACHEFS_ONDISC_INDEX_FREE || + xent->type != 0 || + xent->ino != 0 || + xent->u.freelink[0] != next_index ) { - xent->state = CACHEFS_ONDISC_INDEX_ACTIVE; - xent->type = type; - xent->ino = jentry->ino; - - memcpy(xent->u.data, jindex->data, jentry->count); + xent->state = CACHEFS_ONDISC_INDEX_FREE; + xent->type = 0; + xent->ino = 0; + xent->u.freelink[0] = next_index; cachefs_trans_replays_effect(trans, indexdata, "ix.entry"); @@ -1088,126 +1154,130 @@ _leave(" = %d", ret); return ret; -} /* end cachefs_replay_ujnl_inode_creating() */ +} /* end cachefs_replay_ujnl_recyc_inode() */ /*****************************************************************************/ /* - * replay the updating of the information stored in an inode + * replay CACHEFS_ONDISC_UJNL_REAP_INODE + * - initiation of inode reap */ -static int cachefs_replay_ujnl_inode_updating(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) +static int cachefs_replay_ujnl_reap_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans) { _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - /* we don't do this yet */ - return -EINVAL; + /* nothing needs to be done here - it's all handled implicitly by the + * caller */ + return 0; -} /* end cachefs_replay_ujnl_inode_updating() */ +} /* end cachefs_replay_ujnl_reap_inode() */ /*****************************************************************************/ /* - * replay the deletion of an inode and its associated index entry + * replay CACHEFS_ONDISC_UJNL_DELETE_INODE + * - mark an inode for reclamation */ -static int cachefs_replay_ujnl_inode_deleting(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) +static int cachefs_replay_ujnl_delete_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans) { struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_ujnl_index *jindex; struct cachefs_ondisc_metadata *metadata; - struct cachefs_block *metameta, *inometa; - struct cachefs_block *indexmeta = NULL, *indexdata = NULL; - struct page *metapage, *inompage, *ixmpage = NULL, *ixdatapage = NULL; + struct cachefs_block *inometa, *indexdata; + struct page *inompage, *ixdatapage; + unsigned inomoff, ixdataoff; int ret; _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - jindex = &jentry->u.ixdata[0]; - - ret = cachefs_block_read(super, NULL, 1, 0, &metameta, &metapage); + ret = cachefs_block_read(super, jentry->u.delete_inode.ino_block, 0, + &inometa, &inompage); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, - &inometa, &inompage); + ret = cachefs_block_read(super, jentry->u.delete_inode.ix_block, 0, + &indexdata, &ixdatapage); if (ret < 0) goto error2; - if (jentry->index) { - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, - &indexmeta, &ixmpage); - if (ret < 0) - goto error3; + ixdataoff = jentry->u.delete_inode.ix_entry; + ixdataoff %= jentry->u.delete_inode.ix_epp ?: 1; + ixdataoff *= jentry->u.delete_inode.ix_esize; + + inomoff = jentry->u.delete_inode.ino; + inomoff &= (1 << (PAGE_SHIFT - super->layout->metadata_bits)) - 1; + inomoff <<= super->layout->metadata_bits; - ret = cachefs_block_read(super, NULL, jentry->block, 0, - &indexdata, &ixdatapage); - if (ret < 0) - goto error4; - } + /* make sure the inode's metadata is set to the right state */ + if (BLOCK_VALID(inometa, inompage, jentry)) { + metadata = kmap_atomic(inompage, KM_USER0) + inomoff; + if (metadata->header.state != CACHEFS_ONDISC_INDEX_RECYCLE) { + metadata->header.state = CACHEFS_ONDISC_INDEX_FREE; - /* make sure the metadata file's freelink pointer is correct */ - if (BLOCK_VALID(metameta, metapage, jentry)) { - metadata = kmap_atomic(metapage, KM_USER0) + - super->layout->metadata_size * CACHEFS_INO_METADATA; - if (metadata->freelink != jindex->next_ino) { - metadata->freelink = jindex->next_ino; - cachefs_trans_replays_effect(trans, metameta, - "meta.freelink"); + cachefs_trans_replays_effect(trans, inometa, + "ino.meta"); } kunmap_atomic(metadata, KM_USER0); } - /* make sure the index file's freelink pointer is correct */ - if (BLOCK_VALID(indexmeta, ixmpage, jentry)) { - metadata = kmap_atomic(ixmpage, KM_USER0) + jentry->upentry;; - if (metadata->freelink != jindex->next_index || - metadata->size != jentry->size) { - metadata->freelink = jindex->next_index; - metadata->size = jentry->size; - cachefs_trans_replays_effect(trans, indexmeta, - "ix.freelink"); + /* make sure the index entry is also set to the right state */ + if (BLOCK_VALID(indexdata, ixdatapage, jentry)) { + xent = kmap_atomic(ixdatapage, KM_USER0) + ixdataoff; + if (xent->state != CACHEFS_ONDISC_INDEX_FREE) { + xent->state = CACHEFS_ONDISC_INDEX_FREE; + + cachefs_trans_replays_effect(trans, indexdata, + "ix.entry"); } - kunmap_atomic(metadata, KM_USER0); + kunmap_atomic(xent, KM_USER0); } - /* make sure the deleted inode's metadata contains the right data */ - if (BLOCK_VALID(inometa, inompage, jentry)) { - metadata = kmap_atomic(inompage, KM_USER0) + jentry->auxentry; - if (metadata->header.state != CACHEFS_ONDISC_INDEX_FREE || - metadata->header.type != 0 || - metadata->header.ino != 0xfefdfc || - metadata->freelink != jindex->next_ino || - metadata->pindex != 0 || - metadata->pindex_entry != 0 - ) { - memset(metadata, 0, super->layout->metadata_size); + cachefs_block_put(indexdata); + cachefs_put_page(ixdatapage); + error2: + cachefs_block_put(inometa); + cachefs_put_page(inompage); + error: + _leave(" = %d", ret); + return ret; - metadata->header.state = CACHEFS_ONDISC_INDEX_FREE; - metadata->header.ino = 0xfefdfc; - metadata->freelink = jindex->next_ino; - metadata->atime = CURRENT_TIME.tv_sec; - metadata->pindex = 0; - metadata->pindex_entry = 0; +} /* end cachefs_replay_ujnl_delete_inode() */ - cachefs_trans_replays_effect(trans, inometa, - "ino.meta"); - } - kunmap_atomic(metadata, KM_USER0); - } +/*****************************************************************************/ +/* + * replay CACHEFS_ONDISC_UJNL_UPDATE_INDEX + * - update an index entry + */ +static int cachefs_replay_ujnl_update_index(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans) +{ + struct cachefs_ondisc_index_entry *xent; + struct cachefs_block *indexdata; + struct page *ixdatapage; + unsigned ixdataoff, ixdsize; + void *jdata; + int ret; + + _enter(",{%hd.%hu}", jentry->batch, jentry->serial); + + ret = cachefs_block_read(super, jentry->u.update_index.ix_block, 0, + &indexdata, &ixdatapage); + if (ret < 0) + goto error; + + ixdataoff = jentry->u.update_index.ix_entry; + ixdataoff %= jentry->u.update_index.ix_epp ?: 1; + ixdataoff *= jentry->u.update_index.ix_esize; + ixdsize = jentry->u.update_index.ix_dsize; + jdata = jentry->u.update_index.data; /* make sure the index data is written into the index entry */ if (BLOCK_VALID(indexdata, ixdatapage, jentry)) { - xent = kmap_atomic(ixdatapage, KM_USER0) + jentry->entry; - if (xent->state != CACHEFS_ONDISC_INDEX_FREE || - xent->type != 0 || - xent->ino != 0 || - xent->u.freelink[0] != jindex->next_index - ) { - xent->state = CACHEFS_ONDISC_INDEX_FREE; - xent->type = 0; - xent->ino = 0; - xent->u.freelink[0] = jindex->next_index; + xent = kmap_atomic(ixdatapage, KM_USER0) + ixdataoff; + if (memcmp(xent->u.data, jdata, ixdsize) != 0) { + memcpy(xent->u.data, jdata, ixdsize); cachefs_trans_replays_effect(trans, indexdata, "ix.entry"); @@ -1217,55 +1287,106 @@ cachefs_block_put(indexdata); cachefs_put_page(ixdatapage); - error4: - cachefs_block_put(indexmeta); - cachefs_put_page(ixmpage); - error3: - cachefs_block_put(inometa); - cachefs_put_page(inompage); - error2: - cachefs_block_put(metameta); - cachefs_put_page(metapage); error: _leave(" = %d", ret); return ret; -} /* end cachefs_replay_ujnl_inode_deleting() */ +} /* end cachefs_replay_ujnl_update_index() */ /*****************************************************************************/ /* - * replay the marking of an inode for reclamation + * replay CACHEFS_ONDISC_UJNL_CREATE_INDEX + * replay CACHEFS_ONDISC_UJNL_CREATE_FILE + * - creation of a new data or index inode */ -static int cachefs_replay_ujnl_inode_mark_reclaim(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) +static int cachefs_replay_ujnl_create_inode(struct cachefs_super *super, + struct cachefs_ondisc_update_journal *jentry, + struct cachefs_transaction *trans) { struct cachefs_ondisc_index_entry *xent; struct cachefs_ondisc_ujnl_index *jindex; struct cachefs_ondisc_metadata *metadata; - struct cachefs_block *inometa, *indexdata; - struct page *inompage, *ixdatapage; + struct cachefs_block *metameta, *inometa, *indexmeta, *indexdata; + struct page *metapage, *inompage, *ixmpage, *ixdatapage; + uint32_t type; int ret; _enter(",{%hd.%hu}", jentry->batch, jentry->serial); jindex = &jentry->u.ixdata[0]; - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, - &inometa, &inompage); + ret = cachefs_block_read(super, 1, 0, &metameta, &metapage); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, jentry->block, 0, - &indexdata, &ixdatapage); + ret = cachefs_block_read(super, jentry->auxblock, 0, + &inometa, &inompage); if (ret < 0) goto error2; - /* make sure the inode's metadata is set to the right state */ + ret = cachefs_block_read(super, jentry->upblock, 0, + &indexmeta, &ixmpage); + if (ret < 0) + goto error3; + + ret = cachefs_block_read(super, jentry->block, 0, + &indexdata, &ixdatapage); + if (ret < 0) + goto error4; + + type = CACHEFS_ONDISC_INDEX_DATAFILE; + if (jentry->mark == CACHEFS_ONDISC_UJNL_CREATE_INDEX) + type = CACHEFS_ONDISC_INDEX_INDEXFILE; + + /* make sure the metadata file's freelink pointer is correct */ + if (BLOCK_VALID(metameta, metapage, jentry)) { + metadata = kmap_atomic(metapage, KM_USER0) + + super->layout->metadata_size * CACHEFS_INO_METADATA; + if (metadata->freelink != jindex->next_ino) { + metadata->freelink = jindex->next_ino; + cachefs_trans_replays_effect(trans, metameta, + "meta.freelink"); + } + kunmap_atomic(metadata, KM_USER0); + } + + /* make sure the index file's freelink pointer is correct */ + if (BLOCK_VALID(indexmeta, ixmpage, jentry)) { + metadata = kmap_atomic(ixmpage, KM_USER0) + jentry->upentry; + if (metadata->freelink != jindex->next_index || + metadata->size != jentry->size) { + metadata->freelink = jindex->next_index; + metadata->size = jentry->size; + cachefs_trans_replays_effect(trans, indexmeta, + "ix.freelink"); + } + kunmap_atomic(metadata, KM_USER0); + } + + /* make sure the new inode's metadata contains the right data */ if (BLOCK_VALID(inometa, inompage, jentry)) { metadata = kmap_atomic(inompage, KM_USER0) + jentry->auxentry; - if (metadata->header.state != CACHEFS_ONDISC_INDEX_RECYCLE) { - metadata->header.state = CACHEFS_ONDISC_INDEX_FREE; + if (metadata->header.state != CACHEFS_ONDISC_INDEX_ACTIVE || + metadata->header.type != type || + metadata->header.ino != 0xfefdfc || + metadata->freelink != UINT_MAX || + metadata->pindex != jentry->index || + metadata->pindex_entry != jentry->ixentry || + memcmp(&metadata->index, + &jindex->def, + sizeof(metadata->index)) != 0 + ) { + metadata->header.state = CACHEFS_ONDISC_INDEX_ACTIVE; + metadata->header.type = type; + metadata->header.ino = 0xfefdfc; + metadata->freelink = UINT_MAX; + metadata->atime = CURRENT_TIME.tv_sec; + metadata->pindex = jentry->index; + metadata->pindex_entry = jentry->ixentry; + + memcpy(&metadata->index, + &jindex->def, + sizeof(metadata->index)); cachefs_trans_replays_effect(trans, inometa, "ino.meta"); @@ -1273,11 +1394,19 @@ kunmap_atomic(metadata, KM_USER0); } - /* make sure the index entry is also set to the right state */ + /* make sure the index data is written into the index entry */ if (BLOCK_VALID(indexdata, ixdatapage, jentry)) { xent = kmap_atomic(ixdatapage, KM_USER0) + jentry->entry; - if (xent->state != CACHEFS_ONDISC_INDEX_FREE) { - xent->state = CACHEFS_ONDISC_INDEX_FREE; + if (xent->state != CACHEFS_ONDISC_INDEX_ACTIVE || + xent->type != type || + xent->ino != jentry->ino || + memcmp(xent->u.data, jindex->data, jentry->count) != 0 + ) { + xent->state = CACHEFS_ONDISC_INDEX_ACTIVE; + xent->type = type; + xent->ino = jentry->ino; + + memcpy(xent->u.data, jindex->data, jentry->count); cachefs_trans_replays_effect(trans, indexdata, "ix.entry"); @@ -1287,34 +1416,26 @@ cachefs_block_put(indexdata); cachefs_put_page(ixdatapage); - error2: + error4: + cachefs_block_put(indexmeta); + cachefs_put_page(ixmpage); + error3: cachefs_block_put(inometa); cachefs_put_page(inompage); + error2: + cachefs_block_put(metameta); + cachefs_put_page(metapage); error: _leave(" = %d", ret); return ret; -} /* end cachefs_replay_ujnl_inode_mark_reclaim() */ - -/*****************************************************************************/ -/* - * replay the initiation of inode reclamation - */ -static int cachefs_replay_ujnl_inode_reclaiming(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) -{ - _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - - /* nothing needs to be done here - it's all handled implicitly by the - * caller */ - return 0; - -} /* end cachefs_replay_ujnl_inode_reclaiming() */ +} /* end cachefs_replay_ujnl_create_inode() */ +#if 0 /*****************************************************************************/ /* - * replay the extension of an index file + * replay CACHEFS_ONDISC_UJNL_INDEX_EXTENDING + * - extension of an index file */ static int cachefs_replay_ujnl_index_extending(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -1329,17 +1450,17 @@ _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, + ret = cachefs_block_read(super, jentry->auxblock, 0, &indexmeta, &metapage); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, + ret = cachefs_block_read(super, jentry->upblock, 0, &indexptr, &ptrpage); if (ret < 0) goto error2; - ret = cachefs_block_read(super, NULL, jentry->block, 0, + ret = cachefs_block_read(super, jentry->block, 0, &indexdata, &datapage); if (ret < 0) goto error3; @@ -1408,50 +1529,8 @@ /*****************************************************************************/ /* - * replay the modification of the data in an index entry - */ -static int cachefs_replay_ujnl_index_modifying(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) -{ - struct cachefs_ondisc_index_entry *xent; - struct cachefs_ondisc_ujnl_index *jindex; - struct cachefs_block *indexdata; - struct page *ixdatapage; - int ret; - - _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - - jindex = &jentry->u.ixdata[0]; - - ret = cachefs_block_read(super, NULL, jentry->block, 0, - &indexdata, &ixdatapage); - if (ret < 0) - goto error; - - /* make sure the index data is written into the index entry */ - if (BLOCK_VALID(indexdata, ixdatapage, jentry)) { - xent = kmap_atomic(ixdatapage, KM_USER0) + jentry->entry; - if (memcmp(xent->u.data, jindex->data, jindex->def.dsize) != 0) { - memcpy(xent->u.data, jindex->data, jindex->def.dsize); - - cachefs_trans_replays_effect(trans, indexdata, - "ix.entry"); - } - kunmap_atomic(xent, KM_USER0); - } - - cachefs_block_put(indexdata); - cachefs_put_page(ixdatapage); - error: - _leave(" = %d", ret); - return ret; - -} /* end cachefs_replay_ujnl_index_modifying() */ - -/*****************************************************************************/ -/* - * replay data block allocation and v-journal entry marking + * replay CACHEFS_ONDISC_UJNL_DATA_ALLOCING + * - data block allocation and v-journal entry marking */ static int cachefs_replay_ujnl_data_allocing(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -1466,12 +1545,12 @@ _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - ret = cachefs_block_read(super, NULL, jentry->auxblock, 0, + ret = cachefs_block_read(super, jentry->auxblock, 0, &metablock, &metapage); if (ret < 0) goto error; - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, + ret = cachefs_block_read(super, jentry->upblock, 0, &ptrblock, &ptrpage); if (ret < 0) goto error2; @@ -1480,8 +1559,7 @@ vjbix += (jentry->auxmark / CACHEFS_ONDISC_VJNL_ENTPERPAGE) * sizeof(struct cachefs_ondisc_validity_journal); - ret = cachefs_block_read(super, NULL, vjbix, 0, - &vjblock, &vjpage); + ret = cachefs_block_read(super, vjbix, 0, &vjblock, &vjpage); if (ret < 0) goto error3; @@ -1538,7 +1616,8 @@ /*****************************************************************************/ /* - * replay data write and v-journal clear + * replay CACHEFS_ONDISC_UJNL_DATA_WRITTEN + * - data write and v-journal clear */ static int cachefs_replay_ujnl_data_written(struct cachefs_super *super, struct cachefs_ondisc_update_journal *jentry, @@ -1556,8 +1635,7 @@ vjbix += (jentry->auxmark / CACHEFS_ONDISC_VJNL_ENTPERPAGE) * sizeof(struct cachefs_ondisc_validity_journal); - ret = cachefs_block_read(super, NULL, vjbix, 0, - &vjblock, &vjpage); + ret = cachefs_block_read(super, vjbix, 0, &vjblock, &vjpage); if (ret < 0) goto error; @@ -1695,59 +1773,4 @@ return ret; } /* end cachefs_replay_ujnl_data_unallocing() */ - -/*****************************************************************************/ -/* - * replay indirection block allocation - */ -static int cachefs_replay_ujnl_indirect_allocing(struct cachefs_super *super, - struct cachefs_ondisc_update_journal *jentry, - struct cachefs_transaction *trans) -{ - struct cachefs_block *ptrblock, *indblock; - cachefs_blockix_t *ptr; - struct page *ptrpage, *indpage; - void *content; - int ret; - - _enter(",{%hd.%hu}", jentry->batch, jentry->serial); - - ret = cachefs_block_read(super, NULL, jentry->upblock, 0, - &ptrblock, &ptrpage); - if (ret < 0) - goto error; - - ret = cachefs_block_read(super, NULL, jentry->block, 0, - &indblock, &indpage); - if (ret < 0) - goto error2; - - /* make sure the new block is pointed to by the appropriate pointer */ - if (BLOCK_VALID(ptrblock, ptrpage, jentry)) { - ptr = kmap_atomic(ptrpage, KM_USER0) + jentry->upentry; - if (*ptr != jentry->block) { - *ptr = jentry->block; - - cachefs_trans_replays_effect(trans, ptrblock, "ptr"); - } - kunmap_atomic(ptr, KM_USER0); - } - - /* make sure the indirection block is cleared */ - if (BLOCK_VALID(indblock, indpage, jentry)) { - content = kmap_atomic(indpage, KM_USER0); - clear_page(content); - cachefs_trans_replays_effect(trans, indblock, "indir"); - kunmap_atomic(content, KM_USER0); - } - - cachefs_block_put(indblock); - cachefs_put_page(indpage); - error2: - cachefs_block_put(ptrblock); - cachefs_put_page(ptrpage); - error: - _leave(" = %d", ret); - return ret; - -} /* end cachefs_replay_ujnl_indirect_allocing() */ +#endif diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/rootdir.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/rootdir.c --- linux-2.6.12-rc2-mm3/fs/cachefs/rootdir.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/rootdir.c 2005-04-18 22:24:56.000000000 +0100 @@ -862,7 +862,7 @@ (CACHEFS_RCM_IMM_BUFSIZE - 1); } else { - set_bit(CACHEFS_SUPER_RCM_IMM_SCAN, &super->flags); + set_bit(CACHEFS_SUPER_DO_REAP, &super->flags); } spin_unlock_irqrestore(&super->rcm_lock, flags); diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/super.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/super.c --- linux-2.6.12-rc2-mm3/fs/cachefs/super.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/super.c 2005-04-19 21:24:58.000000000 +0100 @@ -229,6 +229,7 @@ */ static int cachefs_fill_super(struct super_block *sb, void *_data, int silent) { + struct fscache_search_result *srch = NULL; struct cachefs_super *super = NULL; struct cachefs_inode *inode = NULL, *inode2; struct dentry *root = NULL; @@ -273,6 +274,12 @@ super->vjnl_count = CACHEFS_ONDISC_VJNL_ENTS; + srch = cachefs_srch_alloc(&super->cache, GFP_KERNEL); + if (!srch) + goto error; + + cachefs_srch_set(srch, CACHEFS_INO_FSDEF_CATALOGUE); + /* initialise the superblock */ sb->s_magic = CACHEFS_FS_MAGIC; sb->s_op = &cachefs_super_ops; @@ -282,7 +289,6 @@ fscache_init_cache(&super->cache, &cachefs_cache_ops, - CACHEFS_INO_FSDEF_CATALOGUE, "%02x:%02x", MAJOR(sb->s_dev), MINOR(sb->s_dev) @@ -343,7 +349,7 @@ * kmapping it */ asflags = super->imisc->i_mapping->flags; super->imisc->i_mapping->flags = asflags & ~__GFP_HIGHMEM; - ret = cachefs_block_read(super, NULL, 0, 0, NULL, &page); + ret = cachefs_block_read(super, 0, 0, NULL, &page); super->imisc->i_mapping->flags = asflags; if (ret < 0) goto error; @@ -460,12 +466,15 @@ goto error; } - fscache_add_cache(&super->cache); + fscache_add_cache(&super->cache, srch); _leave(" = 0 [super=%p]", super); return 0; error: + if (srch) + cachefs_srch_free(srch); + if (super) { if (super->dmn_task) { super->dmn_die = 1; @@ -905,6 +914,7 @@ inode_init_once(&inode->vfs_inode); init_rwsem(&inode->metadata_sem); fscache_node_init(&inode->node); + inode->indirections = RB_ROOT; } } /* end cachefs_i_init_once() */ diff -uNr linux-2.6.12-rc2-mm3/fs/cachefs/vjournal.c linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/vjournal.c --- linux-2.6.12-rc2-mm3/fs/cachefs/vjournal.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/cachefs/vjournal.c 1970-01-01 01:00:00.000000000 +0100 @@ -1,656 +0,0 @@ -/* vjournal.c: validity journal management - * - * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include "cachefs-int.h" - -static int cachefs_vj_replay_actor(read_descriptor_t *desc, - struct page *page, - unsigned long offset, - unsigned long size); - -static int cachefs_vj_replay_entry(struct cachefs_super *super, - struct cachefs_vj_entry *vjentry); - -static int cachefs_vj_walk_indirection_chain(struct cachefs_super *super, - struct cachefs_inode *inode, - struct cachefs_vj_entry *vjentry); - -struct cachefs_vjio_block_path { - struct page *page; - cachefs_blockix_t bix; /* block number for this level */ - unsigned offset; /* offset into parent pointer block */ -}; - -/*****************************************************************************/ -/* - * allocate an entry in the block validity tracking journal - * - returned attached to trans->vjentry - */ -int cachefs_vj_alloc(struct cachefs_transaction *trans, struct cachefs_inode *inode) -{ - struct cachefs_vj_entry *vjentry; - struct cachefs_super *super; - cachefs_blockix_t bix; - int slot, ret; - - DECLARE_WAITQUEUE(myself,current); - - _enter(""); - - super = trans->super; - - /* allocate and initialise the token */ - vjentry = kmalloc(sizeof(*vjentry), GFP_KERNEL); - if (!vjentry) { - _leave(" = -ENOMEM"); - return -ENOMEM; - } - - memset(vjentry, 0, sizeof(vjentry)); - INIT_LIST_HEAD(&vjentry->link); - - vjentry->ino = inode->vfs_inode.i_ino; - - /* now allocate a slot in the validity journal when one - * becomes available */ - spin_lock_irq(&super->vjnl_lock); - - if (super->vjnl_count == 0) { - set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&super->vjnl_alloc_wq, &myself); - - while (super->vjnl_count == 0 && !signal_pending(current)) { - spin_unlock_irq(&super->vjnl_lock); - schedule(); - spin_lock_irq(&super->vjnl_lock); - set_current_state(TASK_INTERRUPTIBLE); - } - - set_current_state(TASK_RUNNING); - remove_wait_queue(&super->vjnl_alloc_wq, &myself); - - ret = -EINTR; - if (signal_pending(current)) - goto error_free; - } - - slot = find_first_zero_bit(super->vjnl_map, CACHEFS_ONDISC_VJNL_ENTS); - if (slot < 0 || slot >= CACHEFS_ONDISC_VJNL_ENTS) { - printk("CacheFS: vjnl_count=%u slot=%d\n", - super->vjnl_count, slot); - BUG(); - } - - set_bit(slot, super->vjnl_map); - super->vjnl_count--; - - spin_unlock_irq(&super->vjnl_lock); - - /* got a slot - now read the block holding it into memory */ - _debug("VJ slot %d", slot); - - vjentry->vslot = slot; - vjentry->ventry = slot % CACHEFS_ONDISC_VJNL_ENTPERPAGE; - vjentry->ventry *= sizeof(struct cachefs_ondisc_validity_journal); - - bix = slot / CACHEFS_ONDISC_VJNL_ENTPERPAGE; - bix += super->layout->bix_vjournal; - - ret = cachefs_block_read(super, NULL, bix, 0, - &vjentry->vblock, &vjentry->vpage); - if (ret < 0) - goto error_clearbit; - - /* record the fact that this transaction modifies it */ - trans->vjentry = vjentry; - - cachefs_trans_affects_block(trans, vjentry->vblock, vjentry->ventry, - sizeof(struct cachefs_ondisc_validity_journal)); - - _leave(" = 0"); - return 0; - - error_clearbit: - spin_lock_irq(&super->vjnl_lock); - clear_bit(slot, super->vjnl_map); - super->vjnl_count++; - wake_up(&super->vjnl_alloc_wq); - - error_free: - spin_unlock_irq(&super->vjnl_lock); - dbgfree(vjentry); - kfree(vjentry); - _leave(" = %d", ret); - return ret; - -} /* end cachefs_vj_alloc() */ - -/*****************************************************************************/ -/* - * release a v-journal entry - * - clear the allocation map bit and wake up anyone trying to allocate - */ -void cachefs_vj_release(struct cachefs_super *super, struct cachefs_vj_entry *vjentry) -{ - unsigned long flags; - - _enter(""); - - /* free up the block to those that might be waiting for it and wake them up */ - spin_lock_irqsave(&super->vjnl_lock, flags); - clear_bit(vjentry->vslot, super->vjnl_map); - super->vjnl_count++; - spin_unlock_irqrestore(&super->vjnl_lock, flags); - - wake_up(&super->vjnl_alloc_wq); - - /* unpin the block and release the memory */ - cachefs_put_page(vjentry->vpage); - cachefs_block_put(vjentry->vblock); - dbgfree(vjentry); - kfree(vjentry); - - _leave(""); - -} /* end cachefs_vj_release() */ - -/*****************************************************************************/ -/* - * clear a v-journal entry due to the target block having been written - */ -void cachefs_vj_write_complete(struct cachefs_block *block) -{ - struct cachefs_vj_entry *vjentry = block->vjentry; - struct cachefs_super *super; - unsigned long flags; - - _enter("{vs=%u pg={%x}+%x up={%x}+%x", - vjentry->vslot, vjentry->ino, vjentry->pgnum, - vjentry->upblock, vjentry->upentry); - - block->vjentry = NULL; - super = block->super; - - /* move the journal mark to the written queue for kcachefsd to deal - * with */ - spin_lock_irqsave(&super->vjnl_lock, flags); - list_move_tail(&vjentry->link, &super->vjnl_writtenq); - vjentry->written = 1; - spin_unlock_irqrestore(&super->vjnl_lock, flags); - - wake_up(&super->dmn_sleepq); - - _leave(""); - -} /* end cachefs_vj_write_complete() */ - -/*****************************************************************************/ -/* - * queue an invalid block for detachment and recycling - */ -static void __cachefs_vj_cancel(struct cachefs_super *super, - struct cachefs_vj_entry *vjentry) -{ - spin_lock_irq(&super->vjnl_lock); - - _enter(",{vs=%u pg={%x}+%x up={%x}+%x vjp=%p w=%d}", - vjentry->vslot, vjentry->ino, vjentry->pgnum, - vjentry->upblock, vjentry->upentry, vjentry->vpage, - vjentry->written); - - /* move the journal mark to the unallocation queue for kcachefsd to - * deal with */ - if (!vjentry->written) - list_move_tail(&vjentry->link, &super->vjnl_unallocq); - - spin_unlock_irq(&super->vjnl_lock); - - /* wake up kcachefsd */ - wake_up(&super->dmn_sleepq); - - _leave(""); - -} /* end __cachefs_vj_cancel() */ - -/*****************************************************************************/ -/* - * queue an invalid block for detachment and recycling - * - guard against the block being written and the vjentry being discarded - */ -void cachefs_vj_cancel(struct cachefs_block *block) -{ - struct cachefs_vj_entry *vjentry; - struct cachefs_super *super = NULL; - - spin_lock_irq(&block->super->vjnl_lock); - - vjentry = block->vjentry; - if (vjentry) { - _enter("{vs=%u pg={%x}+%x up={%x}+%x vjp=%p w=%d}", - vjentry->vslot, vjentry->ino, vjentry->pgnum, - vjentry->upblock, vjentry->upentry, vjentry->vpage, - vjentry->written); - - /* move the journal mark to the unallocation queue for - * kcachefsd to deal with */ - if (!vjentry->written) - list_move_tail(&vjentry->link, &super->vjnl_unallocq); - - _leave(""); - - /* wake up kcachefsd */ - wake_up(&block->super->dmn_sleepq); - } - - spin_unlock_irq(&block->super->vjnl_lock); - -} /* end cachefs_vj_cancel() */ - -/*****************************************************************************/ -/* - * clear an entry in the vjournal once the corresponding block has been written - * to by the netfs - */ -void cachefs_vj_note_write_completion(struct cachefs_super *super) -{ - struct cachefs_transaction *trans; - struct cachefs_vj_entry *vjentry; - void *ptr; - int ret; - - _enter(""); - - BUG_ON(list_empty(&super->vjnl_writtenq)); - - /* we can access the next pointer without a lock because we know we're - * the only ones going to change it now */ - vjentry = list_entry(super->vjnl_writtenq.next, - struct cachefs_vj_entry, - link); - - /* allocate a transaction to record the completion */ - ret = -ENOMEM; - trans = cachefs_trans_alloc(super, GFP_KERNEL); - if (!trans) - goto error; - - trans->jentry->mark = CACHEFS_ONDISC_UJNL_DATA_WRITTEN; - trans->jentry->ino = vjentry->ino; - trans->jentry->auxmark = vjentry->vslot; - trans->jentry->block = vjentry->bix; - - cachefs_trans_affects_block( - trans, vjentry->vblock, vjentry->ventry, - sizeof(struct cachefs_ondisc_validity_journal)); - - /* write the transaction mark to the journal */ - ret = cachefs_trans_mark(trans); - if (ret < 0) - goto error_free; - - /* we can now modify the data in memory */ - wait_on_page_locked(vjentry->vpage); - cachefs_block_modify(super, vjentry->vblock, &vjentry->vpage); - ptr = kmap_atomic(vjentry->vpage, KM_USER0); - memset(ptr + vjentry->ventry, 0, - sizeof(struct cachefs_ondisc_validity_journal)); - kunmap_atomic(ptr, KM_USER0); - - /* queue the transaction to be written to disc */ - cachefs_trans_commit(trans); - - /* remove from the written marks queue */ - spin_lock_irq(&super->vjnl_lock); - list_del_init(&vjentry->link); - spin_unlock_irq(&super->vjnl_lock); - - cachefs_vj_release(super, vjentry); - _leave(""); - return; - - error_free: - cachefs_trans_put(trans); - error: - _leave(" [error %d]", ret); - -} /* end cachefs_vj_note_write_completion() */ - -/*****************************************************************************/ -/* - * replay the validity journal - * - this involves unallocating every block mentioned - */ -int cachefs_vj_replay(struct cachefs_super *super) -{ - struct file_ra_state ra; - read_descriptor_t desc; - loff_t ppos; - - _enter(""); - - printk("CacheFS: Replaying the validity journal...\n"); - - /* read through the page cache to get readahead */ - memset(&ra, 0, sizeof(ra)); - file_ra_state_init(&ra, super->imisc->i_mapping); - - memset(&desc, 0, sizeof(desc)); - desc.count = super->layout->bix_wbjournal; - desc.count -= super->layout->bix_vjournal; - desc.count *= super->layout->bsize; - desc.arg.buf = (char *) super; - - ppos = super->layout->bix_vjournal; - ppos *= super->layout->bsize; - - do_generic_mapping_read(super->imisc->i_mapping, &ra, NULL, &ppos, - &desc, cachefs_vj_replay_actor); - if (desc.error < 0) - goto error; - return 0; - - error: - printk("CacheFS: failed to replay vjournal: %d\n", desc.error); - return desc.error; - -} /* end cachefs_vj_replay() */ - -/*****************************************************************************/ -/* - * replay a segment of the validity journal - */ -static int cachefs_vj_replay_actor(read_descriptor_t *desc, - struct page *page, - unsigned long offset, - unsigned long size) -{ - struct cachefs_ondisc_validity_journal *vjmark; - struct cachefs_vj_entry *vjentry; - struct cachefs_super *super = (struct cachefs_super *) desc->arg.buf; - struct fscache_page *pageio; - unsigned long stop; - void *data; - int ret; - - _enter("{%zx},{%lu},%lu,%lu", desc->count, page->index, offset, size); - - if (size > desc->count) - size = desc->count; - - BUG_ON(offset % sizeof(*vjmark)); - BUG_ON(size % sizeof(*vjmark)); - - stop = offset + size; - - pageio = fscache_page_grab_private(page); - cachefs_block_set(super, pageio->mapped_block, page, pageio); - - data = kmap(page); - - /* deal with all the entries in this block */ - for (; - offset < stop; - offset += sizeof(struct cachefs_ondisc_validity_journal) - ) { - vjmark = data + offset; - - /* look for valid marks indicating an incomplete write */ - if (vjmark->ino == 0 && vjmark->pgnum == 0) - continue; - - if (vjmark->ino < CACHEFS_INO__FIRST_FILE || - vjmark->ino >= CACHEFS_INO_MISC) { - printk("CacheFS: Impossible ino recorded in vjnl (%x)\n", - vjmark->ino); - desc->error = -EINVAL; - break; - } - - /* construct a record of an incomplete write */ - vjentry = kmalloc(sizeof(*vjentry), GFP_KERNEL); - if (!vjentry) { - desc->error = -ENOMEM; - break; - } - - memset(vjentry, 0, sizeof(vjentry)); - INIT_LIST_HEAD(&vjentry->link); - - vjentry->vslot = page->index - super->layout->bix_vjournal; - vjentry->vslot *= CACHEFS_ONDISC_VJNL_ENTPERPAGE; - vjentry->vslot += offset / sizeof(vjentry); - - vjentry->ino = vjmark->ino; - vjentry->pgnum = vjmark->pgnum; - vjentry->ventry = offset; - vjentry->vpage = page; - vjentry->vblock = __cachefs_get_page_block(page); - - cachefs_block_get(vjentry->vblock); - get_page(vjentry->vpage); - - /* revert the metadata */ - ret = cachefs_vj_replay_entry(super, vjentry); - if (ret < 0) { - desc->error = ret; - cachefs_put_page(vjentry->vpage); - cachefs_block_put(vjentry->vblock); - dbgfree(vjentry); - kfree(vjentry); - break; - } - } - - kunmap(page); - - desc->count -= size; - desc->written += size; - return size; - -} /* end cachefs_vj_replay_actor() */ - -/*****************************************************************************/ -/* - * replay an entry from the validity journal - */ -static int cachefs_vj_replay_entry(struct cachefs_super *super, - struct cachefs_vj_entry *vjentry) -{ - struct cachefs_inode *inode; - int ret; - - _enter(",{ino=%x pg=%x}", vjentry->ino, vjentry->pgnum); - - /* get the inode to which the mark took place */ - inode = cachefs_iget(super, vjentry->ino); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - goto error; - } - - /* validate it */ - ret = -EINVAL; - if (inode->node.flags & FSCACHE_NODE_ISINDEX) { - printk("CacheFS: Index inode %x has block in v-journal\n", - vjentry->ino); - goto error2; - } - - /* get the position of the pointer on disc */ - ret = cachefs_vj_walk_indirection_chain(super, inode, vjentry); - if (ret < 0) { - printk("CacheFS:" - " Inode %x has non-existent block in v-journal\n", - vjentry->ino); - goto error2; - } - - /* cancel the write */ - local_irq_disable(); - __cachefs_vj_cancel(super, vjentry); - local_irq_enable(); - - error2: - cachefs_iput(inode); - error: - _leave(" = %d", ret); - return ret; - -} /* end cachefs_vj_replay_entry() */ - -/*****************************************************************************/ -/* - * walk the indirection chain to a block, looking for the ptr to it - */ -static int cachefs_vj_walk_indirection_chain(struct cachefs_super *super, - struct cachefs_inode *inode, - struct cachefs_vj_entry *vjentry) -{ - struct cachefs_vjio_block_path path[4]; - const size_t ptrperblk = PAGE_SIZE / sizeof(cachefs_blockix_t); - sector_t iblock; - size_t ptrqty, notboundary = 1; - int pix, ret; - - _enter(",%lx,{%x}", inode->vfs_inode.i_ino, vjentry->pgnum); - - if (vjentry->pgnum / ptrperblk >= ptrperblk) { - _leave(" = -EIO [range]"); - return -EIO; - } - - memset(path, 0, sizeof(path)); - - /* is it inside direct range? */ - iblock = vjentry->pgnum; - ptrqty = super->sb->s_blocksize; - ptrqty -= sizeof(struct cachefs_ondisc_metadata); - ptrqty /= sizeof(cachefs_blockix_t); - if (iblock < ptrqty) { - _debug("direct (%llx/%x)", iblock, ptrqty); - notboundary = ptrqty - iblock + 1; - - path[0].offset = iblock * sizeof(cachefs_blockix_t); - path[0].offset += offsetof(struct cachefs_ondisc_metadata, - direct); - path[1].page = inode->metadata_page; - pix = 0; - goto process; - } - iblock -= ptrqty; - - /* is it inside single-indirect range? */ - ptrqty = ptrperblk; - if (iblock < ptrqty) { - _debug("indirect (%llx/%x)", iblock, ptrqty); - notboundary = (iblock + 1) & (ptrperblk - 1); - - path[0].offset = iblock * sizeof(cachefs_blockix_t); - path[1].offset = offsetof(struct cachefs_ondisc_metadata, - single_indirect); - path[2].page = inode->metadata_page; - pix = 1; - goto process; - } - iblock -= ptrqty; - - /* is it inside double-indirect range? */ - ptrqty *= ptrqty; - if (iblock < ptrqty) { - _debug("double indirect (%llx/%x)", iblock, ptrqty); - notboundary = (iblock + 1) & (ptrperblk - 1); - - path[0].offset = - sector_div(iblock, - PAGE_SIZE / sizeof(cachefs_blockix_t)); - path[0].offset *= sizeof(cachefs_blockix_t); - path[1].offset = iblock * sizeof(cachefs_blockix_t); - path[2].offset = offsetof(struct cachefs_ondisc_metadata, - double_indirect); - path[3].page = inode->metadata_page; - pix = 2; - goto process; - } - - /* it seems to be inside triple-indirect range, which isn't supported - * yet (TODO) */ - BUG(); - pix = 3; - - /* walk the path to the pointer */ - process: - page_cache_get(path[pix + 1].page); - - path[pix].offset += inode->metadata_offset; - path[pix + 1].bix = __cachefs_get_page_block(path[pix + 1].page)->bix; - - ret = 0; - for (; pix >= 0; pix--) { - struct cachefs_vjio_block_path *step = &path[pix]; - - _debug("step level %u { ptr={%lx}+%x / bix=%x }", - pix, step[1].page->index, step->offset, step[1].bix); - - /* get the block number for this level */ - if (!step->bix) { - u8 *data = kmap_atomic(step[1].page, KM_USER0); - step->bix = - *(cachefs_blockix_t *)(data + step->offset); - kunmap_atomic(data, KM_USER0); - } - - /* allocate this block if necessary */ - if (!step->bix) { - _debug("path incomplete at level %d", pix); - ret = -ENODATA; - break; - } - - /* if we're at the leaf, we don't need to actually access the - * block */ - if (pix <= 0) - break; - - /* read the pointer block here */ - _debug("reading level %d block %x", pix, step->bix); - - ret = cachefs_block_read(super, NULL, step->bix, 0, NULL, - &step->page); - if (ret < 0) { - printk("CacheFS: " - "read I/O error on level %d block %x: %d\n", - pix, step->bix, ret); - break; - } - - wait_on_page_locked(step->page); - } - - /* record the position of the pointer we need to invalidate */ - vjentry->bix = path[0].bix; - vjentry->upblock = __cachefs_get_page_block(path[1].page)->bix; - vjentry->upentry = path[0].offset; - - /* release the pages used to walk the path */ - for (pix = sizeof(path) / sizeof(path[0]) - 1; pix > 0; pix--) - cachefs_put_page(path[pix].page); - - _leave(" = %d [bix=%x up={%x}+%x]", - ret, vjentry->bix, vjentry->upblock, vjentry->upentry); - - return ret; - -} /* end cachefs_vj_walk_indirection_chain() */ diff -uNr linux-2.6.12-rc2-mm3/fs/direct-io.c linux-2.6.12-rc2-mm3-cachefs/fs/direct-io.c --- linux-2.6.12-rc2-mm3/fs/direct-io.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/direct-io.c 2005-04-12 15:52:45.000000000 +0100 @@ -790,7 +790,7 @@ struct page *page; unsigned block_in_page; struct buffer_head *map_bh = &dio->map_bh; - int ret = 0; + int ret = 0, sent = 0; /* The I/O can start at any block offset within the first page */ block_in_page = dio->first_block_in_page; @@ -861,6 +861,14 @@ page_cache_release(page); return -ENOTBLK; } + else if (dio->iocb->ki_filp->f_flags & + O_NOREADHOLE + ) { + page_cache_release(page); + if (sent) + return 0; + return -ENODATA; + } if (dio->block_in_file >= i_size_read(dio->inode)>>blkbits) { @@ -907,6 +915,7 @@ page_cache_release(page); goto out; } + sent = 1; dio->next_block_for_io += this_chunk_blocks; dio->block_in_file += this_chunk_blocks; diff -uNr linux-2.6.12-rc2-mm3/fs/fscache/cookie.c linux-2.6.12-rc2-mm3-cachefs/fs/fscache/cookie.c --- linux-2.6.12-rc2-mm3/fs/fscache/cookie.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/fscache/cookie.c 2005-04-21 17:00:42.761924790 +0100 @@ -1,6 +1,6 @@ /* cookie.c: general filesystem cache cookie management * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -123,7 +123,6 @@ */ void fscache_init_cache(struct fscache_cache *cache, struct fscache_cache_ops *ops, - unsigned fsdef_ino, const char *idfmt, ...) { @@ -140,10 +139,7 @@ INIT_LIST_HEAD(&cache->link); INIT_LIST_HEAD(&cache->node_list); spin_lock_init(&cache->node_list_lock); - - INIT_LIST_HEAD(&cache->fsdef_srch.link); - cache->fsdef_srch.cache = cache; - cache->fsdef_srch.ino = fsdef_ino; + init_rwsem(&cache->withdrawal_sem); } /* end fscache_init_cache() */ @@ -153,7 +149,8 @@ /* * declare a mounted cache as being open for business */ -void fscache_add_cache(struct fscache_cache *cache) +void fscache_add_cache(struct fscache_cache *cache, + struct fscache_search_result *fsdef_srch) { struct fscache_node *ifsdef; @@ -162,7 +159,8 @@ _enter("{%s.%s}", cache->ops->name, cache->identifier); /* prepare an active-node record for the FSDEF index of this cache */ - ifsdef = cache->ops->lookup_node(cache, cache->fsdef_srch.ino); + ifsdef = cache->ops->lookup_node(cache, &fscache_fsdef_index, + fsdef_srch); BUG_ON(IS_ERR(ifsdef)); /* there shouldn't be an error as FSDEF is the * root dir of the FS and so should already be * in core */ @@ -187,7 +185,7 @@ * cookie as a known backing node */ down_write(&fscache_fsdef_index.sem); - list_add_tail(&cache->fsdef_srch.link, + list_add_tail(&fsdef_srch->link, &fscache_fsdef_index.search_results); list_add_tail(&ifsdef->cookie_link, &fscache_fsdef_index.backing_nodes); @@ -214,7 +212,7 @@ _enter(""); /* make the cache unavailable for cookie acquisition */ - set_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags); + down_write(&cache->withdrawal_sem); down_write(&fscache_addremove_sem); list_del_init(&cache->link); @@ -263,7 +261,7 @@ /*****************************************************************************/ /* - * withdraw a node from active service + * withdraw a node from active service at the behest of the cache * - need break the links to a cached object cookie * - called under two situations: * (1) recycler decides to reclaim an in-use node @@ -313,11 +311,7 @@ found_record: list_del_init(&srch->link); - - if (srch != &cache->fsdef_srch) { - dbgfree(srch); - kfree(srch); - } + cache->ops->srch_free(srch); up_write(&cookie->sem); } @@ -372,15 +366,16 @@ read_lock(&cookie->lock); list_for_each_entry(srch, &cookie->search_results, link) { - _debug("check entry %p x %p [ino %u]", - cookie, cache, srch->ino); + _debug("check entry %p x %p [srch %llx]", + cookie, cache, cache->ops->srch_id(srch)); if (srch->cache == cache) { read_unlock(&cookie->lock); _debug("found entry"); - if (srch->ino) { - _leave(" = 0 [found ino %u]", srch->ino); + if (cache->ops->is_srch_positive(srch)) { + _leave(" = 0 [found srch %llx]", + cache->ops->srch_id(srch)); return 0; } @@ -395,16 +390,12 @@ /* allocate an initially negative entry for this object */ _debug("alloc entry %p x %p", cookie, cache); - srch = kmalloc(sizeof(*srch), GFP_KERNEL); + srch = cache->ops->srch_alloc(cache, GFP_KERNEL); if (!srch) { _leave(" = -ENOMEM"); return -ENOMEM; } - srch->cache = cache; - srch->ino = 0; - INIT_LIST_HEAD(&srch->link); - /* we need see if there's an entry for this cache in this object's * parent index, so the first thing to do is to see if the parent index * is represented on disc @@ -430,7 +421,7 @@ BUG(); - found_parent_entry: +found_parent_entry: read_unlock(&iparent->lock); _debug("found_parent_entry"); @@ -443,7 +434,7 @@ case 0: /* found - allocate a node */ - node = cache->ops->lookup_node(cache, srch->ino); + node = cache->ops->lookup_node(cache, cookie, srch); if (IS_ERR(node)) { ret = PTR_ERR(node); goto error; @@ -468,10 +459,10 @@ node->cookie = cookie; atomic_inc(&cookie->usage); - write_lock(&iparent->lock); + write_lock(&cookie->lock); list_add_tail(&srch->link, &cookie->search_results); list_add_tail(&node->cookie_link, &cookie->backing_nodes); - write_unlock(&iparent->lock); + write_unlock(&cookie->lock); cache->ops->unlock_node(node); break; @@ -483,19 +474,18 @@ break; } - done: +done: up_read(&iparent->sem); _leave(" = %d", ret); return ret; - igrab_failed_upput: +igrab_failed_upput: cache->ops->unlock_node(node); cache->ops->put_node(node); ret = -ENOENT; - error: +error: up_read(&iparent->sem); - dbgfree(srch); - kfree(srch); + cache->ops->srch_free(srch); _leave(" = %d", ret); return ret; @@ -533,10 +523,10 @@ BUG(); - found_search_result: - if (srch->ino) { +found_search_result: + if (cache->ops->is_srch_positive(srch)) { /* it was instantiated already */ - _leave(" = 0 [found ino %u]", srch->ino); + _leave(" = 0 [found ino %llx]", cache->ops->srch_id(srch)); return 0; } @@ -558,7 +548,7 @@ BUG(); - found_parent_node: +found_parent_node: _debug("found_parent_node: node=%p", ipnode); BUG_ON(ipnode->cookie != iparent); @@ -569,7 +559,7 @@ goto error; /* we're going to need an in-memory reflection of the node too */ - node = cache->ops->lookup_node(cache, srch->ino); + node = cache->ops->lookup_node(cache, cookie, srch); if (IS_ERR(node)) { ret = PTR_ERR(node); goto error_x; /* uh-oh... our search record is now wrong */ @@ -603,16 +593,16 @@ /* if we get an error after having instantiated a node on disc, just * discard the search record so we find it next time */ - error_xi: +error_xi: cache->ops->unlock_node(node); cache->ops->put_node(node); + node = NULL; ret = -ENOENT; - error_x: +error_x: list_del(&srch->link); - dbgfree(srch); - kfree(srch); + cache->ops->srch_free(srch); srch = NULL; - error: +error: up_write(&iparent->sem); _leave(" = %d", ret); return ret; @@ -743,7 +733,7 @@ if (list_empty(&fscache_cache_list)) { up_read(&fscache_addremove_sem); - _leave(" [no caches]"); + _leave(" = %p [no caches]", cookie); return cookie; } @@ -765,38 +755,41 @@ } } - /* if the object is a cookie then we need do nothing more here - we + /* if the object is an index then we need do nothing more here - we * create indexes on disc when we need them as an index may exist in * multiple caches */ - if (cookie->idef) - goto done; + if (!cookie->idef) { + /* the object is a file - we need to select a cache in which to + * store it */ + cache = fscache_select_cache_for_file(); + if (!cache) + goto no_cache; /* couldn't decide on a cache */ + + /* create a file index entry on disc, along with all the + * indexes required to find it again later */ + ret = fscache_instantiate_object(cookie, cache); + if (ret < 0) + goto error; + } - /* the object is a file - we need to select a cache in which to store - * it */ - ret = -ENOMEDIUM; - cache = fscache_select_cache_for_file(); - if (!cache) - goto error; /* couldn't decide on a cache */ - - /* create a file index entry on disc, along with all the indexes - * required to find it again later */ - ret = fscache_instantiate_object(cookie, cache); - if (ret == 0) - goto done; + up_write(&cookie->sem); +out: + up_read(&fscache_addremove_sem); + _leave(" = %p", cookie); + return cookie; - error: - printk("FS-Cache: error from cache fs: %d\n", ret); +no_cache: + ret = -ENOMEDIUM; +error: + printk("FS-Cache: error from cache: %d\n", ret); if (cookie) { + up_write(&cookie->sem); __fscache_cookie_put(cookie); cookie = FSCACHE_NEGATIVE_COOKIE; atomic_dec(&iparent->children); } - done: - up_write(&cookie->sem); - up_read(&fscache_addremove_sem); - _leave(" = %p", cookie); - return cookie; + goto out; } /* end __fscache_acquire_cookie() */ @@ -941,7 +934,7 @@ link); list_del(&srch->link); - kfree(srch); + srch->cache->ops->srch_free(srch); } BUG_ON(!list_empty(&cookie->search_results)); diff -uNr linux-2.6.12-rc2-mm3/fs/fscache/page.c linux-2.6.12-rc2-mm3-cachefs/fs/fscache/page.c --- linux-2.6.12-rc2-mm3/fs/fscache/page.c 2005-04-12 15:49:02.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/fscache/page.c 2005-04-21 17:01:46.741608673 +0100 @@ -1,6 +1,6 @@ /* page.c: general filesystem cache cookie management * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -19,6 +19,7 @@ * read a page from the cache or allocate a block in which to store it * - we return: * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted * -ENOBUFS - no backing node available in which to cache the block * -ENODATA - no data available in the backing node for this block * 0 - dispatched a read - it'll call end_io_func() when finished @@ -30,7 +31,6 @@ unsigned long gfp) { struct fscache_node *node; - struct fscache_page *pageio; int ret; _enter("%p,{%lu},", cookie, page->index); @@ -47,13 +47,6 @@ BUG_ON(cookie->idef); /* not supposed to use this for indexes */ - /* get the cache-cookie for this page */ - pageio = cookie->netfs->ops->get_page_token(page); - if (IS_ERR(pageio)) { - _leave(" = %ld", PTR_ERR(pageio)); - return PTR_ERR(pageio); - } - /* prevent the file from being uncached whilst we access it */ down_read(&cookie->sem); @@ -64,19 +57,24 @@ struct fscache_node, cookie_link); - if (node->cache->ops->grab_node(node)) { - /* ask the cache to honour the operation */ - ret = node->cache->ops->read_or_alloc_page(node, - page, - pageio, - end_io_func, - end_io_data, - gfp); + /* prevent the cache from being withdrawn */ + if (down_read_trylock(&node->cache->withdrawal_sem)) { + if (node->cache->ops->grab_node(node)) { + /* ask the cache to honour the operation */ + ret = node->cache->ops->read_or_alloc_page( + node, + page, + end_io_func, + end_io_data, + gfp); - node->cache->ops->put_node(node); - } + node->cache->ops->put_node(node); + } + up_read(&node->cache->withdrawal_sem); + } } + up_read(&cookie->sem); _leave(" = %d", ret); return ret; @@ -90,6 +88,7 @@ * request a page be stored in the cache * - returns: * -ENOMEM - out of memory, nothing done + * -EINTR - interrupted * -ENOBUFS - no backing node available in which to cache the page * 0 - dispatched a write - it'll call end_io_func() when finished */ @@ -99,7 +98,6 @@ void *end_io_data, unsigned long gfp) { - struct fscache_page *pageio; struct fscache_node *node; int ret; @@ -112,29 +110,25 @@ BUG_ON(cookie->idef); /* not supposed to use this for indexes */ - /* get the cache-cookie for this page */ - pageio = cookie->netfs->ops->get_page_token(page); - if (IS_ERR(pageio)) { - _leave(" = %ld", PTR_ERR(pageio)); - return PTR_ERR(pageio); - } - /* prevent the file from been uncached whilst we deal with it */ down_read(&cookie->sem); ret = -ENOBUFS; - if (!list_empty(&cookie->backing_nodes) && pageio->mapped_block) { + if (!list_empty(&cookie->backing_nodes)) { node = list_entry(cookie->backing_nodes.next, struct fscache_node, cookie_link); - /* ask the cache to honour the operation */ - ret = node->cache->ops->write_page(node, - page, - pageio, - end_io_func, - end_io_data, - gfp); + /* prevent the cache from being withdrawn */ + if (down_read_trylock(&node->cache->withdrawal_sem)) { + /* ask the cache to honour the operation */ + ret = node->cache->ops->write_page(node, + page, + end_io_func, + end_io_data, + gfp); + up_read(&node->cache->withdrawal_sem); + } } up_read(&cookie->sem); @@ -153,7 +147,6 @@ */ void __fscache_uncache_page(struct fscache_cookie *cookie, struct page *page) { - struct fscache_page *pageio; struct fscache_node *node; _enter(",{%lu}", page->index); @@ -165,33 +158,24 @@ BUG_ON(cookie->idef); /* not supposed to use this for indexes */ - /* get the cache-cookie for this page */ - pageio = cookie->netfs->ops->get_page_token(page); - if (IS_ERR(pageio)) { - _leave(" [get_page_cookie() = %ld]", PTR_ERR(pageio)); - return; - } - if (list_empty(&cookie->backing_nodes)) { - BUG_ON(pageio->mapped_block); _leave(" [no backing]"); return; } - if (!pageio->mapped_block) { - _leave(" [no mapping]"); - return; - } - /* ask the cache to honour the operation */ down_read(&cookie->sem); - if (!list_empty(&cookie->backing_nodes) && pageio->mapped_block) { + if (!list_empty(&cookie->backing_nodes)) { node = list_entry(cookie->backing_nodes.next, struct fscache_node, cookie_link); - node->cache->ops->uncache_page(node, pageio); + /* prevent the cache from being withdrawn */ + if (down_read_trylock(&node->cache->withdrawal_sem)) { + node->cache->ops->uncache_page(node, page); + up_read(&node->cache->withdrawal_sem); + } } up_read(&cookie->sem); @@ -202,30 +186,3 @@ } /* end __fscache_uncache_page() */ EXPORT_SYMBOL(__fscache_uncache_page); - -/*****************************************************************************/ -/* - * get a page caching token from for a page, allocating it and attaching it to - * the page's private pointer if it doesn't exist - */ -struct fscache_page * __fscache_page_get_private(struct page *page, - unsigned gfp_flags) -{ - struct fscache_page *pageio = (struct fscache_page *) page->private; - - if (!pageio) { - pageio = kmalloc(sizeof(*pageio), gfp_flags); - if (!pageio) - return ERR_PTR(-ENOMEM); - - memset(pageio, 0, sizeof(*pageio)); - rwlock_init(&pageio->lock); - - page->private = (unsigned long) pageio; - SetPagePrivate(page); - } - - return pageio; -} /* end __fscache_page_get_private() */ - -EXPORT_SYMBOL(__fscache_page_get_private); diff -uNr linux-2.6.12-rc2-mm3/fs/Kconfig linux-2.6.12-rc2-mm3-cachefs/fs/Kconfig --- linux-2.6.12-rc2-mm3/fs/Kconfig 2005-04-12 15:49:03.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/Kconfig 2005-04-14 18:44:05.000000000 +0100 @@ -462,8 +462,8 @@ depends on FSCACHE help This filesystem acts as a cache for other filesystems - primarily - networking filesystems - rather than thus allowing fast local disc to - enhance the speed of slower devices. + networking filesystems - thus allowing fast local disc to enhance the + speed of slower devices. It is a filesystem so that raw block devices can be made use of more efficiently, without suffering any overhead from intermediary @@ -476,6 +476,18 @@ See Documentation/filesystems/caching/cachefs.txt for more information. +config CACHEFILES + tristate "Filesystem caching on cache files" + depends on FSCACHE + help + This provides a facility by which one or more directories can be + nominated to use as caches for other filesystems - primarily + networking filesystems - thus allowing fast local disc to enhance the + speed of slower devices. + + See Documentation/filesystems/caching/cachefiles.txt for more + information. + endmenu config FUSE_FS diff -uNr linux-2.6.12-rc2-mm3/fs/Makefile linux-2.6.12-rc2-mm3-cachefs/fs/Makefile --- linux-2.6.12-rc2-mm3/fs/Makefile 2005-04-12 15:49:03.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/fs/Makefile 2005-04-14 18:44:41.000000000 +0100 @@ -101,5 +101,6 @@ obj-$(CONFIG_HOSTFS) += hostfs/ obj-$(CONFIG_HPPFS) += hppfs/ obj-$(CONFIG_CACHEFS) += cachefs/ +obj-$(CONFIG_CACHEFILES) += cachefiles/ obj-$(CONFIG_DEBUG_FS) += debugfs/ obj-$(CONFIG_RELAYFS_FS) += relayfs/ diff -uNr linux-2.6.12-rc2-mm3/include/asm-i386/fcntl.h linux-2.6.12-rc2-mm3-cachefs/include/asm-i386/fcntl.h --- linux-2.6.12-rc2-mm3/include/asm-i386/fcntl.h 2004-09-16 12:06:17.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/include/asm-i386/fcntl.h 2005-04-12 15:52:45.000000000 +0100 @@ -21,6 +21,7 @@ #define O_DIRECTORY 0200000 /* must be a directory */ #define O_NOFOLLOW 0400000 /* don't follow links */ #define O_NOATIME 01000000 +#define O_NOREADHOLE 02000000 /* give short read or ENODATA on a hole */ #define F_DUPFD 0 /* dup */ #define F_GETFD 1 /* get close_on_exec */ diff -uNr linux-2.6.12-rc2-mm3/include/linux/fscache-cache.h linux-2.6.12-rc2-mm3-cachefs/include/linux/fscache-cache.h --- linux-2.6.12-rc2-mm3/include/linux/fscache-cache.h 2005-04-12 15:49:11.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/include/linux/fscache-cache.h 2005-04-20 17:39:00.000000000 +0100 @@ -1,6 +1,6 @@ /* fscache-cache.h: general filesystem caching backing cache interface * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -19,42 +19,48 @@ struct fscache_node; struct fscache_search_result; +/* + * search result + * - embed in cache-specific search result record + */ struct fscache_search_result { struct list_head link; /* link in search_results */ struct fscache_cache *cache; /* cache searched */ - unsigned ino; /* node ID (or 0 if negative) */ }; +static inline +void fscache_init_search_result(struct fscache_cache *cache, + struct fscache_search_result *srch) +{ + srch->cache = cache; + INIT_LIST_HEAD(&srch->link); +} + +/* + * cache definition + */ struct fscache_cache { struct fscache_cache_ops *ops; struct list_head link; /* link in list of caches */ + struct rw_semaphore withdrawal_sem; /* withdrawal control sem */ size_t max_index_size; /* maximum size of index data */ - unsigned long flags; -#define FSCACHE_CACHE_WITHDRAWN 0 /* T if cache has been withdrawn */ - char identifier[32]; /* cache label */ /* node management */ struct list_head node_list; /* list of data/index nodes */ spinlock_t node_list_lock; - struct fscache_search_result fsdef_srch; /* search result for the fsdef index */ + struct fscache_search_result *fsdef_srch; /* search result for the fsdef index */ }; extern void fscache_init_cache(struct fscache_cache *cache, struct fscache_cache_ops *ops, - unsigned fsdef_ino, const char *idfmt, - ...) __attribute__ ((format (printf,4,5))); + ...) __attribute__ ((format (printf,3,4))); -extern void fscache_add_cache(struct fscache_cache *cache); +extern void fscache_add_cache(struct fscache_cache *cache, + struct fscache_search_result *fsdef_srch); extern void fscache_withdraw_cache(struct fscache_cache *cache); -/* see if a cache has been withdrawn */ -static inline int fscache_is_cache_withdrawn(struct fscache_cache *cache) -{ - return test_bit(FSCACHE_CACHE_WITHDRAWN, &cache->flags); -} - /*****************************************************************************/ /* * cache operations @@ -63,8 +69,26 @@ /* name of cache provider */ const char *name; + /* allocate a negative search result record */ + struct fscache_search_result *(*srch_alloc)(struct fscache_cache *cache, + unsigned long gfp); + + /* free a search result record */ + void (*srch_free)(struct fscache_search_result *srch); + + /* set a search result record to negative */ + void (*srch_negate)(struct fscache_search_result *srch); + + /* see if search result is positive */ + int (*is_srch_positive)(struct fscache_search_result *srch); + + /* represent a search result for printing */ + unsigned long long (*srch_id)(struct fscache_search_result *srch); + /* look up the nominated node for this cache */ - struct fscache_node *(*lookup_node)(struct fscache_cache *cache, unsigned ino); + struct fscache_node *(*lookup_node)(struct fscache_cache *cache, + struct fscache_cookie *index, + struct fscache_search_result *srch); /* increment the usage count on this inode (may fail if unmounting) */ struct fscache_node *(*grab_node)(struct fscache_node *node); @@ -79,13 +103,13 @@ void (*put_node)(struct fscache_node *node); /* search an index for an inode to back a cookie - * - the "inode number" should be set in result->ino + * - the "search result" should be updated to refer to the entry found */ int (*index_search)(struct fscache_node *node, struct fscache_cookie *cookie, struct fscache_search_result *result); /* create a new file or inode, with an entry in the named index - * - the "inode number" should be set in result->ino + * - the "search result" should be updated to refer to the new entry */ int (*index_add)(struct fscache_node *node, struct fscache_cookie *cookie, struct fscache_search_result *result); @@ -106,7 +130,6 @@ * cache */ int (*read_or_alloc_page)(struct fscache_node *node, struct page *page, - struct fscache_page *pageio, fscache_rw_complete_t end_io_func, void *end_io_data, unsigned long gfp); @@ -114,14 +137,13 @@ /* write a page to its backing block in the cache */ int (*write_page)(struct fscache_node *node, struct page *page, - struct fscache_page *pageio, fscache_rw_complete_t end_io_func, void *end_io_data, unsigned long gfp); /* detach a backing block from a page */ void (*uncache_page)(struct fscache_node *node, - struct fscache_page *pageio); + struct page *page); }; /*****************************************************************************/ diff -uNr linux-2.6.12-rc2-mm3/include/linux/fscache.h linux-2.6.12-rc2-mm3-cachefs/include/linux/fscache.h --- linux-2.6.12-rc2-mm3/include/linux/fscache.h 2005-04-12 15:49:11.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/include/linux/fscache.h 2005-04-20 17:39:24.000000000 +0100 @@ -1,6 +1,6 @@ /* fscache.h: general filesystem caching interface * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -24,7 +24,6 @@ struct fscache_cookie; struct fscache_netfs; struct fscache_netfs_operations; -struct fscache_page; #define FSCACHE_NEGATIVE_COOKIE NULL @@ -156,12 +155,6 @@ struct fscache_netfs_operations { - /* get page-to-block mapping token for a page - * - one should be allocated if it doesn't exist - * - returning -ENODATA will cause this page to be ignored - * - typically, the struct will be attached to page->private - */ - struct fscache_page *(*get_page_token)(struct page *page); }; #ifdef CONFIG_FSCACHE @@ -189,26 +182,9 @@ #endif } -/*****************************************************************************/ -/* - * page mapping cookie - * - stores the mapping of a page to a block in the cache (may also be null) - * - note that the mapping may be removed without notice if a cache is removed - */ -struct fscache_page -{ - void *mapped_block; /* block mirroring this page */ - rwlock_t lock; - - unsigned long flags; -#define FSCACHE_PAGE_BOUNDARY 0 /* next block has a different - * indirection chain */ -#define FSCACHE_PAGE_NEW 1 /* this is a newly allocated block */ -}; - /* * read a page from the cache or allocate a block in which to store it - * - if the cookie is not backed by a file: + * - if the page is not backed by a file: * - -ENOBUFS will be returned and nothing more will be done * - else if the page is backed by a block in the cache: * - a read will be started which will call end_io_func on completion @@ -321,37 +297,4 @@ unsigned short to); #endif -/* - * convenience routines for mapping page->private directly to a struct - * fscache_page - */ -static inline -struct fscache_page *__fscache_page_grab_private(struct page *page) -{ - return (struct fscache_page *) (PagePrivate(page) ? page->private : 0); -} - -#define fscache_page_grab_private(X) \ -({ \ - BUG_ON(!PagePrivate(X)); \ - __fscache_page_grab_private(X); \ -}) - - -#ifdef CONFIG_FSCACHE -extern struct fscache_page *__fscache_page_get_private(struct page *page, - unsigned gfp); -#endif - -static inline -struct fscache_page *fscache_page_get_private(struct page *page, - unsigned gfp) -{ -#ifdef CONFIG_FSCACHE - return __fscache_page_get_private(page, gfp); -#else - return ERR_PTR(-EIO); -#endif -} - #endif /* _LINUX_FSCACHE_H */ diff -uNr linux-2.6.12-rc2-mm3/kernel.config linux-2.6.12-rc2-mm3-cachefs/kernel.config --- linux-2.6.12-rc2-mm3/kernel.config 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.12-rc2-mm3-cachefs/kernel.config 2005-04-21 16:57:19.000000000 +0100 @@ -0,0 +1,1075 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.12-rc2-mm3 +# Mon Apr 18 13:41:27 2005 +# +CONFIG_X86=y +CONFIG_MMU=y +CONFIG_UID16=y +CONFIG_GENERIC_ISA_DMA=y +CONFIG_GENERIC_IOMAP=y + +# +# Code maturity level options +# +CONFIG_EXPERIMENTAL=y +CONFIG_CLEAN_COMPILE=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 + +# +# General setup +# +CONFIG_LOCALVERSION="" +# CONFIG_LOCALVERSION_AUTO is not set +CONFIG_SWAP=y +CONFIG_SYSVIPC=y +# CONFIG_POSIX_MQUEUE is not set +# CONFIG_BSD_PROCESS_ACCT is not set +CONFIG_SYSCTL=y +# CONFIG_AUDIT is not set +CONFIG_HOTPLUG=y +CONFIG_KOBJECT_UEVENT=y +# CONFIG_IKCONFIG is not set +# CONFIG_CPUSETS is not set +# CONFIG_EMBEDDED is not set +CONFIG_KALLSYMS=y +# CONFIG_KALLSYMS_ALL is not set +# CONFIG_KALLSYMS_EXTRA_PASS is not set +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SHMEM=y +CONFIG_CC_ALIGN_FUNCTIONS=0 +CONFIG_CC_ALIGN_LABELS=0 +CONFIG_CC_ALIGN_LOOPS=0 +CONFIG_CC_ALIGN_JUMPS=0 +# CONFIG_TINY_SHMEM is not set +CONFIG_BASE_SMALL=0 + +# +# Loadable module support +# +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +CONFIG_OBSOLETE_MODPARM=y +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_KMOD=y +CONFIG_STOP_MACHINE=y + +# +# Processor type and features +# +CONFIG_X86_PC=y +# CONFIG_X86_ELAN is not set +# CONFIG_X86_VOYAGER is not set +# CONFIG_X86_NUMAQ is not set +# CONFIG_X86_SUMMIT is not set +# CONFIG_X86_BIGSMP is not set +# CONFIG_X86_VISWS is not set +# CONFIG_X86_GENERICARCH is not set +# CONFIG_X86_ES7000 is not set +# CONFIG_M386 is not set +# CONFIG_M486 is not set +# CONFIG_M586 is not set +# CONFIG_M586TSC is not set +# CONFIG_M586MMX is not set +CONFIG_M686=y +# CONFIG_MPENTIUMII is not set +# CONFIG_MPENTIUMIII is not set +# CONFIG_MPENTIUMM is not set +# CONFIG_MPENTIUM4 is not set +# CONFIG_MK6 is not set +# CONFIG_MK7 is not set +# CONFIG_MK8 is not set +# CONFIG_MCRUSOE is not set +# CONFIG_MEFFICEON is not set +# CONFIG_MWINCHIPC6 is not set +# CONFIG_MWINCHIP2 is not set +# CONFIG_MWINCHIP3D is not set +# CONFIG_MGEODEGX1 is not set +# CONFIG_MCYRIXIII is not set +# CONFIG_MVIAC3_2 is not set +# CONFIG_X86_GENERIC is not set +CONFIG_X86_CMPXCHG=y +CONFIG_X86_XADD=y +CONFIG_X86_L1_CACHE_SHIFT=5 +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_X86_PPRO_FENCE=y +CONFIG_X86_WP_WORKS_OK=y +CONFIG_X86_INVLPG=y +CONFIG_X86_BSWAP=y +CONFIG_X86_POPAD_OK=y +CONFIG_X86_GOOD_APIC=y +CONFIG_X86_USE_PPRO_CHECKSUM=y +# CONFIG_HPET_TIMER is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +# CONFIG_SCHED_SMT is not set +# CONFIG_PREEMPT is not set +CONFIG_X86_LOCAL_APIC=y +CONFIG_X86_IO_APIC=y +CONFIG_X86_TSC=y +# CONFIG_X86_MCE is not set +# CONFIG_TOSHIBA is not set +# CONFIG_I8K is not set +# CONFIG_MICROCODE is not set +CONFIG_X86_MSR=y +CONFIG_X86_CPUID=y + +# +# Firmware Drivers +# +# CONFIG_EDD is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set +CONFIG_FLATMEM=y +# CONFIG_DISCONTIGMEM is not set +# CONFIG_MATH_EMULATION is not set +# CONFIG_MTRR is not set +# CONFIG_EFI is not set +CONFIG_IRQBALANCE=y +CONFIG_HAVE_DEC_LOCK=y +# CONFIG_REGPARM is not set +CONFIG_SECCOMP=y + +# +# Performance-monitoring counters support +# +# CONFIG_PERFCTR is not set +CONFIG_PHYSICAL_START=0x100000 +# CONFIG_KEXEC is not set + +# +# Power management options (ACPI, APM) +# +CONFIG_PM=y +# CONFIG_PM_DEBUG is not set +# CONFIG_SOFTWARE_SUSPEND is not set + +# +# ACPI (Advanced Configuration and Power Interface) Support +# +CONFIG_ACPI=y +CONFIG_ACPI_BOOT=y +CONFIG_ACPI_INTERPRETER=y +# CONFIG_ACPI_SLEEP is not set +# CONFIG_ACPI_AC is not set +# CONFIG_ACPI_BATTERY is not set +# CONFIG_ACPI_BUTTON is not set +# CONFIG_ACPI_VIDEO is not set +# CONFIG_ACPI_HOTKEY is not set +# CONFIG_ACPI_FAN is not set +CONFIG_ACPI_PROCESSOR=y +CONFIG_ACPI_THERMAL=y +# CONFIG_ACPI_ASUS is not set +# CONFIG_ACPI_IBM is not set +# CONFIG_ACPI_TOSHIBA is not set +CONFIG_ACPI_BLACKLIST_YEAR=0 +# CONFIG_ACPI_DEBUG is not set +CONFIG_ACPI_BUS=y +CONFIG_ACPI_EC=y +CONFIG_ACPI_POWER=y +CONFIG_ACPI_PCI=y +CONFIG_ACPI_SYSTEM=y +# CONFIG_X86_PM_TIMER is not set +# CONFIG_ACPI_CONTAINER is not set + +# +# APM (Advanced Power Management) BIOS Support +# +# CONFIG_APM is not set + +# +# CPU Frequency scaling +# +# CONFIG_CPU_FREQ is not set + +# +# Bus options (PCI, PCMCIA, EISA, MCA, ISA) +# +CONFIG_PCI=y +# CONFIG_PCI_GOBIOS is not set +# CONFIG_PCI_GOMMCONFIG is not set +# CONFIG_PCI_GODIRECT is not set +CONFIG_PCI_GOANY=y +CONFIG_PCI_BIOS=y +CONFIG_PCI_DIRECT=y +CONFIG_PCI_MMCONFIG=y +# CONFIG_PCIEPORTBUS is not set +# CONFIG_PCI_MSI is not set +# CONFIG_PCI_LEGACY_PROC is not set +CONFIG_PCI_NAMES=y +# CONFIG_PCI_DEBUG is not set +CONFIG_ISA=y +# CONFIG_EISA is not set +# CONFIG_MCA is not set +# CONFIG_SCx200 is not set +# CONFIG_HOTPLUG_CPU is not set + +# +# PCCARD (PCMCIA/CardBus) support +# +# CONFIG_PCCARD is not set + +# +# PCI Hotplug Support +# +# CONFIG_HOTPLUG_PCI is not set + +# +# Executable file formats +# +CONFIG_BINFMT_ELF=y +# CONFIG_BINFMT_AOUT is not set +CONFIG_BINFMT_MISC=y + +# +# Networking +# +CONFIG_NET=y + +# +# Networking protocols +# +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IP_ADVANCED_ROUTER is not set +# CONFIG_IP_PNP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_IP_MROUTE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_TUNNEL is not set +CONFIG_IP_TCPDIAG=y +# CONFIG_IP_TCPDIAG_IPV6 is not set +# CONFIG_IPV6 is not set +# CONFIG_IP_SCTP is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set + +# +# Network packet filtering +# +# CONFIG_NETFILTER is not set + +# +# QoS and/or fair queueing +# +# CONFIG_NET_SCHED is not set +# CONFIG_NET_CLS_ROUTE is not set +# CONFIG_NET_KEY is not set +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_BRIDGE is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_WAN_ROUTER is not set + +# +# Amateur Radio support +# +# CONFIG_HAMRADIO is not set + +# +# IrDA (infrared) subsystem support +# +# CONFIG_IRDA is not set + +# +# Bluetooth subsystem support +# +# CONFIG_BT is not set +# CONFIG_IEEE80211 is not set + +# +# Asynchronous Transfer Mode (ATM) +# +# CONFIG_ATM is not set + +# +# Network testing +# +# CONFIG_NET_DIVERT is not set +# CONFIG_NET_PKTGEN is not set +# CONFIG_KGDBOE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NETPOLL_RX is not set +# CONFIG_NETPOLL_TRAP is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +# CONFIG_FW_LOADER is not set +# CONFIG_DEBUG_DRIVER is not set + +# +# Memory Technology Devices (MTD) +# +# CONFIG_MTD is not set + +# +# Parallel port support +# +CONFIG_PARPORT=y +CONFIG_PARPORT_PC=y +# CONFIG_PARPORT_SERIAL is not set +# CONFIG_PARPORT_PC_FIFO is not set +# CONFIG_PARPORT_PC_SUPERIO is not set +# CONFIG_PARPORT_GSC is not set +# CONFIG_PARPORT_1284 is not set + +# +# Plug and Play support +# +# CONFIG_PNP is not set + +# +# Block devices +# +CONFIG_BLK_DEV_FD=y +# CONFIG_BLK_DEV_XD is not set +# CONFIG_PARIDE is not set +# CONFIG_BLK_CPQ_DA is not set +# CONFIG_BLK_CPQ_CISS_DA is not set +# CONFIG_BLK_DEV_DAC960 is not set +# CONFIG_BLK_DEV_UMEM is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +CONFIG_BLK_DEV_LOOP=y +# CONFIG_BLK_DEV_CRYPTOLOOP is not set +# CONFIG_BLK_DEV_NBD is not set +# CONFIG_BLK_DEV_SX8 is not set +# CONFIG_BLK_DEV_RAM is not set +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_INITRAMFS_SOURCE="" +CONFIG_LBD=y +# CONFIG_CDROM_PKTCDVD is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +# CONFIG_ATA_OVER_ETH is not set + +# +# ATA/ATAPI/MFM/RLL support +# +CONFIG_IDE=y +CONFIG_BLK_DEV_IDE=y + +# +# Please see Documentation/ide.txt for help/info on IDE drives +# +# CONFIG_BLK_DEV_IDE_SATA is not set +# CONFIG_BLK_DEV_HD_IDE is not set +CONFIG_BLK_DEV_IDEDISK=y +# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_BLK_DEV_IDECD=y +# CONFIG_BLK_DEV_IDETAPE is not set +# CONFIG_BLK_DEV_IDEFLOPPY is not set +# CONFIG_IDE_TASK_IOCTL is not set + +# +# IDE chipset support/bugfixes +# +CONFIG_IDE_GENERIC=y +# CONFIG_BLK_DEV_CMD640 is not set +CONFIG_BLK_DEV_IDEPCI=y +CONFIG_IDEPCI_SHARE_IRQ=y +# CONFIG_BLK_DEV_OFFBOARD is not set +# CONFIG_BLK_DEV_GENERIC is not set +# CONFIG_BLK_DEV_OPTI621 is not set +# CONFIG_BLK_DEV_RZ1000 is not set +CONFIG_BLK_DEV_IDEDMA_PCI=y +# CONFIG_BLK_DEV_IDEDMA_FORCED is not set +CONFIG_IDEDMA_PCI_AUTO=y +# CONFIG_IDEDMA_ONLYDISK is not set +# CONFIG_BLK_DEV_AEC62XX is not set +# CONFIG_BLK_DEV_ALI15X3 is not set +# CONFIG_BLK_DEV_AMD74XX is not set +# CONFIG_BLK_DEV_ATIIXP is not set +# CONFIG_BLK_DEV_CMD64X is not set +# CONFIG_BLK_DEV_TRIFLEX is not set +# CONFIG_BLK_DEV_CY82C693 is not set +# CONFIG_BLK_DEV_CS5520 is not set +# CONFIG_BLK_DEV_CS5530 is not set +# CONFIG_BLK_DEV_HPT34X is not set +# CONFIG_BLK_DEV_HPT366 is not set +# CONFIG_BLK_DEV_SC1200 is not set +CONFIG_BLK_DEV_PIIX=y +# CONFIG_BLK_DEV_NS87415 is not set +# CONFIG_BLK_DEV_PDC202XX_OLD is not set +# CONFIG_BLK_DEV_PDC202XX_NEW is not set +# CONFIG_BLK_DEV_SVWKS is not set +# CONFIG_BLK_DEV_SIIMAGE is not set +# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_SLC90E66 is not set +# CONFIG_BLK_DEV_TRM290 is not set +# CONFIG_BLK_DEV_VIA82CXXX is not set +# CONFIG_IDE_ARM is not set +# CONFIG_IDE_CHIPSETS is not set +CONFIG_BLK_DEV_IDEDMA=y +# CONFIG_IDEDMA_IVB is not set +CONFIG_IDEDMA_AUTO=y +# CONFIG_BLK_DEV_HD is not set + +# +# SCSI device support +# +# CONFIG_SCSI is not set + +# +# Old CD-ROM drivers (not SCSI, not IDE) +# +# CONFIG_CD_NO_IDESCSI is not set + +# +# Multi-device support (RAID and LVM) +# +# CONFIG_MD is not set + +# +# Fusion MPT device support +# + +# +# IEEE 1394 (FireWire) support +# +# CONFIG_IEEE1394 is not set + +# +# I2O device support +# +# CONFIG_I2O is not set + +# +# Network device support +# +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set + +# +# ARCnet devices +# +# CONFIG_ARCNET is not set + +# +# Ethernet (10 or 100Mbit) +# +CONFIG_NET_ETHERNET=y +CONFIG_MII=y +# CONFIG_HAPPYMEAL is not set +# CONFIG_SUNGEM is not set +CONFIG_NET_VENDOR_3COM=y +# CONFIG_EL1 is not set +# CONFIG_EL2 is not set +# CONFIG_ELPLUS is not set +# CONFIG_EL16 is not set +# CONFIG_EL3 is not set +# CONFIG_3C515 is not set +CONFIG_VORTEX=y +# CONFIG_TYPHOON is not set +# CONFIG_LANCE is not set +# CONFIG_NET_VENDOR_SMC is not set +# CONFIG_NET_VENDOR_RACAL is not set + +# +# Tulip family network device support +# +# CONFIG_NET_TULIP is not set +# CONFIG_AT1700 is not set +# CONFIG_DEPCA is not set +# CONFIG_HP100 is not set +# CONFIG_NET_ISA is not set +CONFIG_NET_PCI=y +# CONFIG_PCNET32 is not set +# CONFIG_AMD8111_ETH is not set +# CONFIG_ADAPTEC_STARFIRE is not set +# CONFIG_AC3200 is not set +# CONFIG_APRICOT is not set +# CONFIG_B44 is not set +# CONFIG_FORCEDETH is not set +# CONFIG_CS89x0 is not set +# CONFIG_DGRS is not set +CONFIG_EEPRO100=y +# CONFIG_E100 is not set +# CONFIG_FEALNX is not set +# CONFIG_NATSEMI is not set +# CONFIG_NE2K_PCI is not set +# CONFIG_8139CP is not set +# CONFIG_8139TOO is not set +# CONFIG_SIS900 is not set +# CONFIG_EPIC100 is not set +# CONFIG_SUNDANCE is not set +# CONFIG_TLAN is not set +# CONFIG_VIA_RHINE is not set +# CONFIG_NET_POCKET is not set + +# +# Ethernet (1000 Mbit) +# +# CONFIG_ACENIC is not set +# CONFIG_DL2K is not set +# CONFIG_E1000 is not set +# CONFIG_NS83820 is not set +# CONFIG_HAMACHI is not set +# CONFIG_YELLOWFIN is not set +# CONFIG_R8169 is not set +# CONFIG_SKGE is not set +# CONFIG_SK98LIN is not set +# CONFIG_VIA_VELOCITY is not set +# CONFIG_TIGON3 is not set + +# +# Ethernet (10000 Mbit) +# +# CONFIG_CHELSIO_T1 is not set +# CONFIG_IXGB is not set +# CONFIG_S2IO is not set + +# +# Token Ring devices +# +# CONFIG_TR is not set + +# +# Wireless LAN (non-hamradio) +# +# CONFIG_NET_RADIO is not set + +# +# Wan interfaces +# +# CONFIG_WAN is not set +# CONFIG_FDDI is not set +# CONFIG_HIPPI is not set +# CONFIG_PLIP is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_SHAPER is not set +# CONFIG_NETCONSOLE is not set + +# +# ISDN subsystem +# +# CONFIG_ISDN is not set + +# +# Telephony Support +# +# CONFIG_PHONE is not set + +# +# Input device support +# +CONFIG_INPUT=y + +# +# Userland interfaces +# +CONFIG_INPUT_MOUSEDEV=y +CONFIG_INPUT_MOUSEDEV_PSAUX=y +CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 +CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 +# CONFIG_INPUT_JOYDEV is not set +# CONFIG_INPUT_TSDEV is not set +# CONFIG_INPUT_EVDEV is not set +# CONFIG_INPUT_EVBUG is not set + +# +# Input Device Drivers +# +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +# CONFIG_KEYBOARD_SUNKBD is not set +# CONFIG_KEYBOARD_LKKBD is not set +# CONFIG_KEYBOARD_XTKBD is not set +# CONFIG_KEYBOARD_NEWTON is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_INPUT_JOYSTICK is not set +# CONFIG_INPUT_TOUCHSCREEN is not set +# CONFIG_INPUT_MISC is not set + +# +# Hardware I/O ports +# +CONFIG_SERIO=y +CONFIG_SERIO_I8042=y +# CONFIG_SERIO_SERPORT is not set +# CONFIG_SERIO_CT82C710 is not set +# CONFIG_SERIO_PARKBD is not set +# CONFIG_SERIO_PCIPS2 is not set +CONFIG_SERIO_LIBPS2=y +# CONFIG_SERIO_RAW is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +CONFIG_VT=y +CONFIG_VT_CONSOLE=y +CONFIG_HW_CONSOLE=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +# CONFIG_SERIAL_8250_ACPI is not set +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_MULTIPORT is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +# CONFIG_SERIAL_JSM is not set +CONFIG_UNIX98_PTYS=y +CONFIG_LEGACY_PTYS=y +CONFIG_LEGACY_PTY_COUNT=256 +# CONFIG_PRINTER is not set +# CONFIG_PPDEV is not set +# CONFIG_TIPAR is not set + +# +# IPMI +# +# CONFIG_IPMI_HANDLER is not set + +# +# Watchdog Cards +# +# CONFIG_WATCHDOG is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_RTC is not set +# CONFIG_GEN_RTC is not set +# CONFIG_DTLK is not set +# CONFIG_R3964 is not set +# CONFIG_APPLICOM is not set +# CONFIG_SONYPI is not set + +# +# Ftape, the floppy tape device driver +# +# CONFIG_AGP is not set +# CONFIG_DRM is not set +# CONFIG_MWAVE is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_HPET is not set +# CONFIG_HANGCHECK_TIMER is not set + +# +# TPM devices +# +# CONFIG_TCG_TPM is not set + +# +# I2C support +# +CONFIG_I2C=y +# CONFIG_I2C_CHARDEV is not set + +# +# I2C Algorithms +# +CONFIG_I2C_ALGOBIT=y +# CONFIG_I2C_ALGOPCF is not set +# CONFIG_I2C_ALGOPCA is not set + +# +# I2C Hardware Bus support +# +# CONFIG_I2C_ALI1535 is not set +# CONFIG_I2C_ALI1563 is not set +# CONFIG_I2C_ALI15X3 is not set +# CONFIG_I2C_AMD756 is not set +# CONFIG_I2C_AMD8111 is not set +# CONFIG_I2C_I801 is not set +# CONFIG_I2C_I810 is not set +# CONFIG_I2C_PIIX4 is not set +# CONFIG_I2C_ISA is not set +# CONFIG_I2C_NFORCE2 is not set +# CONFIG_I2C_PARPORT is not set +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_PROSAVAGE is not set +# CONFIG_I2C_SAVAGE4 is not set +# CONFIG_SCx200_ACB is not set +# CONFIG_I2C_SIS5595 is not set +# CONFIG_I2C_SIS630 is not set +# CONFIG_I2C_SIS96X is not set +# CONFIG_I2C_STUB is not set +# CONFIG_I2C_VIA is not set +# CONFIG_I2C_VIAPRO is not set +# CONFIG_I2C_VOODOO3 is not set +# CONFIG_I2C_PCA_ISA is not set + +# +# Hardware Sensors Chip support +# +# CONFIG_I2C_SENSOR is not set +# CONFIG_SENSORS_ADM1021 is not set +# CONFIG_SENSORS_ADM1025 is not set +# CONFIG_SENSORS_ADM1026 is not set +# CONFIG_SENSORS_ADM1031 is not set +# CONFIG_SENSORS_ASB100 is not set +# CONFIG_SENSORS_DS1621 is not set +# CONFIG_SENSORS_FSCHER is not set +# CONFIG_SENSORS_FSCPOS is not set +# CONFIG_SENSORS_GL518SM is not set +# CONFIG_SENSORS_GL520SM is not set +# CONFIG_SENSORS_IT87 is not set +# CONFIG_SENSORS_LM63 is not set +# CONFIG_SENSORS_LM75 is not set +# CONFIG_SENSORS_LM77 is not set +# CONFIG_SENSORS_LM78 is not set +# CONFIG_SENSORS_LM80 is not set +# CONFIG_SENSORS_LM83 is not set +# CONFIG_SENSORS_LM85 is not set +# CONFIG_SENSORS_LM87 is not set +# CONFIG_SENSORS_LM90 is not set +# CONFIG_SENSORS_LM92 is not set +# CONFIG_SENSORS_MAX1619 is not set +# CONFIG_SENSORS_PC87360 is not set +# CONFIG_SENSORS_SMSC47B397 is not set +# CONFIG_SENSORS_SIS5595 is not set +# CONFIG_SENSORS_SMSC47M1 is not set +# CONFIG_SENSORS_VIA686A is not set +# CONFIG_SENSORS_W83781D is not set +# CONFIG_SENSORS_W83L785TS is not set +# CONFIG_SENSORS_W83627HF is not set + +# +# Other I2C Chip support +# +# CONFIG_SENSORS_DS1337 is not set +# CONFIG_SENSORS_EEPROM is not set +# CONFIG_SENSORS_PCF8574 is not set +# CONFIG_SENSORS_PCF8591 is not set +# CONFIG_SENSORS_RTC8564 is not set +# CONFIG_I2C_DEBUG_CORE is not set +# CONFIG_I2C_DEBUG_ALGO is not set +# CONFIG_I2C_DEBUG_BUS is not set +# CONFIG_I2C_DEBUG_CHIP is not set + +# +# Dallas's 1-wire bus +# +# CONFIG_W1 is not set + +# +# Misc devices +# +# CONFIG_IBM_ASM is not set + +# +# Multimedia devices +# +# CONFIG_VIDEO_DEV is not set + +# +# Digital Video Broadcasting Devices +# +# CONFIG_DVB is not set + +# +# Graphics support +# +# CONFIG_FB is not set +# CONFIG_VIDEO_SELECT is not set + +# +# Console display driver support +# +CONFIG_VGA_CONSOLE=y +# CONFIG_MDA_CONSOLE is not set +CONFIG_DUMMY_CONSOLE=y + +# +# Sound +# +# CONFIG_SOUND is not set + +# +# USB support +# +CONFIG_USB_ARCH_HAS_HCD=y +CONFIG_USB_ARCH_HAS_OHCI=y +# CONFIG_USB is not set + +# +# USB Gadget Support +# +# CONFIG_USB_GADGET is not set + +# +# MMC/SD Card support +# +# CONFIG_MMC is not set + +# +# InfiniBand support +# +# CONFIG_INFINIBAND is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_XATTR=y +# CONFIG_EXT3_FS_POSIX_ACL is not set +# CONFIG_EXT3_FS_SECURITY is not set +CONFIG_JBD=y +# CONFIG_JBD_DEBUG is not set +CONFIG_FS_MBCACHE=y +# CONFIG_REISER4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set + +# +# XFS support +# +# CONFIG_XFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_ROMFS_FS is not set +CONFIG_INOTIFY=y +# CONFIG_QUOTA is not set +CONFIG_DNOTIFY=y +# CONFIG_AUTOFS_FS is not set +CONFIG_AUTOFS4_FS=y + +# +# Caches +# +CONFIG_FSCACHE=m +CONFIG_CACHEFS=m +CONFIG_CACHEFILES=m +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +CONFIG_ISO9660_FS=y +# CONFIG_JOLIET is not set +# CONFIG_ZISOFS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +CONFIG_FAT_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_FAT_DEFAULT_CODEPAGE=437 +CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1" +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_SYSFS=y +# CONFIG_DEVFS_FS is not set +# CONFIG_DEVPTS_FS_XATTR is not set +CONFIG_TMPFS=y +# CONFIG_TMPFS_XATTR is not set +# CONFIG_HUGETLBFS is not set +# CONFIG_HUGETLB_PAGE is not set +CONFIG_RAMFS=y +# CONFIG_RELAYFS_FS is not set + +# +# Miscellaneous filesystems +# +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +# CONFIG_CRAMFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set + +# +# Network File Systems +# +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +# CONFIG_NFS_V4 is not set +# CONFIG_NFS_DIRECTIO is not set +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +# CONFIG_NFSD_TCP is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_EXPORTFS=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +# CONFIG_RPCSEC_GSS_KRB5 is not set +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +CONFIG_AFS_FS=m +CONFIG_AFS_FSCACHE=y +CONFIG_RXRPC=m + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y + +# +# Native Language Support +# +CONFIG_NLS=y +CONFIG_NLS_DEFAULT="iso8859-1" +# CONFIG_NLS_CODEPAGE_437 is not set +# CONFIG_NLS_CODEPAGE_737 is not set +# CONFIG_NLS_CODEPAGE_775 is not set +# CONFIG_NLS_CODEPAGE_850 is not set +# CONFIG_NLS_CODEPAGE_852 is not set +# CONFIG_NLS_CODEPAGE_855 is not set +# CONFIG_NLS_CODEPAGE_857 is not set +# CONFIG_NLS_CODEPAGE_860 is not set +# CONFIG_NLS_CODEPAGE_861 is not set +# CONFIG_NLS_CODEPAGE_862 is not set +# CONFIG_NLS_CODEPAGE_863 is not set +# CONFIG_NLS_CODEPAGE_864 is not set +# CONFIG_NLS_CODEPAGE_865 is not set +# CONFIG_NLS_CODEPAGE_866 is not set +# CONFIG_NLS_CODEPAGE_869 is not set +# CONFIG_NLS_CODEPAGE_936 is not set +# CONFIG_NLS_CODEPAGE_950 is not set +# CONFIG_NLS_CODEPAGE_932 is not set +# CONFIG_NLS_CODEPAGE_949 is not set +# CONFIG_NLS_CODEPAGE_874 is not set +# CONFIG_NLS_ISO8859_8 is not set +# CONFIG_NLS_CODEPAGE_1250 is not set +# CONFIG_NLS_CODEPAGE_1251 is not set +# CONFIG_NLS_ASCII is not set +# CONFIG_NLS_ISO8859_1 is not set +# CONFIG_NLS_ISO8859_2 is not set +# CONFIG_NLS_ISO8859_3 is not set +# CONFIG_NLS_ISO8859_4 is not set +# CONFIG_NLS_ISO8859_5 is not set +# CONFIG_NLS_ISO8859_6 is not set +# CONFIG_NLS_ISO8859_7 is not set +# CONFIG_NLS_ISO8859_9 is not set +# CONFIG_NLS_ISO8859_13 is not set +# CONFIG_NLS_ISO8859_14 is not set +# CONFIG_NLS_ISO8859_15 is not set +# CONFIG_NLS_KOI8_R is not set +# CONFIG_NLS_KOI8_U is not set +# CONFIG_NLS_UTF8 is not set + +# +# Profiling support +# +# CONFIG_PROFILING is not set + +# +# Kernel hacking +# +# CONFIG_PRINTK_TIME is not set +CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_LOG_BUF_SHIFT=15 +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_SCHEDSTATS is not set +CONFIG_DEBUG_SLAB=y +CONFIG_DEBUG_SPINLOCK=y +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_KOBJECT is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_PAGE_OWNER is not set +# CONFIG_DEBUG_FS is not set +# CONFIG_FRAME_POINTER is not set +CONFIG_EARLY_PRINTK=y +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_KPROBES is not set +# CONFIG_DEBUG_STACK_USAGE is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_4KSTACKS is not set +CONFIG_X86_FIND_SMP_CONFIG=y +CONFIG_X86_MPPARSE=y +# CONFIG_KGDB is not set + +# +# Security options +# +CONFIG_KEYS=y +CONFIG_KEYS_DEBUG_PROC_KEYS=y +# CONFIG_SECURITY is not set + +# +# Cryptographic options +# +# CONFIG_CRYPTO is not set + +# +# Hardware crypto devices +# + +# +# Library routines +# +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC32 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_GENERIC_HARDIRQS=y +CONFIG_GENERIC_IRQ_PROBE=y +CONFIG_X86_SMP=y +CONFIG_X86_HT=y +CONFIG_X86_BIOS_REBOOT=y +CONFIG_X86_TRAMPOLINE=y +CONFIG_PC=y