[PATCH 02/14] GFS: core fs Core file system functions. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/bmap.c | 1216 +++++++++++++++++++++++++++++++++ fs/gfs2/bmap.h | 43 + fs/gfs2/daemon.c | 215 +++++ fs/gfs2/daemon.h | 24 fs/gfs2/fixed_div64.h | 142 +++ fs/gfs2/format.h | 25 fs/gfs2/glops.c | 491 +++++++++++++ fs/gfs2/glops.h | 27 fs/gfs2/inode.c | 1808 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/inode.h | 77 ++ fs/gfs2/jdata.c | 387 ++++++++++ fs/gfs2/jdata.h | 56 + fs/gfs2/lops.c | 517 ++++++++++++++ fs/gfs2/lops.h | 100 ++ fs/gfs2/main.c | 126 +++ fs/gfs2/meta_io.c | 924 +++++++++++++++++++++++++ fs/gfs2/meta_io.h | 92 ++ fs/gfs2/ondisk.c | 32 fs/gfs2/ops_address.c | 553 +++++++++++++++ fs/gfs2/ops_address.h | 19 fs/gfs2/ops_dentry.c | 121 +++ fs/gfs2/ops_dentry.h | 19 fs/gfs2/ops_export.c | 309 ++++++++ fs/gfs2/ops_export.h | 19 fs/gfs2/ops_file.c | 1549 ++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_file.h | 20 fs/gfs2/ops_fstype.c | 973 ++++++++++++++++++++++++++ fs/gfs2/ops_fstype.h | 22 fs/gfs2/ops_inode.c | 1442 +++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_inode.h | 22 fs/gfs2/ops_super.c | 411 +++++++++++ fs/gfs2/ops_super.h | 19 fs/gfs2/ops_vm.c | 236 ++++++ fs/gfs2/ops_vm.h | 20 fs/gfs2/page.c | 278 +++++++ fs/gfs2/page.h | 27 fs/gfs2/super.c | 965 ++++++++++++++++++++++++++ fs/gfs2/super.h | 59 + fs/gfs2/trans.c | 221 ++++++ fs/gfs2/trans.h | 44 + fs/gfs2/unlinked.c | 461 ++++++++++++ fs/gfs2/unlinked.h | 29 fs/gfs2/util.c | 600 ++++++++++++++++ fs/gfs2/util.h | 358 +++++++++ 44 files changed, 15098 insertions(+) --- a/fs/gfs2/bmap.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/bmap.c 2005-08-01 14:13:08.007808504 +0800 @@ -0,0 +1,1216 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +struct metapath { + unsigned int mp_list[GFS2_MAX_META_HEIGHT]; +}; + +typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, + uint64_t *bottom, unsigned int height, + void *data); + +struct strip_mine { + int sm_first; + unsigned int sm_height; +}; + +/** + * @gfs2_unstuffer_sync - Synchronously unstuff a dinode + * @ip: + * @dibh: + * @block: + * @private: + * + * Cheat and use a metadata buffer instead of a data page. + * + * Returns: errno + */ + +int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct buffer_head *bh; + int error; + + bh = gfs2_meta_new(ip->i_gl, block); + + gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode)); + + set_buffer_dirty(bh); + error = sync_dirty_buffer(bh); + + brelse(bh); + + return error; +} + +/** + * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big + * @ip: The GFS2 inode to unstuff + * @unstuffer: the routine that handles unstuffing a non-zero length file + * @private: private data for the unstuffer + * + * This routine unstuffs a dinode and returns it to a "normal" state such + * that the height can be grown in the traditional way. + * + * Returns: errno + */ + +int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, + void *private) +{ + struct buffer_head *bh, *dibh; + uint64_t block = 0; + int journaled = gfs2_is_jdata(ip); + int error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (ip->i_di.di_size) { + /* Get a free block, fill it with the stuffed data, + and write it out to disk */ + + if (journaled) { + block = gfs2_alloc_meta(ip); + + error = gfs2_jdata_get_buffer(ip, block, TRUE, &bh); + if (error) + goto out_brelse; + gfs2_buffer_copy_tail(bh, + sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + brelse(bh); + } else { + block = gfs2_alloc_data(ip); + + error = unstuffer(ip, dibh, block, private); + if (error) + goto out_brelse; + } + } + + /* Set up the pointer to the new block */ + + gfs2_trans_add_bh(ip->i_gl, dibh); + + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + if (ip->i_di.di_size) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_gfs2_64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height = 1; + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + + out_brelse: + brelse(dibh); + + out: + up_write(&ip->i_rw_mutex); + + return error; +} + +/** + * calc_tree_height - Calculate the height of a metadata tree + * @ip: The GFS2 inode + * @size: The proposed size of the file + * + * Work out how tall a metadata tree needs to be in order to accommodate a + * file of a particular size. If size is less than the current size of + * the inode, then the current size of the inode is used instead of the + * supplied one. + * + * Returns: the height the tree should be + */ + +static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t *arr; + unsigned int max, height; + + if (ip->i_di.di_size > size) + size = ip->i_di.di_size; + + if (gfs2_is_jdata(ip)) { + arr = sdp->sd_jheightsize; + max = sdp->sd_max_jheight; + } else { + arr = sdp->sd_heightsize; + max = sdp->sd_max_height; + } + + for (height = 0; height < max; height++) + if (arr[height] >= size) + break; + + return height; +} + +/** + * build_height - Build a metadata tree of the requested height + * @ip: The GFS2 inode + * @height: The height to build to + * + * This routine makes sure that the metadata tree is tall enough to hold + * "size" bytes of data. + * + * Returns: errno + */ + +static int build_height(struct gfs2_inode *ip, int height) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh, *dibh; + uint64_t block = 0, *bp; + unsigned int x; + int new_block; + int error; + + while (ip->i_di.di_height < height) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + new_block = FALSE; + bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); + for (x = 0; x < sdp->sd_diptrs; x++, bp++) + if (*bp) { + new_block = TRUE; + break; + } + + if (new_block) { + /* Get a new block, fill it with the old direct + pointers, and write it out */ + + block = gfs2_alloc_meta(ip); + + bh = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, + GFS2_METATYPE_IN, + GFS2_FORMAT_IN); + gfs2_buffer_copy_tail(bh, + sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + + brelse(bh); + } + + /* Set up the new direct pointer and write it out to disk */ + + gfs2_trans_add_bh(ip->i_gl, dibh); + + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + if (new_block) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_gfs2_64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height++; + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + return 0; +} + +/** + * find_metapath - Find path through the metadata tree + * @ip: The inode pointer + * @mp: The metapath to return the result in + * @block: The disk block to look up + * + * This routine returns a struct metapath structure that defines a path + * through the metadata of inode "ip" to get to block "block". + * + * Example: + * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a + * filesystem with a blocksize of 4096. + * + * find_metapath() would return a struct metapath structure set to: + * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, + * and mp_list[2] = 165. + * + * That means that in order to get to the block containing the byte at + * offset 101342453, we would load the indirect block pointed to by pointer + * 0 in the dinode. We would then load the indirect block pointed to by + * pointer 48 in that indirect block. We would then load the data block + * pointed to by pointer 165 in that indirect block. + * + * ---------------------------------------- + * | Dinode | | + * | | 4| + * | |0 1 2 3 4 5 9| + * | | 6| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 5| + * | 4 4 4 4 4 5 5 1| + * |0 5 6 7 8 9 0 1 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 1 1 1 1 1 5| + * | 6 6 6 6 6 1| + * |0 3 4 5 6 7 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Data block containing offset | + * | 101342453 | + * | | + * | | + * ---------------------------------------- + * + */ + +static struct metapath *find_metapath(struct gfs2_inode *ip, uint64_t block) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct metapath *mp; + uint64_t b = block; + unsigned int i; + + mp = kmalloc_nofail(sizeof(struct metapath), GFP_KERNEL); + memset(mp, 0, sizeof(struct metapath)); + + for (i = ip->i_di.di_height; i--;) + mp->mp_list[i] = do_div(b, sdp->sd_inptrs); + + return mp; +} + +/** + * metapointer - Return pointer to start of metadata in a buffer + * @bh: The buffer + * @height: The metadata height (0 = dinode) + * @mp: The metapath + * + * Return a pointer to the block number of the next height of the metadata + * tree given a buffer containing the pointer to the current height of the + * metadata tree. + */ + +static inline uint64_t *metapointer(struct buffer_head *bh, + unsigned int height, struct metapath *mp) +{ + unsigned int head_size = (height > 0) ? + sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); + + return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height]; +} + +/** + * lookup_block - Get the next metadata block in metadata tree + * @ip: The GFS2 inode + * @bh: Buffer containing the pointers to metadata blocks + * @height: The height of the tree (0 = dinode) + * @mp: The metapath + * @create: Non-zero if we may create a new meatdata block + * @new: Used to indicate if we did create a new metadata block + * @block: the returned disk block number + * + * Given a metatree, complete to a particular height, checks to see if the next + * height of the tree exists. If not the next height of the tree is created. + * The block number of the next height of the metadata tree is returned. + * + */ + +static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, + unsigned int height, struct metapath *mp, int create, + int *new, uint64_t *block) +{ + uint64_t *ptr = metapointer(bh, height, mp); + + if (*ptr) { + *block = gfs2_64_to_cpu(*ptr); + return; + } + + *block = 0; + + if (!create) + return; + + if (height == ip->i_di.di_height - 1 && + !gfs2_is_jdata(ip)) + *block = gfs2_alloc_data(ip); + else + *block = gfs2_alloc_meta(ip); + + gfs2_trans_add_bh(ip->i_gl, bh); + + *ptr = cpu_to_gfs2_64(*block); + ip->i_di.di_blocks++; + + *new = 1; +} + +/** + * gfs2_block_map - Map a block from an inode to a disk block + * @ip: The GFS2 inode + * @lblock: The logical block number + * @new: Value/Result argument (1 = may create/did create new blocks) + * @dblock: the disk block number of the start of an extent + * @extlen: the size of the extent + * + * Find the block number on the current device which corresponds to an + * inode's block. If the block had to be created, "new" will be set. + * + * Returns: errno + */ + +int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + struct metapath *mp; + int create = *new; + unsigned int bsize; + unsigned int height; + unsigned int end_of_metadata; + unsigned int x; + int error = 0; + + *new = 0; + *dblock = 0; + if (extlen) + *extlen = 0; + + if (create) + down_write(&ip->i_rw_mutex); + else + down_read(&ip->i_rw_mutex); + + if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + goto out; + + bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + + height = calc_tree_height(ip, (lblock + 1) * bsize); + if (ip->i_di.di_height < height) { + if (!create) + goto out; + + error = build_height(ip, height); + if (error) + goto out; + } + + mp = find_metapath(ip, lblock); + end_of_metadata = ip->i_di.di_height - 1; + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_kfree; + + for (x = 0; x < end_of_metadata; x++) { + lookup_block(ip, bh, x, mp, create, new, dblock); + brelse(bh); + if (!*dblock) + goto out_kfree; + + error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh); + if (error) + goto out_kfree; + } + + lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock); + + if (extlen && *dblock) { + *extlen = 1; + + if (!*new) { + uint64_t tmp_dblock; + int tmp_new; + unsigned int nptrs; + + nptrs = (end_of_metadata) ? sdp->sd_inptrs : + sdp->sd_diptrs; + + while (++mp->mp_list[end_of_metadata] < nptrs) { + lookup_block(ip, bh, end_of_metadata, mp, + FALSE, &tmp_new, &tmp_dblock); + + if (*dblock + *extlen != tmp_dblock) + break; + + (*extlen)++; + } + } + } + + brelse(bh); + + if (*new) { + error = gfs2_meta_inode_buffer(ip, &bh); + if (!error) { + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_dinode_out(&ip->i_di, bh->b_data); + brelse(bh); + } + } + + out_kfree: + kfree(mp); + + out: + if (create) + up_write(&ip->i_rw_mutex); + else + up_read(&ip->i_rw_mutex); + + return error; +} + +/** + * recursive_scan - recursively scan through the end of a file + * @ip: the inode + * @dibh: the dinode buffer + * @mp: the path through the metadata to the point to start + * @height: the height the recursion is at + * @block: the indirect block to look at + * @first: TRUE if this is the first block + * @bc: the call to make for each piece of metadata + * @data: data opaque to this function to pass to @bc + * + * When this is first called @height and @block should be zero and + * @first should be TRUE. + * + * Returns: errno + */ + +static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, + struct metapath *mp, unsigned int height, + uint64_t block, int first, block_call_t bc, + void *data) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh = NULL; + uint64_t *top, *bottom; + uint64_t bn; + int error; + int mh_size = sizeof(struct gfs2_meta_header); + + if (!height) { + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + return error; + dibh = bh; + + top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) + + mp->mp_list[0]; + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) + + sdp->sd_diptrs; + } else { + error = gfs2_meta_indirect_buffer(ip, height, block, FALSE, + &bh); + if (error) + return error; + + top = (uint64_t *)(bh->b_data + mh_size) + + ((first) ? mp->mp_list[height] : 0); + + bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs; + } + + error = bc(ip, dibh, bh, top, bottom, height, data); + if (error) + goto out; + + if (height < ip->i_di.di_height - 1) + for (; top < bottom; top++, first = FALSE) { + if (!*top) + continue; + + bn = gfs2_64_to_cpu(*top); + + error = recursive_scan(ip, dibh, mp, height + 1, bn, + first, bc, data); + if (error) + break; + } + + out: + brelse(bh); + + return error; +} + +/** + * do_strip - Look for a layer a particular layer of the file and strip it off + * @ip: the inode + * @dibh: the dinode buffer + * @bh: A buffer of pointers + * @top: The first pointer in the buffer + * @bottom: One more than the last pointer + * @height: the height this buffer is at + * @data: a pointer to a struct strip_mine + * + * Returns: errno + */ + +static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct strip_mine *sm = (struct strip_mine *)data; + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrp_list rlist; + uint64_t bn, bstart; + uint32_t blen; + uint64_t *p; + unsigned int rg_blocks = 0; + int metadata; + unsigned int revokes = 0; + int x; + int error; + + if (!*top) + sm->sm_first = FALSE; + + if (height != sm->sm_height) + return 0; + + if (sm->sm_first) { + top++; + sm->sm_first = FALSE; + } + + metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); + if (metadata) + revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; + + error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); + if (error) + return error; + + memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = gfs2_64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) + gfs2_rlist_add(sdp, &rlist, bstart); + + bstart = bn; + blen = 1; + } + } + + if (bstart) + gfs2_rlist_add(sdp, &rlist, bstart); + else + goto out; /* Nothing to do */ + + gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); + + for (x = 0; x < rlist.rl_rgrps; x++) { + struct gfs2_rgrpd *rgd; + rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl); + rg_blocks += rgd->rd_ri.ri_length; + } + + error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); + if (error) + goto out_rlist; + + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + + RES_INDIRECT + RES_STATFS + RES_QUOTA, + revokes); + if (error) + goto out_rg_gunlock; + + down_write(&ip->i_rw_mutex); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_trans_add_bh(ip->i_gl, bh); + + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = gfs2_64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) { + if (metadata) + gfs2_free_meta(ip, bstart, blen); + else + gfs2_free_data(ip, bstart, blen); + } + + bstart = bn; + blen = 1; + } + + *p = 0; + if (!ip->i_di.di_blocks) + gfs2_consist_inode(ip); + ip->i_di.di_blocks--; + } + if (bstart) { + if (metadata) + gfs2_free_meta(ip, bstart, blen); + else + gfs2_free_data(ip, bstart, blen); + } + + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + + up_write(&ip->i_rw_mutex); + + gfs2_trans_end(sdp); + + out_rg_gunlock: + gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + + out_rlist: + gfs2_rlist_free(&rlist); + + out: + gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); + + return error; +} + +/** + * do_grow - Make a file look bigger than it is + * @ip: the inode + * @size: the size to set the file to + * + * Called with an exclusive lock on @ip. + * + * Returns: errno + */ + +static int do_grow(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al; + struct buffer_head *dibh; + unsigned int h; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_height + RES_DATA; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + + RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, + NULL); + if (error) + goto out_end_trans; + } + + h = calc_tree_height(ip, size); + if (ip->i_di.di_height < h) { + down_write(&ip->i_rw_mutex); + error = build_height(ip, h); + up_write(&ip->i_rw_mutex); + if (error) + goto out_end_trans; + } + } + + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipres: + gfs2_inplace_release(ip); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +static int truncator_journaled(struct gfs2_inode *ip, uint64_t size) +{ + uint64_t lbn, dbn; + uint32_t off; + struct buffer_head *bh; + int new = FALSE; + int error; + + lbn = size; + off = do_div(lbn, ip->i_sbd->sd_jbsize); + + error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); + if (error || !dbn) + return error; + + error = gfs2_jdata_get_buffer(ip, dbn, FALSE, &bh); + if (error) + return error; + + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off); + + brelse(bh); + + return 0; +} + +static int trunc_start(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int journaled = gfs2_is_jdata(ip); + int error; + + error = gfs2_trans_begin(sdp, + RES_DINODE + ((journaled) ? RES_JDATA : 0), 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (gfs2_is_stuffed(ip)) { + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); + error = 1; + + } else { + if (journaled) { + if (do_mod(size, sdp->sd_jbsize)) + error = truncator_journaled(ip, size); + } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) + error = truncator(ip, size); + + if (!error) { + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + } + } + + brelse(dibh); + + out: + gfs2_trans_end(sdp); + + return error; +} + +static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size) +{ + unsigned int height = ip->i_di.di_height; + uint64_t lblock; + struct metapath *mp; + int error; + + if (!size) + lblock = 0; + else if (gfs2_is_jdata(ip)) { + lblock = size - 1; + do_div(lblock, ip->i_sbd->sd_jbsize); + } else + lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; + + mp = find_metapath(ip, lblock); + gfs2_alloc_get(ip); + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + while (height--) { + struct strip_mine sm; + sm.sm_first = !!size; + sm.sm_height = height; + + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_strip, &sm); + if (error) + break; + } + + gfs2_quota_unhold(ip); + + out: + gfs2_alloc_put(ip); + kfree(mp); + + return error; +} + +static int trunc_end(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (!ip->i_di.di_size) { + ip->i_di.di_height = 0; + ip->i_di.di_goal_meta = + ip->i_di.di_goal_data = + ip->i_num.no_addr; + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + } + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + out: + up_write(&ip->i_rw_mutex); + + gfs2_trans_end(sdp); + + return error; +} + +/** + * do_shrink - make a file smaller + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * Called with an exclusive lock on @ip. + * + * Returns: errno + */ + +static int do_shrink(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + int error; + + error = trunc_start(ip, size, truncator); + if (error < 0) + return error; + if (error > 0) + return 0; + + error = trunc_dealloc(ip, size); + if (!error) + error = trunc_end(ip); + + return error; +} + +/** + * gfs2_truncatei - make a file a give size + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * The file size can grow, shrink, or stay the same size. + * + * Returns: errno + */ + +int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + int error; + + if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode))) + return -EINVAL; + + if (size > ip->i_di.di_size) + error = do_grow(ip, size); + else + error = do_shrink(ip, size, truncator); + + return error; +} + +int gfs2_truncatei_resume(struct gfs2_inode *ip) +{ + int error; + error = trunc_dealloc(ip, ip->i_di.di_size); + if (!error) + error = trunc_end(ip); + return error; +} + +int gfs2_file_dealloc(struct gfs2_inode *ip) +{ + return trunc_dealloc(ip, 0); +} + +/** + * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file + * @ip: the file + * @len: the number of bytes to be written to the file + * @data_blocks: returns the number of data blocks required + * @ind_blocks: returns the number of indirect blocks required + * + */ + +void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + unsigned int tmp; + + if (gfs2_is_jdata(ip)) { + *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; + *ind_blocks = 3 * (sdp->sd_max_jheight - 1); + } else { + *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3; + *ind_blocks = 3 * (sdp->sd_max_height - 1); + } + + for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) { + tmp = DIV_RU(tmp, sdp->sd_inptrs); + *ind_blocks += tmp; + } +} + +/** + * gfs2_write_alloc_required - figure out if a write will require an allocation + * @ip: the file being written to + * @offset: the offset to write to + * @len: the number of bytes being written + * @alloc_required: set to TRUE if an alloc is required, FALSE otherwise + * + * Returns: errno + */ + +int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, + unsigned int len, int *alloc_required) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t lblock, lblock_stop, dblock; + uint32_t extlen; + int new = FALSE; + int error = 0; + + *alloc_required = FALSE; + + if (!len) + return 0; + + if (gfs2_is_stuffed(ip)) { + if (offset + len > + sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) + *alloc_required = TRUE; + return 0; + } + + if (gfs2_is_jdata(ip)) { + unsigned int bsize = sdp->sd_jbsize; + lblock = offset; + do_div(lblock, bsize); + lblock_stop = offset + len + bsize - 1; + do_div(lblock_stop, bsize); + } else { + unsigned int shift = sdp->sd_sb.sb_bsize_shift; + lblock = offset >> shift; + lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; + } + + for (; lblock < lblock_stop; lblock += extlen) { + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (!dblock) { + *alloc_required = TRUE; + return 0; + } + } + + return 0; +} + +/** + * do_gfm - Copy out the dinode/indirect blocks of a file + * @ip: the file + * @dibh: the dinode buffer + * @bh: the indirect buffer we're looking at + * @top: the first pointer in the block + * @bottom: one more than the last pointer in the block + * @height: the height the block is at + * @data: a pointer to a struct gfs2_user_buffer structure + * + * If this is a journaled file, copy out the data too. + * + * Returns: errno + */ + +static int do_gfm(struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data; + int error; + + error = gfs2_add_bh_to_ub(ub, bh); + if (error) + return error; + + if (!S_ISDIR(ip->i_di.di_mode) || + height + 1 != ip->i_di.di_height) + return 0; + + for (; top < bottom; top++) + if (*top) { + struct buffer_head *data_bh; + + error = gfs2_meta_read(ip->i_gl, gfs2_64_to_cpu(*top), + DIO_START | DIO_WAIT, + &data_bh); + if (error) + return error; + + error = gfs2_add_bh_to_ub(ub, data_bh); + + brelse(data_bh); + + if (error) + return error; + } + + return 0; +} + +/** + * gfs2_get_file_meta - return all the metadata for a file + * @ip: the file + * @ub: the structure representing the meta + * + * Returns: errno + */ + +int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub) +{ + int error; + + if (gfs2_is_stuffed(ip)) { + struct buffer_head *dibh; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + error = gfs2_add_bh_to_ub(ub, dibh); + brelse(dibh); + } + } else { + struct metapath *mp = find_metapath(ip, 0); + error = recursive_scan(ip, NULL, mp, 0, 0, TRUE, do_gfm, ub); + kfree(mp); + } + + return error; +} + --- a/fs/gfs2/bmap.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/bmap.h 2005-08-01 14:13:08.007808504 +0800 @@ -0,0 +1,43 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __BMAP_DOT_H__ +#define __BMAP_DOT_H__ + +typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip, + struct buffer_head * dibh, uint64_t block, + void *private); +int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, + void *private); + +int gfs2_block_map(struct gfs2_inode *ip, + uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen); + +typedef int (*gfs2_truncator_t) (struct gfs2_inode * ip, uint64_t size); +int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator); +int gfs2_truncatei_resume(struct gfs2_inode *ip); +int gfs2_file_dealloc(struct gfs2_inode *ip); + +void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, + unsigned int *data_blocks, + unsigned int *ind_blocks); +int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, + unsigned int len, int *alloc_required); + +int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub); + +#endif /* __BMAP_DOT_H__ */ --- a/fs/gfs2/daemon.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/daemon.c 2005-08-01 14:13:08.007808504 +0800 @@ -0,0 +1,215 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "daemon.h" +#include "glock.h" +#include "log.h" +#include "quota.h" +#include "recovery.h" +#include "super.h" +#include "unlinked.h" + +/** + * gfs2_scand - Look for cached glocks and inodes to toss from memory + * @sdp: Pointer to GFS2 superblock + * + * One of these daemons runs, finding candidates to add to sd_reclaim_list. + * See gfs2_glockd() + */ + +int gfs2_scand(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + + while (!kthread_should_stop()) { + gfs2_scand_internal(sdp); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(gfs2_tune_get(sdp, gt_scand_secs) * HZ); + } + + return 0; +} + +/** + * gfs2_glockd - Reclaim unused glock structures + * @sdp: Pointer to GFS2 superblock + * + * One or more of these daemons run, reclaiming glocks on sd_reclaim_list. + * Number of daemons can be set by user, with num_glockd mount option. + */ + +int gfs2_glockd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + DECLARE_WAITQUEUE(wait_chan, current); + + while (!kthread_should_stop()) { + while (atomic_read(&sdp->sd_reclaim_count)) + gfs2_reclaim_glock(sdp); + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan); + if (!atomic_read(&sdp->sd_reclaim_count) && + !kthread_should_stop()) + schedule(); + remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan); + set_current_state(TASK_RUNNING); + } + + return 0; +} + +/** + * gfs2_recoverd - Recover dead machine's journals + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_recoverd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + + while (!kthread_should_stop()) { + gfs2_check_journals(sdp); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(gfs2_tune_get(sdp, gt_recoverd_secs) * HZ); + } + + return 0; +} + +/** + * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks + * @sdp: Pointer to GFS2 superblock + * + * Also, periodically check to make sure that we're using the most recent + * journal index. + */ + +int gfs2_logd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + struct gfs2_holder ji_gh; + + while (!kthread_should_stop()) { + /* Advance the log tail */ + gfs2_ail1_empty(sdp, DIO_ALL); + if (time_after_eq(jiffies, + sdp->sd_log_flush_time + + gfs2_tune_get(sdp, gt_log_flush_secs) * HZ)) { + gfs2_log_flush(sdp); + sdp->sd_log_flush_time = jiffies; + } + + /* Check for latest journal index */ + if (time_after_eq(jiffies, + sdp->sd_jindex_refresh_time + + gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ)) { + if (!gfs2_jindex_hold(sdp, &ji_gh)) + gfs2_glock_dq_uninit(&ji_gh); + sdp->sd_jindex_refresh_time = jiffies; + } + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(gfs2_tune_get(sdp, gt_logd_secs) * HZ); + } + + return 0; +} + +/** + * gfs2_quotad - Write cached quota changes into the quota file + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_quotad(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + int error; + + while (!kthread_should_stop()) { + /* Update the master statfs file */ + if (time_after_eq(jiffies, + sdp->sd_statfs_sync_time + + gfs2_tune_get(sdp, gt_statfs_quantum) * HZ)) { + error = gfs2_statfs_sync(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + printk("GFS2: fsid=%s: quotad: (1) error=%d\n", + sdp->sd_fsname, error); + sdp->sd_statfs_sync_time = jiffies; + } + + /* Update quota file */ + if (time_after_eq(jiffies, + sdp->sd_quota_sync_time + + gfs2_tune_get(sdp, gt_quota_quantum) * HZ)) { + error = gfs2_quota_sync(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + printk("GFS2: fsid=%s: quotad: (2) error=%d\n", + sdp->sd_fsname, error); + sdp->sd_quota_sync_time = jiffies; + } + + gfs2_quota_scan(sdp); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(gfs2_tune_get(sdp, gt_quotad_secs) * HZ); + } + + return 0; +} + +/** + * gfs2_inoded - Deallocate unlinked inodes + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_inoded(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + int error; + + while (!kthread_should_stop()) { + error = gfs2_unlinked_dealloc(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + printk("GFS2: fsid=%s: inoded: error = %d\n", + sdp->sd_fsname, error); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(gfs2_tune_get(sdp, gt_inoded_secs) * HZ); + } + + return 0; +} + --- a/fs/gfs2/daemon.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/daemon.h 2005-08-01 14:13:08.007808504 +0800 @@ -0,0 +1,24 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __DAEMON_DOT_H__ +#define __DAEMON_DOT_H__ + +int gfs2_scand(void *data); +int gfs2_glockd(void *data); +int gfs2_recoverd(void *data); +int gfs2_logd(void *data); +int gfs2_quotad(void *data); +int gfs2_inoded(void *data); + +#endif /* __DAEMON_DOT_H__ */ --- a/fs/gfs2/fixed_div64.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/fixed_div64.h 2005-08-01 14:13:08.009808200 +0800 @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + * Further, this software is distributed without any warranty that it is + * free of the rightful claim of any third person regarding infringement + * or the like. Any license provided herein, whether implied or + * otherwise, applies only to this software file. Patent licenses, if + * any, provided herein do not apply to combinations of this program with + * other software, or any other product whatsoever. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston MA 02111-1307, USA. + * + * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy, + * Mountain View, CA 94043, or: + * + * http://www.sgi.com + * + * For further information regarding this notice, see: + * + * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/ + * + * Additional munging: + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + */ + +#ifndef __FIXED_DIV64_DOT_H__ +#define __FIXED_DIV64_DOT_H__ + +#include + +#if defined __i386__ +/* For ia32 we need to pull some tricks to get past various versions + * of the compiler which do not like us using do_div in the middle + * of large functions. + */ +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else +static inline __u32 fixed_div64_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 fixed_div64_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) fixed_div64_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) fixed_div64_do_mod(&(a), (b), sizeof(a)) + +#endif /* __FIXED_DIV64_DOT_H__ */ --- a/fs/gfs2/format.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/format.h 2005-08-01 14:13:08.009808200 +0800 @@ -0,0 +1,25 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __FORMAT_DOT_H__ +#define __FORMAT_DOT_H__ + +static const uint32_t gfs2_old_fs_formats[] = { + 0 +}; + +static const uint32_t gfs2_old_multihost_formats[] = { + 0 +}; + +#endif /* __FORMAT_DOT_H__ */ --- a/fs/gfs2/glops.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/glops.c 2005-08-01 14:13:08.009808200 +0800 @@ -0,0 +1,491 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "page.h" +#include "recovery.h" +#include "rgrp.h" + +/** + * meta_go_sync - sync out the metadata for this glock + * @gl: the glock + * @flags: DIO_* + * + * Called when demoting or unlocking an EX glock. We must flush + * to disk all dirty buffers/pages relating to this glock, and must not + * not return to caller to demote/unlock the glock until I/O is complete. + */ + +static void meta_go_sync(struct gfs2_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + if (flags & DIO_RELEASE) + gfs2_ail_empty_gl(gl); + } + + clear_bit(GLF_SYNC, &gl->gl_flags); +} + +/** + * meta_go_inval - invalidate the metadata for this glock + * @gl: the glock + * @flags: + * + */ + +static void meta_go_inval(struct gfs2_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + gfs2_meta_inval(gl); + gl->gl_vn++; +} + +/** + * meta_go_demote_ok - Check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: TRUE if we have no cached data; ok to demote meta glock + */ + +static int meta_go_demote_ok(struct gfs2_glock *gl) +{ + return !gl->gl_aspace->i_mapping->nrpages; +} + +/** + * inode_go_xmote_th - promote/demote a glock + * @gl: the glock + * @state: the requested state + * @flags: + * + */ + +static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, + int flags) +{ + if (gl->gl_state != LM_ST_UNLOCKED) + gfs2_pte_inval(gl); + gfs2_glock_xmote_th(gl, state, flags); +} + +/** + * inode_go_xmote_bh - After promoting/demoting a glock + * @gl: the glock + * + */ + +static void inode_go_xmote_bh(struct gfs2_glock *gl) +{ + struct gfs2_holder *gh = gl->gl_req_gh; + struct buffer_head *bh; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + (!gh || !(gh->gh_flags & GL_SKIP))) { + error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START, + &bh); + if (!error) + brelse(bh); + } +} + +/** + * inode_go_drop_th - unlock a glock + * @gl: the glock + * + * Invoked from rq_demote(). + * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) + * is being purged from our node's glock cache; we're dropping lock. + */ + +static void inode_go_drop_th(struct gfs2_glock *gl) +{ + gfs2_pte_inval(gl); + gfs2_glock_drop_th(gl); +} + +/** + * inode_go_sync - Sync the dirty data and/or metadata for an inode glock + * @gl: the glock protecting the inode + * @flags: + * + */ + +static void inode_go_sync(struct gfs2_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + if (meta && data) { + gfs2_page_sync(gl, flags | DIO_START); + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + gfs2_page_sync(gl, flags | DIO_WAIT); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } else if (meta) { + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + } else if (data) + gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT); + if (flags & DIO_RELEASE) + gfs2_ail_empty_gl(gl); + } + + clear_bit(GLF_SYNC, &gl->gl_flags); +} + +/** + * inode_go_inval - prepare a inode glock to be released + * @gl: the glock + * @flags: + * + */ + +static void inode_go_inval(struct gfs2_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (meta) { + gfs2_meta_inval(gl); + gl->gl_vn++; + } + if (data) + gfs2_page_inval(gl); +} + +/** + * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int inode_go_demote_ok(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + int demote = FALSE; + + if (!get_gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages) + demote = TRUE; + else if (!sdp->sd_args.ar_localcaching && + time_after_eq(jiffies, gl->gl_stamp + gfs2_tune_get(sdp, gt_demote_secs) * HZ)) + demote = TRUE; + + return demote; +} + +/** + * inode_go_lock - operation done after an inode lock is locked by a process + * @gl: the glock + * @flags: + * + * Returns: errno + */ + +static int inode_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_inode *ip = get_gl2ip(gl); + int error = 0; + + if (!ip) + return 0; + + if (ip->i_vn != gl->gl_vn) { + error = gfs2_inode_refresh(ip); + if (error) + return error; + gfs2_inode_attr_in(ip); + } + + if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && + (gl->gl_state == LM_ST_EXCLUSIVE) && + (gh->gh_flags & GL_LOCAL_EXCL)) + error = gfs2_truncatei_resume(ip); + + return error; +} + +/** + * inode_go_unlock - operation done before an inode lock is unlocked by a + * process + * @gl: the glock + * @flags: + * + */ + +static void inode_go_unlock(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_inode *ip = get_gl2ip(gl); + + if (ip && test_bit(GLF_DIRTY, &gl->gl_flags)) + gfs2_inode_attr_in(ip); + + if (ip) + gfs2_meta_cache_flush(ip); +} + +/** + * inode_greedy - + * @gl: the glock + * + */ + +static void inode_greedy(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = get_gl2ip(gl); + unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); + unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); + unsigned int new_time; + + spin_lock(&ip->i_spin); + + if (time_after(ip->i_last_pfault + quantum, jiffies)) { + new_time = ip->i_greedy + quantum; + if (new_time > max) + new_time = max; + } else { + new_time = ip->i_greedy - quantum; + if (!new_time || new_time > max) + new_time = 1; + } + + ip->i_greedy = new_time; + + spin_unlock(&ip->i_spin); + + gfs2_inode_put(ip); +} + +/** + * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int rgrp_go_demote_ok(struct gfs2_glock *gl) +{ + return !gl->gl_aspace->i_mapping->nrpages; +} + +/** + * rgrp_go_lock - operation done after an rgrp lock is locked by + * a first holder on this node. + * @gl: the glock + * @flags: + * + * Returns: errno + */ + +static int rgrp_go_lock(struct gfs2_holder *gh) +{ + return gfs2_rgrp_bh_get(get_gl2rgd(gh->gh_gl)); +} + +/** + * rgrp_go_unlock - operation done before an rgrp lock is unlocked by + * a last holder on this node. + * @gl: the glock + * @flags: + * + */ + +static void rgrp_go_unlock(struct gfs2_holder *gh) +{ + gfs2_rgrp_bh_put(get_gl2rgd(gh->gh_gl)); +} + +/** + * trans_go_xmote_th - promote/demote the transaction glock + * @gl: the glock + * @state: the requested state + * @flags: + * + */ + +static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, + int flags) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + } + + gfs2_glock_xmote_th(gl, state, flags); +} + +/** + * trans_go_xmote_bh - After promoting/demoting the transaction glock + * @gl: the glock + * + */ + +static void trans_go_xmote_bh(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl; + struct gfs2_log_header head; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs2_find_jhead(sdp->sd_jdesc, &head); + if (error) + gfs2_consist(sdp); + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) + gfs2_consist(sdp); + + /* Initialize some head of the log stuff */ + if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + sdp->sd_log_sequence = head.lh_sequence + 1; + gfs2_log_pointers_init(sdp, head.lh_blkno); + } + } +} + +/** + * trans_go_drop_th - unlock the transaction glock + * @gl: the glock + * + * We want to sync the device even with localcaching. Remember + * that localcaching journal replay only marks buffers dirty. + */ + +static void trans_go_drop_th(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + } + + gfs2_glock_drop_th(gl); +} + +/** + * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock + * @gl: the glock + * + * Returns: TRUE if it's ok + */ + +static int quota_go_demote_ok(struct gfs2_glock *gl) +{ + return !atomic_read(&gl->gl_lvb_count); +} + +struct gfs2_glock_operations gfs2_meta_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = meta_go_demote_ok, + .go_type = LM_TYPE_META +}; + +struct gfs2_glock_operations gfs2_inode_glops = { + .go_xmote_th = inode_go_xmote_th, + .go_xmote_bh = inode_go_xmote_bh, + .go_drop_th = inode_go_drop_th, + .go_sync = inode_go_sync, + .go_inval = inode_go_inval, + .go_demote_ok = inode_go_demote_ok, + .go_lock = inode_go_lock, + .go_unlock = inode_go_unlock, + .go_greedy = inode_greedy, + .go_type = LM_TYPE_INODE +}; + +struct gfs2_glock_operations gfs2_rgrp_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = rgrp_go_demote_ok, + .go_lock = rgrp_go_lock, + .go_unlock = rgrp_go_unlock, + .go_type = LM_TYPE_RGRP +}; + +struct gfs2_glock_operations gfs2_trans_glops = { + .go_xmote_th = trans_go_xmote_th, + .go_xmote_bh = trans_go_xmote_bh, + .go_drop_th = trans_go_drop_th, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs2_glock_operations gfs2_iopen_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_callback = gfs2_iopen_go_callback, + .go_type = LM_TYPE_IOPEN +}; + +struct gfs2_glock_operations gfs2_flock_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_FLOCK +}; + +struct gfs2_glock_operations gfs2_nondisk_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs2_glock_operations gfs2_quota_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_demote_ok = quota_go_demote_ok, + .go_type = LM_TYPE_QUOTA +}; + +struct gfs2_glock_operations gfs2_journal_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_JOURNAL +}; + --- a/fs/gfs2/glops.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/glops.h 2005-08-01 14:13:08.010808048 +0800 @@ -0,0 +1,27 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __GLOPS_DOT_H__ +#define __GLOPS_DOT_H__ + +extern struct gfs2_glock_operations gfs2_meta_glops; +extern struct gfs2_glock_operations gfs2_inode_glops; +extern struct gfs2_glock_operations gfs2_rgrp_glops; +extern struct gfs2_glock_operations gfs2_trans_glops; +extern struct gfs2_glock_operations gfs2_iopen_glops; +extern struct gfs2_glock_operations gfs2_flock_glops; +extern struct gfs2_glock_operations gfs2_nondisk_glops; +extern struct gfs2_glock_operations gfs2_quota_glops; +extern struct gfs2_glock_operations gfs2_journal_glops; + +#endif /* __GLOPS_DOT_H__ */ --- a/fs/gfs2/inode.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/inode.c 2005-08-01 14:13:08.010808048 +0800 @@ -0,0 +1,1808 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "bmap.h" +#include "dir.h" +#include "eattr.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "ops_address.h" +#include "ops_file.h" +#include "ops_inode.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS2 inode (with embedded disk inode data) + * @inode: The Linux VFS inode + * + */ + +static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode) +{ + inode->i_ino = ip->i_num.no_formal_ino; + + switch (ip->i_di.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor); + break; + default: + inode->i_rdev = 0; + break; + }; + + inode->i_mode = ip->i_di.di_mode; + inode->i_nlink = ip->i_di.di_nlink; + inode->i_uid = ip->i_di.di_uid; + inode->i_gid = ip->i_di.di_gid; + i_size_write(inode, ip->i_di.di_size); + inode->i_atime.tv_sec = ip->i_di.di_atime; + inode->i_mtime.tv_sec = ip->i_di.di_mtime; + inode->i_ctime.tv_sec = ip->i_di.di_ctime; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_blksize = PAGE_SIZE; + inode->i_blocks = ip->i_di.di_blocks << + (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + + if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + + if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; +} + +/** + * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS2 inode (with embedded disk inode data) + * + */ + +void gfs2_inode_attr_in(struct gfs2_inode *ip) +{ + struct inode *inode; + + inode = gfs2_ip2v(ip, NO_CREATE); + if (inode) { + inode_attr_in(ip, inode); + iput(inode); + } +} + +/** + * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode + * @ip: The GFS2 inode + * + * Only copy out the attributes that we want the VFS layer + * to be able to modify. + */ + +void gfs2_inode_attr_out(struct gfs2_inode *ip) +{ + struct inode *inode = ip->i_vnode; + + gfs2_assert_withdraw(ip->i_sbd, + (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT)); + ip->i_di.di_mode = inode->i_mode; + ip->i_di.di_uid = inode->i_uid; + ip->i_di.di_gid = inode->i_gid; + ip->i_di.di_atime = inode->i_atime.tv_sec; + ip->i_di.di_mtime = inode->i_mtime.tv_sec; + ip->i_di.di_ctime = inode->i_ctime.tv_sec; +} + +/** + * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode + * @ip: the struct gfs2_inode to get the struct inode for + * @create: create a new struct inode if one does not already exist + * + * Returns: A VFS inode, or NULL if NO_CREATE and none in existance + */ + +struct inode *gfs2_ip2v(struct gfs2_inode *ip, int create) +{ + struct inode *inode = NULL, *tmp; + + gfs2_assert_warn(ip->i_sbd, + test_bit(GIF_MIN_INIT, &ip->i_flags)); + + spin_lock(&ip->i_spin); + if (ip->i_vnode) + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_spin); + + if (inode || !create) + return inode; + + tmp = new_inode(ip->i_sbd->sd_vfs); + if (!tmp) + return NULL; + + inode_attr_in(ip, tmp); + + if (S_ISREG(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_file_iops; + tmp->i_fop = &gfs2_file_fops; + tmp->i_mapping->a_ops = &gfs2_file_aops; + } else if (S_ISDIR(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_dir_iops; + tmp->i_fop = &gfs2_dir_fops; + } else if (S_ISLNK(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_symlink_iops; + } else { + tmp->i_op = &gfs2_dev_iops; + init_special_inode(tmp, tmp->i_mode, tmp->i_rdev); + } + + set_v2ip(tmp, NULL); + + for (;;) { + spin_lock(&ip->i_spin); + if (!ip->i_vnode) + break; + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_spin); + + if (inode) { + iput(tmp); + return inode; + } + yield(); + } + + inode = tmp; + + gfs2_inode_hold(ip); + ip->i_vnode = inode; + set_v2ip(inode, ip); + + spin_unlock(&ip->i_spin); + + insert_inode_hash(inode); + + return inode; +} + +static int iget_test(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_inum *inum = (struct gfs2_inum *)opaque; + + if (ip && ip->i_num.no_addr == inum->no_addr) + return 1; + + return 0; +} + +struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) +{ + return ilookup5(sb, (unsigned long)inum->no_formal_ino, + iget_test, inum); +} + +void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type) +{ + spin_lock(&ip->i_spin); + if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) { + ip->i_di.di_nlink = 1; + ip->i_di.di_mode = DT2IF(type); + } + spin_unlock(&ip->i_spin); +} + +/** + * gfs2_inode_refresh - Refresh the incore copy of the dinode + * @ip: The GFS2 inode + * + * Returns: errno + */ + +int gfs2_inode_refresh(struct gfs2_inode *ip) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) { + brelse(dibh); + return -EIO; + } + + spin_lock(&ip->i_spin); + gfs2_dinode_in(&ip->i_di, dibh->b_data); + set_bit(GIF_MIN_INIT, &ip->i_flags); + spin_unlock(&ip->i_spin); + + brelse(dibh); + + if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) + return -ESTALE; + + ip->i_vn = ip->i_gl->gl_vn; + + return 0; +} + +/** + * inode_create - create a struct gfs2_inode + * @i_gl: The glock covering the inode + * @inum: The inode number + * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode) + * @io_state: the state the iopen glock should be acquired in + * @ipp: pointer to put the returned inode in + * + * Returns: errno + */ + +static int inode_create(struct gfs2_glock *i_gl, struct gfs2_inum *inum, + struct gfs2_glock *io_gl, unsigned int io_state, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = i_gl->gl_sbd; + struct gfs2_inode *ip; + int error = 0; + + RETRY_MALLOC(ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL), ip); + gfs2_memory_add(ip); + memset(ip, 0, sizeof(struct gfs2_inode)); + + ip->i_num = *inum; + + atomic_set(&ip->i_count, 1); + + ip->i_vn = i_gl->gl_vn - 1; + + ip->i_gl = i_gl; + ip->i_sbd = sdp; + + spin_lock_init(&ip->i_spin); + init_rwsem(&ip->i_rw_mutex); + + ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); + + error = gfs2_glock_nq_init(io_gl, + io_state, GL_LOCAL_EXCL | GL_EXACT, + &ip->i_iopen_gh); + if (error) + goto fail; + ip->i_iopen_gh.gh_owner = NULL; + + spin_lock(&io_gl->gl_spin); + gfs2_glock_hold(i_gl); + set_gl2gl(io_gl, i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs2_glock_hold(i_gl); + set_gl2ip(i_gl, ip); + + atomic_inc(&sdp->sd_inode_count); + + *ipp = ip; + + return 0; + + fail: + gfs2_meta_cache_flush(ip); + gfs2_memory_rm(ip); + kmem_cache_free(gfs2_inode_cachep, ip); + *ipp = NULL; + + return error; +} + +/** + * gfs2_inode_get - Create or get a reference on an inode + * @i_gl: The glock covering the inode + * @inum: The inode number + * @create: + * @ipp: pointer to put the returned inode in + * + * Returns: errno + */ + +int gfs2_inode_get(struct gfs2_glock *i_gl, struct gfs2_inum *inum, int create, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = i_gl->gl_sbd; + struct gfs2_glock *io_gl; + int error = 0; + + gfs2_glmutex_lock(i_gl); + + *ipp = get_gl2ip(i_gl); + if (*ipp) { + error = -ESTALE; + if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino) + goto out; + atomic_inc(&(*ipp)->i_count); + error = 0; + goto out; + } + + if (!create) + goto out; + + error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, + CREATE, &io_gl); + if (!error) { + error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp); + gfs2_glock_put(io_gl); + } + + out: + gfs2_glmutex_unlock(i_gl); + + return error; +} + +/** + * gfs2_inode_hold - hold a struct gfs2_inode structure + * @ip: The GFS2 inode + * + */ + +void gfs2_inode_hold(struct gfs2_inode *ip) +{ + gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0,); + atomic_inc(&ip->i_count); +} + +/** + * gfs2_inode_put - put a struct gfs2_inode structure + * @ip: The GFS2 inode + * + */ + +void gfs2_inode_put(struct gfs2_inode *ip) +{ + gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0,); + atomic_dec(&ip->i_count); +} + +/** + * gfs2_inode_destroy - Destroy a GFS2 inode structure with no references on it + * @ip: The GFS2 inode + * + */ + +void gfs2_inode_destroy(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; + struct gfs2_glock *i_gl = ip->i_gl; + + gfs2_assert_warn(sdp, !atomic_read(&ip->i_count)); + gfs2_assert(sdp, get_gl2gl(io_gl) == i_gl,); + + spin_lock(&io_gl->gl_spin); + set_gl2gl(io_gl, NULL); + gfs2_glock_put(i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + + gfs2_meta_cache_flush(ip); + gfs2_memory_rm(ip); + kmem_cache_free(gfs2_inode_cachep, ip); + + set_gl2ip(i_gl, NULL); + gfs2_glock_put(i_gl); + + atomic_dec(&sdp->sd_inode_count); +} + +/** + * dinode_dealloc - Put deallocate a dinode + * @ip: The GFS2 inode + * + * Returns: errno + */ + +static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al; + struct gfs2_rgrpd *rgd; + int error; + + if (ip->i_di.di_blocks != 1) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + if (error) + goto out_qs; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_rindex_relse; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &al->al_rgd_gh); + if (error) + goto out_rindex_relse; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED + + RES_STATFS + RES_QUOTA, 1); + if (error) + goto out_rg_gunlock; + + gfs2_trans_add_gl(ip->i_gl); + + gfs2_free_di(rgd, ip); + + error = gfs2_unlinked_ondisk_rm(sdp, ul); + + gfs2_trans_end(sdp); + clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + out_rg_gunlock: + gfs2_glock_dq_uninit(&al->al_rgd_gh); + + out_rindex_relse: + gfs2_glock_dq_uninit(&al->al_ri_gh); + + out_qs: + gfs2_quota_unhold(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +/** + * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode) + * @sdp: the filesystem + * @inum: the inode number to deallocate + * @io_gh: a holder for the iopen glock for this inode + * + * Returns: errno + */ + +static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul, + struct gfs2_holder *io_gh) +{ + struct gfs2_inode *ip; + struct gfs2_holder i_gh; + int error; + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops, + LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + /* We reacquire the iopen lock here to avoid a race with the NFS server + calling gfs2_read_inode() with the inode number of a inode we're in + the process of deallocating. And we can't keep our hold on the lock + from inode_dealloc_init() for deadlock reasons. */ + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh); + error = gfs2_glock_nq(io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + error = 1; + default: + goto out; + } + + gfs2_assert_warn(sdp, !get_gl2ip(i_gh.gh_gl)); + error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl, + LM_ST_EXCLUSIVE, &ip); + + gfs2_glock_dq(io_gh); + + if (error) + goto out; + + error = gfs2_inode_refresh(ip); + if (error) + goto out_iput; + + if (ip->i_di.di_nlink) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + error = -EIO; + goto out_iput; + } + + if (S_ISDIR(ip->i_di.di_mode) && + (ip->i_di.di_flags & GFS2_DIF_EXHASH)) { + error = gfs2_dir_exhash_dealloc(ip); + if (error) + goto out_iput; + } + + if (ip->i_di.di_eattr) { + error = gfs2_ea_dealloc(ip); + if (error) + goto out_iput; + } + + if (!gfs2_is_stuffed(ip)) { + error = gfs2_file_dealloc(ip); + if (error) + goto out_iput; + } + + error = dinode_dealloc(ip, ul); + if (error) + goto out_iput; + + out_iput: + gfs2_glmutex_lock(i_gh.gh_gl); + gfs2_inode_put(ip); + gfs2_inode_destroy(ip); + gfs2_glmutex_unlock(i_gh.gh_gl); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * try_inode_dealloc - Try to deallocate an inode and all its blocks + * @sdp: the filesystem + * + * Returns: 0 on success, -errno on error, 1 on busy (inode open) + */ + +int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_holder io_gh; + int error = 0; + + gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum); + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops, + LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + return 1; + default: + return error; + } + + gfs2_glock_dq(&io_gh); + error = inode_dealloc(sdp, ul, &io_gh); + gfs2_holder_uninit(&io_gh); + + return error; +} + +int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh, rgd_gh; + int error; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr); + if (!rgd) { + gfs2_consist(sdp); + error = -EIO; + goto out; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (error) + goto out; + + error = gfs2_trans_begin(sdp, + RES_RG_BIT + RES_UNLINKED + RES_STATFS, + 0); + if (error) + goto out_gunlock; + + gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr); + gfs2_unlinked_ondisk_rm(sdp, ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_uninit(&rgd_gh); + out: + gfs2_glock_dq_uninit(&ri_gh); + + return error; +} + +int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT) + return inode_dealloc_uninit(sdp, ul); + else + return try_inode_dealloc(sdp, ul); +} + +/** + * gfs2_change_nlink - Change nlink count on inode + * @ip: The GFS2 inode + * @diff: The change in the nlink count required + * + * Returns: errno + */ + +int gfs2_change_nlink(struct gfs2_inode *ip, int diff) +{ + struct buffer_head *dibh; + uint32_t nlink; + int error; + + nlink = ip->i_di.di_nlink + diff; + + /* If we are reducing the nlink count, but the new value ends up being + bigger than the old one, we must have underflowed. */ + if (diff < 0 && nlink > ip->i_di.di_nlink) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + ip->i_di.di_nlink = nlink; + ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * gfs2_lookupi - Look up a filename in a directory and return its inode + * @d_gh: An initialized holder for the directory glock + * @name: The name of the inode to look for + * @is_root: If TRUE, ignore the caller's permissions + * @i_gh: An uninitialized holder for the new inode glock + * + * There will always be a vnode (Linux VFS inode) for the d_gh inode unless + * @is_root is true. + * + * Returns: errno + */ + +int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder d_gh; + struct gfs2_inum inum; + unsigned int type; + struct gfs2_glock *gl; + int error; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return -ENAMETOOLONG; + + if (gfs2_filecmp(name, ".", 1) || + (gfs2_filecmp(name, "..", 2) && dip == sdp->sd_root_dir)) { + gfs2_inode_hold(dip); + *ipp = dip; + return 0; + } + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + return error; + + if (!is_root) { + error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL); + if (error) + goto out; + } + + error = gfs2_dir_search(dip, name, &inum, &type); + if (error) + goto out; + + error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops, + CREATE, &gl); + if (error) + goto out; + + error = gfs2_inode_get(gl, &inum, CREATE, ipp); + if (!error) + gfs2_inode_min_init(*ipp, type); + + gfs2_glock_put(gl); + + out: + gfs2_glock_dq_uninit(&d_gh); + + return error; +} + +static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino) +{ + struct gfs2_inode *ip = sdp->sd_ir_inode; + struct buffer_head *bh; + struct gfs2_inum_range ir; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + down(&sdp->sd_inum_mutex); + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) { + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + return error; + } + + gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + if (ir.ir_length) { + *formal_ino = ir.ir_start++; + ir.ir_length--; + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_inum_range_out(&ir, + bh->b_data + sizeof(struct gfs2_dinode)); + brelse(bh); + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + return 0; + } + + brelse(bh); + + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + + return 1; +} + +static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino) +{ + struct gfs2_inode *ip = sdp->sd_ir_inode; + struct gfs2_inode *m_ip = sdp->sd_inum_inode; + struct gfs2_holder gh; + struct buffer_head *bh; + struct gfs2_inum_range ir; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (error) + return error; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); + if (error) + goto out; + down(&sdp->sd_inum_mutex); + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_end_trans; + + gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + if (!ir.ir_length) { + struct buffer_head *m_bh; + uint64_t x, y; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out_brelse; + + x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)); + x = y = gfs2_64_to_cpu(x); + ir.ir_start = x; + ir.ir_length = GFS2_INUM_QUANTUM; + x += GFS2_INUM_QUANTUM; + if (x < y) + gfs2_consist_inode(m_ip); + x = cpu_to_gfs2_64(x); + gfs2_trans_add_bh(m_ip->i_gl, m_bh); + *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x; + + brelse(m_bh); + } + + *formal_ino = ir.ir_start++; + ir.ir_length--; + + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + out_brelse: + brelse(bh); + + out_end_trans: + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + + out: + gfs2_glock_dq_uninit(&gh); + + return error; +} + +static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum) +{ + int error; + + error = pick_formal_ino_1(sdp, inum); + if (error <= 0) + return error; + + error = pick_formal_ino_2(sdp, inum); + + return error; +} + +/** + * create_ok - OK to create a new on-disk inode here? + * @dip: Directory in which dinode is to be created + * @name: Name of new dinode + * @mode: + * + * Returns: errno + */ + +static int create_ok(struct gfs2_inode *dip, struct qstr *name, + unsigned int mode) +{ + int error; + + error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL); + if (error) + return error; + + /* Don't create entries in an unlinked directory */ + if (!dip->i_di.di_nlink) + return -EPERM; + + error = gfs2_dir_search(dip, name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + return -EEXIST; + default: + return error; + } + + if (dip->i_di.di_entries == (uint32_t)-1) + return -EFBIG; + if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1) + return -EMLINK; + + return 0; +} + +static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, + unsigned int *uid, unsigned int *gid) +{ + if (dip->i_sbd->sd_args.ar_suiddir && + (dip->i_di.di_mode & S_ISUID) && + dip->i_di.di_uid) { + if (S_ISDIR(*mode)) + *mode |= S_ISUID; + else if (dip->i_di.di_uid != current->fsuid) + *mode &= ~07111; + *uid = dip->i_di.di_uid; + } else + *uid = current->fsuid; + + if (dip->i_di.di_mode & S_ISGID) { + if (S_ISDIR(*mode)) + *mode |= S_ISGID; + *gid = dip->i_di.di_gid; + } else + *gid = current->fsgid; +} + +static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + int error; + + gfs2_alloc_get(dip); + + dip->i_alloc->al_requested = RES_DINODE; + error = gfs2_inplace_reserve(dip); + if (error) + goto out; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED + + RES_STATFS, 0); + if (error) + goto out_ipreserv; + + ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip); + + ul->ul_ut.ut_flags = GFS2_UTF_UNINIT; + error = gfs2_unlinked_ondisk_add(sdp, ul); + + gfs2_trans_end(sdp); + + out_ipreserv: + gfs2_inplace_release(dip); + + out: + gfs2_alloc_put(dip); + + return error; +} + +/** + * init_dinode - Fill in a new dinode structure + * @dip: the directory this inode is being created in + * @gl: The glock covering the new inode + * @inum: the inode number + * @mode: the file permissions + * @uid: + * @gid: + * + */ + +static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + struct gfs2_inum *inum, unsigned int mode, + unsigned int uid, unsigned int gid) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_dinode di; + struct buffer_head *dibh; + + dibh = gfs2_meta_new(gl, inum->no_addr); + gfs2_trans_add_bh(gl, dibh); + gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + memset(&di, 0, sizeof(struct gfs2_dinode)); + gfs2_meta_header_in(&di.di_header, dibh->b_data); + di.di_num = *inum; + di.di_mode = mode; + di.di_uid = uid; + di.di_gid = gid; + di.di_blocks = 1; + di.di_atime = di.di_mtime = di.di_ctime = get_seconds(); + di.di_goal_meta = di.di_goal_data = inum->no_addr; + + if (S_ISREG(mode)) { + if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + di.di_flags |= GFS2_DIF_JDATA; + if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || + gfs2_tune_get(sdp, gt_new_files_directio)) + di.di_flags |= GFS2_DIF_DIRECTIO; + } else if (S_ISDIR(mode)) { + di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO); + di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA); + } + + gfs2_dinode_out(&di, dibh->b_data); + brelse(dibh); +} + +static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + unsigned int mode, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + unsigned int uid, gid; + int error; + + munge_mode_uid_gid(dip, &mode, &uid, &gid); + + gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, uid, gid); + if (error) + goto out; + + error = gfs2_quota_check(dip, uid, gid); + if (error) + goto out_quota; + + error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED + + RES_QUOTA, 0); + if (error) + goto out_quota; + + ul->ul_ut.ut_flags = 0; + error = gfs2_unlinked_ondisk_munge(sdp, ul); + + init_dinode(dip, gl, &ul->ul_ut.ut_inum, + mode, uid, gid); + + gfs2_quota_change(dip, +1, uid, gid); + + gfs2_trans_end(sdp); + + out_quota: + gfs2_quota_unlock(dip); + + out: + gfs2_alloc_put(dip); + + return error; +} + +static int link_dinode(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_alloc *al; + int alloc_required; + struct buffer_head *dibh; + int error; + + al = gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_diradd_alloc_required(dip, name, &alloc_required); + if (alloc_required) { + error = gfs2_quota_check(dip, dip->i_di.di_uid, + dip->i_di.di_gid); + if (error) + goto fail_quota_locks; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto fail_quota_locks; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 2 * RES_DINODE + RES_UNLINKED + + RES_STATFS + RES_QUOTA, 0); + if (error) + goto fail_ipreserv; + } else { + error = gfs2_trans_begin(sdp, + RES_LEAF + + 2 * RES_DINODE + + RES_UNLINKED, 0); + if (error) + goto fail_quota_locks; + } + + error = gfs2_dir_add(dip, name, &ip->i_num, IF2DT(ip->i_di.di_mode)); + if (error) + goto fail_end_trans; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + ip->i_di.di_nlink = 1; + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + error = gfs2_unlinked_ondisk_rm(sdp, ul); + if (error) + goto fail_end_trans; + + return 0; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipreserv: + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + + fail_quota_locks: + gfs2_quota_unlock(dip); + + fail: + gfs2_alloc_put(dip); + + return error; +} + +/** + * gfs2_createi - Create a new inode + * @ghs: An array of two holders + * @name: The name of the new file + * @mode: the permissions on the new inode + * + * @ghs[0] is an initialized holder for the directory + * @ghs[1] is the holder for the inode lock + * + * If the return value is 0, the glocks on both the directory and the new + * file are held. A transaction has been started and an inplace reservation + * is held, as well. + * + * Returns: errno + */ + +int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode) +{ + struct gfs2_inode *dip = get_gl2ip(ghs->gh_gl); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_unlinked *ul; + struct gfs2_inode *ip; + int error; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return -ENAMETOOLONG; + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq(ghs); + if (error) + goto fail; + + error = create_ok(dip, name, mode); + if (error) + goto fail_gunlock; + + error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino); + if (error) + goto fail_gunlock; + + error = alloc_dinode(dip, ul); + if (error) + goto fail_gunlock; + + if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) { + gfs2_glock_dq(ghs); + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, + &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, + ghs + 1); + if (error) { + gfs2_unlinked_put(sdp, ul); + return error; + } + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq(ghs); + if (error) { + gfs2_glock_dq_uninit(ghs + 1); + gfs2_unlinked_put(sdp, ul); + return error; + } + + error = create_ok(dip, name, mode); + if (error) + goto fail_gunlock2; + } else { + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, + &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, + ghs + 1); + if (error) + goto fail_gunlock; + } + + error = make_dinode(dip, ghs[1].gh_gl, mode, ul); + if (error) + goto fail_gunlock2; + + error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip); + if (error) + goto fail_gunlock2; + + error = gfs2_inode_refresh(ip); + if (error) + goto fail_iput; + + error = gfs2_acl_create(dip, ip); + if (error) + goto fail_iput; + + error = link_dinode(dip, name, ip, ul); + if (error) + goto fail_iput; + + gfs2_unlinked_put(sdp, ul); + + return 0; + + fail_iput: + gfs2_inode_put(ip); + + fail_gunlock2: + gfs2_glock_dq_uninit(ghs + 1); + + fail_gunlock: + gfs2_glock_dq(ghs); + + fail: + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_unlinki - Unlink a file + * @dip: The inode of the directory + * @name: The name of the file to be unlinked + * @ip: The inode of the file to be removed + * + * Assumes Glocks on both dip and ip are held. + * + * Returns: errno + */ + +int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + int error; + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + /* If this inode is being unlinked from the directory structure, + we need to mark that in the log so that it isn't lost during + a crash. */ + + if (!ip->i_di.di_nlink) { + ul->ul_ut.ut_inum = ip->i_num; + error = gfs2_unlinked_ondisk_add(sdp, ul); + if (!error) + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + } + + return error; +} + +/** + * gfs2_rmdiri - Remove a directory + * @dip: The parent directory of the directory to be removed + * @name: The name of the directory to be removed + * @ip: The GFS2 inode of the directory to be removed + * + * Assumes Glocks on dip and ip are held + * + * Returns: errno + */ + +int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct qstr dotname; + int error; + + if (ip->i_di.di_entries != 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(dip, -1); + if (error) + return error; + + dotname.len = 1; + dotname.name = "."; + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + dotname.len = 2; + dotname.name = ".."; + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + error = gfs2_change_nlink(ip, -2); + if (error) + return error; + + /* This inode is being unlinked from the directory structure and + we need to mark that in the log so that it isn't lost during + a crash. */ + + ul->ul_ut.ut_inum = ip->i_num; + error = gfs2_unlinked_ondisk_add(sdp, ul); + if (!error) + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + return error; +} + +/* + * gfs2_unlink_ok - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, errno if it isn't + */ + +int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip) +{ + struct gfs2_inum inum; + unsigned int type; + int error; + + if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode)) + return -EPERM; + + if ((dip->i_di.di_mode & S_ISVTX) && + dip->i_di.di_uid != current->fsuid && + ip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) + return -EPERM; + + if (IS_APPEND(dip->i_vnode)) + return -EPERM; + + error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL); + if (error) + return error; + + error = gfs2_dir_search(dip, name, &inum, &type); + if (error) + return error; + + if (!gfs2_inum_equal(&inum, &ip->i_num)) + return -ENOENT; + + if (IF2DT(ip->i_di.di_mode) != type) { + gfs2_consist_inode(dip); + return -EIO; + } + + return 0; +} + +/* + * gfs2_ok_to_move - check if it's ok to move a directory to another directory + * @this: move this + * @to: to here + * + * Follow @to back to the root and make sure we don't encounter @this + * Assumes we already hold the rename lock. + * + * Returns: errno + */ + +int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) +{ + struct gfs2_sbd *sdp = this->i_sbd; + struct gfs2_inode *tmp; + struct qstr dotdot; + int error = 0; + + memset(&dotdot, 0, sizeof(struct qstr)); + dotdot.name = ".."; + dotdot.len = 2; + + gfs2_inode_hold(to); + + for (;;) { + if (to == this) { + error = -EINVAL; + break; + } + if (to == sdp->sd_root_dir) { + error = 0; + break; + } + + error = gfs2_lookupi(to, &dotdot, TRUE, &tmp); + if (error) + break; + + gfs2_inode_put(to); + to = tmp; + } + + gfs2_inode_put(to); + + return error; +} + +/** + * gfs2_readlinki - return the contents of a symlink + * @ip: the symlink's inode + * @buf: a pointer to the buffer to be filled + * @len: a pointer to the length of @buf + * + * If @buf is too small, a piece of memory is kmalloc()ed and needs + * to be freed by the caller. + * + * Returns: errno + */ + +int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) +{ + struct gfs2_holder i_gh; + struct buffer_head *dibh; + unsigned int x; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs2_glock_nq_atime(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (!ip->i_di.di_size) { + gfs2_consist_inode(ip); + error = -EIO; + goto out; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + x = ip->i_di.di_size + 1; + if (x > *len) { + *buf = kmalloc(x, GFP_KERNEL); + if (!*buf) { + error = -ENOMEM; + goto out_brelse; + } + } + + memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); + *len = x; + + out_brelse: + brelse(dibh); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and + * conditionally update the inode's atime + * @gh: the holder to acquire + * + * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap + * Update if the difference between the current time and the inode's current + * atime is greater than an interval specified at mount. + * + * Returns: errno + */ + +int gfs2_glock_nq_atime(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = get_gl2ip(gl); + int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); + unsigned int state; + int flags; + int error; + + if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || + gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || + gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) + return -EINVAL; + + state = gh->gh_state; + flags = gh->gh_flags; + + error = gfs2_glock_nq(gh); + if (error) + return error; + + if (test_bit(SDF_NOATIME, &sdp->sd_flags) || + (sdp->sd_vfs->s_flags & MS_RDONLY)) + return 0; + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, + gh->gh_flags & ~LM_FLAG_ANY, + gh); + error = gfs2_glock_nq(gh); + if (error) + return error; + + /* Verify that atime hasn't been updated while we were + trying to get exclusive lock. */ + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + struct buffer_head *dibh; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error == -EROFS) + return 0; + if (error) + goto fail; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_atime = curtime; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs2_trans_end(sdp); + } + + /* If someone else has asked for the glock, + unlock and let them have it. Then reacquire + in the original state. */ + if (gfs2_glock_is_blocking(gl)) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(state, flags, gh); + return gfs2_glock_nq(gh); + } + } + + return 0; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail: + gfs2_glock_dq(gh); + + return error; +} + +/** + * glock_compare_atime - Compare two struct gfs2_glock structures for gfs2_sort + * @arg_a: the first structure + * @arg_b: the second structure + * + * Returns: 1 if A > B + * -1 if A < B + * 0 if A = B + */ + +static int glock_compare_atime(const void *arg_a, const void *arg_b) +{ + struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a; + struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b; + struct lm_lockname *a = &gh_a->gh_gl->gl_name; + struct lm_lockname *b = &gh_b->gh_gl->gl_name; + int ret = 0; + + if (a->ln_number > b->ln_number) + ret = 1; + else if (a->ln_number < b->ln_number) + ret = -1; + else { + if (gh_a->gh_state == LM_ST_SHARED && + gh_b->gh_state == LM_ST_EXCLUSIVE) + ret = 1; + else if (gh_a->gh_state == LM_ST_SHARED && + (gh_b->gh_flags & GL_ATIME)) + ret = 1; + } + + return ret; +} + +/** + * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an + * atime update + * @num_gh: the number of structures + * @ghs: an array of struct gfs2_holder structures + * + * Returns: 0 on success (all glocks acquired), + * errno on failure (no glocks acquired) + */ + +int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs) +{ + struct gfs2_holder **p; + unsigned int x; + int error = 0; + + if (!num_gh) + return 0; + + if (num_gh == 1) { + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + if (ghs->gh_flags & GL_ATIME) + error = gfs2_glock_nq_atime(ghs); + else + error = gfs2_glock_nq(ghs); + return error; + } + + p = kmalloc(num_gh * sizeof(struct gfs2_holder *), GFP_KERNEL); + if (!p) + return -ENOMEM; + + for (x = 0; x < num_gh; x++) + p[x] = &ghs[x]; + + gfs2_sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime); + + for (x = 0; x < num_gh; x++) { + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + + if (p[x]->gh_flags & GL_ATIME) + error = gfs2_glock_nq_atime(p[x]); + else + error = gfs2_glock_nq(p[x]); + + if (error) { + while (x--) + gfs2_glock_dq(p[x]); + break; + } + } + + kfree(p); + + return error; +} + +/** + * gfs2_try_toss_vnode - See if we can toss a vnode from memory + * @ip: the inode + * + * Returns: TRUE if the vnode was tossed + */ + +void gfs2_try_toss_vnode(struct gfs2_inode *ip) +{ + struct inode *inode; + + inode = gfs2_ip2v(ip, NO_CREATE); + if (!inode) + return; + + d_prune_aliases(inode); + + if (S_ISDIR(ip->i_di.di_mode)) { + struct list_head *head = &inode->i_dentry; + struct dentry *d = NULL; + + spin_lock(&dcache_lock); + if (list_empty(head)) + spin_unlock(&dcache_lock); + else { + d = list_entry(head->next, struct dentry, d_alias); + dget_locked(d); + spin_unlock(&dcache_lock); + + if (have_submounts(d)) + dput(d); + else { + shrink_dcache_parent(d); + dput(d); + d_prune_aliases(inode); + } + } + } + + inode->i_nlink = 0; + iput(inode); +} + +/** + * gfs2_setattr_simple - + * @ip: + * @attr: + * + * Called with a reference on the vnode. + * + * Returns: errno + */ + +int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + inode_setattr(ip->i_vnode, attr); + gfs2_inode_attr_out(ip); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(ip->i_sbd); + + return error; +} + +int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd) +{ + return permission(inode, mask, nd); +} + --- a/fs/gfs2/inode.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/inode.h 2005-08-01 14:13:08.010808048 +0800 @@ -0,0 +1,77 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __INODE_DOT_H__ +#define __INODE_DOT_H__ + +static inline int gfs2_is_stuffed(struct gfs2_inode *ip) +{ + return !ip->i_di.di_height; +} + +static inline int gfs2_is_jdata(struct gfs2_inode *ip) +{ + return ip->i_di.di_flags & GFS2_DIF_JDATA; +} + +void gfs2_inode_attr_in(struct gfs2_inode *ip); +void gfs2_inode_attr_out(struct gfs2_inode *ip); +struct inode *gfs2_ip2v(struct gfs2_inode *ip, int create); +struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum); + +void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type); +int gfs2_inode_refresh(struct gfs2_inode *ip); + +int gfs2_inode_get(struct gfs2_glock *i_gl, + struct gfs2_inum *inum, int create, + struct gfs2_inode **ipp); +void gfs2_inode_hold(struct gfs2_inode *ip); +void gfs2_inode_put(struct gfs2_inode *ip); +void gfs2_inode_destroy(struct gfs2_inode *ip); + +int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_change_nlink(struct gfs2_inode *ip, int diff); +int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root, + struct gfs2_inode **ipp); +int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode); +int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul); +int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul); +int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip); +int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); +int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); + +int gfs2_glock_nq_atime(struct gfs2_holder *gh); +int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs); + +void gfs2_try_toss_vnode(struct gfs2_inode *ip); + +int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); + +int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd); + +static inline int gfs2_lookup_simple(struct gfs2_inode *dip, char *name, + struct gfs2_inode **ipp) +{ + struct qstr qstr; + memset(&qstr, 0, sizeof(struct qstr)); + qstr.name = name; + qstr.len = strlen(name); + return gfs2_lookupi(dip, &qstr, TRUE, ipp); +} + +#endif /* __INODE_DOT_H__ */ + --- a/fs/gfs2/jdata.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/jdata.c 2005-08-01 14:13:08.010808048 +0800 @@ -0,0 +1,387 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "trans.h" + +int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + int error = 0; + + if (new) { + bh = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + } else { + error = gfs2_meta_read(ip->i_gl, block, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) { + brelse(bh); + return -EIO; + } + } + + *bhp = bh; + + return 0; +} + +/** + * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read() + * @bh: The buffer to copy from, or NULL meaning zero the buffer + * @buf: The buffer to copy/zero + * @offset: The offset in the buffer to copy from + * @size: The amount of data to copy/zero + * + * Returns: errno + */ + +int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset, + unsigned int size) +{ + if (bh) + memcpy(*buf, bh->b_data + offset, size); + else + memset(*buf, 0, size); + *buf += size; + return 0; +} + +/** + * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read() + * @bh: The buffer + * @buf: The destination of the data + * @offset: The offset into the buffer + * @size: The amount of data to copy + * + * Returns: errno + */ + +int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset, + unsigned int size) +{ + int error; + + if (bh) + error = copy_to_user(*buf, bh->b_data + offset, size); + else + error = clear_user(*buf, size); + + if (error) + error = -EFAULT; + else + *buf += size; + + return error; +} + +static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf, + unsigned int offset, unsigned int size, + read_copy_fn_t copy_fn) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + error = copy_fn(dibh, &buf, + offset + sizeof(struct gfs2_dinode), size); + brelse(dibh); + } + + return (error) ? error : size; +} + +/** + * gfs2_jdata_read - Read a jdata file + * @ip: The GFS2 Inode + * @buf: The buffer to place result into + * @offset: File offset to begin jdata_readng from + * @size: Amount of data to transfer + * @copy_fn: Function to actually perform the copy + * + * The @copy_fn only copies a maximum of a single block at once so + * we are safe calling it with int arguments. It is done so that + * we don't needlessly put 64bit arguments on the stack and it + * also makes the code in the @copy_fn nicer too. + * + * Returns: The amount of data actually copied or the error + */ + +int gfs2_jdata_read(struct gfs2_inode *ip, char *buf, uint64_t offset, + unsigned int size, read_copy_fn_t copy_fn) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t lblock, dblock; + uint32_t extlen = 0; + unsigned int o; + int copied = 0; + int error = 0; + + if (offset >= ip->i_di.di_size) + return 0; + + if ((offset + size) > ip->i_di.di_size) + size = ip->i_di.di_size - offset; + + if (!size) + return 0; + + if (gfs2_is_stuffed(ip)) + return jdata_read_stuffed(ip, buf, (unsigned int)offset, size, + copy_fn); + + if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) + return -EINVAL; + + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize) + + sizeof(struct gfs2_meta_header); + + while (copied < size) { + unsigned int amount; + struct buffer_head *bh; + int new; + + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + new = FALSE; + error = gfs2_block_map(ip, lblock, &new, + &dblock, &extlen); + if (error) + goto fail; + } + + if (extlen > 1) + gfs2_meta_ra(ip->i_gl, dblock, extlen); + + if (dblock) { + error = gfs2_jdata_get_buffer(ip, dblock, new, &bh); + if (error) + goto fail; + dblock++; + extlen--; + } else + bh = NULL; + + error = copy_fn(bh, &buf, o, amount); + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + + o = sizeof(struct gfs2_meta_header); + } + + return copied; + + fail: + return (copied) ? copied : error; +} + +/** + * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write() + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * + * Returns: errno + */ + +int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh, + char **buf, unsigned int offset, unsigned int size) +{ + gfs2_trans_add_bh(ip->i_gl, bh); + memcpy(bh->b_data + offset, *buf, size); + + *buf += size; + + return 0; +} + +/** + * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write() + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * + * Returns: errno + */ + +int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh, + char **buf, unsigned int offset, unsigned int size) +{ + int error = 0; + + gfs2_trans_add_bh(ip->i_gl, bh); + if (copy_from_user(bh->b_data + offset, *buf, size)) + error = -EFAULT; + else + *buf += size; + + return error; +} + +static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf, + unsigned int offset, unsigned int size, + write_copy_fn_t copy_fn) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + error = copy_fn(ip, + dibh, &buf, + offset + sizeof(struct gfs2_dinode), size); + if (!error) { + if (ip->i_di.di_size < offset + size) + ip->i_di.di_size = offset + size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + } + + brelse(dibh); + + return (error) ? error : size; +} + +/** + * gfs2_jdata_write - Write bytes to a file + * @ip: The GFS2 inode + * @buf: The buffer containing information to be written + * @offset: The file offset to start writing at + * @size: The amount of data to write + * @copy_fn: Function to do the actual copying + * + * Returns: The number of bytes correctly written or error code + */ + +int gfs2_jdata_write(struct gfs2_inode *ip, char *buf, uint64_t offset, + unsigned int size, write_copy_fn_t copy_fn) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + uint64_t lblock, dblock; + uint32_t extlen = 0; + unsigned int o; + int copied = 0; + int error = 0; + + if (!size) + return 0; + + if (gfs2_is_stuffed(ip) && + offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) + return jdata_write_stuffed(ip, buf, (unsigned int)offset, size, + copy_fn); + + if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) + return -EINVAL; + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL, NULL); + if (error) + return error; + } + + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); + + while (copied < size) { + unsigned int amount; + struct buffer_head *bh; + int new; + + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + new = TRUE; + error = gfs2_block_map(ip, lblock, &new, + &dblock, &extlen); + if (error) + goto fail; + error = -EIO; + if (gfs2_assert_withdraw(sdp, dblock)) + goto fail; + } + + error = gfs2_jdata_get_buffer(ip, dblock, + (amount == sdp->sd_jbsize) ? TRUE : new, + &bh); + if (error) + goto fail; + + error = copy_fn(ip, bh, &buf, o, amount); + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + dblock++; + extlen--; + + o = sizeof(struct gfs2_meta_header); + } + + out: + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (ip->i_di.di_size < offset + copied) + ip->i_di.di_size = offset + copied; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return copied; + + fail: + if (copied) + goto out; + return error; +} + --- a/fs/gfs2/jdata.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/jdata.h 2005-08-01 14:13:08.010808048 +0800 @@ -0,0 +1,56 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __FILE_DOT_H__ +#define __FILE_DOT_H__ + +int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); + +typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +typedef int (*write_copy_fn_t) (struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); + +int gfs2_copy2mem(struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_copy2user(struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_jdata_read(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size, + read_copy_fn_t copy_fn); + +int gfs2_copy_from_mem(struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_copy_from_user(struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_jdata_write(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size, + write_copy_fn_t copy_fn); + +static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size) +{ + return gfs2_jdata_read(ip, buf, offset, size, gfs2_copy2mem); +} + +static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size) +{ + return gfs2_jdata_write(ip, buf, offset, size, gfs2_copy_from_mem); +} + +#endif /* __FILE_DOT_H__ */ --- a/fs/gfs2/lops.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/lops.c 2005-08-01 14:13:08.011807896 +0800 @@ -0,0 +1,517 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "recovery.h" +#include "rgrp.h" +#include "trans.h" + +static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_glock *gl; + + get_transaction->tr_touched = TRUE; + + if (!list_empty(&le->le_list)) + return; + + gl = container_of(le, struct gfs2_glock, gl_le); + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) + return; + gfs2_glock_hold(gl); + set_bit(GLF_DIRTY, &gl->gl_flags); + + gfs2_log_lock(sdp); + sdp->sd_log_num_gl++; + list_add(&le->le_list, &sdp->sd_log_le_gl); + gfs2_log_unlock(sdp); +} + +static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_gl; + struct gfs2_glock *gl; + + while (!list_empty(head)) { + gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); + list_del_init(&gl->gl_le.le_list); + sdp->sd_log_num_gl--; + + gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); + gfs2_glock_put(gl); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); +} + +static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_trans *tr; + + if (!list_empty(&bd->bd_list_tr)) + return; + + tr = get_transaction; + tr->tr_touched = TRUE; + tr->tr_num_buf++; + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + + if (!list_empty(&le->le_list)) + return; + + gfs2_trans_add_gl(bd->bd_gl); + + gfs2_meta_check(sdp, bd->bd_bh); + gfs2_meta_pin(sdp, bd->bd_bh); + + gfs2_log_lock(sdp); + sdp->sd_log_num_buf++; + list_add(&le->le_list, &sdp->sd_log_le_buf); + gfs2_log_unlock(sdp); + + tr->tr_num_buf_new++; +} + +static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +{ + struct list_head *head = &tr->tr_list_buf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); + list_del_init(&bd->bd_list_tr); + tr->tr_num_buf--; + } + gfs2_assert_warn(sdp, !tr->tr_num_buf); +} + +static void buf_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct buffer_head *bh; + struct gfs2_log_descriptor ld; + struct gfs2_bufdata *bd; + + if (!sdp->sd_log_num_buf) + return; + + bh = gfs2_log_get_buf(sdp); + memset(&ld, 0, sizeof(struct gfs2_log_descriptor)); + ld.ld_header.mh_magic = GFS2_MAGIC; + ld.ld_header.mh_type = GFS2_METATYPE_LD; + ld.ld_header.mh_format = GFS2_FORMAT_LD; + ld.ld_header.mh_blkno = bh->b_blocknr; + ld.ld_type = GFS2_LOG_DESC_METADATA; + ld.ld_length = sdp->sd_log_num_buf + 1; + ld.ld_data1 = sdp->sd_log_num_buf; + gfs2_log_descriptor_out(&ld, bh->b_data); + + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + + list_for_each_entry(bd, &sdp->sd_log_le_buf, bd_le.le_list) { + bh = gfs2_log_fake_buf(sdp, bd->bd_bh); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + } +} + +static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_buf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + sdp->sd_log_num_buf--; + + gfs2_meta_unpin(sdp, bd->bd_bh, ai); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); +} + +static void buf_lo_before_scan(struct gfs2_jdesc *jd, + struct gfs2_log_header *head, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (pass != 0) + return; + + sdp->sd_found_blocks = 0; + sdp->sd_replayed_blocks = 0; +} + +static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + struct gfs2_glock *gl = jd->jd_inode->i_gl; + unsigned int blks = ld->ld_data1; + struct buffer_head *bh_log, *bh_ip; + uint64_t blkno; + int error = 0; + + if (pass != 1 || ld->ld_type != GFS2_LOG_DESC_METADATA) + return 0; + + gfs2_replay_incr_blk(sdp, &start); + + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + blkno = ((struct gfs2_meta_header *)bh_log->b_data)->mh_blkno; + blkno = gfs2_64_to_cpu(blkno); + + sdp->sd_found_blocks++; + + if (gfs2_revoke_check(sdp, blkno, start)) { + brelse(bh_log); + continue; + } + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + if (gfs2_meta_check(sdp, bh_ip)) + error = -EIO; + else + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + + if (error) + break; + + sdp->sd_replayed_blocks++; + } + + return error; +} + +static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + return; + } + if (pass != 1) + return; + + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + + printk("GFS2: fsid=%s: jid=%u: Replayed %u of %u blocks\n", + sdp->sd_fsname, jd->jd_jid, + sdp->sd_replayed_blocks, + sdp->sd_found_blocks); +} + +static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_trans *tr; + + tr = get_transaction; + tr->tr_touched = TRUE; + tr->tr_num_revoke++; + + gfs2_log_lock(sdp); + sdp->sd_log_num_revoke++; + list_add(&le->le_list, &sdp->sd_log_le_revoke); + gfs2_log_unlock(sdp); +} + +static void revoke_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct gfs2_log_descriptor ld; + struct gfs2_meta_header *mh = &ld.ld_header; + struct buffer_head *bh; + unsigned int offset; + struct list_head *head = &sdp->sd_log_le_revoke; + struct gfs2_revoke *rv; + + if (!sdp->sd_log_num_revoke) + return; + + bh = gfs2_log_get_buf(sdp); + memset(&ld, 0, sizeof(struct gfs2_log_descriptor)); + ld.ld_header.mh_magic = GFS2_MAGIC; + ld.ld_header.mh_type = GFS2_METATYPE_LD; + ld.ld_header.mh_format = GFS2_FORMAT_LD; + ld.ld_header.mh_blkno = bh->b_blocknr; + ld.ld_type = GFS2_LOG_DESC_REVOKE; + ld.ld_length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(uint64_t)); + ld.ld_data1 = sdp->sd_log_num_revoke; + gfs2_log_descriptor_out(&ld, bh->b_data); + offset = sizeof(struct gfs2_log_descriptor); + + while (!list_empty(head)) { + rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); + list_del(&rv->rv_le.le_list); + sdp->sd_log_num_revoke--; + + if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) { + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + + bh = gfs2_log_get_buf(sdp); + mh->mh_type = GFS2_METATYPE_LB; + mh->mh_format = GFS2_FORMAT_LB; + mh->mh_blkno = bh->b_blocknr; + gfs2_meta_header_out(mh, bh->b_data); + offset = sizeof(struct gfs2_meta_header); + } + + *(uint64_t *)(bh->b_data + offset) = cpu_to_gfs2_64(rv->rv_blkno); + kfree(rv); + + offset += sizeof(uint64_t); + } + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); + + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); +} + +static void revoke_lo_before_scan(struct gfs2_jdesc *jd, + struct gfs2_log_header *head, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (pass != 0) + return; + + sdp->sd_found_revokes = 0; + sdp->sd_replay_tail = head->lh_tail; +} + +static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + unsigned int blks = ld->ld_length; + unsigned int revokes = ld->ld_data1; + struct buffer_head *bh; + unsigned int offset; + uint64_t blkno; + int first = TRUE; + int error; + + if (pass != 0 || ld->ld_type != GFS2_LOG_DESC_REVOKE) + return 0; + + offset = sizeof(struct gfs2_log_descriptor); + + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + error = gfs2_replay_read_block(jd, start, &bh); + if (error) + return error; + + if (!first) + gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); + + while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) { + blkno = *(uint64_t *)(bh->b_data + offset); + blkno = gfs2_64_to_cpu(blkno); + + error = gfs2_revoke_add(sdp, blkno, start); + if (error < 0) + return error; + else if (error) + sdp->sd_found_revokes++; + + if (!--revokes) + break; + offset += sizeof(uint64_t); + } + + brelse(bh); + offset = sizeof(struct gfs2_meta_header); + first = FALSE; + } + + return 0; +} + +static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_revoke_clean(sdp); + return; + } + if (pass != 1) + return; + + printk("GFS2: fsid=%s: jid=%u: Found %u revoke tags\n", + sdp->sd_fsname, jd->jd_jid, + sdp->sd_found_revokes); + + gfs2_revoke_clean(sdp); +} + +static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_rgrpd *rgd; + + get_transaction->tr_touched = TRUE; + + if (!list_empty(&le->le_list)) + return; + + rgd = container_of(le, struct gfs2_rgrpd, rd_le); + gfs2_rgrp_bh_hold(rgd); + + gfs2_log_lock(sdp); + sdp->sd_log_num_rg++; + list_add(&le->le_list, &sdp->sd_log_le_rg); + gfs2_log_unlock(sdp); +} + +static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_rg; + struct gfs2_rgrpd *rgd; + + while (!list_empty(head)) { + rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list); + list_del_init(&rgd->rd_le.le_list); + sdp->sd_log_num_rg--; + + gfs2_rgrp_repolish_clones(rgd); + gfs2_rgrp_bh_put(rgd); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); +} + +static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + get_transaction->tr_touched = TRUE; + + gfs2_log_lock(sdp); + sdp->sd_log_num_databuf++; + list_add(&le->le_list, &sdp->sd_log_le_databuf); + gfs2_log_unlock(sdp); +} + +static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct list_head *head = &sdp->sd_log_le_databuf; + LIST_HEAD(started); + struct gfs2_databuf *db; + struct buffer_head *bh; + + while (!list_empty(head)) { + db = list_entry(head->prev, struct gfs2_databuf, db_le.le_list); + list_move(&db->db_le.le_list, &started); + + gfs2_log_lock(sdp); + bh = db->db_bh; + if (bh) { + get_bh(bh); + gfs2_log_unlock(sdp); + if (buffer_dirty(bh)) { + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + } + brelse(bh); + } else + gfs2_log_unlock(sdp); + } + + while (!list_empty(&started)) { + db = list_entry(started.next, struct gfs2_databuf, + db_le.le_list); + list_del(&db->db_le.le_list); + sdp->sd_log_num_databuf--; + + gfs2_log_lock(sdp); + bh = db->db_bh; + if (bh) { + set_v2db(bh, NULL); + gfs2_log_unlock(sdp); + wait_on_buffer(bh); + brelse(bh); + } else + gfs2_log_unlock(sdp); + + kfree(db); + } + + gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); +} + +struct gfs2_log_operations gfs2_glock_lops = { + .lo_add = glock_lo_add, + .lo_after_commit = glock_lo_after_commit, + .lo_name = "glock" +}; + +struct gfs2_log_operations gfs2_buf_lops = { + .lo_add = buf_lo_add, + .lo_incore_commit = buf_lo_incore_commit, + .lo_before_commit = buf_lo_before_commit, + .lo_after_commit = buf_lo_after_commit, + .lo_before_scan = buf_lo_before_scan, + .lo_scan_elements = buf_lo_scan_elements, + .lo_after_scan = buf_lo_after_scan, + .lo_name = "buf" +}; + +struct gfs2_log_operations gfs2_revoke_lops = { + .lo_add = revoke_lo_add, + .lo_before_commit = revoke_lo_before_commit, + .lo_before_scan = revoke_lo_before_scan, + .lo_scan_elements = revoke_lo_scan_elements, + .lo_after_scan = revoke_lo_after_scan, + .lo_name = "revoke" +}; + +struct gfs2_log_operations gfs2_rg_lops = { + .lo_add = rg_lo_add, + .lo_after_commit = rg_lo_after_commit, + .lo_name = "rg" +}; + +struct gfs2_log_operations gfs2_databuf_lops = { + .lo_add = databuf_lo_add, + .lo_before_commit = databuf_lo_before_commit, + .lo_name = "databuf" +}; + +struct gfs2_log_operations *gfs2_log_ops[] = { + &gfs2_glock_lops, + &gfs2_buf_lops, + &gfs2_revoke_lops, + &gfs2_rg_lops, + &gfs2_databuf_lops, + NULL +}; + --- a/fs/gfs2/lops.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/lops.h 2005-08-01 14:13:08.011807896 +0800 @@ -0,0 +1,100 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __LOPS_DOT_H__ +#define __LOPS_DOT_H__ + +extern struct gfs2_log_operations gfs2_glock_lops; +extern struct gfs2_log_operations gfs2_buf_lops; +extern struct gfs2_log_operations gfs2_revoke_lops; +extern struct gfs2_log_operations gfs2_rg_lops; +extern struct gfs2_log_operations gfs2_databuf_lops; + +extern struct gfs2_log_operations *gfs2_log_ops[]; + +#define INIT_LE(le, lops) \ +do { \ + INIT_LIST_HEAD(&(le)->le_list); \ + (le)->le_ops = (lops); \ +} while (0) + +#define LO_ADD(sdp, le) \ +do { \ + if ((le)->le_ops->lo_add) \ + (le)->le_ops->lo_add((sdp), (le)); \ +} while (0) + +#define LO_INCORE_COMMIT(sdp, tr) \ +do { \ + int __lops_x; \ + for (__lops_x = 0; gfs2_log_ops[__lops_x]; __lops_x++) \ + if (gfs2_log_ops[__lops_x]->lo_incore_commit) \ + gfs2_log_ops[__lops_x]->lo_incore_commit((sdp), (tr)); \ +} while (0) + +#define LO_BEFORE_COMMIT(sdp) \ +do { \ + int __lops_x; \ + for (__lops_x = 0; gfs2_log_ops[__lops_x]; __lops_x++) \ + if (gfs2_log_ops[__lops_x]->lo_before_commit) \ + gfs2_log_ops[__lops_x]->lo_before_commit((sdp)); \ +} while (0) + +#define LO_AFTER_COMMIT(sdp, ai) \ +do { \ + int __lops_x; \ + for (__lops_x = 0; gfs2_log_ops[__lops_x]; __lops_x++) \ + if (gfs2_log_ops[__lops_x]->lo_after_commit) \ + gfs2_log_ops[__lops_x]->lo_after_commit((sdp), (ai)); \ +} while (0) + +#define LO_BEFORE_SCAN(jd, head, pass) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs2_log_ops[__lops_x]; __lops_x++) \ + if (gfs2_log_ops[__lops_x]->lo_before_scan) \ + gfs2_log_ops[__lops_x]->lo_before_scan((jd), (head), (pass)); \ +} \ +while (0) + +static inline int LO_SCAN_ELEMENTS(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, + unsigned int pass) +{ + unsigned int x; + int error; + + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_scan_elements) { + error = gfs2_log_ops[x]->lo_scan_elements(jd, start, + ld, pass); + if (error) + return error; + } + + return 0; +} + +#define LO_AFTER_SCAN(jd, error, pass) \ +do \ +{ \ + int __lops_x; \ + for (__lops_x = 0; gfs2_log_ops[__lops_x]; __lops_x++) \ + if (gfs2_log_ops[__lops_x]->lo_before_scan) \ + gfs2_log_ops[__lops_x]->lo_after_scan((jd), (error), (pass)); \ +} \ +while (0) + +#endif /* __LOPS_DOT_H__ */ + --- a/fs/gfs2/main.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/main.c 2005-08-01 14:13:08.011807896 +0800 @@ -0,0 +1,126 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "diaper.h" +#include "ops_fstype.h" +#include "proc.h" + +/** + * init_gfs2_fs - Register GFS2 as a filesystem + * + * Returns: 0 on success, error code on failure + */ + +int __init init_gfs2_fs(void) +{ + int error; + + gfs2_random_number = xtime.tv_nsec; + + gfs2_memory_init(); + + error = gfs2_proc_init(); + if (error) + goto fail_debug; + + error = gfs2_diaper_init(); + if (error) + goto fail_proc; + + error = -ENOMEM; + + gfs2_glock_cachep = kmem_cache_create("gfs2_glock", + sizeof(struct gfs2_glock), + 0, 0, NULL, NULL); + if (!gfs2_glock_cachep) + goto fail_diaper; + + gfs2_inode_cachep = kmem_cache_create("gfs2_inode", + sizeof(struct gfs2_inode), + 0, 0, NULL, NULL); + if (!gfs2_inode_cachep) + goto fail_diaper; + + gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata", + sizeof(struct gfs2_bufdata), + 0, 0, NULL, NULL); + if (!gfs2_bufdata_cachep) + goto fail_diaper; + + error = register_filesystem(&gfs2_fs_type); + if (error) + goto fail_diaper; + + printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); + + return 0; + + fail_diaper: + if (gfs2_bufdata_cachep) + kmem_cache_destroy(gfs2_bufdata_cachep); + + if (gfs2_inode_cachep) + kmem_cache_destroy(gfs2_inode_cachep); + + if (gfs2_glock_cachep) + kmem_cache_destroy(gfs2_glock_cachep); + + gfs2_diaper_uninit(); + + fail_proc: + gfs2_proc_uninit(); + + fail_debug: + gfs2_memory_uninit(); + + return error; +} + +/** + * exit_gfs2_fs - Unregister the file system + * + */ + +void __exit exit_gfs2_fs(void) +{ + unregister_filesystem(&gfs2_fs_type); + + kmem_cache_destroy(gfs2_bufdata_cachep); + kmem_cache_destroy(gfs2_inode_cachep); + kmem_cache_destroy(gfs2_glock_cachep); + + gfs2_diaper_uninit(); + gfs2_proc_uninit(); + + gfs2_memory_uninit(); +} + +MODULE_DESCRIPTION("Global File System"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +module_init(init_gfs2_fs); +module_exit(exit_gfs2_fs); + --- a/fs/gfs2/meta_io.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/meta_io.c 2005-08-01 14:13:08.011807896 +0800 @@ -0,0 +1,924 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "rgrp.h" +#include "trans.h" + +#define buffer_busy(bh) \ +((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) +#define buffer_in_io(bh) \ +((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) + +/** + * aspace_get_block - + * @inode: + * @lblock: + * @bh_result: + * @create: + * + * Returns: errno + */ + +static int aspace_get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + gfs2_assert_warn(get_v2sdp(inode->i_sb), FALSE); + return -EOPNOTSUPP; +} + +/** + * gfs2_aspace_writepage - write an aspace page + * @page: the page + * @wbc: + * + * Returns: errno + */ + +static int gfs2_aspace_writepage(struct page *page, + struct writeback_control *wbc) +{ + return block_write_full_page(page, aspace_get_block, wbc); +} + +/** + * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out. + * @bh: the buffer we're stuck on + * + */ + +static void stuck_releasepage(struct buffer_head *bh) +{ + struct gfs2_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb); + struct gfs2_bufdata *bd = get_v2bd(bh); + + printk("GFS2: fsid=%s: stuck in gfs2_releasepage()\n", sdp->sd_fsname); + printk("GFS2: fsid=%s: blkno = %"PRIu64", bh->b_count = %d\n", + sdp->sd_fsname, + (uint64_t)bh->b_blocknr, + atomic_read(&bh->b_count)); + printk("GFS2: fsid=%s: pinned = %u\n", + sdp->sd_fsname, buffer_pinned(bh)); + printk("GFS2: fsid=%s: get_v2bd(bh) = %s\n", + sdp->sd_fsname, + (bd) ? "!NULL" : "NULL"); + + if (bd) { + struct gfs2_glock *gl = bd->bd_gl; + + printk("GFS2: fsid=%s: gl = (%u, %"PRIu64")\n", + sdp->sd_fsname, + gl->gl_name.ln_type, + gl->gl_name.ln_number); + printk("GFS2: fsid=%s: bd_list_tr = %s, bd_le.le_list = %s\n", + sdp->sd_fsname, + (list_empty(&bd->bd_list_tr)) ? "no" : "yes", + (list_empty(&bd->bd_le.le_list)) ? "no" : "yes"); + + if (gl->gl_ops == &gfs2_inode_glops) { + struct gfs2_inode *ip = get_gl2ip(gl); + + if (ip) { + unsigned int x; + + printk("GFS2: fsid=%s: " + "ip = %"PRIu64"/%"PRIu64"\n", + sdp->sd_fsname, + ip->i_num.no_formal_ino, + ip->i_num.no_addr); + printk("GFS2: fsid=%s: " + "ip->i_count = %d, ip->i_vnode = %s\n", + sdp->sd_fsname, + atomic_read(&ip->i_count), + (ip->i_vnode) ? "!NULL" : "NULL"); + + for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) + printk("GFS2: fsid=%s: " + "ip->i_cache[%u] = %s\n", + sdp->sd_fsname, x, + (ip->i_cache[x]) ? "!NULL" : + "NULL"); + } + } + } +} + +/** + * gfs2_aspace_releasepage - free the metadata associated with a page + * @page: the page that's being released + * @gfp_mask: passed from Linux VFS, ignored by us + * + * Call try_to_free_buffers() if the buffers in this page can be + * released. + * + * Returns: 0 + */ + +static int gfs2_aspace_releasepage(struct page *page, int gfp_mask) +{ + struct inode *aspace = page->mapping->host; + struct gfs2_sbd *sdp = get_v2sdp(aspace->i_sb); + struct buffer_head *bh, *head; + struct gfs2_bufdata *bd; + unsigned long t; + + if (!page_has_buffers(page)) + goto out; + + head = bh = page_buffers(page); + do { + t = jiffies; + + while (atomic_read(&bh->b_count)) { + if (atomic_read(&aspace->i_writecount)) { + if (time_after_eq(jiffies, t + + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) { + stuck_releasepage(bh); + t = jiffies; + } + + yield(); + continue; + } + + return 0; + } + + gfs2_assert_warn(sdp, !buffer_pinned(bh)); + + bd = get_v2bd(bh); + if (bd) { + gfs2_assert_warn(sdp, bd->bd_bh == bh); + gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); + gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list)); + gfs2_assert_warn(sdp, !bd->bd_ail); + gfs2_memory_rm(bd); + kmem_cache_free(gfs2_bufdata_cachep, bd); + atomic_dec(&sdp->sd_bufdata_count); + set_v2bd(bh, NULL); + } + + bh = bh->b_this_page; + } + while (bh != head); + + out: + return try_to_free_buffers(page); +} + +static struct address_space_operations aspace_aops = { + .writepage = gfs2_aspace_writepage, + .releasepage = gfs2_aspace_releasepage, +}; + +/** + * gfs2_aspace_get - Create and initialize a struct inode structure + * @sdp: the filesystem the aspace is in + * + * Right now a struct inode is just a struct inode. Maybe Linux + * will supply a more lightweight address space construct (that works) + * in the future. + * + * Make sure pages/buffers in this aspace aren't in high memory. + * + * Returns: the aspace + */ + +struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) +{ + struct inode *aspace; + + aspace = new_inode(sdp->sd_vfs); + if (aspace) { + mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL); + aspace->i_mapping->a_ops = &aspace_aops; + aspace->i_size = ~0ULL; + set_v2ip(aspace, NULL); + insert_inode_hash(aspace); + } + + return aspace; +} + +/** + * gfs2_aspace_put - get rid of an aspace + * @aspace: + * + */ + +void gfs2_aspace_put(struct inode *aspace) +{ + remove_inode_hash(aspace); + iput(aspace); +} + +/** + * gfs2_ail1_start_one - Start I/O on a part of the AIL + * @sdp: the filesystem + * @tr: the part of the AIL + * + */ + +void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head, *tmp, *prev; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + int retry; + + do { + retry = FALSE; + + for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list); + bh = bd->bd_bh; + + gfs2_assert(sdp, bd->bd_ail == ai,); + + if (!buffer_busy(bh)) { + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + list_move(&bd->bd_ail_st_list, + &ai->ai_ail2_list); + continue; + } + + if (!buffer_dirty(bh)) + continue; + + list_move(&bd->bd_ail_st_list, head); + + gfs2_log_unlock(sdp); + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + gfs2_log_lock(sdp); + + retry = TRUE; + break; + } + } while (retry); +} + +/** + * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced + * @sdp: the filesystem + * @ai: the AIL entry + * + */ + +int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) +{ + struct list_head *head, *tmp, *prev; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list); + bh = bd->bd_bh; + + gfs2_assert(sdp, bd->bd_ail == ai,); + + if (buffer_busy(bh)) { + if (flags & DIO_ALL) + continue; + else + break; + } + + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + } + + return list_empty(head); +} + +/** + * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced + * @sdp: the filesystem + * @ai: the AIL entry + * + */ + +void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &ai->ai_ail2_list; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->prev, struct gfs2_bufdata, + bd_ail_st_list); + gfs2_assert(sdp, bd->bd_ail == ai,); + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&bd->bd_gl->gl_ail_count); + brelse(bd->bd_bh); + } +} + +/** + * ail_empty_gl - remove all buffers for a given lock from the AIL + * @gl: the glock + * + * None of the buffers should be dirty, locked, or pinned. + */ + +void gfs2_ail_empty_gl(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int blocks; + struct list_head *head = &gl->gl_ail_list; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + uint64_t blkno; + int error; + + blocks = atomic_read(&gl->gl_ail_count); + if (!blocks) + return; + + error = gfs2_trans_begin(sdp, 0, blocks); + if (gfs2_assert_withdraw(sdp, !error)) + return; + + gfs2_log_lock(sdp); + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, + bd_ail_gl_list); + bh = bd->bd_bh; + blkno = bh->b_blocknr; + gfs2_assert_withdraw(sdp, !buffer_busy(bh)); + + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&gl->gl_ail_count); + brelse(bh); + gfs2_log_unlock(sdp); + + gfs2_trans_add_revoke(sdp, blkno); + + gfs2_log_lock(sdp); + } + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_log_unlock(sdp); + + gfs2_trans_end(sdp); + gfs2_log_flush(sdp); +} + +/** + * gfs2_meta_inval - Invalidate all buffers associated with a glock + * @gl: the glock + * + */ + +void gfs2_meta_inval(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + struct address_space *mapping = gl->gl_aspace->i_mapping; + + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + + atomic_inc(&aspace->i_writecount); + truncate_inode_pages(mapping, 0); + atomic_dec(&aspace->i_writecount); + + gfs2_assert_withdraw(sdp, !mapping->nrpages); +} + +/** + * gfs2_meta_sync - Sync all buffers associated with a glock + * @gl: The glock + * @flags: DIO_START | DIO_WAIT + * + */ + +void gfs2_meta_sync(struct gfs2_glock *gl, int flags) +{ + struct address_space *mapping = gl->gl_aspace->i_mapping; + int error = 0; + + if (flags & DIO_START) + filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + error = filemap_fdatawait(mapping); + + if (error) + gfs2_io_error(gl->gl_sbd); +} + +/** + * getbuf - Get a buffer with a given address space + * @sdp: the filesystem + * @aspace: the address space + * @blkno: the block number (filesystem scope) + * @create: TRUE if the buffer should be created + * + * Returns: the buffer + */ + +static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace, + uint64_t blkno, int create) +{ + struct page *page; + struct buffer_head *bh; + unsigned int shift; + unsigned long index; + unsigned int bufnum; + + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; + index = blkno >> shift; /* convert block to page */ + bufnum = blkno - (index << shift); /* block buf index within page */ + + if (create) { + RETRY_MALLOC(page = grab_cache_page(aspace->i_mapping, index), + page); + } else { + page = find_lock_page(aspace->i_mapping, index); + if (!page) + return NULL; + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); + + /* Locate header for our buffer within our page */ + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + get_bh(bh); + + if (!buffer_mapped(bh)) + map_bh(bh, sdp->sd_vfs, blkno); + + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + + return bh; +} + +void meta_prep_new(struct buffer_head *bh) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + + lock_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + mh->mh_magic = cpu_to_gfs2_32(GFS2_MAGIC); + mh->mh_blkno = cpu_to_gfs2_64(bh->b_blocknr); +} + +/** + * gfs2_meta_new - Get a block + * @gl: The glock associated with this block + * @blkno: The block number + * + * Returns: The buffer + */ + +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno) +{ + struct buffer_head *bh; + bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + meta_prep_new(bh); + return bh; +} + +/** + * gfs2_meta_read - Read a block from disk + * @gl: The glock covering the block + * @blkno: The block number + * @flags: flags to gfs2_dreread() + * @bhp: the place where the buffer is returned (NULL on failure) + * + * Returns: errno + */ + +int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags, + struct buffer_head **bhp) +{ + int error; + + *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags); + if (error) + brelse(*bhp); + + return error; +} + +/** + * gfs2_meta_reread - Reread a block from disk + * @sdp: the filesystem + * @bh: The block to read + * @flags: Flags that control the read + * + * Returns: errno + */ + +int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags) +{ + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + if (flags & DIO_FORCE) + clear_buffer_uptodate(bh); + + if ((flags & DIO_START) && !buffer_uptodate(bh)) + ll_rw_block(READ, 1, &bh); + + if (flags & DIO_WAIT) { + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + struct gfs2_trans *tr = get_transaction; + if (tr && tr->tr_touched) + gfs2_io_error_bh(sdp, bh); + return -EIO; + } + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + } + + return 0; +} + +/** + * gfs2_meta_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer + * @gl: the glock the buffer belongs to + * @bh: The buffer to be attached to + * + */ + +void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd; + + lock_page(bh->b_page); + + if (get_v2bd(bh)) { + unlock_page(bh->b_page); + return; + } + + RETRY_MALLOC(bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_KERNEL), + bd); + gfs2_memory_add(bd); + atomic_inc(&gl->gl_sbd->sd_bufdata_count); + + memset(bd, 0, sizeof(struct gfs2_bufdata)); + + bd->bd_bh = bh; + bd->bd_gl = gl; + + INIT_LIST_HEAD(&bd->bd_list_tr); + INIT_LE(&bd->bd_le, &gfs2_buf_lops); + + set_v2bd(bh, bd); + + unlock_page(bh->b_page); +} + +/** + * gfs2_meta_pin - Pin a metadata buffer in memory + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to be pinned + * + */ + +void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd = get_v2bd(bh); + + gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); + + if (test_set_buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, FALSE); + + wait_on_buffer(bh); + + /* If this buffer is in the AIL and it has already been written + to in-place disk block, remove it from the AIL. */ + + gfs2_log_lock(sdp); + if (bd->bd_ail && !buffer_in_io(bh)) + list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); + gfs2_log_unlock(sdp); + + clear_buffer_dirty(bh); + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + + get_bh(bh); +} + +/** + * gfs2_meta_unpin - Unpin a buffer + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to unpin + * @ai: + * + */ + +void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct gfs2_ail *ai) +{ + struct gfs2_bufdata *bd = get_v2bd(bh); + + gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); + + if (!buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, FALSE); + + mark_buffer_dirty(bh); + clear_buffer_pinned(bh); + + gfs2_log_lock(sdp); + if (bd->bd_ail) { + list_del(&bd->bd_ail_st_list); + brelse(bh); + } else { + struct gfs2_glock *gl = bd->bd_gl; + list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); + atomic_inc(&gl->gl_ail_count); + } + bd->bd_ail = ai; + list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + gfs2_log_unlock(sdp); +} + +/** + * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore + * @ip: the inode who owns the buffers + * @bstart: the first buffer in the run + * @blen: the number of buffers in the run + * + */ + +void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *aspace = ip->i_gl->gl_aspace; + struct buffer_head *bh; + + while (blen) { + bh = getbuf(sdp, aspace, bstart, NO_CREATE); + if (bh) { + struct gfs2_bufdata *bd = get_v2bd(bh); + + if (test_clear_buffer_pinned(bh)) { + gfs2_log_lock(sdp); + list_del_init(&bd->bd_le.le_list); + gfs2_assert_warn(sdp, sdp->sd_log_num_buf); + sdp->sd_log_num_buf--; + gfs2_log_unlock(sdp); + get_transaction->tr_num_buf_rm++; + brelse(bh); + } + if (bd) { + gfs2_log_lock(sdp); + if (bd->bd_ail) { + uint64_t blkno = bh->b_blocknr; + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&bd->bd_gl->gl_ail_count); + brelse(bh); + gfs2_log_unlock(sdp); + gfs2_trans_add_revoke(sdp, blkno); + } else + gfs2_log_unlock(sdp); + } + + lock_buffer(bh); + clear_buffer_dirty(bh); + clear_buffer_uptodate(bh); + unlock_buffer(bh); + + brelse(bh); + } + + bstart++; + blen--; + } +} + +/** + * gfs2_meta_cache_flush - get rid of any references on buffers for this inode + * @ip: The GFS2 inode + * + * This releases buffers that are in the most-recently-used array of + * blocks used for indirect block addressing for this inode. + */ + +void gfs2_meta_cache_flush(struct gfs2_inode *ip) +{ + struct buffer_head **bh_slot; + unsigned int x; + + spin_lock(&ip->i_spin); + + for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { + bh_slot = &ip->i_cache[x]; + if (!*bh_slot) + break; + brelse(*bh_slot); + *bh_slot = NULL; + } + + spin_unlock(&ip->i_spin); +} + +/** + * gfs2_meta_indirect_buffer - Get a metadata buffer + * @ip: The GFS2 inode + * @height: The level of this buf in the metadata (indir addr) tree (if any) + * @num: The block number (device relative) of the buffer + * @new: Non-zero if we may create a new buffer + * @bhp: the buffer is returned here + * + * Try to use the gfs2_inode's MRU metadata tree cache. + * + * Returns: errno + */ + +int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num, + int new, struct buffer_head **bhp) +{ + struct buffer_head *bh, **bh_slot = ip->i_cache + height; + int error; + + spin_lock(&ip->i_spin); + bh = *bh_slot; + if (bh) { + if (bh->b_blocknr == num) + get_bh(bh); + else + bh = NULL; + } + spin_unlock(&ip->i_spin); + + if (bh) { + if (new) + meta_prep_new(bh); + else { + error = gfs2_meta_reread(ip->i_sbd, bh, + DIO_START | DIO_WAIT); + if (error) { + brelse(bh); + return error; + } + } + } else { + if (new) + bh = gfs2_meta_new(ip->i_gl, num); + else { + error = gfs2_meta_read(ip->i_gl, num, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + } + + spin_lock(&ip->i_spin); + if (*bh_slot != bh) { + brelse(*bh_slot); + *bh_slot = bh; + get_bh(bh); + } + spin_unlock(&ip->i_spin); + } + + if (new) { + if (gfs2_assert_warn(ip->i_sbd, height)) { + brelse(bh); + return -EIO; + } + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + } else if (gfs2_metatype_check(ip->i_sbd, bh, + (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) { + brelse(bh); + return -EIO; + } + + *bhp = bh; + + return 0; +} + +/** + * gfs2_meta_ra - start readahead on an extent of a file + * @gl: the glock the blocks belong to + * @dblock: the starting disk block + * @extlen: the number of blocks in the extent + * + */ + +void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + struct buffer_head *first_bh, *bh; + uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift; + int error; + + if (!extlen || !max_ra) + return; + if (extlen > max_ra) + extlen = max_ra; + + first_bh = getbuf(sdp, aspace, dblock, CREATE); + + if (buffer_uptodate(first_bh)) + goto out; + if (!buffer_locked(first_bh)) { + error = gfs2_meta_reread(sdp, first_bh, DIO_START); + if (error) + goto out; + } + + dblock++; + extlen--; + + while (extlen) { + bh = getbuf(sdp, aspace, dblock, CREATE); + + if (!buffer_uptodate(bh) && !buffer_locked(bh)) { + error = gfs2_meta_reread(sdp, bh, DIO_START); + brelse(bh); + if (error) + goto out; + } else + brelse(bh); + + dblock++; + extlen--; + + if (buffer_uptodate(first_bh)) + break; + } + + out: + brelse(first_bh); +} + +/** + * gfs2_meta_syncfs - sync all the buffers in a filesystem + * @sdp: the filesystem + * + */ + +void gfs2_meta_syncfs(struct gfs2_sbd *sdp) +{ + gfs2_log_flush(sdp); + for (;;) { + gfs2_ail1_start(sdp, DIO_ALL); + if (gfs2_ail1_empty(sdp, DIO_ALL)) + break; + msleep(100); + } +} + --- a/fs/gfs2/meta_io.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/meta_io.h 2005-08-01 14:13:08.011807896 +0800 @@ -0,0 +1,92 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __DIO_DOT_H__ +#define __DIO_DOT_H__ + +static inline void gfs2_buffer_clear(struct buffer_head *bh) +{ + memset(bh->b_data, 0, bh->b_size); +} + +static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head) +{ + memset(bh->b_data + head, 0, bh->b_size - head); +} + +static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset, + int amount, int journaled) +{ + int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0; + int z_len1 = offset - z_off1; + int z_off2 = offset + amount; + int z_len2 = (bh)->b_size - z_off2; + + if (z_len1) + memset(bh->b_data + z_off1, 0, z_len1); + + if (z_len2) + memset(bh->b_data + z_off2, 0, z_len2); +} + +static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, + int to_head, + struct buffer_head *from_bh, + int from_head) +{ + memcpy(to_bh->b_data + to_head, + from_bh->b_data + from_head, + from_bh->b_size - from_head); + memset(to_bh->b_data + to_bh->b_size + to_head - from_head, + 0, + from_head - to_head); +} + +struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); +void gfs2_aspace_put(struct inode *aspace); + +void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai); +int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags); +void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai); +void gfs2_ail_empty_gl(struct gfs2_glock *gl); + +void gfs2_meta_inval(struct gfs2_glock *gl); +void gfs2_meta_sync(struct gfs2_glock *gl, int flags); + +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno); +int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, + int flags, struct buffer_head **bhp); +int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags); + +void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); +void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct gfs2_ail *ai); + +void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen); + +void gfs2_meta_cache_flush(struct gfs2_inode *ip); +int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num, + int new, struct buffer_head **bhp); + +static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, + struct buffer_head **bhp) +{ + return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, FALSE, bhp); +} + +void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen); +void gfs2_meta_syncfs(struct gfs2_sbd *sdp); + +#endif /* __DIO_DOT_H__ */ + --- a/fs/gfs2/ondisk.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ondisk.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,32 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" + +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member); + +#define WANT_GFS2_CONVERSION_FUNCTIONS +#include + +#ifdef GFS2_ENDIAN_BIG +#warning Big endian is set. +#endif + --- a/fs/gfs2/ops_address.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_address.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,553 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "log.h" +#include "meta_io.h" +#include "ops_address.h" +#include "page.h" +#include "quota.h" +#include "trans.h" + +/** + * get_block - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = create; + uint64_t dblock; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, NULL); + if (error) + return error; + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + return 0; +} + +/** + * get_block_noalloc - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int get_block_noalloc(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = FALSE; + uint64_t dblock; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, NULL); + if (error) + return error; + + if (dblock) + map_bh(bh_result, inode->i_sb, dblock); + else if (gfs2_assert_withdraw(ip->i_sbd, !create)) + error = -EIO; + + return error; +} + +/** + * get_blocks - + * @inode: + * @lblock: + * @max_blocks: + * @bh_result: + * @create: + * + * Returns: errno + */ + +static int get_blocks(struct inode *inode, sector_t lblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = create; + uint64_t dblock; + uint32_t extlen; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + if (extlen > max_blocks) + extlen = max_blocks; + bh_result->b_size = extlen << inode->i_blkbits; + + return 0; +} + +/** + * get_blocks_noalloc - + * @inode: + * @lblock: + * @max_blocks: + * @bh_result: + * @create: + * + * Returns: errno + */ + +static int get_blocks_noalloc(struct inode *inode, sector_t lblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = FALSE; + uint64_t dblock; + uint32_t extlen; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (dblock) { + map_bh(bh_result, inode->i_sb, dblock); + if (extlen > max_blocks) + extlen = max_blocks; + bh_result->b_size = extlen << inode->i_blkbits; + } else if (gfs2_assert_withdraw(ip->i_sbd, !create)) + error = -EIO; + + return error; +} + +/** + * gfs2_writepage - Write complete page + * @page: Page to write + * + * Returns: errno + * + * Use Linux VFS block_write_full_page() to write one page, + * using GFS2's get_block_noalloc to find which blocks to write. + */ + +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { + unlock_page(page); + return -EIO; + } + if (get_transaction) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + + error = block_write_full_page(page, get_block_noalloc, wbc); + + gfs2_meta_cache_flush(ip); + + return error; +} + +/** + * stuffed_readpage - Fill in a Linux page with stuffed file data + * @ip: the inode + * @page: the page + * + * Returns: errno + */ + +static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +{ + struct buffer_head *dibh; + void *kaddr; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + kaddr = kmap(page); + memcpy((char *)kaddr, + dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_di.di_size); + memset((char *)kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + + return 0; +} + +static int zero_readpage(struct page *page) +{ + void *kaddr; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_CACHE_SIZE); + kunmap(page); + + SetPageUptodate(page); + unlock_page(page); + + return 0; +} + +/** + * jdata_readpage - readpage that goes through gfs2_jdata_read_mem() + * @ip: + * @page: The page to read + * + * Returns: errno + */ + +static int jdata_readpage(struct gfs2_inode *ip, struct page *page) +{ + void *kaddr; + int ret; + + kaddr = kmap(page); + + ret = gfs2_jdata_read_mem(ip, kaddr, + (uint64_t)page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE); + if (ret >= 0) { + if (ret < PAGE_CACHE_SIZE) + memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret); + SetPageUptodate(page); + ret = 0; + } + + kunmap(page); + + unlock_page(page); + + return ret; +} + +/** + * gfs2_readpage - readpage with locking + * @file: The file to read a page for + * @page: The page to read + * + * Returns: errno + */ + +static int gfs2_readpage(struct file *file, struct page *page) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) { + unlock_page(page); + return -EOPNOTSUPP; + } + + if (!gfs2_is_jdata(ip)) { + if (gfs2_is_stuffed(ip)) { + if (!page->index) { + error = stuffed_readpage(ip, page); + unlock_page(page); + } else + error = zero_readpage(page); + } else + error = block_read_full_page(page, get_block); + } else + error = jdata_readpage(ip, page); + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + error = -EIO; + + return error; +} + +/** + * gfs2_prepare_write - Prepare to write a page to a file + * @file: The file to write to + * @page: The page which is to be prepared for writing + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int gfs2_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error = 0; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) + return -EOPNOTSUPP; + + if (gfs2_is_stuffed(ip)) { + uint64_t file_size; + file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (file_size > sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, + page); + if (!error) + error = block_prepare_write(page, from, to, + get_block); + } else if (!PageUptodate(page)) + error = stuffed_readpage(ip, page); + } else + error = block_prepare_write(page, from, to, get_block); + + return error; +} + +/** + * gfs2_commit_write - Commit write to a file + * @file: The file to write to + * @page: The page containing the data + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int gfs2_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_is_stuffed(ip)) { + struct buffer_head *dibh; + uint64_t file_size; + void *kaddr; + + file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail; + + gfs2_trans_add_bh(ip->i_gl, dibh); + + kaddr = kmap(page); + memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, + (char *)kaddr + from, + to - from); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + + if (inode->i_size < file_size) + i_size_write(inode, file_size); + } else { + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_page_add_databufs(sdp, page, from, to); + error = generic_commit_write(file, page, from, to); + if (error) + goto fail; + } + + return 0; + + fail: + ClearPageUptodate(page); + + return error; +} + +/** + * gfs2_bmap - Block map function + * @mapping: Address space info + * @lblock: The block to map + * + * Returns: The disk address for the block or 0 on hole or error + */ + +static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) +{ + struct gfs2_inode *ip = get_v2ip(mapping->host); + struct gfs2_holder i_gh; + sector_t dblock = 0; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return 0; + + if (!gfs2_is_stuffed(ip)) + dblock = generic_block_bmap(mapping, lblock, get_block); + + gfs2_glock_dq_uninit(&i_gh); + + return dblock; +} + +static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_databuf *db; + + gfs2_log_lock(sdp); + db = get_v2db(bh); + if (db) { + db->db_bh = NULL; + set_v2db(bh, NULL); + gfs2_log_unlock(sdp); + brelse(bh); + } else + gfs2_log_unlock(sdp); + + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_bdev = NULL; + clear_buffer_mapped(bh); + clear_buffer_req(bh); + clear_buffer_new(bh); + clear_buffer_delay(bh); + unlock_buffer(bh); +} + +int gfs2_invalidatepage(struct page *page, unsigned long offset) +{ + struct gfs2_sbd *sdp = get_v2sdp(page->mapping->host->i_sb); + struct buffer_head *head, *bh, *next; + unsigned int curr_off = 0; + int ret = 1; + + BUG_ON(!PageLocked(page)); + if (!page_has_buffers(page)) + return 1; + + bh = head = page_buffers(page); + do { + unsigned int next_off = curr_off + bh->b_size; + next = bh->b_this_page; + + if (offset <= curr_off) + discard_buffer(sdp, bh); + + curr_off = next_off; + bh = next; + } while (bh != head); + + if (!offset) + ret = try_to_release_page(page, 0); + + return ret; +} + +/** + * gfs2_direct_IO - + * @rw: + * @iocb: + * @iov: + * @offset: + * @nr_segs: + * + * Returns: errno + */ + +static int gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + get_blocks_t *gb = get_blocks; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) || + gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + return -EINVAL; + + if (rw == WRITE && !get_transaction) + gb = get_blocks_noalloc; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, gb, NULL); +} + +struct address_space_operations gfs2_file_aops = { + .writepage = gfs2_writepage, + .readpage = gfs2_readpage, + .sync_page = block_sync_page, + .prepare_write = gfs2_prepare_write, + .commit_write = gfs2_commit_write, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .direct_IO = gfs2_direct_IO, +}; + --- a/fs/gfs2/ops_address.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_address.h 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_ADDRESS_DOT_H__ +#define __OPS_ADDRESS_DOT_H__ + +extern struct address_space_operations gfs2_file_aops; + +#endif /* __OPS_ADDRESS_DOT_H__ */ --- a/fs/gfs2/ops_dentry.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_dentry.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,121 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "ops_dentry.h" + +/** + * gfs2_drevalidate - Check directory lookup consistency + * @dentry: the mapping to check + * @nd: + * + * Check to make sure the lookup necessary to arrive at this inode from its + * parent is still good. + * + * Returns: 1 if the dentry is ok, 0 if it isn't + */ + +static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = dget_parent(dentry); + struct gfs2_inode *dip = get_v2ip(parent->d_inode); + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode; + struct gfs2_holder d_gh; + struct gfs2_inode *ip; + struct gfs2_inum inum; + unsigned int type; + int error; + + lock_kernel(); + + atomic_inc(&sdp->sd_ops_dentry); + + inode = dentry->d_inode; + if (inode && is_bad_inode(inode)) + goto invalid; + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + + error = gfs2_dir_search(dip, &dentry->d_name, &inum, &type); + switch (error) { + case 0: + if (!inode) + goto invalid_gunlock; + break; + case -ENOENT: + if (!inode) + goto valid_gunlock; + goto invalid_gunlock; + default: + goto fail_gunlock; + } + + ip = get_v2ip(inode); + + if (!gfs2_inum_equal(&ip->i_num, &inum)) + goto invalid_gunlock; + + if (IF2DT(ip->i_di.di_mode) != type) { + gfs2_consist_inode(dip); + goto fail_gunlock; + } + + valid_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + valid: + unlock_kernel(); + dput(parent); + return 1; + + invalid_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + invalid: + if (inode && S_ISDIR(inode->i_mode)) { + if (have_submounts(dentry)) + goto valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + + unlock_kernel(); + dput(parent); + return 0; + + fail_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + fail: + unlock_kernel(); + dput(parent); + return 0; +} + +struct dentry_operations gfs2_dops = { + .d_revalidate = gfs2_drevalidate, +}; + --- a/fs/gfs2/ops_dentry.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_dentry.h 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_DENTRY_DOT_H__ +#define __OPS_DENTRY_DOT_H__ + +extern struct dentry_operations gfs2_dops; + +#endif /* __OPS_DENTRY_DOT_H__ */ --- a/fs/gfs2/ops_export.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_export.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,309 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ops_export.h" +#include "rgrp.h" + +struct dentry *gfs2_decode_fh(struct super_block *sb, __u32 *fh, int fh_len, + int fh_type, int (*acceptable)(void *context, struct dentry *dentry), + void *context) +{ + struct gfs2_inum this, parent; + + atomic_inc(&get_v2sdp(sb)->sd_ops_export); + + if (fh_type != fh_len) + return NULL; + + memset(&parent, 0, sizeof(struct gfs2_inum)); + + switch (fh_type) { + case 8: + parent.no_formal_ino = ((uint64_t)gfs2_32_to_cpu(fh[4])) << 32; + parent.no_formal_ino |= gfs2_32_to_cpu(fh[5]); + parent.no_addr = ((uint64_t)gfs2_32_to_cpu(fh[6])) << 32; + parent.no_addr |= gfs2_32_to_cpu(fh[7]); + case 4: + this.no_formal_ino = ((uint64_t)gfs2_32_to_cpu(fh[0])) << 32; + this.no_formal_ino |= gfs2_32_to_cpu(fh[1]); + this.no_addr = ((uint64_t)gfs2_32_to_cpu(fh[2])) << 32; + this.no_addr |= gfs2_32_to_cpu(fh[3]); + break; + default: + return NULL; + } + + return gfs2_export_ops.find_exported_dentry(sb, &this, &parent, + acceptable, context); +} + +int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, int connectable) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + + atomic_inc(&sdp->sd_ops_export); + + if (*len < 4 || (connectable && *len < 8)) + return 255; + + fh[0] = ip->i_num.no_formal_ino >> 32; + fh[0] = cpu_to_gfs2_32(fh[0]); + fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF; + fh[1] = cpu_to_gfs2_32(fh[1]); + fh[2] = ip->i_num.no_addr >> 32; + fh[2] = cpu_to_gfs2_32(fh[2]); + fh[3] = ip->i_num.no_addr & 0xFFFFFFFF; + fh[3] = cpu_to_gfs2_32(fh[3]); + *len = 4; + + if (!connectable || ip == sdp->sd_root_dir) + return *len; + + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + ip = get_v2ip(inode); + gfs2_inode_hold(ip); + spin_unlock(&dentry->d_lock); + + fh[4] = ip->i_num.no_formal_ino >> 32; + fh[4] = cpu_to_gfs2_32(fh[4]); + fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF; + fh[5] = cpu_to_gfs2_32(fh[5]); + fh[6] = ip->i_num.no_addr >> 32; + fh[6] = cpu_to_gfs2_32(fh[6]); + fh[7] = ip->i_num.no_addr & 0xFFFFFFFF; + fh[7] = cpu_to_gfs2_32(fh[7]); + *len = 8; + + gfs2_inode_put(ip); + + return *len; +} + +struct get_name_filldir { + struct gfs2_inum inum; + char *name; +}; + +static int get_name_filldir(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; + + if (!gfs2_inum_equal(inum, &gnfd->inum)) + return 0; + + memcpy(gnfd->name, name, length); + gnfd->name[length] = 0; + + return 1; +} + +int gfs2_get_name(struct dentry *parent, char *name, struct dentry *child) +{ + struct inode *dir = parent->d_inode; + struct inode *inode = child->d_inode; + struct gfs2_inode *dip, *ip; + struct get_name_filldir gnfd; + struct gfs2_holder gh; + uint64_t offset = 0; + int error; + + if (!dir) + return -EINVAL; + + atomic_inc(&get_v2sdp(dir->i_sb)->sd_ops_export); + + if (!S_ISDIR(dir->i_mode) || !inode) + return -EINVAL; + + dip = get_v2ip(dir); + ip = get_v2ip(inode); + + *name = 0; + gnfd.inum = ip->i_num; + gnfd.name = name; + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); + if (error) + return error; + + error = gfs2_dir_read(dip, &offset, &gnfd, get_name_filldir); + + gfs2_glock_dq_uninit(&gh); + + if (!error && !*name) + error = -ENOENT; + + return error; +} + +struct dentry *gfs2_get_parent(struct dentry *child) +{ + struct gfs2_inode *dip = get_v2ip(child->d_inode); + struct qstr dotdot = { .name = "..", .len = 2 }; + struct gfs2_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&dip->i_sbd->sd_ops_export); + + error = gfs2_lookupi(dip, &dotdot, TRUE, &ip); + if (error) + return ERR_PTR(error); + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; +} + +struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + struct gfs2_inum *inum = (struct gfs2_inum *)inum_p; + struct gfs2_holder i_gh, ri_gh, rgd_gh; + struct gfs2_rgrpd *rgd; + struct gfs2_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&sdp->sd_ops_export); + + /* System files? */ + + inode = gfs2_iget(sb, inum); + if (inode) { + ip = get_v2ip(inode); + if (ip->i_num.no_formal_ino != inum->no_formal_ino) { + iput(inode); + return ERR_PTR(-ESTALE); + } + goto out_inode; + } + + error = gfs2_glock_nq_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, + &i_gh); + if (error) + return ERR_PTR(error); + + error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip); + if (error) + goto fail; + if (ip) + goto out_ip; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + + error = -EINVAL; + rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); + if (!rgd) + goto fail_rindex; + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); + if (error) + goto fail_rindex; + + error = -ESTALE; + if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) + goto fail_rgd; + + gfs2_glock_dq_uninit(&rgd_gh); + gfs2_glock_dq_uninit(&ri_gh); + + error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip); + if (error) + goto fail; + + error = gfs2_inode_refresh(ip); + if (error) { + gfs2_inode_put(ip); + goto fail; + } + + atomic_inc(&sdp->sd_fh2dentry_misses); + + out_ip: + error = -EIO; + if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) { + gfs2_inode_put(ip); + goto fail; + } + + gfs2_glock_dq_uninit(&i_gh); + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + out_inode: + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; + + fail_rgd: + gfs2_glock_dq_uninit(&rgd_gh); + + fail_rindex: + gfs2_glock_dq_uninit(&ri_gh); + + fail: + gfs2_glock_dq_uninit(&i_gh); + return ERR_PTR(error); +} + +struct export_operations gfs2_export_ops = { + .decode_fh = gfs2_decode_fh, + .encode_fh = gfs2_encode_fh, + .get_name = gfs2_get_name, + .get_parent = gfs2_get_parent, + .get_dentry = gfs2_get_dentry, +}; + --- a/fs/gfs2/ops_export.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_export.h 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_EXPORT_DOT_H__ +#define __OPS_EXPORT_DOT_H__ + +extern struct export_operations gfs2_export_ops; + +#endif /* __OPS_EXPORT_DOT_H__ */ --- a/fs/gfs2/ops_file.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_file.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,1549 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ioctl.h" +#include "jdata.h" +#include "lm.h" +#include "log.h" +#include "meta_io.h" +#include "ops_file.h" +#include "ops_vm.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +/* "bad" is for NFS support */ +struct filldir_bad_entry { + char *fbe_name; + unsigned int fbe_length; + uint64_t fbe_offset; + struct gfs2_inum fbe_inum; + unsigned int fbe_type; +}; + +struct filldir_bad { + struct gfs2_sbd *fdb_sbd; + + struct filldir_bad_entry *fdb_entry; + unsigned int fdb_entry_num; + unsigned int fdb_entry_off; + + char *fdb_name; + unsigned int fdb_name_size; + unsigned int fdb_name_off; +}; + +/* For regular, non-NFS */ +struct filldir_reg { + struct gfs2_sbd *fdr_sbd; + int fdr_prefetch; + + filldir_t fdr_filldir; + void *fdr_opaque; +}; + +typedef ssize_t(*do_rw_t) (struct file *file, + char *buf, + size_t size, loff_t *offset, + unsigned int num_gh, struct gfs2_holder *ghs); + +/** + * gfs2_llseek - seek to a location in a file + * @file: the file + * @offset: the offset + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) + * + * SEEK_END requires the glock for the file because it references the + * file's size. + * + * Returns: The new offset, or errno + */ + +static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_holder i_gh; + loff_t error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + if (origin == 2) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = remote_llseek(file, offset, origin); + gfs2_glock_dq_uninit(&i_gh); + } + } else + error = remote_llseek(file, offset, origin); + + return error; +} + +#define vma2state(vma) \ +((((vma)->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == \ + (VM_MAYWRITE | VM_MAYSHARE)) ? \ + LM_ST_EXCLUSIVE : LM_ST_SHARED) \ + +static ssize_t walk_vm_hard(struct file *file, char *buf, size_t size, + loff_t *offset, do_rw_t operation) +{ + struct gfs2_holder *ghs; + unsigned int num_gh = 0; + ssize_t count; + + { + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + unsigned int x = 0; + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) { + num_gh++; + } + } + + ghs = kmalloc((num_gh + 1) * sizeof(struct gfs2_holder), + GFP_KERNEL); + if (!ghs) { + if (!dumping) + up_read(&mm->mmap_sem); + return -ENOMEM; + } + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file) { + struct inode *inode; + inode = vma->vm_file->f_dentry->d_inode; + if (inode->i_sb == sb) + gfs2_holder_init(get_v2ip(inode)->i_gl, + vma2state(vma), + 0, &ghs[x++]); + } + } + + if (!dumping) + up_read(&mm->mmap_sem); + + gfs2_assert(get_v2sdp(sb), x == num_gh,); + } + + count = operation(file, buf, size, offset, num_gh, ghs); + + while (num_gh--) + gfs2_holder_uninit(&ghs[num_gh]); + kfree(ghs); + + return count; +} + +/** + * walk_vm - Walk the vmas associated with a buffer for read or write. + * If any of them are gfs2, pass the gfs2 inode down to the read/write + * worker function so that locks can be acquired in the correct order. + * @file: The file to read/write from/to + * @buf: The buffer to copy to/from + * @size: The amount of data requested + * @offset: The current file offset + * @operation: The read or write worker function + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t walk_vm(struct file *file, char *buf, size_t size, + loff_t *offset, do_rw_t operation) +{ + if (current->mm) { + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + + if (!dumping) + down_read(&mm->mmap_sem); + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) + goto do_locks; + } + + if (!dumping) + up_read(&mm->mmap_sem); + } + + { + struct gfs2_holder gh; + return operation(file, buf, size, offset, 0, &gh); + } + + do_locks: + return walk_vm_hard(file, buf, size, offset, operation); +} + +static ssize_t do_jdata_read(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + ssize_t count = 0; + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EFAULT; + + if (!(file->f_flags & O_LARGEFILE)) { + if (*offset >= 0x7FFFFFFFull) + return -EFBIG; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user); + + if (count > 0) + *offset += count; + + return count; +} + +/** + * do_read_direct - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t do_read_direct(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + unsigned int state = LM_ST_DEFERRED; + int flags = 0; + unsigned int x; + ssize_t count = 0; + int error; + + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_SHARED; + flags |= GL_LOCAL_EXCL; + break; + } + + gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs2_is_jdata(ip)) + goto out_gunlock; + + if (gfs2_is_stuffed(ip)) { + size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; + + if (((*offset) & mask) || (((unsigned long)buf) & mask)) + goto out_gunlock; + + count = do_jdata_read(file, buf, size & ~mask, offset); + } else + count = generic_file_read(file, buf, size, offset); + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_read_buf - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t do_read_buf(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + ssize_t count = 0; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]); + + error = gfs2_glock_nq_m_atime(num_gh + 1, ghs); + if (error) + goto out; + + if (gfs2_is_jdata(ip) || + (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags))) + count = do_jdata_read(file, buf, size, offset); + else + count = generic_file_read(file, buf, size, offset); + + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs2_read - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t gfs2_read(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (file->f_flags & O_DIRECT) + return walk_vm(file, buf, size, offset, do_read_direct); + else + return walk_vm(file, buf, size, offset, do_read_buf); +} + +/** + * grope_mapping - feel up a mapping that needs to be written + * @buf: the start of the memory to be written + * @size: the size of the memory to be written + * + * We do this after acquiring the locks on the mapping, + * but before starting the write transaction. We need to make + * sure that we don't cause recursive transactions if blocks + * need to be allocated to the file backing the mapping. + * + * Returns: errno + */ + +static int grope_mapping(char *buf, size_t size) +{ + unsigned long start = (unsigned long)buf; + unsigned long stop = start + size; + char c; + + while (start < stop) { + if (copy_from_user(&c, (char *)start, 1)) + return -EFAULT; + start += PAGE_CACHE_SIZE; + start &= PAGE_CACHE_MASK; + } + + return 0; +} + +/** + * do_write_direct_alloc - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_direct_alloc(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = NULL; + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + ssize_t count; + int error; + + gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + ind_blocks + + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto fail_ipres; + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? + (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL); + if (error) + goto fail_end_trans; + } + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs2_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs2_log_flush_glock(ip->i_gl); + + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + + return count; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipres: + gfs2_inplace_release(ip); + + fail_gunlock_q: + gfs2_quota_unlock(ip); + + fail: + gfs2_alloc_put(ip); + + return error; +} + +/** + * do_write_direct - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_direct(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_file *fp = get_v2fp(file); + unsigned int state = LM_ST_DEFERRED; + int alloc_required; + unsigned int x; + size_t s; + ssize_t count = 0; + int error; + + if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags)) + state = LM_ST_EXCLUSIVE; + else + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_EXCLUSIVE; + break; + } + + restart: + gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs2_is_jdata(ip)) + goto out_gunlock; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= 0x7FFFFFFFull) + goto out_gunlock; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + if (gfs2_is_stuffed(ip) || + *offset + size > ip->i_di.di_size || + ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID))) + alloc_required = TRUE; + else { + error = gfs2_write_alloc_required(ip, *offset, size, + &alloc_required); + if (error) + goto out_gunlock; + } + + if (alloc_required && state != LM_ST_EXCLUSIVE) { + gfs2_glock_dq_m(num_gh + 1, ghs); + gfs2_holder_uninit(&ghs[num_gh]); + state = LM_ST_EXCLUSIVE; + goto restart; + } + + if (alloc_required) { + set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + /* split large writes into smaller atomic transactions */ + while (size) { + s = gfs2_tune_get(sdp, gt_max_atomic_write); + if (s > size) + s = size; + + error = do_write_direct_alloc(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct gfs2_holder t_gh; + + clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_NEVER_RECURSE, &t_gh); + if (error) + goto out_gunlock; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + + gfs2_glock_dq_uninit(&t_gh); + } + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_do_write_buf(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = NULL; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + int alloc_required, journaled; + ssize_t count; + int error; + + journaled = gfs2_is_jdata(ip); + + gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required); + if (error) + return error; + + if (alloc_required) { + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + + ind_blocks + + ((journaled) ? data_blocks : 0) + + RES_DINODE + RES_STATFS + RES_QUOTA, + 0); + if (error) + goto fail_ipres; + } else { + error = gfs2_trans_begin(sdp, + ((journaled) ? data_blocks : 0) + + RES_DINODE, + 0); + if (error) + goto fail_ipres; + } + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? + (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (journaled || + (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) && + *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))) { + + count = gfs2_jdata_write(ip, buf, *offset, size, + gfs2_copy_from_user); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + *offset += count; + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs2_log_flush_glock(ip->i_gl); + + if (alloc_required) { + gfs2_assert_warn(sdp, count != size || + al->al_alloced); + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + + return count; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipres: + if (alloc_required) + gfs2_inplace_release(ip); + + fail_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(ip); + + fail: + if (alloc_required) + gfs2_alloc_put(ip); + + return error; +} + +/** + * do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_buf(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + size_t s; + ssize_t count = 0; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= 0x7FFFFFFFull) + goto out_gunlock; + if (*offset + size > 0x7FFFFFFFull) + size = 0x7FFFFFFFull - *offset; + } + + /* split large writes into smaller atomic transactions */ + while (size) { + s = gfs2_tune_get(sdp, gt_max_atomic_write); + if (s > size) + s = size; + + error = do_do_write_buf(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs2_write - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t gfs2_write(struct file *file, const char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + ssize_t count; + + atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file); + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_READ, buf, size)) + return -EFAULT; + + down(&inode->i_sem); + if (file->f_flags & O_DIRECT) + count = walk_vm(file, (char *)buf, size, offset, + do_write_direct); + else + count = walk_vm(file, (char *)buf, size, offset, do_write_buf); + up(&inode->i_sem); + + return count; +} + +/** + * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +static int filldir_reg_func(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct filldir_reg *fdr = (struct filldir_reg *)opaque; + struct gfs2_sbd *sdp = fdr->fdr_sbd; + int error; + + error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, + inum->no_formal_ino, type); + if (error) + return 1; + + if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_reg - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: errno + */ + +static int readdir_reg(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs2_inode *dip = get_v2ip(file->f_mapping->host); + struct filldir_reg fdr; + struct gfs2_holder d_gh; + uint64_t offset = file->f_pos; + int error; + + fdr.fdr_sbd = dip->i_sbd; + fdr.fdr_prefetch = TRUE; + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs2_glock_nq_atime(&d_gh); + if (error) { + gfs2_holder_uninit(&d_gh); + return error; + } + + error = gfs2_dir_read(dip, &offset, &fdr, filldir_reg_func); + + gfs2_glock_dq_uninit(&d_gh); + + file->f_pos = offset; + + return error; +} + +/** + * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * For supporting NFS. + * + * Returns: 0 on success, 1 if buffer full + */ + +static int filldir_bad_func(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct filldir_bad *fdb = (struct filldir_bad *)opaque; + struct gfs2_sbd *sdp = fdb->fdb_sbd; + struct filldir_bad_entry *fbe; + + if (fdb->fdb_entry_off == fdb->fdb_entry_num || + fdb->fdb_name_off + length > fdb->fdb_name_size) + return 1; + + fbe = &fdb->fdb_entry[fdb->fdb_entry_off]; + fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off; + memcpy(fbe->fbe_name, name, length); + fbe->fbe_length = length; + fbe->fbe_offset = offset; + fbe->fbe_inum = *inum; + fbe->fbe_type = type; + + fdb->fdb_entry_off++; + fdb->fdb_name_off += length; + + if (!(length == 1 && *name == '.')) { + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_bad - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * For supporting NFS. + * + * Returns: errno + */ + +static int readdir_bad(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs2_inode *dip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = dip->i_sbd; + struct filldir_reg fdr; + unsigned int entries, size; + struct filldir_bad *fdb; + struct gfs2_holder d_gh; + uint64_t offset = file->f_pos; + unsigned int x; + struct filldir_bad_entry *fbe; + int error; + + entries = gfs2_tune_get(sdp, gt_entries_per_readdir); + size = sizeof(struct filldir_bad) + + entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE); + + fdb = kmalloc(size, GFP_KERNEL); + if (!fdb) + return -ENOMEM; + memset(fdb, 0, size); + + fdb->fdb_sbd = sdp; + fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1); + fdb->fdb_entry_num = entries; + fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) + + entries * sizeof(struct filldir_bad_entry); + fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE; + + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs2_glock_nq_atime(&d_gh); + if (error) { + gfs2_holder_uninit(&d_gh); + goto out; + } + + error = gfs2_dir_read(dip, &offset, fdb, filldir_bad_func); + + gfs2_glock_dq_uninit(&d_gh); + + fdr.fdr_sbd = sdp; + fdr.fdr_prefetch = FALSE; + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + for (x = 0; x < fdb->fdb_entry_off; x++) { + fbe = &fdb->fdb_entry[x]; + + error = filldir_reg_func(&fdr, + fbe->fbe_name, fbe->fbe_length, + fbe->fbe_offset, + &fbe->fbe_inum, fbe->fbe_type); + if (error) { + file->f_pos = fbe->fbe_offset; + error = 0; + goto out; + } + } + + file->f_pos = offset; + + out: + kfree(fdb); + + return error; +} + +/** + * gfs2_readdir - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: errno + */ + +static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int error; + + atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (strcmp(current->comm, "nfsd") != 0) + error = readdir_reg(file, dirent, filldir); + else + error = readdir_bad(file, dirent, filldir); + + return error; +} + +/** + * gfs2_ioctl - do an ioctl on a file + * @inode: the inode + * @file: the file pointer + * @cmd: the ioctl command + * @arg: the argument + * + * Returns: errno + */ + +static int gfs2_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&ip->i_sbd->sd_ops_file); + + switch (cmd) { + case GFS2_IOCTL_IDENTIFY: { + unsigned int x = GFS2_MAGIC; + if (copy_to_user((unsigned int *)arg, &x, sizeof(unsigned int))) + return -EFAULT; + return 0; + } + + case GFS2_IOCTL_SUPER: + return gfs2_ioctl_i(ip, (void *)arg); + + default: + return -ENOTTY; + } +} + +/** + * gfs2_mmap - + * @file: The file to map + * @vma: The VMA which described the mapping + * + * Returns: 0 or error code + */ + +static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs2_glock_nq_atime(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (gfs2_is_jdata(ip)) { + if (vma->vm_flags & VM_MAYSHARE) + error = -EOPNOTSUPP; + else + vma->vm_ops = &gfs2_vm_ops_private; + } else { + /* This is VM_MAYWRITE instead of VM_WRITE because a call + to mprotect() can turn on VM_WRITE later. */ + + if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == + (VM_MAYSHARE | VM_MAYWRITE)) + vma->vm_ops = &gfs2_vm_ops_sharewrite; + else + vma->vm_ops = &gfs2_vm_ops_private; + } + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs2_open - open a file + * @inode: the inode to open + * @file: the struct file for this opening + * + * Returns: errno + */ + +static int gfs2_open(struct inode *inode, struct file *file) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + struct gfs2_file *fp; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + fp = kmalloc(sizeof(struct gfs2_file), GFP_KERNEL); + if (!fp) + return -ENOMEM; + memset(fp, 0, sizeof(struct gfs2_file)); + + init_MUTEX(&fp->f_fl_mutex); + + fp->f_inode = ip; + fp->f_vfile = file; + + gfs2_assert_warn(ip->i_sbd, !get_v2fp(file)); + set_v2fp(file, fp); + + if (S_ISREG(ip->i_di.di_mode)) { + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + goto fail; + + if (!(file->f_flags & O_LARGEFILE) && + ip->i_di.di_size > 0x7FFFFFFFull) { + error = -EFBIG; + goto fail_gunlock; + } + + /* Listen to the Direct I/O flag */ + + if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) + file->f_flags |= O_DIRECT; + + /* Don't let the user open O_DIRECT on a jdata file */ + + if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) { + error = -EINVAL; + goto fail_gunlock; + } + + gfs2_glock_dq_uninit(&i_gh); + } + + return 0; + + fail_gunlock: + gfs2_glock_dq_uninit(&i_gh); + + fail: + set_v2fp(file, NULL); + kfree(fp); + + return error; +} + +/** + * gfs2_close - called to close a struct file + * @inode: the inode the struct file belongs to + * @file: the struct file being closed + * + * Returns: errno + */ + +static int gfs2_close(struct inode *inode, struct file *file) +{ + struct gfs2_sbd *sdp = get_v2sdp(inode->i_sb); + struct gfs2_file *fp; + + atomic_inc(&sdp->sd_ops_file); + + fp = get_v2fp(file); + set_v2fp(file, NULL); + + if (gfs2_assert_warn(sdp, fp)) + return -EIO; + + kfree(fp); + + return 0; +} + +/** + * gfs2_fsync - sync the dirty data for a file (across the cluster) + * @file: the file that points to the dentry (we ignore this) + * @dentry: the dentry that points to the inode to sync + * + * Returns: errno + */ + +static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + + atomic_inc(&ip->i_sbd->sd_ops_file); + gfs2_log_flush_glock(ip->i_gl); + + return 0; +} + +/** + * gfs2_lock - acquire/release a posix lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + struct lm_lockname name = + { .ln_number = ip->i_num.no_addr, + .ln_type = LM_TYPE_PLOCK }; + + atomic_inc(&sdp->sd_ops_file); + + if (!(fl->fl_flags & FL_POSIX)) + return -ENOLCK; + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (sdp->sd_args.ar_localflocks) { + if (IS_GETLK(cmd)) { + struct file_lock *tmp; + lock_kernel(); + tmp = posix_test_lock(file, fl); + fl->fl_type = F_UNLCK; + if (tmp) + memcpy(fl, tmp, sizeof(struct file_lock)); + unlock_kernel(); + return 0; + } else { + int error; + lock_kernel(); + error = posix_lock_file_wait(file, fl); + unlock_kernel(); + return error; + } + } + + if (IS_GETLK(cmd)) + return gfs2_lm_plock_get(sdp, &name, file, fl); + else if (fl->fl_type == F_UNLCK) + return gfs2_lm_punlock(sdp, &name, file, fl); + else + return gfs2_lm_plock(sdp, &name, file, cmd, fl); +} + +/** + * gfs2_sendfile - Send bytes to a file or socket + * @in_file: The file to read from + * @out_file: The file to write to + * @count: The amount of data + * @offset: The beginning file offset + * + * Outputs: offset - updated according to number of bytes read + * + * Returns: The number of bytes sent, errno on failure + */ + +static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count, + read_actor_t actor, void __user *target) +{ + struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host); + struct gfs2_holder gh; + ssize_t retval; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + + retval = gfs2_glock_nq_atime(&gh); + if (retval) + goto out; + + if (gfs2_is_jdata(ip)) + retval = -EOPNOTSUPP; + else + retval = generic_file_sendfile(in_file, offset, count, actor, + target); + + gfs2_glock_dq(&gh); + + out: + gfs2_holder_uninit(&gh); + + return retval; +} + +/** + * do_flock - Acquire a flock on a file + * @file: + * @cmd: + * @fl: + * + * Returns: errno + */ + +static int do_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_file *fp = get_v2fp(file); + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + struct gfs2_inode *ip = fp->f_inode; + struct gfs2_glock *gl; + unsigned int state; + int flags; + int error = 0; + + state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; + flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + + down(&fp->f_fl_mutex); + + gl = fl_gh->gh_gl; + if (gl) { + if (fl_gh->gh_state == state) + goto out; + gfs2_glock_hold(gl); + flock_lock_file_wait(file, + &(struct file_lock){.fl_type = F_UNLCK}); + gfs2_glock_dq_uninit(fl_gh); + } else { + error = gfs2_glock_get(ip->i_sbd, + ip->i_num.no_addr, &gfs2_flock_glops, + CREATE, &gl); + if (error) + goto out; + } + + gfs2_holder_init(gl, state, flags, fl_gh); + gfs2_glock_put(gl); + + error = gfs2_glock_nq(fl_gh); + if (error) { + gfs2_holder_uninit(fl_gh); + if (error == GLR_TRYFAILED) + error = -EAGAIN; + } else { + error = flock_lock_file_wait(file, fl); + gfs2_assert_warn(ip->i_sbd, !error); + } + + out: + up(&fp->f_fl_mutex); + + return error; +} + +/** + * do_unflock - Release a flock on a file + * @file: the file + * @fl: + * + */ + +static void do_unflock(struct file *file, struct file_lock *fl) +{ + struct gfs2_file *fp = get_v2fp(file); + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + + down(&fp->f_fl_mutex); + flock_lock_file_wait(file, fl); + if (fl_gh->gh_gl) + gfs2_glock_dq_uninit(fl_gh); + up(&fp->f_fl_mutex); +} + +/** + * gfs2_flock - acquire/release a flock lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (sdp->sd_args.ar_localflocks) + return flock_lock_file_wait(file, fl); + + if (fl->fl_type == F_UNLCK) { + do_unflock(file, fl); + return 0; + } else + return do_flock(file, cmd, fl); +} + +struct file_operations gfs2_file_fops = { + .llseek = gfs2_llseek, + .read = gfs2_read, + .write = gfs2_write, + .ioctl = gfs2_ioctl, + .mmap = gfs2_mmap, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .sendfile = gfs2_sendfile, + .flock = gfs2_flock, +}; + +struct file_operations gfs2_dir_fops = { + .readdir = gfs2_readdir, + .ioctl = gfs2_ioctl, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .flock = gfs2_flock, +}; + --- a/fs/gfs2/ops_file.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_file.h 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_FILE_DOT_H__ +#define __OPS_FILE_DOT_H__ + +extern struct file_operations gfs2_file_fops; +extern struct file_operations gfs2_dir_fops; + +#endif /* __OPS_FILE_DOT_H__ */ --- a/fs/gfs2/ops_fstype.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_fstype.c 2005-08-01 14:13:08.012807744 +0800 @@ -0,0 +1,973 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "daemon.h" +#include "diaper.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "lm.h" +#include "mount.h" +#include "ops_export.h" +#include "ops_fstype.h" +#include "ops_super.h" +#include "proc.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "unlinked.h" + +#define DO FALSE +#define UNDO TRUE + +#undef NO_DIAPER + +static struct gfs2_sbd *init_sbd(struct super_block *sb) +{ + struct gfs2_sbd *sdp; + unsigned int x; + + sdp = vmalloc(sizeof(struct gfs2_sbd)); + if (!sdp) + return NULL; + + memset(sdp, 0, sizeof(struct gfs2_sbd)); + + set_v2sdp(sb, sdp); + sdp->sd_vfs = sb; + + gfs2_tune_init(&sdp->sd_tune); + + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { + sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list); + } + INIT_LIST_HEAD(&sdp->sd_reclaim_list); + spin_lock_init(&sdp->sd_reclaim_lock); + init_waitqueue_head(&sdp->sd_reclaim_wq); + + init_MUTEX(&sdp->sd_inum_mutex); + spin_lock_init(&sdp->sd_statfs_spin); + init_MUTEX(&sdp->sd_statfs_mutex); + + spin_lock_init(&sdp->sd_rindex_spin); + init_MUTEX(&sdp->sd_rindex_mutex); + INIT_LIST_HEAD(&sdp->sd_rindex_list); + INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); + INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); + + INIT_LIST_HEAD(&sdp->sd_jindex_list); + spin_lock_init(&sdp->sd_jindex_spin); + init_MUTEX(&sdp->sd_jindex_mutex); + + INIT_LIST_HEAD(&sdp->sd_unlinked_list); + spin_lock_init(&sdp->sd_unlinked_spin); + init_MUTEX(&sdp->sd_unlinked_mutex); + + INIT_LIST_HEAD(&sdp->sd_quota_list); + spin_lock_init(&sdp->sd_quota_spin); + init_MUTEX(&sdp->sd_quota_mutex); + + spin_lock_init(&sdp->sd_log_lock); + init_waitqueue_head(&sdp->sd_log_trans_wq); + init_waitqueue_head(&sdp->sd_log_flush_wq); + + INIT_LIST_HEAD(&sdp->sd_log_le_gl); + INIT_LIST_HEAD(&sdp->sd_log_le_buf); + INIT_LIST_HEAD(&sdp->sd_log_le_revoke); + INIT_LIST_HEAD(&sdp->sd_log_le_rg); + INIT_LIST_HEAD(&sdp->sd_log_le_databuf); + + INIT_LIST_HEAD(&sdp->sd_log_blks_list); + init_waitqueue_head(&sdp->sd_log_blks_wait); + + INIT_LIST_HEAD(&sdp->sd_ail1_list); + INIT_LIST_HEAD(&sdp->sd_ail2_list); + + init_MUTEX(&sdp->sd_log_flush_lock); + INIT_LIST_HEAD(&sdp->sd_log_flush_list); + + INIT_LIST_HEAD(&sdp->sd_revoke_list); + + init_MUTEX(&sdp->sd_freeze_lock); + + return sdp; +} + +static void init_vfs(struct gfs2_sbd *sdp) +{ + struct super_block *sb = sdp->sd_vfs; + + sb->s_magic = GFS2_MAGIC; + sb->s_op = &gfs2_super_ops; + sb->s_export_op = &gfs2_export_ops; + sb->s_maxbytes = MAX_LFS_FILESIZE; + + if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + + /* Don't let the VFS update atimes. GFS2 handles this itself. */ + sb->s_flags |= MS_NOATIME | MS_NODIRATIME; + + /* Set up the buffer cache and fill in some fake block size values + to allow us to read-in the on-disk superblock. */ + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK); + sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; +} + +static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, + int undo) +{ + struct task_struct *p; + int error = 0; + + if (undo) + goto fail_trans; + + p = kthread_run(gfs2_scand, sdp, "gfs2_scand"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start scand thread: %d\n", + sdp->sd_fsname, error); + return error; + } + sdp->sd_scand_process = p; + + for (sdp->sd_glockd_num = 0; + sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; + sdp->sd_glockd_num++) { + p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start glockd thread: %d\n", + sdp->sd_fsname, error); + goto fail; + } + sdp->sd_glockd_process[sdp->sd_glockd_num] = p; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE, + mount_gh); + if (error) { + printk("GFS2: fsid=%s: can't acquire mount glock: %d\n", + sdp->sd_fsname, error); + goto fail; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_LIVE_LOCK, &gfs2_nondisk_glops, + LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE, + &sdp->sd_live_gh); + if (error) { + printk("GFS2: fsid=%s: can't acquire live glock: %d\n", + sdp->sd_fsname, error); + goto fail_mount; + } + + error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops, + CREATE, &sdp->sd_rename_gl); + if (error) { + printk("GFS2: fsid=%s: can't create rename glock: %d\n", + sdp->sd_fsname, error); + goto fail_live; + } + + error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, + CREATE, &sdp->sd_trans_gl); + if (error) { + printk("GFS2: fsid=%s: can't create transaction glock: %d\n", + sdp->sd_fsname, error); + goto fail_rename; + } + set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags); + + return 0; + + fail_trans: + gfs2_glock_put(sdp->sd_trans_gl); + + fail_rename: + gfs2_glock_put(sdp->sd_rename_gl); + + fail_live: + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + + fail_mount: + gfs2_glock_dq_uninit(mount_gh); + + fail: + while (sdp->sd_glockd_num--) + kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); + + kthread_stop(sdp->sd_scand_process); + + return error; +} + +static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) +{ + struct super_block *sb = sdp->sd_vfs; + struct gfs2_holder sb_gh; + int error = 0; + + if (undo) { + gfs2_inode_put(sdp->sd_master_dir); + return 0; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_SB_LOCK, &gfs2_meta_glops, + LM_ST_SHARED, 0, &sb_gh); + if (error) { + printk("GFS2: fsid=%s: can't acquire superblock glock: %d\n", + sdp->sd_fsname, error); + return error; + } + + error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); + if (error) { + printk("GFS2: fsid=%s: can't read superblock: %d\n", + sdp->sd_fsname, error); + goto out; + } + + /* Set up the buffer cache and SB for real */ + error = -EINVAL; + if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { + printk("GFS2: fsid=%s: FS block size (%u) is too small " + "for device block size (%u)\n", + sdp->sd_fsname, + sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); + goto out; + } + if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { + printk("GFS2: fsid=%s: FS block size (%u) is too big " + "for machine page size (%u)\n", + sdp->sd_fsname, + sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); + goto out; + } + + /* Get rid of buffers from the original block size */ + sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA); + sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift; + + sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); +#ifndef NO_DIAPER + set_blocksize(gfs2_diaper_2real(sb->s_bdev), sdp->sd_sb.sb_bsize); +#endif + + error = gfs2_lookup_master_dir(sdp); + if (error) + printk("GFS2: fsid=%s: can't read in master directory: %d\n", + sdp->sd_fsname, error); + + out: + gfs2_glock_dq_uninit(&sb_gh); + + return error; +} + +static int init_journal(struct gfs2_sbd *sdp, int undo) +{ + struct gfs2_holder ji_gh; + struct task_struct *p; + int jindex = TRUE; + int error = 0; + + if (undo) { + jindex = FALSE; + goto fail_recoverd; + } + + error = gfs2_lookup_simple(sdp->sd_master_dir, "jindex", + &sdp->sd_jindex); + if (error) { + printk("GFS2: fsid=%s: can't lookup journal index: %d\n", + sdp->sd_fsname, error); + return error; + } + set_bit(GLF_STICKY, &sdp->sd_jindex->i_gl->gl_flags); + + /* Load in the journal index special file */ + + error = gfs2_jindex_hold(sdp, &ji_gh); + if (error) { + printk("GFS2: fsid=%s: can't read journal index: %d\n", + sdp->sd_fsname, error); + goto fail; + } + + error = -EINVAL; + if (!gfs2_jindex_size(sdp)) { + printk("GFS2: fsid=%s: no journals!\n", + sdp->sd_fsname); + goto fail_jindex; + } + + if (sdp->sd_args.ar_spectator) { + sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); + sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + } else { + if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { + printk("GFS2: fsid=%s: can't mount journal #%u\n", + sdp->sd_fsname, sdp->sd_lockstruct.ls_jid); + printk("GFS2: fsid=%s: " + "there are only %u journals (0 - %u)\n", + sdp->sd_fsname, + gfs2_jindex_size(sdp), + gfs2_jindex_size(sdp) - 1); + goto fail_jindex; + } + sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid); + + error = gfs2_glock_nq_num(sdp, + sdp->sd_lockstruct.ls_jid, + &gfs2_journal_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, + &sdp->sd_journal_gh); + if (error) { + printk("GFS2: fsid=%s: " + "can't acquire the journal glock: %d\n", + sdp->sd_fsname, error); + goto fail_jindex; + } + + error = gfs2_glock_nq_init(sdp->sd_jdesc->jd_inode->i_gl, + LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, + &sdp->sd_jinode_gh); + if (error) { + printk("gfs2: fsid=%s: " + "can't acquire out journal inode glock: %d\n", + sdp->sd_fsname, error); + goto fail_journal_gh; + } + + error = gfs2_jdesc_check(sdp->sd_jdesc); + if (error) { + printk("GFS2: fsid=%s: my journal (%u) is bad: %d\n", + sdp->sd_fsname, sdp->sd_jdesc->jd_jid, error); + goto fail_jinode_gh; + } + sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + } + + if (sdp->sd_lockstruct.ls_first) { + unsigned int x; + for (x = 0; x < sdp->sd_journals; x++) { + error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), + WAIT); + if (error) { + printk("GFS2: fsid=%s: " + "error recovering journal %u: %d\n", + sdp->sd_fsname, x, error); + goto fail_jinode_gh; + } + } + + gfs2_lm_others_may_mount(sdp); + } else if (!sdp->sd_args.ar_spectator) { + error = gfs2_recover_journal(sdp->sd_jdesc, WAIT); + if (error) { + printk("GFS2: fsid=%s: " + "error recovering my journal: %d\n", + sdp->sd_fsname, error); + goto fail_jinode_gh; + } + } + + set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); + gfs2_glock_dq_uninit(&ji_gh); + jindex = FALSE; + + /* Disown my Journal glock */ + + sdp->sd_journal_gh.gh_owner = NULL; + sdp->sd_jinode_gh.gh_owner = NULL; + + p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start recoverd thread: %d\n", + sdp->sd_fsname, error); + goto fail_jinode_gh; + } + sdp->sd_recoverd_process = p; + + return 0; + + fail_recoverd: + kthread_stop(sdp->sd_recoverd_process); + + fail_jinode_gh: + if (!sdp->sd_args.ar_spectator) + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + + fail_journal_gh: + if (!sdp->sd_args.ar_spectator) + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + + fail_jindex: + gfs2_jindex_free(sdp); + if (jindex) + gfs2_glock_dq_uninit(&ji_gh); + + fail: + gfs2_inode_put(sdp->sd_jindex); + + return error; +} + +static int init_inodes(struct gfs2_sbd *sdp, int undo) +{ + struct inode *inode; + struct dentry **dentry = &sdp->sd_vfs->s_root; + int error = 0; + + if (undo) + goto fail_dput; + + /* Read in the master inode number inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "inum", + &sdp->sd_inum_inode); + if (error) { + printk("GFS2: fsid=%s: can't read in inum inode: %d\n", + sdp->sd_fsname, error); + return error; + } + + /* Read in the master statfs inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "statfs", + &sdp->sd_statfs_inode); + if (error) { + printk("GFS2: fsid=%s: can't read in statfs inode: %d\n", + sdp->sd_fsname, error); + goto fail; + } + + /* Read in the resource index inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "rindex", + &sdp->sd_rindex); + if (error) { + printk("GFS2: fsid=%s: can't get resource index inode: %d\n", + sdp->sd_fsname, error); + goto fail_statfs; + } + set_bit(GLF_STICKY, &sdp->sd_rindex->i_gl->gl_flags); + sdp->sd_rindex_vn = sdp->sd_rindex->i_gl->gl_vn - 1; + + /* Read in the quota inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "quota", + &sdp->sd_quota_inode); + if (error) { + printk("GFS2: fsid=%s: can't get quota file inode: %d\n", + sdp->sd_fsname, error); + goto fail_rindex; + } + + /* Get the root inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "root", + &sdp->sd_root_dir); + if (error) { + printk("GFS2: fsid=%s: can't read in root inode: %d\n", + sdp->sd_fsname, error); + goto fail_qinode; + } + + /* Get the root inode/dentry */ + inode = gfs2_ip2v(sdp->sd_root_dir, CREATE); + if (!inode) { + printk("GFS2: fsid=%s: can't get root inode\n", sdp->sd_fsname); + error = -ENOMEM; + goto fail_rooti; + } + + *dentry = d_alloc_root(inode); + if (!*dentry) { + iput(inode); + printk("GFS2: fsid=%s: can't get root dentry\n", sdp->sd_fsname); + error = -ENOMEM; + goto fail_rooti; + } + + return 0; + + fail_dput: + dput(*dentry); + *dentry = NULL; + + fail_rooti: + gfs2_inode_put(sdp->sd_root_dir); + + fail_qinode: + gfs2_inode_put(sdp->sd_quota_inode); + + fail_rindex: + gfs2_clear_rgrpd(sdp); + gfs2_inode_put(sdp->sd_rindex); + + fail_statfs: + gfs2_inode_put(sdp->sd_statfs_inode); + + fail: + gfs2_inode_put(sdp->sd_inum_inode); + + return error; +} + +static int init_per_node(struct gfs2_sbd *sdp, int undo) +{ + struct gfs2_inode *pn = NULL; + char buf[30]; + int error = 0; + + if (sdp->sd_args.ar_spectator) + return 0; + + if (undo) + goto fail_qc_gh; + + error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node", &pn); + if (error) { + printk("GFS2: fsid=%s: can't find per_node directory: %d\n", + sdp->sd_fsname, error); + return error; + } + + sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_ir_inode); + if (error) { + printk("GFS2: fsid=%s: can't find local \"ir\" file: %d\n", + sdp->sd_fsname, error); + goto fail; + } + + sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_sc_inode); + if (error) { + printk("GFS2: fsid=%s: can't find local \"sc\" file: %d\n", + sdp->sd_fsname, error); + goto fail_ir_i; + } + + sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_ut_inode); + if (error) { + printk("GFS2: fsid=%s: can't find local \"ut\" file: %d\n", + sdp->sd_fsname, error); + goto fail_sc_i; + } + + sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_qc_inode); + if (error) { + printk("GFS2: fsid=%s: can't find local \"qc\" file: %d\n", + sdp->sd_fsname, error); + goto fail_ut_i; + } + + gfs2_inode_put(pn); + pn = NULL; + + error = gfs2_glock_nq_init(sdp->sd_ir_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_ir_gh); + if (error) { + printk("GFS2: fsid=%s: can't lock local \"ir\" file: %d\n", + sdp->sd_fsname, error); + goto fail_qc_i; + } + + error = gfs2_glock_nq_init(sdp->sd_sc_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_sc_gh); + if (error) { + printk("GFS2: fsid=%s: can't lock local \"sc\" file: %d\n", + sdp->sd_fsname, error); + goto fail_ir_gh; + } + + error = gfs2_glock_nq_init(sdp->sd_ut_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_ut_gh); + if (error) { + printk("GFS2: fsid=%s: can't lock local \"ut\" file: %d\n", + sdp->sd_fsname, error); + goto fail_sc_gh; + } + + error = gfs2_glock_nq_init(sdp->sd_qc_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_qc_gh); + if (error) { + printk("GFS2: fsid=%s: can't lock local \"qc\" file: %d\n", + sdp->sd_fsname, error); + goto fail_ut_gh; + } + + return 0; + + fail_qc_gh: + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + + fail_ut_gh: + gfs2_glock_dq_uninit(&sdp->sd_ut_gh); + + fail_sc_gh: + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + + fail_ir_gh: + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + + fail_qc_i: + gfs2_inode_put(sdp->sd_qc_inode); + + fail_ut_i: + gfs2_inode_put(sdp->sd_ut_inode); + + fail_sc_i: + gfs2_inode_put(sdp->sd_sc_inode); + + fail_ir_i: + gfs2_inode_put(sdp->sd_ir_inode); + + fail: + if (pn) + gfs2_inode_put(pn); + return error; +} + +static int init_threads(struct gfs2_sbd *sdp, int undo) +{ + struct task_struct *p; + int error = 0; + + if (undo) + goto fail_inoded; + + sdp->sd_log_flush_time = jiffies; + sdp->sd_jindex_refresh_time = jiffies; + + p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start logd thread: %d\n", + sdp->sd_fsname, error); + return error; + } + sdp->sd_logd_process = p; + + sdp->sd_statfs_sync_time = jiffies; + sdp->sd_quota_sync_time = jiffies; + + p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start quotad thread: %d\n", + sdp->sd_fsname, error); + goto fail; + } + sdp->sd_quotad_process = p; + + p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded"); + error = IS_ERR(p); + if (error) { + printk("GFS2: fsid=%s: can't start inoded thread: %d\n", + sdp->sd_fsname, error); + goto fail_quotad; + } + sdp->sd_inoded_process = p; + + return 0; + + fail_inoded: + kthread_stop(sdp->sd_inoded_process); + + fail_quotad: + kthread_stop(sdp->sd_quotad_process); + + fail: + kthread_stop(sdp->sd_logd_process); + + return error; +} + +/** + * fill_super - Read in superblock + * @sb: The VFS superblock + * @data: Mount options + * @silent: Don't complain if it's not a GFS2 filesystem + * + * Returns: errno + */ + +static int fill_super(struct super_block *sb, void *data, int silent) +{ + struct gfs2_sbd *sdp; + struct gfs2_holder mount_gh; + int error; + + sdp = init_sbd(sb); + if (!sdp) { + printk("GFS2: can't alloc struct gfs2_sbd\n"); + return -ENOMEM; + } + +#ifndef NO_DIAPER + gfs2_diaper_register_sbd(sb->s_bdev, sdp); +#endif + + error = gfs2_mount_args(sdp, (char *)data, FALSE); + if (error) { + printk("GFS2: can't parse mount arguments\n"); + goto fail; + } + + init_vfs(sdp); + + /* Mount an inter-node lock module, check for local optimizations */ + error = gfs2_lm_mount(sdp, silent); + if (error) + goto fail; + + error = init_locking(sdp, &mount_gh, DO); + if (error) + goto fail_lm; + + error = init_sb(sdp, silent, DO); + if (error) + goto fail_locking; + + error = init_journal(sdp, DO); + if (error) + goto fail_sb; + + error = init_inodes(sdp, DO); + if (error) + goto fail_journals; + + error = init_per_node(sdp, DO); + if (error) + goto fail_inodes; + + error = gfs2_statfs_init(sdp); + if (error) { + printk("GFS2: fsid=%s: can't initialize statfs subsystem: %d\n", + sdp->sd_fsname, error); + goto fail_per_node; + } + + error = init_threads(sdp, DO); + if (error) + goto fail_per_node; + + gfs2_proc_fs_add(sdp); + + /* Make the FS read/write */ + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_rw(sdp); + if (error) { + printk("GFS2: fsid=%s: can't make FS RW: %d\n", + sdp->sd_fsname, error); + goto fail_proc; + } + } + + gfs2_glock_dq_uninit(&mount_gh); + + return 0; + + fail_proc: + gfs2_proc_fs_del(sdp); + init_threads(sdp, UNDO); + + fail_per_node: + init_per_node(sdp, UNDO); + + fail_inodes: + init_inodes(sdp, UNDO); + + fail_journals: + init_journal(sdp, UNDO); + + fail_sb: + init_sb(sdp, FALSE, UNDO); + + fail_locking: + init_locking(sdp, &mount_gh, UNDO); + + fail_lm: + gfs2_gl_hash_clear(sdp, WAIT); + gfs2_lm_unmount(sdp); + while (invalidate_inodes(sb)) + yield(); + + fail: + vfree(sdp); + set_v2sdp(sb, NULL); + + return error; +} + +/** + * gfs2_test_bdev_super - + * @sb: + * @data: + * + */ + +int gfs2_test_bdev_super(struct super_block *sb, void *data) +{ + return (void *)sb->s_bdev == data; +} + +/** + * gfs2_test_bdev_super - + * @sb: + * @data: + * + */ + +int gfs2_set_bdev_super(struct super_block *sb, void *data) +{ + sb->s_bdev = data; + sb->s_dev = sb->s_bdev->bd_dev; + return 0; +} + +#ifdef NO_DIAPER +struct super_block *gfs2_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super); +} +#else +/** + * gfs2_get_sb - + * @fs_type: + * @flags: + * @dev_name: + * @data: + * + * Rip off of get_sb_bdev(). + * + * Returns: the new superblock + */ + +struct super_block *gfs2_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + struct block_device *real, *diaper; + struct super_block *sb; + int error = 0; + + real = open_bdev_excl(dev_name, flags, fs_type); + if (IS_ERR(real)) + return (struct super_block *)real; + + diaper = gfs2_diaper_get(real, flags); + if (IS_ERR(diaper)) { + close_bdev_excl(real); + return (struct super_block *)diaper; + } + + down(&diaper->bd_mount_sem); + sb = sget(fs_type, gfs2_test_bdev_super, gfs2_set_bdev_super, diaper); + up(&diaper->bd_mount_sem); + if (IS_ERR(sb)) + goto out; + + if (sb->s_root) { + if ((flags ^ sb->s_flags) & MS_RDONLY) { + up_write(&sb->s_umount); + deactivate_super(sb); + sb = ERR_PTR(-EBUSY); + } + goto out; + } else { + char buf[BDEVNAME_SIZE]; + + sb->s_flags = flags; + strlcpy(sb->s_id, bdevname(real, buf), sizeof(sb->s_id)); + sb->s_old_blocksize = block_size(real); + sb_set_blocksize(sb, sb->s_old_blocksize); + set_blocksize(real, sb->s_old_blocksize); + error = fill_super(sb, data, (flags & MS_VERBOSE) ? 1 : 0); + if (error) { + up_write(&sb->s_umount); + deactivate_super(sb); + sb = ERR_PTR(error); + } else + sb->s_flags |= MS_ACTIVE; + } + + return sb; + + out: + gfs2_diaper_put(diaper); + close_bdev_excl(real); + return sb; +} + +/** + * gfs2_kill_sb - + * @sb: + * + * Rip off of kill_block_super(). + * + */ + +void gfs2_kill_sb(struct super_block *sb) +{ + struct block_device *diaper = sb->s_bdev; + struct block_device *real = gfs2_diaper_2real(diaper); + unsigned long bsize = sb->s_old_blocksize; + + generic_shutdown_super(sb); + set_blocksize(diaper, bsize); + set_blocksize(real, bsize); + gfs2_diaper_put(diaper); + close_bdev_excl(real); +} +#endif + +struct file_system_type gfs2_fs_type = { + .name = "gfs2", + .fs_flags = FS_REQUIRES_DEV, + .get_sb = gfs2_get_sb, +#ifdef NO_DIAPER + .kill_sb = kill_block_super, +#else + .kill_sb = gfs2_kill_sb, +#endif + .owner = THIS_MODULE, +}; + --- a/fs/gfs2/ops_fstype.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_fstype.h 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,22 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_FSTYPE_DOT_H__ +#define __OPS_FSTYPE_DOT_H__ + +int gfs2_test_bdev_super(struct super_block *sb, void *data); +int gfs2_set_bdev_super(struct super_block *sb, void *data); + +extern struct file_system_type gfs2_fs_type; + +#endif /* __OPS_FSTYPE_DOT_H__ */ --- a/fs/gfs2/ops_inode.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_inode.c 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,1442 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "bmap.h" +#include "dir.h" +#include "eaops.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "ops_dentry.h" +#include "ops_inode.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs2_create - Create a file + * @dir: The directory in which to create the file + * @dentry: The dentry of the new file + * @mode: The mode of the new file + * + * Returns: errno + */ + +static int gfs2_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + int new = TRUE; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + for (;;) { + error = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode); + if (!error) { + ip = get_gl2ip(ghs[1].gh_gl); + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + gfs2_glock_dq_uninit_m(2, ghs); + break; + } else if (error != -EEXIST || + (nd->intent.open.flags & O_EXCL)) { + gfs2_holder_uninit(ghs); + return error; + } + + error = gfs2_lookupi(dip, &dentry->d_name, FALSE, &ip); + if (!error) { + new = FALSE; + gfs2_holder_uninit(ghs); + break; + } else if (error != -ENOENT) { + gfs2_holder_uninit(ghs); + return error; + } + } + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + if (new) + mark_inode_dirty(inode); + + return 0; +} + +/** + * lookup_cdpn_sub_at - Maybe lookup a Context Dependent Pathname + * @sdp: the filesystem + * @dentry: the original dentry to lookup + * @new_dentry: the new dentry, if this was a substitutable path. + * + * Returns: the new dentry, a ERR_PTR, or NULL + */ + +static struct dentry *lookup_cdpn_sub_at(struct gfs2_sbd *sdp, + struct dentry *dentry) +{ + struct dentry *parent, *new = NULL; + char *buf; + + buf = kmalloc(2 * __NEW_UTS_LEN + 2, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + parent = dget_parent(dentry); + + if (gfs2_filecmp(&dentry->d_name, "@hostname", 9)) + new = lookup_one_len(system_utsname.nodename, + parent, + strlen(system_utsname.nodename)); + else if (gfs2_filecmp(&dentry->d_name, "@mach", 5)) + new = lookup_one_len(system_utsname.machine, + parent, + strlen(system_utsname.machine)); + else if (gfs2_filecmp(&dentry->d_name, "@os", 3)) + new = lookup_one_len(system_utsname.sysname, + parent, + strlen(system_utsname.sysname)); + else if (gfs2_filecmp(&dentry->d_name, "@uid", 4)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsuid)); + else if (gfs2_filecmp(&dentry->d_name, "@gid", 4)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsgid)); + else if (gfs2_filecmp(&dentry->d_name, "@sys", 4)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%s_%s", + system_utsname.machine, + system_utsname.sysname)); + else if (gfs2_filecmp(&dentry->d_name, "@jid", 4)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", + sdp->sd_jdesc->jd_jid)); + + dput(parent); + kfree(buf); + + return new; +} + +/** + * lookup_cdpn_sub_brace - Maybe lookup a Context Dependent Pathname + * @sdp: the filesystem + * @dentry: the original dentry to lookup + * @new_dentry: the new dentry, if this was a substitutable path. + * + * Returns: the new dentry, a ERR_PTR, or NULL + */ + +static struct dentry *lookup_cdpn_sub_brace(struct gfs2_sbd *sdp, + struct dentry *dentry) +{ + struct dentry *parent, *new = NULL; + char *buf; + + buf = kmalloc(2 * __NEW_UTS_LEN + 2, GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + parent = dget_parent(dentry); + + if (gfs2_filecmp(&dentry->d_name, "{hostname}", 10)) + new = lookup_one_len(system_utsname.nodename, + parent, + strlen(system_utsname.nodename)); + else if (gfs2_filecmp(&dentry->d_name, "{mach}", 6)) + new = lookup_one_len(system_utsname.machine, + parent, + strlen(system_utsname.machine)); + else if (gfs2_filecmp(&dentry->d_name, "{os}", 4)) + new = lookup_one_len(system_utsname.sysname, + parent, + strlen(system_utsname.sysname)); + else if (gfs2_filecmp(&dentry->d_name, "{uid}", 5)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsuid)); + else if (gfs2_filecmp(&dentry->d_name, "{gid}", 5)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", current->fsgid)); + else if (gfs2_filecmp(&dentry->d_name, "{sys}", 5)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%s_%s", + system_utsname.machine, + system_utsname.sysname)); + else if (gfs2_filecmp(&dentry->d_name, "{jid}", 5)) + new = lookup_one_len(buf, + parent, + sprintf(buf, "%u", + sdp->sd_jdesc->jd_jid)); + + dput(parent); + kfree(buf); + + return new; +} + +/** + * gfs2_lookup - Look up a filename in a directory and return its inode + * @dir: The directory inode + * @dentry: The dentry of the new inode + * @nd: passed from Linux VFS, ignored by us + * + * Called by the VFS layer. Lock dir and call gfs2_lookupi() + * + * Returns: errno + */ + +static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode = NULL; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + /* Do Context Dependent Path Name expansion */ + + if (*dentry->d_name.name == '@' && dentry->d_name.len > 1) { + struct dentry *new_dentry; + new_dentry = lookup_cdpn_sub_at(sdp, dentry); + if (new_dentry) + return new_dentry; + } else if (*dentry->d_name.name == '{' && dentry->d_name.len > 2) { + struct dentry *new_dentry; + new_dentry = lookup_cdpn_sub_brace(sdp, dentry); + if (new_dentry) + return new_dentry; + } + + if (!sdp->sd_args.ar_localcaching) + dentry->d_op = &gfs2_dops; + + error = gfs2_lookupi(dip, &dentry->d_name, FALSE, &ip); + if (!error) { + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + if (!inode) + return ERR_PTR(-ENOMEM); + + } else if (error != -ENOENT) + return ERR_PTR(error); + + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + + return NULL; +} + +/** + * gfs2_link - Link to a file + * @old_dentry: The inode to link + * @dir: Add link to this directory + * @dentry: The name of the link + * + * Link the inode in "old_dentry" into the directory "dir" with the + * name in "dentry". + * + * Returns: errno + */ + +static int gfs2_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode = old_dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder ghs[2]; + int alloc_required; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (S_ISDIR(ip->i_di.di_mode)) + return -EPERM; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto out_gunlock; + + error = gfs2_dir_search(dip, &dentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + } + + error = -EINVAL; + if (!dip->i_di.di_nlink) + goto out_gunlock; + error = -EFBIG; + if (dip->i_di.di_entries == (uint32_t)-1) + goto out_gunlock; + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out_gunlock; + error = -EINVAL; + if (!ip->i_di.di_nlink) + goto out_gunlock; + error = -EMLINK; + if (ip->i_di.di_nlink == (uint32_t)-1) + goto out_gunlock; + + error = gfs2_diradd_alloc_required(dip, &dentry->d_name, + &alloc_required); + if (error) + goto out_gunlock; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + error = gfs2_quota_check(dip, dip->i_di.di_uid, + dip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 2 * RES_DINODE + RES_STATFS + + RES_QUOTA, 0); + if (error) + goto out_ipres; + } else { + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); + if (error) + goto out_ipres; + } + + error = gfs2_dir_add(dip, &dentry->d_name, &ip->i_num, + IF2DT(ip->i_di.di_mode)); + if (error) + goto out_end_trans; + + error = gfs2_change_nlink(ip, +1); + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipres: + if (alloc_required) + gfs2_inplace_release(dip); + + out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(dip); + + out_alloc: + if (alloc_required) + gfs2_alloc_put(dip); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + if (!error) { + atomic_inc(&inode->i_count); + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + } + + return error; +} + +/** + * gfs2_unlink - Unlink a file + * @dir: The inode of the directory containing the file to unlink + * @dentry: The file itself + * + * Unlink a file. Call gfs2_unlinki() + * + * Returns: errno + */ + +static int gfs2_unlink(struct inode *dir, struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_unlink_ok(dip, &dentry->d_name, ip); + if (error) + goto out_gunlock; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + + error = gfs2_unlinki(dip, &dentry->d_name, ip,ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_symlink - Create a symlink + * @dir: The directory to create the symlink in + * @dentry: The dentry to put the symlink in + * @symname: The thing which the link points to + * + * Returns: errno + */ + +static int gfs2_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + int size; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + /* Must be stuffed with a null terminator for gfs2_follow_link() */ + size = strlen(symname); + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) + return -ENAMETOOLONG; + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_size = size; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + gfs2_dinode_out(&ip->i_di, dibh->b_data); + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, + size); + brelse(dibh); + } + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_mkdir - Make a directory + * @dir: The parent directory of the new one + * @dentry: The dentry of the new directory + * @mode: The mode of the new directory + * + * Returns: errno + */ + +static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_nlink = 2; + ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); + ip->i_di.di_flags |= GFS2_DIF_JDATA; + ip->i_di.di_payload_format = GFS2_FORMAT_DE; + ip->i_di.di_entries = 2; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + struct gfs2_dirent *dent; + + gfs2_dirent_alloc(ip, dibh, 1, &dent); + + dent->de_inum = di->di_num; /* already GFS2 endian */ + dent->de_hash = gfs2_disk_hash(".", 1); + dent->de_hash = cpu_to_gfs2_32(dent->de_hash); + dent->de_type = DT_DIR; + memcpy((char *) (dent + 1), ".", 1); + di->di_entries = cpu_to_gfs2_32(1); + + gfs2_dirent_alloc(ip, dibh, 2, &dent); + + gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum); + dent->de_hash = gfs2_disk_hash("..", 2); + dent->de_hash = cpu_to_gfs2_32(dent->de_hash); + dent->de_type = DT_DIR; + memcpy((char *) (dent + 1), "..", 2); + + gfs2_dinode_out(&ip->i_di, (char *)di); + + brelse(dibh); + } + + error = gfs2_change_nlink(dip, +1); + gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_rmdir - Remove a directory + * @dir: The parent directory of the directory to be removed + * @dentry: The dentry of the directory to remove + * + * Remove a directory. Call gfs2_rmdiri() + * + * Returns: errno + */ + +static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_unlink_ok(dip, &dentry->d_name, ip); + if (error) + goto out_gunlock; + + if (ip->i_di.di_entries < 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + error = -EIO; + goto out_gunlock; + } + if (ip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto out_gunlock; + } + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + + error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_mknod - Make a special file + * @dir: The directory in which the special file will reside + * @dentry: The dentry of the special file + * @mode: The mode of the special file + * @rdev: The device specification of the special file + * + */ + +static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + uint32_t major = 0, minor = 0; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + switch (mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + major = MAJOR(dev); + minor = MINOR(dev); + break; + case S_IFIFO: + case S_IFSOCK: + break; + default: + return -EOPNOTSUPP; + }; + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, mode); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_major = major; + ip->i_di.di_minor = minor; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip, CREATE); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_rename - Rename a file + * @odir: Parent directory of old file name + * @odentry: The old dentry of the file + * @ndir: Parent directory of new file name + * @ndentry: The new dentry of the file + * + * Returns: errno + */ + +static int gfs2_rename(struct inode *odir, struct dentry *odentry, + struct inode *ndir, struct dentry *ndentry) +{ + struct gfs2_inode *odip = get_v2ip(odir); + struct gfs2_inode *ndip = get_v2ip(ndir); + struct gfs2_inode *ip = get_v2ip(odentry->d_inode); + struct gfs2_inode *nip = NULL; + struct gfs2_sbd *sdp = odip->i_sbd; + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[4], r_gh; + unsigned int num_gh; + int dir_rename = FALSE; + int alloc_required; + unsigned int x; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (ndentry->d_inode) { + nip = get_v2ip(ndentry->d_inode); + if (ip == nip) + return 0; + } + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + /* Make sure we aren't trying to move a dirctory into it's subdir */ + + if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) { + dir_rename = TRUE; + + error = gfs2_glock_nq_init(sdp->sd_rename_gl, + LM_ST_EXCLUSIVE, 0, + &r_gh); + if (error) + goto out; + + error = gfs2_ok_to_move(ip, ndip); + if (error) + goto out_gunlock_r; + } + + gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + num_gh = 3; + + if (nip) + gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); + + error = gfs2_glock_nq_m(num_gh, ghs); + if (error) + goto out_uninit; + + /* Check out the old directory */ + + error = gfs2_unlink_ok(odip, &odentry->d_name, ip); + if (error) + goto out_gunlock; + + /* Check out the new directory */ + + if (nip) { + error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); + if (error) + goto out_gunlock; + + if (S_ISDIR(nip->i_di.di_mode)) { + if (nip->i_di.di_entries < 2) { + if (gfs2_consist_inode(nip)) + gfs2_dinode_print(&nip->i_di); + error = -EIO; + goto out_gunlock; + } + if (nip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto out_gunlock; + } + } + } else { + error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto out_gunlock; + + error = gfs2_dir_search(ndip, &ndentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + }; + + if (odip != ndip) { + if (!ndip->i_di.di_nlink) { + error = -EINVAL; + goto out_gunlock; + } + if (ndip->i_di.di_entries == (uint32_t)-1) { + error = -EFBIG; + goto out_gunlock; + } + if (S_ISDIR(ip->i_di.di_mode) && + ndip->i_di.di_nlink == (uint32_t)-1) { + error = -EMLINK; + goto out_gunlock; + } + } + } + + /* Check out the dir to be renamed */ + + if (dir_rename) { + error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL); + if (error) + goto out_gunlock; + } + + error = gfs2_diradd_alloc_required(ndip, &ndentry->d_name, + &alloc_required); + if (error) + goto out_gunlock; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(ndip); + + error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + error = gfs2_quota_check(ndip, ndip->i_di.di_uid, + ndip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(ndip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 4 * RES_DINODE + 4 * RES_LEAF + + RES_UNLINKED + RES_STATFS + + RES_QUOTA, 0); + if (error) + goto out_ipreserv; + } else { + error = gfs2_trans_begin(sdp, 4 * RES_DINODE + + 5 * RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + } + + /* Remove the target file, if it exists */ + + if (nip) { + if (S_ISDIR(nip->i_di.di_mode)) + error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul); + else + error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul); + if (error) + goto out_end_trans; + } + + if (dir_rename) { + struct qstr name; + name.len = 2; + name.name = ".."; + + error = gfs2_change_nlink(ndip, +1); + if (error) + goto out_end_trans; + error = gfs2_change_nlink(odip, -1); + if (error) + goto out_end_trans; + + error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); + if (error) + goto out_end_trans; + } else { + struct buffer_head *dibh; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + error = gfs2_dir_del(odip, &odentry->d_name); + if (error) + goto out_end_trans; + + error = gfs2_dir_add(ndip, &ndentry->d_name, &ip->i_num, + IF2DT(ip->i_di.di_mode)); + if (error) + goto out_end_trans; + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipreserv: + if (alloc_required) + gfs2_inplace_release(ndip); + + out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(ndip); + + out_alloc: + if (alloc_required) + gfs2_alloc_put(ndip); + + out_gunlock: + gfs2_glock_dq_m(num_gh, ghs); + + out_uninit: + for (x = 0; x < num_gh; x++) + gfs2_holder_uninit(ghs + x); + + out_gunlock_r: + if (dir_rename) + gfs2_glock_dq_uninit(&r_gh); + + out: + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_readlink - Read the value of a symlink + * @dentry: the symlink + * @buf: the buffer to read the symlink data into + * @size: the size of the buffer + * + * Returns: errno + */ + +static int gfs2_readlink(struct dentry *dentry, char *user_buf, int user_size) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + char array[GFS2_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS2_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_readlinki(ip, &buf, &len); + if (error) + return error; + + if (user_size > len - 1) + user_size = len - 1; + + if (copy_to_user(user_buf, buf, user_size)) + error = -EFAULT; + else + error = user_size; + + if (buf != array) + kfree(buf); + + return error; +} + +/** + * gfs2_follow_link - Follow a symbolic link + * @dentry: The dentry of the link + * @nd: Data that we pass to vfs_follow_link() + * + * This can handle symlinks of any size. It is optimised for symlinks + * under GFS2_FAST_NAME_SIZE. + * + * Returns: 0 on success or error code + */ + +static int gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + char array[GFS2_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS2_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_readlinki(ip, &buf, &len); + if (!error) { + error = vfs_follow_link(nd, buf); + if (buf != array) + kfree(buf); + } + + return error; +} + +/** + * gfs2_permission - + * @inode: + * @mask: + * @nd: passed from Linux VFS, ignored by us + * + * Returns: errno + */ + +static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + if (ip->i_vn == ip->i_gl->gl_vn) + return generic_permission(inode, mask, gfs2_check_acl); + + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = generic_permission(inode, mask, gfs2_check_acl_locked); + gfs2_glock_dq_uninit(&i_gh); + } + + return error; +} + +static int setattr_size(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int error; + + error = gfs2_repermission(inode, MAY_WRITE, NULL); + if (error) + return error; + + if (attr->ia_size != ip->i_di.di_size) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + error = gfs2_truncatei(ip, attr->ia_size, gfs2_truncator_page); + if (error) + return error; + + return error; +} + +static int setattr_chown(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + uint32_t ouid, ogid, nuid, ngid; + int error; + + ouid = ip->i_di.di_uid; + ogid = ip->i_di.di_gid; + nuid = attr->ia_uid; + ngid = attr->ia_gid; + + if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) + ouid = nuid = NO_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) + ogid = ngid = NO_QUOTA_CHANGE; + + gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out_alloc; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + error = gfs2_quota_check(ip, nuid, ngid); + if (error) + goto out_gunlock_q; + } + + error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); + if (error) + goto out_gunlock_q; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + inode_setattr(inode, attr); + gfs2_inode_attr_out(ip); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + gfs2_quota_change(ip, -ip->i_di.di_blocks, + ouid, ogid); + gfs2_quota_change(ip, ip->i_di.di_blocks, + nuid, ngid); + } + + out_end_trans: + gfs2_trans_end(sdp); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out_alloc: + gfs2_alloc_put(ip); + + return error; +} + +/** + * gfs2_setattr - Change attributes on an inode + * @dentry: The dentry which is changing + * @attr: The structure describing the change + * + * The VFS layer wants to change one or more of an inodes attributes. Write + * that change out to disk. + * + * Returns: errno + */ + +static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + if (attr->ia_valid & ATTR_SIZE) + error = setattr_size(inode, attr); + else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) + error = setattr_chown(inode, attr); + else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) + error = gfs2_acl_chmod(ip, attr); + else + error = gfs2_setattr_simple(ip, attr); + + out: + gfs2_glock_dq_uninit(&i_gh); + + if (!error) + mark_inode_dirty(inode); + + return error; +} + +/** + * gfs2_getattr - Read out an inode's attributes + * @mnt: ? + * @dentry: The dentry to stat + * @stat: The inode's stats + * + * Returns: errno + */ + +static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (!error) { + generic_fillattr(inode, stat); + gfs2_glock_dq_uninit(&gh); + } + + return error; +} + +/** + * gfs2_setxattr - Set (or create or replace) an inode's extended attribute + * @dentry: + * @name: + * @data: + * @size: + * @flags: + * + * Returns: errno + */ + +int gfs2_setxattr(struct dentry *dentry, const char *name, + const void *data, size_t size, int flags) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_ea_request er; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_data = (char *)data; + er.er_name_len = strlen(er.er_name); + er.er_data_len = size; + er.er_flags = flags; + + gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE)); + + return gfs2_ea_set(ip, &er); +} + +/** + * gfs2_getxattr - + * @dentry: + * @name: + * @data: + * @size: + * + * Returns: The number of bytes put into data, or -errno + */ + +ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, + void *data, size_t size) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_data = data; + er.er_name_len = strlen(er.er_name); + er.er_data_len = size; + + return gfs2_ea_get(get_v2ip(dentry->d_inode), &er); +} + +/** + * gfs2_listxattr - + * @dentry: + * @buffer: + * @size: + * + * Returns: The number of bytes put into data, or -errno + */ + +ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_data = (size) ? buffer : NULL; + er.er_data_len = size; + + return gfs2_ea_list(get_v2ip(dentry->d_inode), &er); +} + +/** + * gfs2_removexattr - + * @dentry: + * @name: + * + * Returns: errno + */ + +int gfs2_removexattr(struct dentry *dentry, const char *name) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_name_len = strlen(er.er_name); + + return gfs2_ea_remove(get_v2ip(dentry->d_inode), &er); +} + +struct inode_operations gfs2_file_iops = { + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_dev_iops = { + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_dir_iops = { + .create = gfs2_create, + .lookup = gfs2_lookup, + .link = gfs2_link, + .unlink = gfs2_unlink, + .symlink = gfs2_symlink, + .mkdir = gfs2_mkdir, + .rmdir = gfs2_rmdir, + .mknod = gfs2_mknod, + .rename = gfs2_rename, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_symlink_iops = { + .readlink = gfs2_readlink, + .follow_link = gfs2_follow_link, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + --- a/fs/gfs2/ops_inode.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_inode.h 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,22 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_INODE_DOT_H__ +#define __OPS_INODE_DOT_H__ + +extern struct inode_operations gfs2_file_iops; +extern struct inode_operations gfs2_dir_iops; +extern struct inode_operations gfs2_symlink_iops; +extern struct inode_operations gfs2_dev_iops; + +#endif /* __OPS_INODE_DOT_H__ */ --- a/fs/gfs2/ops_super.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_super.c 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,411 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "inode.h" +#include "lm.h" +#include "log.h" +#include "mount.h" +#include "ops_super.h" +#include "page.h" +#include "proc.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" + +/** + * gfs2_write_inode - Make sure the inode is stable on the disk + * @inode: The inode + * @sync: synchronous write flag + * + * Returns: errno + */ + +static int gfs2_write_inode(struct inode *inode, int sync) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&ip->i_sbd->sd_ops_super); + + if (current->flags & PF_MEMALLOC) + return 0; + if (ip && sync) + gfs2_log_flush_glock(ip->i_gl); + + return 0; +} + +/** + * gfs2_put_super - Unmount the filesystem + * @sb: The VFS superblock + * + */ + +static void gfs2_put_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + if (!sdp) + return; + + atomic_inc(&sdp->sd_ops_super); + + gfs2_proc_fs_del(sdp); + + /* Unfreeze the filesystem, if we need to */ + + down(&sdp->sd_freeze_lock); + if (sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + up(&sdp->sd_freeze_lock); + + kthread_stop(sdp->sd_inoded_process); + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + kthread_stop(sdp->sd_recoverd_process); + while (sdp->sd_glockd_num--) + kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); + kthread_stop(sdp->sd_scand_process); + + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_ro(sdp); + if (error) + gfs2_io_error(sdp); + } + + /* At this point, we're through modifying the disk */ + + /* Release stuff */ + + gfs2_inode_put(sdp->sd_master_dir); + gfs2_inode_put(sdp->sd_jindex); + gfs2_inode_put(sdp->sd_inum_inode); + gfs2_inode_put(sdp->sd_statfs_inode); + gfs2_inode_put(sdp->sd_rindex); + gfs2_inode_put(sdp->sd_quota_inode); + gfs2_inode_put(sdp->sd_root_dir); + + gfs2_glock_put(sdp->sd_rename_gl); + gfs2_glock_put(sdp->sd_trans_gl); + + if (!sdp->sd_args.ar_spectator) { + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + gfs2_glock_dq_uninit(&sdp->sd_ut_gh); + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + gfs2_inode_put(sdp->sd_ir_inode); + gfs2_inode_put(sdp->sd_sc_inode); + gfs2_inode_put(sdp->sd_ut_inode); + gfs2_inode_put(sdp->sd_qc_inode); + } + + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + + gfs2_clear_rgrpd(sdp); + gfs2_jindex_free(sdp); + + /* Take apart glock structures and buffer lists */ + gfs2_gl_hash_clear(sdp, WAIT); + + /* Unmount the locking protocol */ + gfs2_lm_unmount(sdp); + + /* At this point, we're through participating in the lockspace */ + + /* Get rid of any extra inodes */ + while (invalidate_inodes(sb)) + yield(); + + vfree(sdp); + + set_v2sdp(sb, NULL); +} + +/** + * gfs2_write_super - disk commit all incore transactions + * @sb: the filesystem + * + * This function is called every time sync(2) is called. + * After this exits, all dirty buffers and synced. + */ + +static void gfs2_write_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + atomic_inc(&sdp->sd_ops_super); + gfs2_log_flush(sdp); +} + +/** + * gfs2_write_super_lockfs - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void gfs2_write_super_lockfs(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + for (;;) { + error = gfs2_freeze_fs(sdp); + if (!error) + break; + + switch (error) { + case -EBUSY: + printk("GFS2: fsid=%s: " + "waiting for recovery before freeze\n", + sdp->sd_fsname); + break; + + default: + printk("GFS2: fsid=%s: error freezing FS: %d\n", + sdp->sd_fsname, error); + break; + } + + printk("GFS2: fsid=%s: retrying...\n", sdp->sd_fsname); + msleep(1000); + } +} + +/** + * gfs2_unlockfs - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void gfs2_unlockfs(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + + atomic_inc(&sdp->sd_ops_super); + gfs2_unfreeze_fs(sdp); +} + +/** + * gfs2_statfs - Gather and return stats about the filesystem + * @sb: The superblock + * @statfsbuf: The buffer + * + * Returns: 0 on success or error code + */ + +static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + struct gfs2_statfs_change sc; + int error; + + atomic_inc(&sdp->sd_ops_super); + + if (gfs2_tune_get(sdp, gt_statfs_slow)) + error = gfs2_statfs_slow(sdp, &sc); + else + error = gfs2_statfs_i(sdp, &sc); + + if (error) + return error; + + memset(buf, 0, sizeof(struct kstatfs)); + + buf->f_type = GFS2_MAGIC; + buf->f_bsize = sdp->sd_sb.sb_bsize; + buf->f_blocks = sc.sc_total; + buf->f_bfree = sc.sc_free; + buf->f_bavail = sc.sc_free; + buf->f_files = sc.sc_dinodes + sc.sc_free; + buf->f_ffree = sc.sc_free; + buf->f_namelen = GFS2_FNAMESIZE; + + return 0; +} + +/** + * gfs2_remount_fs - called when the FS is remounted + * @sb: the filesystem + * @flags: the remount flags + * @data: extra data passed in (not used right now) + * + * Returns: errno + */ + +static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + error = gfs2_mount_args(sdp, data, TRUE); + if (error) + return error; + + if (sdp->sd_args.ar_spectator) + *flags |= MS_RDONLY; + else { + if (*flags & MS_RDONLY) { + if (!(sb->s_flags & MS_RDONLY)) + error = gfs2_make_fs_ro(sdp); + } else if (!(*flags & MS_RDONLY) && + (sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_rw(sdp); + } + } + + if (*flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + else + clear_bit(SDF_NOATIME, &sdp->sd_flags); + + /* Don't let the VFS update atimes. GFS2 handles this itself. */ + *flags |= MS_NOATIME | MS_NODIRATIME; + + return error; +} + +/** + * gfs2_clear_inode - Deallocate an inode when VFS is done with it + * @inode: The VFS inode + * + */ + +static void gfs2_clear_inode(struct inode *inode) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_super); + + if (ip) { + spin_lock(&ip->i_spin); + ip->i_vnode = NULL; + set_v2ip(inode, NULL); + spin_unlock(&ip->i_spin); + + gfs2_glock_schedule_for_reclaim(ip->i_gl); + gfs2_inode_put(ip); + } +} + +/** + * gfs2_show_options - Show mount options for /proc/mounts + * @s: seq_file structure + * @mnt: vfsmount + * + * Returns: 0 on success or error code + */ + +static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct gfs2_sbd *sdp = get_v2sdp(mnt->mnt_sb); + struct gfs2_args *args = &sdp->sd_args; + + atomic_inc(&sdp->sd_ops_super); + + if (args->ar_lockproto[0]) + seq_printf(s, ",lockproto=%s", args->ar_lockproto); + if (args->ar_locktable[0]) + seq_printf(s, ",locktable=%s", args->ar_locktable); + if (args->ar_hostdata[0]) + seq_printf(s, ",hostdata=%s", args->ar_hostdata); + if (args->ar_spectator) + seq_printf(s, ",spectator"); + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); + if (args->ar_localflocks) + seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); + if (args->ar_oopses_ok) + seq_printf(s, ",oopses_ok"); + if (args->ar_debug) + seq_printf(s, ",debug"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); + if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT) + seq_printf(s, ",num_glockd=%u", args->ar_num_glockd); + if (args->ar_posix_acl) + seq_printf(s, ",acl"); + if (args->ar_quota != GFS2_QUOTA_DEFAULT) { + char *state; + switch (args->ar_quota) { + case GFS2_QUOTA_OFF: + state = "off"; + break; + case GFS2_QUOTA_ACCOUNT: + state = "account"; + break; + case GFS2_QUOTA_ON: + state = "on"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",quota=%s", state); + } + if (args->ar_suiddir) + seq_printf(s, ",suiddir"); + if (args->ar_data != GFS2_DATA_DEFAULT) { + char *state; + switch (args->ar_data) { + case GFS2_DATA_WRITEBACK: + state = "writeback"; + break; + case GFS2_DATA_ORDERED: + state = "ordered"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",data=%s", state); + } + + return 0; +} + +struct super_operations gfs2_super_ops = { + .write_inode = gfs2_write_inode, + .put_super = gfs2_put_super, + .write_super = gfs2_write_super, + .write_super_lockfs = gfs2_write_super_lockfs, + .unlockfs = gfs2_unlockfs, + .statfs = gfs2_statfs, + .remount_fs = gfs2_remount_fs, + .clear_inode = gfs2_clear_inode, + .show_options = gfs2_show_options, +}; + --- a/fs/gfs2/ops_super.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_super.h 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,19 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_SUPER_DOT_H__ +#define __OPS_SUPER_DOT_H__ + +extern struct super_operations gfs2_super_ops; + +#endif /* __OPS_SUPER_DOT_H__ */ --- a/fs/gfs2/ops_vm.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_vm.c 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,236 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "ops_vm.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +/** + * pfault_be_greedy - + * @ip: + * + */ + +static void pfault_be_greedy(struct gfs2_inode *ip) +{ + unsigned int time; + + spin_lock(&ip->i_spin); + time = ip->i_greedy; + ip->i_last_pfault = jiffies; + spin_unlock(&ip->i_spin); + + gfs2_inode_hold(ip); + if (gfs2_glock_be_greedy(ip->i_gl, time)) + gfs2_inode_put(ip); +} + +/** + * gfs2_private_nopage - + * @area: + * @address: + * @type: + * + * Returns: the page + */ + +static struct page *gfs2_private_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host); + struct gfs2_holder i_gh; + struct page *result; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + if (error) + return NULL; + + set_bit(GIF_PAGED, &ip->i_flags); + + result = filemap_nopage(area, address, type); + + if (result && result != NOPAGE_OOM) + pfault_be_greedy(ip); + + gfs2_glock_dq_uninit(&i_gh); + + return result; +} + +/** + * alloc_page_backing - + * @ip: + * @index: + * + * Returns: errno + */ + +static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + unsigned long index = page->index; + uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift); + unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; + struct gfs2_alloc *al; + unsigned int data_blocks, ind_blocks; + unsigned int x; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, + &data_blocks, &ind_blocks); + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + + ind_blocks + RES_DINODE + + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL); + if (error) + goto out_trans; + } + + for (x = 0; x < blocks; ) { + uint64_t dblock; + unsigned int extlen; + int new = TRUE; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + goto out_trans; + + lblock += extlen; + x += extlen; + } + + gfs2_assert_warn(sdp, al->al_alloced); + + out_trans: + gfs2_trans_end(sdp); + + out_ipres: + gfs2_inplace_release(ip); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +/** + * gfs2_sharewrite_nopage - + * @area: + * @address: + * @type: + * + * Returns: the page + */ + +static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host); + struct gfs2_holder i_gh; + struct page *result = NULL; + unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + int alloc_required; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return NULL; + + if (gfs2_is_jdata(ip)) + goto out; + + set_bit(GIF_PAGED, &ip->i_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + + error = gfs2_write_alloc_required(ip, + (uint64_t)index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); + if (error) + goto out; + + result = filemap_nopage(area, address, type); + if (!result || result == NOPAGE_OOM) + goto out; + + if (alloc_required) { + error = alloc_page_backing(ip, result); + if (error) { + page_cache_release(result); + result = NULL; + goto out; + } + set_page_dirty(result); + } + + pfault_be_greedy(ip); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return result; +} + +struct vm_operations_struct gfs2_vm_ops_private = { + .nopage = gfs2_private_nopage, +}; + +struct vm_operations_struct gfs2_vm_ops_sharewrite = { + .nopage = gfs2_sharewrite_nopage, +}; + --- a/fs/gfs2/ops_vm.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/ops_vm.h 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,20 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __OPS_VM_DOT_H__ +#define __OPS_VM_DOT_H__ + +extern struct vm_operations_struct gfs2_vm_ops_private; +extern struct vm_operations_struct gfs2_vm_ops_sharewrite; + +#endif /* __OPS_VM_DOT_H__ */ --- a/fs/gfs2/page.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/page.c 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,278 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "page.h" +#include "trans.h" + +/** + * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock + * @gl: the glock + * + */ + +void gfs2_pte_inval(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + if (!test_bit(GIF_PAGED, &ip->i_flags)) + return; + + inode = gfs2_ip2v(ip, NO_CREATE); + if (inode) { + unmap_shared_mapping_range(inode->i_mapping, 0, 0); + iput(inode); + + if (test_bit(GIF_SW_PAGED, &ip->i_flags)) + set_bit(GLF_DIRTY, &gl->gl_flags); + } + + clear_bit(GIF_SW_PAGED, &ip->i_flags); +} + +/** + * gfs2_page_inval - Invalidate all pages associated with a glock + * @gl: the glock + * + */ + +void gfs2_page_inval(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + inode = gfs2_ip2v(ip, NO_CREATE); + if (inode) { + struct address_space *mapping = inode->i_mapping; + + truncate_inode_pages(mapping, 0); + gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages); + + iput(inode); + } + + clear_bit(GIF_PAGED, &ip->i_flags); +} + +/** + * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock + * @gl: the glock + * @flags: DIO_START | DIO_WAIT + * + * Syncs data (not metadata) for a regular file. + * No-op for all other types. + */ + +void gfs2_page_sync(struct gfs2_glock *gl, int flags) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + inode = gfs2_ip2v(ip, NO_CREATE); + if (inode) { + struct address_space *mapping = inode->i_mapping; + int error = 0; + + if (flags & DIO_START) + filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + error = filemap_fdatawait(mapping); + + /* Put back any errors cleared by filemap_fdatawait() + so they can be caught by someone who can pass them + up to user space. */ + + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else if (error) + set_bit(AS_EIO, &mapping->flags); + + iput(inode); + } +} + +/** + * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: any locked page held by the caller process + * + * Returns: errno + */ + +int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *inode = ip->i_vnode; + struct page *page = (struct page *)private; + struct buffer_head *bh; + int release = FALSE; + + if (!page || page->index) { + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + release = TRUE; + } + + if (!PageUptodate(page)) { + void *kaddr = kmap(page); + + memcpy(kaddr, + dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_di.di_size); + memset(kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + SetPageUptodate(page); + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + bh = page_buffers(page); + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, block); + + set_buffer_uptodate(bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_trans_add_databuf(sdp, bh); + mark_buffer_dirty(bh); + + if (release) { + unlock_page(page); + page_cache_release(page); + } + + return 0; +} + +/** + * gfs2_truncator_page - truncate a partial data block in the page cache + * @ip: the inode + * @size: the size the file should be + * + * Returns: errno + */ + +int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *inode = ip->i_vnode; + struct page *page; + struct buffer_head *bh; + void *kaddr; + uint64_t lbn, dbn; + unsigned long index; + unsigned int offset; + unsigned int bufnum; + int new = FALSE; + int error; + + lbn = size >> inode->i_blkbits; + error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); + if (error || !dbn) + return error; + + index = size >> PAGE_CACHE_SHIFT; + offset = size & (PAGE_CACHE_SIZE - 1); + bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits)); + + page = read_cache_page(inode->i_mapping, index, + (filler_t *)inode->i_mapping->a_ops->readpage, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (!PageUptodate(page) || PageError(page)) { + error = -EIO; + goto out; + } + + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + kunmap(page); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, dbn); + + set_buffer_uptodate(bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_trans_add_databuf(sdp, bh); + mark_buffer_dirty(bh); + + out: + unlock_page(page); + page_cache_release(page); + + return error; +} + +void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page, + unsigned int from, unsigned int to) +{ + struct buffer_head *head = page_buffers(page); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + unsigned int start, end; + + for (bh = head, start = 0; + bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from || start >= to) + continue; + gfs2_trans_add_databuf(sdp, bh); + } +} + --- a/fs/gfs2/page.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/page.h 2005-08-01 14:13:08.013807592 +0800 @@ -0,0 +1,27 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __PAGE_DOT_H__ +#define __PAGE_DOT_H__ + +void gfs2_pte_inval(struct gfs2_glock *gl); +void gfs2_page_inval(struct gfs2_glock *gl); +void gfs2_page_sync(struct gfs2_glock *gl, int flags); + +int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size); +void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page, + unsigned int from, unsigned int to); + +#endif /* __PAGE_DOT_H__ */ --- a/fs/gfs2/super.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/super.c 2005-08-01 14:13:08.014807440 +0800 @@ -0,0 +1,965 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "dir.h" +#include "format.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs2_tune_init - Fill a gfs2_tune structure with default values + * @gt: tune + * + */ + +void gfs2_tune_init(struct gfs2_tune *gt) +{ + spin_lock_init(>->gt_spin); + + gt->gt_ilimit = 100; + gt->gt_ilimit_tries = 3; + gt->gt_ilimit_min = 1; + gt->gt_demote_secs = 300; + gt->gt_incore_log_blocks = 1024; + gt->gt_log_flush_secs = 60; + gt->gt_jindex_refresh_secs = 60; + gt->gt_scand_secs = 15; + gt->gt_recoverd_secs = 60; + gt->gt_logd_secs = 1; + gt->gt_quotad_secs = 5; + gt->gt_inoded_secs = 15; + gt->gt_quota_simul_sync = 64; + gt->gt_quota_warn_period = 10; + gt->gt_quota_scale_num = 1; + gt->gt_quota_scale_den = 1; + gt->gt_quota_cache_secs = 300; + gt->gt_quota_quantum = 60; + gt->gt_atime_quantum = 3600; + gt->gt_new_files_jdata = 0; + gt->gt_new_files_directio = 0; + gt->gt_max_atomic_write = 4 << 20; + gt->gt_max_readahead = 1 << 18; + gt->gt_lockdump_size = 131072; + gt->gt_stall_secs = 600; + gt->gt_complain_secs = 10; + gt->gt_reclaim_limit = 5000; + gt->gt_entries_per_readdir = 32; + gt->gt_prefetch_secs = 10; + gt->gt_greedy_default = HZ / 10; + gt->gt_greedy_quantum = HZ / 40; + gt->gt_greedy_max = HZ / 4; + gt->gt_statfs_quantum = 30; + gt->gt_statfs_slow = 0; +} + +/** + * gfs2_check_sb - Check superblock + * @sdp: the filesystem + * @sb: The superblock + * @silent: Don't print a message if the check fails + * + * Checks the version code of the FS is one that we understand how to + * read and that the sizes of the various on-disk structures have not + * changed. + */ + +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent) +{ + unsigned int x; + + if (sb->sb_header.mh_magic != GFS2_MAGIC || + sb->sb_header.mh_type != GFS2_METATYPE_SB) { + if (!silent) + printk("GFS2: not a GFS2 filesystem\n"); + return -EINVAL; + } + + /* If format numbers match exactly, we're done. */ + + if (sb->sb_fs_format == GFS2_FORMAT_FS && + sb->sb_multihost_format == GFS2_FORMAT_MULTI) + return 0; + + if (sb->sb_fs_format != GFS2_FORMAT_FS) { + for (x = 0; gfs2_old_fs_formats[x]; x++) + if (gfs2_old_fs_formats[x] == sb->sb_fs_format) + break; + + if (!gfs2_old_fs_formats[x]) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + for (x = 0; gfs2_old_multihost_formats[x]; x++) + if (gfs2_old_multihost_formats[x] == sb->sb_multihost_format) + break; + + if (!gfs2_old_multihost_formats[x]) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: Use the \"upgrade\" mount option to upgrade " + "the FS\n"); + printk("GFS2: See the manual for more details\n"); + return -EINVAL; + } + + return 0; +} + +/** + * gfs2_read_sb - Read super block + * @sdp: The GFS2 superblock + * @gl: the glock for the superblock (assumed to be held) + * @silent: Don't print message if mount fails + * + */ + +int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) +{ + struct buffer_head *bh; + uint32_t hash_blocks, ind_blocks, leaf_blocks; + uint32_t tmp_blocks; + unsigned int x; + int error; + + error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, + DIO_FORCE | DIO_START | DIO_WAIT, &bh); + if (error) { + if (!silent) + printk("GFS2: fsid=%s: can't read superblock\n", + sdp->sd_fsname); + return error; + } + + gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size,); + gfs2_sb_in(&sdp->sd_sb, bh->b_data); + brelse(bh); + + error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); + if (error) + return error; + + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - + GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) / sizeof(uint64_t); + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t); + sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_unlinked_tag); + sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_quota_change); + + /* Compute maximum reservation required to add a entry to a directory */ + + hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH), + sdp->sd_jbsize); + + ind_blocks = 0; + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { + tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs); + ind_blocks += tmp_blocks; + } + + leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; + + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; + + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t space, d; + uint32_t m; + + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_heightsize[x - 1] || m) + break; + sdp->sd_heightsize[x] = space; + } + sdp->sd_max_height = x; + gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT,); + + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t space, d; + uint32_t m; + + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_jheightsize[x - 1] || m) + break; + sdp->sd_jheightsize[x] = space; + } + sdp->sd_max_jheight = x; + gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT,); + + return 0; +} + +/** + * gfs2_do_upgrade - upgrade a filesystem + * @sdp: The GFS2 superblock + * + */ + +int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl) +{ + return 0; +} + +/** + * gfs2_jindex_hold - Grab a lock on the jindex + * @sdp: The GFS2 superblock + * @ji_gh: the holder for the jindex glock + * + * This is very similar to the gfs2_rindex_hold() function, except that + * in general we hold the jindex lock for longer periods of time and + * we grab it far less frequently (in general) then the rgrp lock. + * + * Returns: errno + */ + +int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) +{ + struct gfs2_inode *dip = sdp->sd_jindex; + struct qstr name; + char buf[20]; + struct gfs2_jdesc *jd; + int error; + + name.name = buf; + + down(&sdp->sd_jindex_mutex); + + for (;;) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, + GL_LOCAL_EXCL, ji_gh); + if (error) + break; + + name.len = sprintf(buf, "journal%u", sdp->sd_journals); + + error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); + if (error == -ENOENT) { + error = 0; + break; + } + + gfs2_glock_dq_uninit(ji_gh); + + if (error) + break; + + error = -ENOMEM; + jd = kmalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL); + if (!jd) + break; + memset(jd, 0, sizeof(struct gfs2_jdesc)); + + error = gfs2_lookupi(dip, &name, TRUE, &jd->jd_inode); + if (error) { + kfree(jd); + break; + } + + spin_lock(&sdp->sd_jindex_spin); + jd->jd_jid = sdp->sd_journals++; + list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); + spin_unlock(&sdp->sd_jindex_spin); + } + + up(&sdp->sd_jindex_mutex); + + return error; +} + +/** + * gfs2_jindex_free - Clear all the journal index information + * @sdp: The GFS2 superblock + * + */ + +void gfs2_jindex_free(struct gfs2_sbd *sdp) +{ + struct list_head list; + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + list_add(&list, &sdp->sd_jindex_list); + list_del_init(&sdp->sd_jindex_list); + sdp->sd_journals = 0; + spin_unlock(&sdp->sd_jindex_spin); + + while (!list_empty(&list)) { + jd = list_entry(list.next, struct gfs2_jdesc, jd_list); + list_del(&jd->jd_list); + gfs2_inode_put(jd->jd_inode); + kfree(jd); + } +} + +static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) +{ + struct gfs2_jdesc *jd; + int found = FALSE; + + list_for_each_entry(jd, head, jd_list) { + if (jd->jd_jid == jid) { + found = TRUE; + break; + } + } + + if (!found) + jd = NULL; + + return jd; +} + +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) +{ + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + jd = jdesc_find_i(&sdp->sd_jindex_list, jid); + spin_unlock(&sdp->sd_jindex_spin); + + return jd; +} + +void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) +{ + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + jd = jdesc_find_i(&sdp->sd_jindex_list, jid); + if (jd) + jd->jd_dirty = TRUE; + spin_unlock(&sdp->sd_jindex_spin); +} + +struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) +{ + struct gfs2_jdesc *jd; + int found = FALSE; + + spin_lock(&sdp->sd_jindex_spin); + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (jd->jd_dirty) { + jd->jd_dirty = FALSE; + found = TRUE; + break; + } + } + spin_unlock(&sdp->sd_jindex_spin); + + if (!found) + jd = NULL; + + return jd; +} + +int gfs2_jdesc_check(struct gfs2_jdesc *jd) +{ + struct gfs2_inode *ip = jd->jd_inode; + struct gfs2_sbd *sdp = ip->i_sbd; + int ar; + int error; + + if (ip->i_di.di_size < (8 << 20) || + ip->i_di.di_size > (1 << 30) || + (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) { + gfs2_consist_inode(ip); + return -EIO; + } + jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; + + error = gfs2_write_alloc_required(ip, + 0, ip->i_di.di_size, + &ar); + if (!error && ar) { + gfs2_consist_inode(ip); + error = -EIO; + } + + return error; +} + +/** + * gfs2_lookup_master_dir - + * @sdp: The GFS2 superblock + * + * Returns: errno + */ + +int gfs2_lookup_master_dir(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *gl; + int error; + + error = gfs2_glock_get(sdp, + sdp->sd_sb.sb_master_dir.no_addr, + &gfs2_inode_glops, CREATE, &gl); + if (!error) { + error = gfs2_inode_get(gl, &sdp->sd_sb.sb_master_dir, CREATE, + &sdp->sd_master_dir); + gfs2_glock_put(gl); + } + + return error; +} + +/** + * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_make_fs_rw(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl; + struct gfs2_holder t_gh; + struct gfs2_log_header head; + int error; + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh); + if (error) + return error; + + gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs2_find_jhead(sdp->sd_jdesc, &head); + if (error) + goto fail; + + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + gfs2_consist(sdp); + error = -EIO; + goto fail; + } + + /* Initialize some head of the log stuff */ + sdp->sd_log_sequence = head.lh_sequence + 1; + gfs2_log_pointers_init(sdp, head.lh_blkno); + + error = gfs2_unlinked_init(sdp); + if (error) + goto fail; + error = gfs2_quota_init(sdp); + if (error) + goto fail_unlinked; + + set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + gfs2_glock_dq_uninit(&t_gh); + + return 0; + + fail_unlinked: + gfs2_unlinked_cleanup(sdp); + + fail: + t_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&t_gh); + + return error; +} + +/** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_unlinked_dealloc(sdp); + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_unlinked_cleanup(sdp); + gfs2_quota_cleanup(sdp); + + return error; +} + +int gfs2_statfs_init(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *m_ip = sdp->sd_statfs_inode; + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct buffer_head *m_bh, *l_bh; + struct gfs2_holder gh; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + return error; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out; + + if (sdp->sd_args.ar_spectator) { + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + } else { + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + goto out_m_bh; + + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + gfs2_statfs_change_in(l_sc, l_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + + brelse(l_bh); + } + + out_m_bh: + brelse(m_bh); + + out: + gfs2_glock_dq_uninit(&gh); + + return 0; +} + +void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free, + int64_t dinodes) +{ + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct buffer_head *l_bh; + int error; + + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + return; + + down(&sdp->sd_statfs_mutex); + gfs2_trans_add_bh(l_ip->i_gl, l_bh); + up(&sdp->sd_statfs_mutex); + + spin_lock(&sdp->sd_statfs_spin); + l_sc->sc_total += total; + l_sc->sc_free += free; + l_sc->sc_dinodes += dinodes; + gfs2_statfs_change_out(l_sc, l_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + + brelse(l_bh); +} + +int gfs2_statfs_sync(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *m_ip = sdp->sd_statfs_inode; + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_holder gh; + struct buffer_head *m_bh, *l_bh; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + return error; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out; + + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { + spin_unlock(&sdp->sd_statfs_spin); + goto out_bh; + } + spin_unlock(&sdp->sd_statfs_spin); + + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + goto out_bh; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); + if (error) + goto out_bh2; + + down(&sdp->sd_statfs_mutex); + gfs2_trans_add_bh(l_ip->i_gl, l_bh); + up(&sdp->sd_statfs_mutex); + + spin_lock(&sdp->sd_statfs_spin); + m_sc->sc_total += l_sc->sc_total; + m_sc->sc_free += l_sc->sc_free; + m_sc->sc_dinodes += l_sc->sc_dinodes; + memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); + memset(l_bh->b_data + sizeof(struct gfs2_dinode), + 0, sizeof(struct gfs2_statfs_change)); + spin_unlock(&sdp->sd_statfs_spin); + + gfs2_trans_add_bh(m_ip->i_gl, m_bh); + gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); + + gfs2_trans_end(sdp); + + out_bh2: + brelse(l_bh); + + out_bh: + brelse(m_bh); + + out: + gfs2_glock_dq_uninit(&gh); + + return error; +} + +/** + * gfs2_statfs_i - Do a statfs + * @sdp: the filesystem + * @sg: the sg structure + * + * Returns: errno + */ + +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +{ + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + + *sc = *m_sc; + sc->sc_total += l_sc->sc_total; + sc->sc_free += l_sc->sc_free; + sc->sc_dinodes += l_sc->sc_dinodes; + + spin_unlock(&sdp->sd_statfs_spin); + + if (sc->sc_free < 0) + sc->sc_free = 0; + if (sc->sc_free > sc->sc_total) + sc->sc_free = sc->sc_total; + if (sc->sc_dinodes < 0) + sc->sc_dinodes = 0; + + return 0; +} + +/** + * statfs_fill - fill in the sg for a given RG + * @rgd: the RG + * @sc: the sc structure + * + * Returns: 0 on success, -ESTALE if the LVB is invalid + */ + +static int statfs_slow_fill(struct gfs2_rgrpd *rgd, + struct gfs2_statfs_change *sc) +{ + gfs2_rgrp_verify(rgd); + sc->sc_total += rgd->rd_ri.ri_data; + sc->sc_free += rgd->rd_rg.rg_free; + sc->sc_dinodes += rgd->rd_rg.rg_dinodes; + return 0; +} + +/** + * gfs2_statfs_slow - Stat a filesystem using asynchronous locking + * @sdp: the filesystem + * @sc: the sc info that will be returned + * + * Any error (other than a signal) will cause this routine to fall back + * to the synchronous version. + * + * FIXME: This really shouldn't busy wait like this. + * + * Returns: errno + */ + +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +{ + struct gfs2_holder ri_gh; + struct gfs2_rgrpd *rgd_next; + struct gfs2_holder *gha, *gh; + unsigned int slots = 64; + unsigned int x; + int done; + int error = 0, err; + + memset(sc, 0, sizeof(struct gfs2_statfs_change)); + gha = kmalloc(slots * sizeof(struct gfs2_holder), GFP_KERNEL); + if (!gha) + return -ENOMEM; + memset(gha, 0, slots * sizeof(struct gfs2_holder)); + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto out; + + rgd_next = gfs2_rgrpd_get_first(sdp); + + for (;;) { + done = TRUE; + + for (x = 0; x < slots; x++) { + gh = gha + x; + + if (gh->gh_gl && gfs2_glock_poll(gh)) { + err = gfs2_glock_wait(gh); + if (err) { + gfs2_holder_uninit(gh); + error = err; + } else { + if (!error) + error = statfs_slow_fill(get_gl2rgd(gh->gh_gl), sc); + gfs2_glock_dq_uninit(gh); + } + } + + if (gh->gh_gl) + done = FALSE; + else if (rgd_next && !error) { + error = gfs2_glock_nq_init(rgd_next->rd_gl, + LM_ST_SHARED, + GL_ASYNC, + gh); + rgd_next = gfs2_rgrpd_get_next(rgd_next); + done = FALSE; + } + + if (signal_pending(current)) + error = -ERESTARTSYS; + } + + if (done) + break; + + yield(); + } + + gfs2_glock_dq_uninit(&ri_gh); + + out: + kfree(gha); + + return error; +} + +struct lfcc { + struct list_head list; + struct gfs2_holder gh; +}; + +/** + * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all + * journals are clean + * @sdp: the file system + * @state: the state to put the transaction lock into + * @t_gh: the hold on the transaction lock + * + * Returns: errno + */ + +int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh) +{ + struct gfs2_holder ji_gh; + struct gfs2_jdesc *jd; + struct lfcc *lfcc; + LIST_HEAD(list); + struct gfs2_log_header lh; + int error; + + error = gfs2_jindex_hold(sdp, &ji_gh); + if (error) + return error; + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); + if (!lfcc) { + error = -ENOMEM; + goto out; + } + error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED, 0, + &lfcc->gh); + if (error) { + kfree(lfcc); + goto out; + } + list_add(&lfcc->list, &list); + } + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, + LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE, + t_gh); + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + error = gfs2_jdesc_check(jd); + if (error) + break; + error = gfs2_find_jhead(jd, &lh); + if (error) + break; + if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EBUSY; + break; + } + } + + if (error) + gfs2_glock_dq_uninit(t_gh); + + out: + while (!list_empty(&list)) { + lfcc = list_entry(list.next, struct lfcc, list); + list_del(&lfcc->list); + gfs2_glock_dq_uninit(&lfcc->gh); + kfree(lfcc); + } + gfs2_glock_dq_uninit(&ji_gh); + + return error; +} + +/** + * gfs2_freeze_fs - freezes the file system + * @sdp: the file system + * + * This function flushes data and meta data for all machines by + * aquiring the transaction log exclusively. All journals are + * ensured to be in a clean state as well. + * + * Returns: errno + */ + +int gfs2_freeze_fs(struct gfs2_sbd *sdp) +{ + int error = 0; + + down(&sdp->sd_freeze_lock); + + if (!sdp->sd_freeze_count++) { + error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); + if (error) + sdp->sd_freeze_count--; + } + + up(&sdp->sd_freeze_lock); + + return error; +} + +/** + * gfs2_unfreeze_fs - unfreezes the file system + * @sdp: the file system + * + * This function allows the file system to proceed by unlocking + * the exclusively held transaction lock. Other GFS2 nodes are + * now free to acquire the lock shared and go on with their lives. + * + */ + +void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) +{ + down(&sdp->sd_freeze_lock); + + if (sdp->sd_freeze_count && !--sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + + up(&sdp->sd_freeze_lock); +} + --- a/fs/gfs2/super.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/super.h 2005-08-01 14:13:08.014807440 +0800 @@ -0,0 +1,59 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __SUPER_DOT_H__ +#define __SUPER_DOT_H__ + +void gfs2_tune_init(struct gfs2_tune *gt); + +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent); +int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); +int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb); + +static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) +{ + unsigned int x; + spin_lock(&sdp->sd_jindex_spin); + x = sdp->sd_journals; + spin_unlock(&sdp->sd_jindex_spin); + return x; +} + +int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh); +void gfs2_jindex_free(struct gfs2_sbd *sdp); + +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); +void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid); +struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp); +int gfs2_jdesc_check(struct gfs2_jdesc *jd); + +int gfs2_lookup_master_dir(struct gfs2_sbd *sdp); +int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, + struct gfs2_inode **ipp); + +int gfs2_make_fs_rw(struct gfs2_sbd *sdp); +int gfs2_make_fs_ro(struct gfs2_sbd *sdp); + +int gfs2_statfs_init(struct gfs2_sbd *sdp); +void gfs2_statfs_change(struct gfs2_sbd *sdp, + int64_t total, int64_t free, int64_t dinodes); +int gfs2_statfs_sync(struct gfs2_sbd *sdp); +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); + +int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh); +int gfs2_freeze_fs(struct gfs2_sbd *sdp); +void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); + +#endif /* __SUPER_DOT_H__ */ + --- a/fs/gfs2/trans.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/trans.c 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,221 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "trans.h" + +int gfs2_trans_begin_i(struct gfs2_sbd *sdp, unsigned int blocks, + unsigned int revokes, char *file, unsigned int line) +{ + struct gfs2_trans *tr; + int error; + + if (gfs2_assert_warn(sdp, !get_transaction) || + gfs2_assert_warn(sdp, blocks || revokes)) { + printk("GFS2: fsid=%s: (%s, %u)\n", + sdp->sd_fsname, file, line); + return -EINVAL; + } + + tr = kmalloc(sizeof(struct gfs2_trans), GFP_KERNEL); + if (!tr) + return -ENOMEM; + + memset(tr, 0, sizeof(struct gfs2_trans)); + tr->tr_file = file; + tr->tr_line = line; + tr->tr_blocks = blocks; + tr->tr_revokes = revokes; + tr->tr_reserved = 1; + if (blocks) + tr->tr_reserved += 1 + blocks; + if (revokes) + tr->tr_reserved += gfs2_struct2blk(sdp, revokes, + sizeof(uint64_t)); + INIT_LIST_HEAD(&tr->tr_list_buf); + + error = -ENOMEM; + tr->tr_t_gh = gfs2_holder_get(sdp->sd_trans_gl, LM_ST_SHARED, + GL_NEVER_RECURSE); + if (!tr->tr_t_gh) + goto fail; + + error = gfs2_glock_nq(tr->tr_t_gh); + if (error) + goto fail_holder_put; + + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + tr->tr_t_gh->gh_flags |= GL_NOCACHE; + error = -EROFS; + goto fail_gunlock; + } + + error = gfs2_log_reserve(sdp, tr->tr_reserved); + if (error) + goto fail_gunlock; + + set_transaction(tr); + + return 0; + + fail_gunlock: + gfs2_glock_dq(tr->tr_t_gh); + + fail_holder_put: + gfs2_holder_put(tr->tr_t_gh); + + fail: + kfree(tr); + + return error; +} + +void gfs2_trans_end(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_holder *t_gh; + + tr = get_transaction; + set_transaction(NULL); + + if (gfs2_assert_warn(sdp, tr)) + return; + + t_gh = tr->tr_t_gh; + tr->tr_t_gh = NULL; + + if (!tr->tr_touched) { + gfs2_log_release(sdp, tr->tr_reserved); + kfree(tr); + + gfs2_glock_dq(t_gh); + gfs2_holder_put(t_gh); + + return; + } + + if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) + printk("GFS2: fsid=%s: tr_num_buf = %u, tr_blocks = %u\n" + "GFS2: fsid=%s: tr_file = %s, tr_line = %u\n", + sdp->sd_fsname, tr->tr_num_buf, tr->tr_blocks, + sdp->sd_fsname, tr->tr_file, tr->tr_line); + if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) + printk("GFS2: fsid=%s: tr_num_revoke = %u, tr_revokes = %u\n" + "GFS2: fsid=%s: tr_file = %s, tr_line = %u\n", + sdp->sd_fsname, tr->tr_num_revoke, tr->tr_revokes, + sdp->sd_fsname, tr->tr_file, tr->tr_line); + + gfs2_log_commit(sdp, tr); + + gfs2_glock_dq(t_gh); + gfs2_holder_put(t_gh); + + if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) + gfs2_log_flush(sdp); +} + +void gfs2_trans_add_gl(struct gfs2_glock *gl) +{ + LO_ADD(gl->gl_sbd, &gl->gl_le); +} + +/** + * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction + * @gl: the glock the buffer belongs to + * @bh: The buffer to add + * + */ + +void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_bufdata *bd; + + bd = get_v2bd(bh); + if (bd) + gfs2_assert(sdp, bd->bd_gl == gl,); + else { + gfs2_meta_attach_bufdata(gl, bh); + bd = get_v2bd(bh); + } + + LO_ADD(sdp, &bd->bd_le); +} + +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno) +{ + struct gfs2_revoke *rv = kmalloc_nofail(sizeof(struct gfs2_revoke), + GFP_KERNEL); + INIT_LE(&rv->rv_le, &gfs2_revoke_lops); + rv->rv_blkno = blkno; + LO_ADD(sdp, &rv->rv_le); +} + +void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno) +{ + struct gfs2_revoke *rv; + int found = FALSE; + + gfs2_log_lock(sdp); + + list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { + if (rv->rv_blkno == blkno) { + list_del(&rv->rv_le.le_list); + gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); + sdp->sd_log_num_revoke--; + found = TRUE; + break; + } + } + + gfs2_log_unlock(sdp); + + if (found) { + kfree(rv); + get_transaction->tr_num_revoke_rm++; + } +} + +void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) +{ + LO_ADD(rgd->rd_sbd, &rgd->rd_le); +} + +void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_databuf *db; + + db = get_v2db(bh); + if (!db) { + db = kmalloc_nofail(sizeof(struct gfs2_databuf), + GFP_KERNEL); + INIT_LE(&db->db_le, &gfs2_databuf_lops); + get_bh(bh); + db->db_bh = bh; + set_v2db(bh, db); + LO_ADD(sdp, &db->db_le); + } +} + --- a/fs/gfs2/trans.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/trans.h 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,44 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __TRANS_DOT_H__ +#define __TRANS_DOT_H__ + +#define RES_DINODE 1 +#define RES_INDIRECT 1 +#define RES_JDATA 1 +#define RES_DATA 1 +#define RES_LEAF 1 +#define RES_RG_BIT 2 +#define RES_EATTR 1 +#define RES_UNLINKED 1 +#define RES_STATFS 1 +#define RES_QUOTA 2 + +#define gfs2_trans_begin(sdp, blocks, revokes) \ +gfs2_trans_begin_i((sdp), (blocks), (revokes), __FILE__, __LINE__) + +int gfs2_trans_begin_i(struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + char *file, unsigned int line); + +void gfs2_trans_end(struct gfs2_sbd *sdp); + +void gfs2_trans_add_gl(struct gfs2_glock *gl); +void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno); +void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno); +void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); +void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh); + +#endif /* __TRANS_DOT_H__ */ --- a/fs/gfs2/unlinked.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/unlinked.c 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,461 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "meta_io.h" +#include "trans.h" +#include "unlinked.h" + +static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot, + struct gfs2_unlinked_tag *ut) +{ + struct gfs2_inode *ip = sdp->sd_ut_inode; + unsigned int block, offset; + uint64_t dblock; + int new = FALSE; + struct buffer_head *bh; + int error; + + block = slot / sdp->sd_ut_per_block; + offset = slot % sdp->sd_ut_per_block; + + error = gfs2_block_map(ip, block, &new, &dblock, NULL); + if (error) + return error; + error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) { + error = -EIO; + goto out; + } + + down(&sdp->sd_unlinked_mutex); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_unlinked_tag_out(ut, bh->b_data + + sizeof(struct gfs2_meta_header) + + offset * sizeof(struct gfs2_unlinked_tag)); + up(&sdp->sd_unlinked_mutex); + + out: + brelse(bh); + + return error; +} + +static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_spin); + list_add(&ul->ul_list, &sdp->sd_unlinked_list); + gfs2_assert(sdp, ul->ul_count,); + ul->ul_count++; + atomic_inc(&sdp->sd_unlinked_count); + spin_unlock(&sdp->sd_unlinked_spin); +} + +static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_spin); + list_del_init(&ul->ul_list); + gfs2_assert(sdp, ul->ul_count > 1,); + ul->ul_count--; + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0); + atomic_dec(&sdp->sd_unlinked_count); + spin_unlock(&sdp->sd_unlinked_spin); +} + +struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp) +{ + struct list_head *head; + struct gfs2_unlinked *ul; + int found = FALSE; + + if (sdp->sd_vfs->s_flags & MS_RDONLY) + return NULL; + + spin_lock(&sdp->sd_unlinked_spin); + + head = &sdp->sd_unlinked_list; + + list_for_each_entry(ul, head, ul_list) { + if (test_bit(ULF_LOCKED, &ul->ul_flags)) + continue; + + list_move_tail(&ul->ul_list, head); + ul->ul_count++; + set_bit(ULF_LOCKED, &ul->ul_flags); + found = TRUE; + + break; + } + + if (!found) + ul = NULL; + + spin_unlock(&sdp->sd_unlinked_spin); + + return ul; +} + +/** + * enforce_limit - limit the number of inodes waiting to be deallocated + * @sdp: the filesystem + * + * Returns: errno + */ + +void enforce_limit(struct gfs2_sbd *sdp) +{ + unsigned int tries = 0, min = 0; + int error; + + if (atomic_read(&sdp->sd_unlinked_count) >= gfs2_tune_get(sdp, gt_ilimit)) { + tries = gfs2_tune_get(sdp, gt_ilimit_tries); + min = gfs2_tune_get(sdp, gt_ilimit_min); + } + + while (tries--) { + struct gfs2_unlinked *ul = ul_fish(sdp); + if (!ul) + break; + error = gfs2_inode_dealloc(sdp, ul); + gfs2_unlinked_put(sdp, ul); + + if (!error) { + if (!--min) + break; + } else if (error != 1) + break; + } +} + +struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp) +{ + struct gfs2_unlinked *ul; + + ul = kmalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL); + if (ul) { + memset(ul, 0, sizeof(struct gfs2_unlinked)); + INIT_LIST_HEAD(&ul->ul_list); + ul->ul_count = 1; + set_bit(ULF_LOCKED, &ul->ul_flags); + } + + return ul; +} + +int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul) +{ + unsigned int c, o = 0, b; + unsigned char byte = 0; + + enforce_limit(sdp); + + *ul = ul_alloc(sdp); + if (!*ul) + return -ENOMEM; + + spin_lock(&sdp->sd_unlinked_spin); + + for (c = 0; c < sdp->sd_unlinked_chunks; c++) + for (o = 0; o < PAGE_SIZE; o++) { + byte = sdp->sd_unlinked_bitmap[c][o]; + if (byte != 0xFF) + goto found; + } + + goto fail; + + found: + for (b = 0; b < 8; b++) + if (!(byte & (1 << b))) + break; + (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b; + + if ((*ul)->ul_slot >= sdp->sd_unlinked_slots) + goto fail; + + sdp->sd_unlinked_bitmap[c][o] |= 1 << b; + + spin_unlock(&sdp->sd_unlinked_spin); + + return 0; + + fail: + spin_unlock(&sdp->sd_unlinked_spin); + kfree(*ul); + return -ENOSPC; +} + +void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags)); + + spin_lock(&sdp->sd_unlinked_spin); + gfs2_assert(sdp, ul->ul_count,); + ul->ul_count--; + if (!ul->ul_count) { + gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0); + spin_unlock(&sdp->sd_unlinked_spin); + kfree(ul); + } else + spin_unlock(&sdp->sd_unlinked_spin); +} + +int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, list_empty(&ul->ul_list)); + + error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut); + if (!error) + ul_hash(sdp, ul); + + return error; +} + +int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, !list_empty(&ul->ul_list)); + + error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut); + + return error; +} + +int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_unlinked_tag ut; + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, !list_empty(&ul->ul_list)); + + memset(&ut, 0, sizeof(struct gfs2_unlinked_tag)); + + error = munge_ondisk(sdp, ul->ul_slot, &ut); + if (error) + return error; + + ul_unhash(sdp, ul); + + return 0; +} + +/** + * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp) +{ + unsigned int hits, strikes; + int error; + + for (;;) { + hits = 0; + strikes = 0; + + for (;;) { + struct gfs2_unlinked *ul = ul_fish(sdp); + if (!ul) + return 0; + error = gfs2_inode_dealloc(sdp, ul); + gfs2_unlinked_put(sdp, ul); + + if (!error) { + hits++; + if (strikes) + strikes--; + } else if (error == 1) { + strikes++; + if (strikes >= atomic_read(&sdp->sd_unlinked_count)) { + error = 0; + break; + } + } else + return error; + } + + if (!hits || kthread_should_stop()) + break; + + cond_resched(); + } + + return 0; +} + +int gfs2_unlinked_init(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *ip = sdp->sd_ut_inode; + unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; + unsigned int x, slot = 0; + unsigned int found = 0; + uint64_t dblock; + uint32_t extlen = 0; + int error; + + if (!ip->i_di.di_size || + ip->i_di.di_size > (64 << 20) || + ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) { + gfs2_consist_inode(ip); + return -EIO; + } + sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block; + sdp->sd_unlinked_chunks = DIV_RU(sdp->sd_unlinked_slots, 8 * PAGE_SIZE); + + error = -ENOMEM; + + sdp->sd_unlinked_bitmap = kmalloc(sdp->sd_unlinked_chunks * + sizeof(unsigned char *), + GFP_KERNEL); + if (!sdp->sd_unlinked_bitmap) + return error; + memset(sdp->sd_unlinked_bitmap, 0, + sdp->sd_unlinked_chunks * sizeof(unsigned char *)); + + for (x = 0; x < sdp->sd_unlinked_chunks; x++) { + sdp->sd_unlinked_bitmap[x] = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!sdp->sd_unlinked_bitmap[x]) + goto fail; + memset(sdp->sd_unlinked_bitmap[x], 0, PAGE_SIZE); + } + + for (x = 0; x < blocks; x++) { + struct buffer_head *bh; + unsigned int y; + + if (!extlen) { + int new = FALSE; + error = gfs2_block_map(ip, x, &new, &dblock, &extlen); + if (error) + goto fail; + } + gfs2_meta_ra(ip->i_gl, dblock, extlen); + error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, + &bh); + if (error) + goto fail; + error = -EIO; + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) { + brelse(bh); + goto fail; + } + + for (y = 0; + y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots; + y++, slot++) { + struct gfs2_unlinked_tag ut; + struct gfs2_unlinked *ul; + + gfs2_unlinked_tag_in(&ut, bh->b_data + + sizeof(struct gfs2_meta_header) + + y * sizeof(struct gfs2_unlinked_tag)); + if (!ut.ut_inum.no_addr) + continue; + + error = -ENOMEM; + ul = ul_alloc(sdp); + if (!ul) { + brelse(bh); + goto fail; + } + ul->ul_ut = ut; + ul->ul_slot = slot; + + spin_lock(&sdp->sd_unlinked_spin); + gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1); + spin_unlock(&sdp->sd_unlinked_spin); + ul_hash(sdp, ul); + + gfs2_unlinked_put(sdp, ul); + found++; + } + + brelse(bh); + dblock++; + extlen--; + } + + if (found) + printk("GFS2: fsid=%s: found %u unlinked inodes\n", + sdp->sd_fsname, found); + + return 0; + + fail: + gfs2_unlinked_cleanup(sdp); + return error; +} + +/** + * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures + * @sdp: the filesystem + * + */ + +void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp) +{ + struct list_head *head = &sdp->sd_unlinked_list; + struct gfs2_unlinked *ul; + unsigned int x; + + spin_lock(&sdp->sd_unlinked_spin); + while (!list_empty(head)) { + ul = list_entry(head->next, struct gfs2_unlinked, ul_list); + + if (ul->ul_count > 1) { + list_move_tail(&ul->ul_list, head); + spin_unlock(&sdp->sd_unlinked_spin); + schedule(); + spin_lock(&sdp->sd_unlinked_spin); + continue; + } + + list_del_init(&ul->ul_list); + atomic_dec(&sdp->sd_unlinked_count); + + gfs2_assert_warn(sdp, ul->ul_count == 1); + gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags)); + kfree(ul); + } + spin_unlock(&sdp->sd_unlinked_spin); + + gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count)); + + if (sdp->sd_unlinked_bitmap) { + for (x = 0; x < sdp->sd_unlinked_chunks; x++) + kfree(sdp->sd_unlinked_bitmap[x]); + kfree(sdp->sd_unlinked_bitmap); + } +} + --- a/fs/gfs2/unlinked.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/unlinked.h 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,29 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __UNLINKED_DOT_H__ +#define __UNLINKED_DOT_H__ + +int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul); +void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); +int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); +int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp); + +int gfs2_unlinked_init(struct gfs2_sbd *sdp); +void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp); + +#endif /* __UNLINKED_DOT_H__ */ --- a/fs/gfs2/util.c 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/util.c 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,600 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "lm.h" + +uint32_t gfs2_random_number; + +unsigned long gfs2_malloc_warning = 0; + +kmem_cache_t *gfs2_glock_cachep = NULL; +kmem_cache_t *gfs2_inode_cachep = NULL; +kmem_cache_t *gfs2_bufdata_cachep = NULL; + +/** + * gfs2_random - Generate a random 32-bit number + * + * Generate a semi-crappy 32-bit pseudo-random number without using + * floating point. + * + * The PRNG is from "Numerical Recipes in C" (second edition), page 284. + * + * Returns: a 32-bit random number + */ + +uint32_t gfs2_random(void) +{ + gfs2_random_number = 0x0019660D * gfs2_random_number + 0x3C6EF35F; + return gfs2_random_number; +} + +/** + * hash_more_internal - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * @hash: the hash from a previous call + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * Hash guts + * + * Returns: the hash + */ + +static inline uint32_t hash_more_internal(const void *data, unsigned int len, + uint32_t hash) +{ + unsigned char *p = (unsigned char *)data; + unsigned char *e = p + len; + uint32_t h = hash; + + while (p < e) { + h ^= (uint32_t)(*p++); + h *= 0x01000193; + } + + return h; +} + +/** + * gfs2_hash - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * Returns: the hash + */ + +uint32_t gfs2_hash(const void *data, unsigned int len) +{ + uint32_t h = 0x811C9DC5; + h = hash_more_internal(data, len, h); + return h; +} + +/** + * gfs2_hash_more - hash an array of data + * @data: the data to be hashed + * @len: the length of data to be hashed + * @hash: the hash from a previous call + * + * Take some data and convert it to a 32-bit hash. + * + * This is the 32-bit FNV-1a hash from: + * http://www.isthe.com/chongo/tech/comp/fnv/ + * + * This version let's you hash together discontinuous regions. + * For example, to compute the combined hash of the memory in + * (data1, len1), (data2, len2), and (data3, len3) you: + * + * h = gfs2_hash(data1, len1); + * h = gfs2_hash_more(data2, len2, h); + * h = gfs2_hash_more(data3, len3, h); + * + * Returns: the hash + */ + +uint32_t gfs2_hash_more(const void *data, unsigned int len, uint32_t hash) +{ + return hash_more_internal(data, len, hash); +} + +/* Byte-wise swap two items of size SIZE. */ + +#define SWAP(a, b, size) \ +do { \ + register size_t __size = (size); \ + register char *__a = (a), *__b = (b); \ + do { \ + char __tmp = *__a; \ + *__a++ = *__b; \ + *__b++ = __tmp; \ + } while (__size-- > 1); \ +} while (0) + +/** + * gfs2_sort - Sort base array using shell sort algorithm + * @base: the input array + * @num_elem: number of elements in array + * @size: size of each element in array + * @compar: fxn to compare array elements (returns negative + * for lt, 0 for eq, and positive for gt + * + * Sorts the array passed in using the compar fxn to compare elements using + * the shell sort algorithm + */ + +void gfs2_sort(void *base, unsigned int num_elem, unsigned int size, + int (*compar) (const void *, const void *)) +{ + register char *pbase = (char *)base; + int i, j, k, h; + static int cols[16] = {1391376, 463792, 198768, 86961, + 33936, 13776, 4592, 1968, + 861, 336, 112, 48, + 21, 7, 3, 1}; + + for (k = 0; k < 16; k++) { + h = cols[k]; + for (i = h; i < num_elem; i++) { + j = i; + while (j >= h && + (*compar)((void *)(pbase + size * (j - h)), + (void *)(pbase + size * j)) > 0) { + SWAP(pbase + size * j, + pbase + size * (j - h), + size); + j = j - h; + } + } + } +} + +/** + * gfs2_assert_i - Cause the machine to panic if @assertion is false + * @sdp: + * @assertion: + * @function: + * @file: + * @line: + * + */ + +void gfs2_assert_i(struct gfs2_sbd *sdp, char *assertion, const char *function, + char *file, unsigned int line) +{ + if (sdp->sd_args.ar_oopses_ok) { + printk("GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); + BUG(); + } + dump_stack(); + panic("GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false + * @sdp: + * @assertion: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); + return (me) ? -1 : -2; +} + +/** + * gfs2_assert_warn_i - Print a message to the console if @assertion is false + * @sdp: + * @assertion: + * @function: + * @file: + * @line: + * + * Returns: -1 if we printed something + * -2 if we didn't + */ + +int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) +{ + if (time_before(jiffies, + sdp->sd_last_warning + + gfs2_tune_get(sdp, gt_complain_secs) * HZ)) + return -2; + + printk("GFS2: fsid=%s: warning: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); + + if (sdp->sd_args.ar_debug) + BUG(); + + sdp->sd_last_warning = jiffies; + + return -1; +} + +/** + * gfs2_consist_i - Flag a filesystem consistency error and withdraw + * @sdp: + * @cluster_wide: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, + char *file, unsigned int line) +{ + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_consist_inode_i - Flag an inode consistency error and withdraw + * @ip: + * @cluster_wide: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, + const char *function, char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, + ip->i_num.no_formal_ino, ip->i_num.no_addr, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw + * @rgd: + * @cluster_wide: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, + const char *function, char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: RG = %"PRIu64"\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, rgd->rd_ri.ri_addr, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw + * @sdp: + * @bh: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *type, const char *function, char *file, + unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: invalid metadata block\n" + "GFS2: fsid=%s: bh = %"PRIu64" (%s)\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); + return (me) ? -1 : -2; +} + +/** + * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw + * @sdp: + * @bh: + * @type: + * @t: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + uint16_t type, uint16_t t, const char *function, + char *file, unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: invalid metadata block\n" + "GFS2: fsid=%s: bh = %"PRIu64" (type: exp=%u, found=%u)\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); + return (me) ? -1 : -2; +} + +/** + * gfs2_io_error_i - Flag an I/O error and withdraw + * @sdp: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, + unsigned int line) +{ + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: I/O error\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_io_error_inode_i - Flag an inode I/O error and withdraw + * @ip: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_inode_i(struct gfs2_inode *ip, const char *function, + char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: I/O error\n" + "GFS2: fsid=%s: inode = %"PRIu64"/%"PRIu64"\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, + ip->i_num.no_formal_ino, ip->i_num.no_addr, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw + * @sdp: + * @bh: + * @function: + * @file: + * @line: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line) +{ + return gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: I/O error\n" + "GFS2: fsid=%s: block = %"PRIu64"\n" + "GFS2: fsid=%s: function = %s\n" + "GFS2: fsid=%s: file = %s, line = %u\n" + "GFS2: fsid=%s: time = %lu\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, + sdp->sd_fsname, function, + sdp->sd_fsname, file, line, + sdp->sd_fsname, get_seconds()); +} + +/** + * gfs2_add_bh_to_ub - copy a buffer up to user space + * @ub: the structure representing where to copy + * @bh: the buffer + * + * Returns: errno + */ + +int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh) +{ + uint64_t blkno = bh->b_blocknr; + + if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size) + return -ENOMEM; + + if (copy_to_user(ub->ub_data + ub->ub_count, + &blkno, + sizeof(uint64_t))) + return -EFAULT; + ub->ub_count += sizeof(uint64_t); + + if (copy_to_user(ub->ub_data + ub->ub_count, + bh->b_data, + bh->b_size)) + return -EFAULT; + ub->ub_count += bh->b_size; + + return 0; +} + +/** + * gfs2_printf_i - + * @buf: + * @size: + * @count: + * @fmt: + * + * Returns: 0 on success, 1 on out of space + */ + +int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count, + char *fmt, ...) +{ + va_list args; + int left, out; + + if (!buf) { + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + return 0; + } + + left = size - *count; + if (left <= 0) + return 1; + + va_start(args, fmt); + out = vsnprintf(buf + *count, left, fmt, args); + va_end(args); + + if (out < left) + *count += out; + else + return 1; + + return 0; +} + +void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value) +{ + unsigned int c, o, b = bit; + int old_value; + + c = b / (8 * PAGE_SIZE); + b %= 8 * PAGE_SIZE; + o = b / 8; + b %= 8; + + old_value = (bitmap[c][o] & (1 << b)); + gfs2_assert_withdraw(sdp, !old_value != !new_value); + + if (new_value) + bitmap[c][o] |= 1 << b; + else + bitmap[c][o] &= ~(1 << b); +} + --- a/fs/gfs2/util.h 1970-01-01 07:30:00.000000000 +0730 +++ b/fs/gfs2/util.h 2005-08-01 14:13:08.015807288 +0800 @@ -0,0 +1,358 @@ +/****************************************************************************** +******************************************************************************* +** +** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. +** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** +** This copyrighted material is made available to anyone wishing to use, +** modify, copy, or redistribute it subject to the terms and conditions +** of the GNU General Public License v.2. +** +******************************************************************************* +******************************************************************************/ + +#ifndef __UTIL_DOT_H__ +#define __UTIL_DOT_H__ + + +/* Utility functions */ + +extern uint32_t gfs2_random_number; +uint32_t gfs2_random(void); + +uint32_t gfs2_hash(const void *data, unsigned int len); +uint32_t gfs2_hash_more(const void *data, unsigned int len, uint32_t hash); + +void gfs2_sort(void *base, unsigned int num_elem, unsigned int size, + int (*compar) (const void *, const void *)); + + +/* Error handling */ + +/** + * gfs2_assert - Cause the machine to panic if @assertion is false + * @sdp: + * @assertion: + * @todo: + * + */ + +void gfs2_assert_i(struct gfs2_sbd *sdp, + char *assertion, + const char *function, + char *file, unsigned int line) __attribute__ ((noreturn)); + +#define gfs2_assert(sdp, assertion, todo) \ +do { \ + if (unlikely(!(assertion))) { \ + {todo} \ + gfs2_assert_i((sdp), #assertion, \ + __FUNCTION__, __FILE__, __LINE__); \ + } \ +} while (0) + +/** + * gfs2_assert_withdraw - Cause the machine to withdraw if @assertion is false + * @sdp: + * @assertion: + * + * Returns: 0 if things are ok, + * -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, + char *assertion, + const char *function, + char *file, unsigned int line); + +#define gfs2_assert_withdraw(sdp, assertion) \ +((likely(assertion)) ? 0 : \ + gfs2_assert_withdraw_i((sdp), #assertion, \ + __FUNCTION__, __FILE__, __LINE__)) + +/** + * gfs2_assert_warn - Print a message to the console if @assertion is false + * @sdp: + * @assertion: + * + * Returns: 0 if things are ok, + * -1 if we printed something + * -2 if we didn't + */ + +int gfs2_assert_warn_i(struct gfs2_sbd *sdp, + char *assertion, + const char *function, + char *file, unsigned int line); + +#define gfs2_assert_warn(sdp, assertion) \ +((likely(assertion)) ? 0 : \ + gfs2_assert_warn_i((sdp), #assertion, \ + __FUNCTION__, __FILE__, __LINE__)) + +/** + * gfs2_consist - Flag a filesystem consistency error and withdraw + * gfs2_cconsist - Flag a filesystem consistency error and withdraw cluster + * @sdp: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, + const char *function, + char *file, unsigned int line); + +#define gfs2_consist(sdp)\ +gfs2_consist_i((sdp), FALSE, __FUNCTION__, __FILE__, __LINE__) + +#define gfs2_cconsist(sdp)\ +gfs2_consist_i((sdp), TRUE, __FUNCTION__, __FILE__, __LINE__) + +/** + * gfs2_consist_inode - Flag an inode consistency error and withdraw + * gfs2_cconsist_inode - Flag an inode consistency error and withdraw cluster + * @ip: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, + const char *function, + char *file, unsigned int line); + +#define gfs2_consist_inode(ip) \ +gfs2_consist_inode_i((ip), FALSE, __FUNCTION__, __FILE__, __LINE__) + +#define gfs2_cconsist_inode(ip) \ +gfs2_consist_inode_i((ip), TRUE, __FUNCTION__, __FILE__, __LINE__) + +/** + * gfs2_consist_rgrpd - Flag a RG consistency error and withdraw + * gfs2_cconsist_rgrpd - Flag a RG consistency error and withdraw cluster + * @rgd: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, + const char *function, + char *file, unsigned int line); + +#define gfs2_consist_rgrpd(rgd) \ +gfs2_consist_rgrpd_i((rgd), FALSE, __FUNCTION__, __FILE__, __LINE__) + +#define gfs2_cconsist_rgrpd(rgd) \ +gfs2_consist_rgrpd_i((rgd), TRUE, __FUNCTION__, __FILE__, __LINE__) + +/** + * gfs2_meta_check - Flag a magic number consistency error and withdraw + * @sdp: + * @bh: + * + * Returns: 0 if things are ok, + * -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *type, + const char *function, + char *file, unsigned int line); + +static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, + struct buffer_head *bh, + const char *function, + char *file, unsigned int line) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + uint32_t magic = mh->mh_magic; + uint64_t blkno = mh->mh_blkno; + magic = gfs2_32_to_cpu(magic); + blkno = gfs2_64_to_cpu(blkno); + if (unlikely(magic != GFS2_MAGIC)) + return gfs2_meta_check_ii(sdp, bh, "magic number", function, file, line); + if (unlikely(blkno != bh->b_blocknr)) + return gfs2_meta_check_ii(sdp, bh, "block number", function, file, line); + return 0; +} + +#define gfs2_meta_check(sdp, bh) \ +gfs2_meta_check_i((sdp), (bh), \ + __FUNCTION__, __FILE__, __LINE__) + +/** + * gfs2_metatype_check - Flag a metadata type consistency error and withdraw + * @sdp: + * @bh: + * @type: + * + * Returns: 0 if things are ok, + * -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + uint16_t type, uint16_t t, + const char *function, + char *file, unsigned int line); + +static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, + struct buffer_head *bh, + uint16_t type, + const char *function, + char *file, unsigned int line) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + uint32_t magic = mh->mh_magic; + uint16_t t = mh->mh_type; + uint64_t blkno = mh->mh_blkno; + magic = gfs2_32_to_cpu(magic); + blkno = gfs2_64_to_cpu(blkno); + if (unlikely(magic != GFS2_MAGIC)) + return gfs2_meta_check_ii(sdp, bh, "magic number", function, file, line); + if (unlikely(blkno != bh->b_blocknr)) + return gfs2_meta_check_ii(sdp, bh, "block number", function, file, line); + t = gfs2_16_to_cpu(t); + if (unlikely(t != type)) + return gfs2_metatype_check_ii(sdp, bh, type, t, function, file, line); + return 0; +} + +#define gfs2_metatype_check(sdp, bh, type) \ +gfs2_metatype_check_i((sdp), (bh), (type), \ + __FUNCTION__, __FILE__, __LINE__) + +/** + * gfs2_metatype_set - set the metadata type on a buffer + * @bh: + * @type: + * @format: + * + */ + +static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type, + uint16_t format) +{ + struct gfs2_meta_header *mh; + mh = (struct gfs2_meta_header *)bh->b_data; + mh->mh_type = cpu_to_gfs2_16(type); + mh->mh_format = cpu_to_gfs2_16(format); +} + +/** + * gfs2_io_error - Flag an I/O error and withdraw + * @sdp: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_i(struct gfs2_sbd *sdp, + const char *function, + char *file, unsigned int line); + +#define gfs2_io_error(sdp) \ +gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); + +/** + * gfs2_io_error_inode - Flag an inode I/O error and withdraw + * @ip: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_inode_i(struct gfs2_inode *ip, + const char *function, + char *file, unsigned int line); + +#define gfs2_io_error_inode(ip) \ +gfs2_io_error_inode_i((ip), __FUNCTION__, __FILE__, __LINE__); + +/** + * gfs2_io_error_bh - Flag a buffer I/O error and withdraw + * @sdp: + * @bh: + * + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, + char *file, unsigned int line); + +#define gfs2_io_error_bh(sdp, bh) \ +gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); + + +/* Memory stuff */ + +extern unsigned long gfs2_malloc_warning; + +#define RETRY_MALLOC(do_this, until_this) \ +for (;;) { \ + { do_this; } \ + if (until_this) \ + break; \ + if (time_after_eq(jiffies, gfs2_malloc_warning + 5 * HZ)) { \ + printk("GFS2: out of memory: %s, %u\n", __FILE__, __LINE__); \ + gfs2_malloc_warning = jiffies; \ + } \ + yield(); \ +} + +extern kmem_cache_t *gfs2_glock_cachep; +extern kmem_cache_t *gfs2_inode_cachep; +extern kmem_cache_t *gfs2_bufdata_cachep; + + +struct gfs2_user_buffer { + char *ub_data; + unsigned int ub_size; + unsigned int ub_count; +}; +int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh); + + +static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, + unsigned int *p) +{ + unsigned int x; + spin_lock(>->gt_spin); + x = *p; + spin_unlock(>->gt_spin); + return x; +} + +#define gfs2_tune_get(sdp, field) \ +gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) + + +int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count, + char *fmt, ...) __attribute__ ((format(printf, 4, 5))); + +#define gfs2_printf(fmt, args...) \ +do { \ + if (gfs2_printf_i(buf, size, count, fmt, ##args)) \ + goto out; \ +} while(0) + + +void gfs2_icbit_munge(struct gfs2_sbd *sdp, + unsigned char **bitmap, unsigned int bit, + int new_value); + +static inline uint64_t get_time(void) +{ + struct timeval tv; + do_gettimeofday(&tv); + return ((uint64_t)tv.tv_sec) * 1000000 + tv.tv_usec; +} + +#endif /* __UTIL_DOT_H__ */ +