[PATCH 01/16] GFS: headers Central header files that are widely used. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/gfs2.h | 63 ++ fs/gfs2/incore.h | 701 ++++++++++++++++++++++++++++ include/linux/gfs2_ioctl.h | 30 + include/linux/gfs2_ondisk.h | 1093 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1887 insertions(+) --- a/fs/gfs2/gfs2.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/gfs2.h 2005-10-10 11:28:49.167807060 -0500 @@ -0,0 +1,63 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __GFS2_DOT_H__ +#define __GFS2_DOT_H__ + +#include + +#include "locking/harness/lm_interface.h" +#include "lvb.h" +#include "incore.h" +#include "util.h" + +enum { + NO_CREATE = 0, + CREATE = 1, +}; + +enum { + NO_WAIT = 0, + WAIT = 1, +}; + +enum { + NO_FORCE = 0, + FORCE = 1, +}; + +/* Divide num by den. Round up if there is a remainder. */ +#define DIV_RU(num, den) (((num) + (den) - 1) / (den)) +#define MAKE_MULT8(x) (((x) + 7) & ~7) + +#define GFS2_FAST_NAME_SIZE 8 + +#define get_v2sdp(sb) ((struct gfs2_sbd *)(sb)->s_fs_info) +#define set_v2sdp(sb, sdp) (sb)->s_fs_info = (sdp) +#define get_v2ip(inode) ((struct gfs2_inode *)(inode)->u.generic_ip) +#define set_v2ip(inode, ip) (inode)->u.generic_ip = (ip) +#define get_v2fp(file) ((struct gfs2_file *)(file)->private_data) +#define set_v2fp(file, fp) (file)->private_data = (fp) +#define get_v2bd(bh) ((struct gfs2_bufdata *)(bh)->b_private) +#define set_v2bd(bh, bd) (bh)->b_private = (bd) +#define get_v2db(bh) ((struct gfs2_databuf *)(bh)->b_private) +#define set_v2db(bh, db) (bh)->b_private = (db) + +#define get_transaction ((struct gfs2_trans *)(current->journal_info)) +#define set_transaction(tr) (current->journal_info) = (tr) + +#define get_gl2ip(gl) ((struct gfs2_inode *)(gl)->gl_object) +#define set_gl2ip(gl, ip) (gl)->gl_object = (ip) +#define get_gl2rgd(gl) ((struct gfs2_rgrpd *)(gl)->gl_object) +#define set_gl2rgd(gl, rgd) (gl)->gl_object = (rgd) +#define get_gl2gl(gl) ((struct gfs2_glock *)(gl)->gl_object) +#define set_gl2gl(gl, gl2) (gl)->gl_object = (gl2) + +#endif /* __GFS2_DOT_H__ */ + --- a/fs/gfs2/incore.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/incore.h 2005-10-10 11:28:49.209800511 -0500 @@ -0,0 +1,701 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __INCORE_DOT_H__ +#define __INCORE_DOT_H__ + +#define DIO_FORCE 0x00000001 +#define DIO_CLEAN 0x00000002 +#define DIO_DIRTY 0x00000004 +#define DIO_START 0x00000008 +#define DIO_WAIT 0x00000010 +#define DIO_METADATA 0x00000020 +#define DIO_DATA 0x00000040 +#define DIO_RELEASE 0x00000080 +#define DIO_ALL 0x00000100 + +struct gfs2_log_operations; +struct gfs2_log_element; +struct gfs2_bitmap; +struct gfs2_rgrpd; +struct gfs2_bufdata; +struct gfs2_databuf; +struct gfs2_glock_operations; +struct gfs2_holder; +struct gfs2_glock; +struct gfs2_alloc; +struct gfs2_inode; +struct gfs2_file; +struct gfs2_revoke; +struct gfs2_revoke_replay; +struct gfs2_unlinked; +struct gfs2_quota_data; +struct gfs2_log_buf; +struct gfs2_trans; +struct gfs2_ail; +struct gfs2_jdesc; +struct gfs2_args; +struct gfs2_tune; +struct gfs2_gl_hash_bucket; +struct gfs2_sbd; + +typedef void (*gfs2_glop_bh_t) (struct gfs2_glock *gl, unsigned int ret); + +/* + * Structure of operations that are associated with each + * type of element in the log. + */ + +struct gfs2_log_operations { + void (*lo_add) (struct gfs2_sbd *sdp, struct gfs2_log_element *le); + void (*lo_incore_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); + void (*lo_before_commit) (struct gfs2_sbd *sdp); + void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_ail *ai); + void (*lo_before_scan) (struct gfs2_jdesc *jd, + struct gfs2_log_header *head, int pass); + int (*lo_scan_elements) (struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, int pass); + void (*lo_after_scan) (struct gfs2_jdesc *jd, int error, int pass); + char *lo_name; +}; + +struct gfs2_log_element { + struct list_head le_list; + struct gfs2_log_operations *le_ops; +}; + +struct gfs2_bitmap { + struct buffer_head *bi_bh; + char *bi_clone; + uint32_t bi_offset; + uint32_t bi_start; + uint32_t bi_len; +}; + +struct gfs2_rgrpd { + struct list_head rd_list; /* Link with superblock */ + struct list_head rd_list_mru; + struct list_head rd_recent; /* Recently used rgrps */ + struct gfs2_glock *rd_gl; /* Glock for this rgrp */ + struct gfs2_rindex rd_ri; + struct gfs2_rgrp rd_rg; + uint64_t rd_rg_vn; + struct gfs2_bitmap *rd_bits; + unsigned int rd_bh_count; + struct semaphore rd_mutex; + uint32_t rd_free_clone; + struct gfs2_log_element rd_le; + uint32_t rd_last_alloc_data; + uint32_t rd_last_alloc_meta; + struct gfs2_sbd *rd_sbd; +}; + +enum gfs2_state_bits { + BH_Pinned = BH_PrivateStart, +}; + +BUFFER_FNS(Pinned, pinned) +TAS_BUFFER_FNS(Pinned, pinned) + +struct gfs2_bufdata { + struct buffer_head *bd_bh; + struct gfs2_glock *bd_gl; + + struct list_head bd_list_tr; + struct gfs2_log_element bd_le; + + struct gfs2_ail *bd_ail; + struct list_head bd_ail_st_list; + struct list_head bd_ail_gl_list; +}; + +struct gfs2_databuf { + struct gfs2_log_element db_le; + struct buffer_head *db_bh; +}; + +struct gfs2_glock_operations { + void (*go_xmote_th) (struct gfs2_glock * gl, unsigned int state, + int flags); + void (*go_xmote_bh) (struct gfs2_glock * gl); + void (*go_drop_th) (struct gfs2_glock * gl); + void (*go_drop_bh) (struct gfs2_glock * gl); + void (*go_sync) (struct gfs2_glock * gl, int flags); + void (*go_inval) (struct gfs2_glock * gl, int flags); + int (*go_demote_ok) (struct gfs2_glock * gl); + int (*go_lock) (struct gfs2_holder * gh); + void (*go_unlock) (struct gfs2_holder * gh); + void (*go_callback) (struct gfs2_glock * gl, unsigned int state); + void (*go_greedy) (struct gfs2_glock * gl); + int go_type; +}; + +enum { + /* Actions */ + HIF_MUTEX = 0, + HIF_PROMOTE = 1, + HIF_DEMOTE = 2, + HIF_GREEDY = 3, + + /* States */ + HIF_ALLOCED = 4, + HIF_DEALLOC = 5, + HIF_HOLDER = 6, + HIF_FIRST = 7, + HIF_RECURSE = 8, + HIF_ABORTED = 9, +}; + +struct gfs2_holder { + struct list_head gh_list; + + struct gfs2_glock *gh_gl; + struct task_struct *gh_owner; + unsigned int gh_state; + int gh_flags; + + int gh_error; + unsigned long gh_iflags; + struct completion gh_wait; +}; + +enum { + GLF_PLUG = 0, + GLF_LOCK = 1, + GLF_STICKY = 2, + GLF_PREFETCH = 3, + GLF_SYNC = 4, + GLF_DIRTY = 5, + GLF_SKIP_WAITERS2 = 6, + GLF_GREEDY = 7, +}; + +struct gfs2_glock { + struct list_head gl_list; + unsigned long gl_flags; /* GLF_... */ + struct lm_lockname gl_name; + struct kref gl_ref; + + spinlock_t gl_spin; + + unsigned int gl_state; + struct list_head gl_holders; + struct list_head gl_waiters1; /* HIF_MUTEX */ + struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */ + struct list_head gl_waiters3; /* HIF_PROMOTE */ + + struct gfs2_glock_operations *gl_ops; + + struct gfs2_holder *gl_req_gh; + gfs2_glop_bh_t gl_req_bh; + + lm_lock_t *gl_lock; + char *gl_lvb; + atomic_t gl_lvb_count; + + uint64_t gl_vn; + unsigned long gl_stamp; + void *gl_object; + + struct gfs2_gl_hash_bucket *gl_bucket; + struct list_head gl_reclaim; + + struct gfs2_sbd *gl_sbd; + + struct inode *gl_aspace; + struct gfs2_log_element gl_le; + struct list_head gl_ail_list; + atomic_t gl_ail_count; +}; + +struct gfs2_alloc { + /* Quota stuff */ + + unsigned int al_qd_num; + struct gfs2_quota_data *al_qd[4]; + struct gfs2_holder al_qd_ghs[4]; + + /* Filled in by the caller to gfs2_inplace_reserve() */ + + uint32_t al_requested; + + /* Filled in by gfs2_inplace_reserve() */ + + char *al_file; + unsigned int al_line; + struct gfs2_holder al_ri_gh; + struct gfs2_holder al_rgd_gh; + struct gfs2_rgrpd *al_rgd; + + /* Filled in by gfs2_alloc_*() */ + + uint32_t al_alloced; +}; + +enum { + GIF_MIN_INIT = 0, + GIF_QD_LOCKED = 1, + GIF_PAGED = 2, + GIF_SW_PAGED = 3, +}; + +struct gfs2_inode { + struct gfs2_inum i_num; + + atomic_t i_count; + unsigned long i_flags; /* GIF_... */ + + uint64_t i_vn; + struct gfs2_dinode i_di; + + struct gfs2_glock *i_gl; + struct gfs2_sbd *i_sbd; + struct inode *i_vnode; + + struct gfs2_holder i_iopen_gh; + + struct gfs2_alloc *i_alloc; + uint64_t i_last_rg_alloc; + + spinlock_t i_spin; + struct rw_semaphore i_rw_mutex; + + unsigned int i_greedy; + unsigned long i_last_pfault; + + struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT]; +}; + +enum { + GFF_DID_DIRECT_ALLOC = 0, +}; + +struct gfs2_file { + unsigned long f_flags; /* GFF_... */ + + struct semaphore f_fl_mutex; + struct gfs2_holder f_fl_gh; + + struct gfs2_inode *f_inode; + struct file *f_vfile; +}; + +struct gfs2_revoke { + struct gfs2_log_element rv_le; + uint64_t rv_blkno; +}; + +struct gfs2_revoke_replay { + struct list_head rr_list; + uint64_t rr_blkno; + unsigned int rr_where; +}; + +enum { + ULF_LOCKED = 0, +}; + +struct gfs2_unlinked { + struct list_head ul_list; + unsigned int ul_count; + struct gfs2_unlinked_tag ul_ut; + unsigned long ul_flags; /* ULF_... */ + unsigned int ul_slot; +}; + +enum { + QDF_USER = 0, + QDF_CHANGE = 1, + QDF_LOCKED = 2, +}; + +struct gfs2_quota_data { + struct list_head qd_list; + unsigned int qd_count; + + uint32_t qd_id; + unsigned long qd_flags; /* QDF_... */ + + int64_t qd_change; + int64_t qd_change_sync; + + unsigned int qd_slot; + unsigned int qd_slot_count; + + struct buffer_head *qd_bh; + struct gfs2_quota_change *qd_bh_qc; + unsigned int qd_bh_count; + + struct gfs2_glock *qd_gl; + struct gfs2_quota_lvb qd_qb; + + uint64_t qd_sync_gen; + unsigned long qd_last_warn; + unsigned long qd_last_touched; +}; + +struct gfs2_log_buf { + struct list_head lb_list; + struct buffer_head *lb_bh; + struct buffer_head *lb_real; +}; + +struct gfs2_trans { + char *tr_file; + unsigned int tr_line; + + unsigned int tr_blocks; + unsigned int tr_revokes; + unsigned int tr_reserved; + + struct gfs2_holder *tr_t_gh; + + int tr_touched; + + unsigned int tr_num_buf; + unsigned int tr_num_buf_new; + unsigned int tr_num_buf_rm; + struct list_head tr_list_buf; + + unsigned int tr_num_revoke; + unsigned int tr_num_revoke_rm; +}; + +struct gfs2_ail { + struct list_head ai_list; + + unsigned int ai_first; + struct list_head ai_ail1_list; + struct list_head ai_ail2_list; + + uint64_t ai_sync_gen; +}; + +struct gfs2_jdesc { + struct list_head jd_list; + + struct gfs2_inode *jd_inode; + unsigned int jd_jid; + int jd_dirty; + + unsigned int jd_blocks; +}; + +#define GFS2_GLOCKD_DEFAULT 1 +#define GFS2_GLOCKD_MAX 16 + +#define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF +#define GFS2_QUOTA_OFF 0 +#define GFS2_QUOTA_ACCOUNT 1 +#define GFS2_QUOTA_ON 2 + +#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED +#define GFS2_DATA_WRITEBACK 1 +#define GFS2_DATA_ORDERED 2 + +struct gfs2_args { + char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ + char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ + char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ + int ar_spectator; /* Don't get a journal because we're always RO */ + int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ + int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ + int ar_localcaching; /* Local-style caching (dangerous on multihost) */ + int ar_debug; /* Oops on errors instead of trying to be graceful */ + int ar_upgrade; /* Upgrade ondisk/multihost format */ + unsigned int ar_num_glockd; /* Number of glockd threads */ + int ar_posix_acl; /* Enable posix acls */ + int ar_quota; /* off/account/on */ + int ar_suiddir; /* suiddir support */ + int ar_data; /* ordered/writeback */ +}; + +struct gfs2_tune { + spinlock_t gt_spin; + + unsigned int gt_ilimit; + unsigned int gt_ilimit_tries; + unsigned int gt_ilimit_min; + unsigned int gt_demote_secs; /* Cache retention for unheld glock */ + unsigned int gt_incore_log_blocks; + unsigned int gt_log_flush_secs; + unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ + + unsigned int gt_scand_secs; + unsigned int gt_recoverd_secs; + unsigned int gt_logd_secs; + unsigned int gt_quotad_secs; + unsigned int gt_inoded_secs; + + unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ + unsigned int gt_quota_warn_period; /* Secs between quota warn msgs */ + unsigned int gt_quota_scale_num; /* Numerator */ + unsigned int gt_quota_scale_den; /* Denominator */ + unsigned int gt_quota_cache_secs; + unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ + unsigned int gt_atime_quantum; /* Min secs between atime updates */ + unsigned int gt_new_files_jdata; + unsigned int gt_new_files_directio; + unsigned int gt_max_atomic_write; /* Split big writes into this size */ + unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ + unsigned int gt_lockdump_size; + unsigned int gt_stall_secs; /* Detects trouble! */ + unsigned int gt_complain_secs; + unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */ + unsigned int gt_entries_per_readdir; + unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */ + unsigned int gt_greedy_default; + unsigned int gt_greedy_quantum; + unsigned int gt_greedy_max; + unsigned int gt_statfs_quantum; + unsigned int gt_statfs_slow; +}; + +struct gfs2_gl_hash_bucket { + rwlock_t hb_lock; + struct list_head hb_list; +}; + +enum { + SDF_JOURNAL_CHECKED = 0, + SDF_JOURNAL_LIVE = 1, + SDF_SHUTDOWN = 2, + SDF_NOATIME = 3, +}; + +#define GFS2_GL_HASH_SHIFT 13 +#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) +#define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) +#define GFS2_FSNAME_LEN 256 + +struct gfs2_sbd { + struct super_block *sd_vfs; + struct kobject sd_kobj; + unsigned long sd_flags; /* SDF_... */ + struct gfs2_sb sd_sb; + + /* Constants computed on mount */ + + uint32_t sd_fsb2bb; + uint32_t sd_fsb2bb_shift; + uint32_t sd_diptrs; /* Number of pointers in a dinode */ + uint32_t sd_inptrs; /* Number of pointers in a indirect block */ + uint32_t sd_jbsize; /* Size of a journaled data block */ + uint32_t sd_hash_bsize; /* sizeof(exhash block) */ + uint32_t sd_hash_bsize_shift; + uint32_t sd_hash_ptrs; /* Number of pointers in a hash block */ + uint32_t sd_ut_per_block; + uint32_t sd_qc_per_block; + uint32_t sd_max_dirres; /* Max blocks needed to add a directory entry */ + uint32_t sd_max_height; /* Max height of a file's metadata tree */ + uint64_t sd_heightsize[GFS2_MAX_META_HEIGHT]; + uint32_t sd_max_jheight; /* Max height of journaled file's meta tree */ + uint64_t sd_jheightsize[GFS2_MAX_META_HEIGHT]; + + struct gfs2_args sd_args; /* Mount arguments */ + struct gfs2_tune sd_tune; /* Filesystem tuning structure */ + + /* Lock Stuff */ + + struct lm_lockstruct sd_lockstruct; + struct gfs2_gl_hash_bucket sd_gl_hash[GFS2_GL_HASH_SIZE]; + struct list_head sd_reclaim_list; + spinlock_t sd_reclaim_lock; + wait_queue_head_t sd_reclaim_wq; + atomic_t sd_reclaim_count; + struct gfs2_holder sd_live_gh; + struct gfs2_glock *sd_rename_gl; + struct gfs2_glock *sd_trans_gl; + + /* Inode Stuff */ + + struct gfs2_inode *sd_master_dir; + struct gfs2_inode *sd_jindex; + struct gfs2_inode *sd_inum_inode; + struct gfs2_inode *sd_statfs_inode; + struct gfs2_inode *sd_ir_inode; + struct gfs2_inode *sd_sc_inode; + struct gfs2_inode *sd_ut_inode; + struct gfs2_inode *sd_qc_inode; + struct gfs2_inode *sd_rindex; + struct gfs2_inode *sd_quota_inode; + struct gfs2_inode *sd_root_dir; + + /* Inum stuff */ + + struct semaphore sd_inum_mutex; + + /* StatFS stuff */ + + spinlock_t sd_statfs_spin; + struct semaphore sd_statfs_mutex; + struct gfs2_statfs_change sd_statfs_master; + struct gfs2_statfs_change sd_statfs_local; + unsigned long sd_statfs_sync_time; + + /* Resource group stuff */ + + uint64_t sd_rindex_vn; + spinlock_t sd_rindex_spin; + struct semaphore sd_rindex_mutex; + struct list_head sd_rindex_list; + struct list_head sd_rindex_mru_list; + struct list_head sd_rindex_recent_list; + struct gfs2_rgrpd *sd_rindex_forward; + unsigned int sd_rgrps; + + /* Journal index stuff */ + + struct list_head sd_jindex_list; + spinlock_t sd_jindex_spin; + struct semaphore sd_jindex_mutex; + unsigned int sd_journals; + unsigned long sd_jindex_refresh_time; + + struct gfs2_jdesc *sd_jdesc; + struct gfs2_holder sd_journal_gh; + struct gfs2_holder sd_jinode_gh; + + struct gfs2_holder sd_ir_gh; + struct gfs2_holder sd_sc_gh; + struct gfs2_holder sd_ut_gh; + struct gfs2_holder sd_qc_gh; + + /* Daemon stuff */ + + struct task_struct *sd_scand_process; + struct task_struct *sd_recoverd_process; + struct task_struct *sd_logd_process; + struct task_struct *sd_quotad_process; + struct task_struct *sd_inoded_process; + struct task_struct *sd_glockd_process[GFS2_GLOCKD_MAX]; + unsigned int sd_glockd_num; + + /* Unlinked inode stuff */ + + struct list_head sd_unlinked_list; + atomic_t sd_unlinked_count; + spinlock_t sd_unlinked_spin; + struct semaphore sd_unlinked_mutex; + + unsigned int sd_unlinked_slots; + unsigned int sd_unlinked_chunks; + unsigned char **sd_unlinked_bitmap; + + /* Quota stuff */ + + struct list_head sd_quota_list; + atomic_t sd_quota_count; + spinlock_t sd_quota_spin; + struct semaphore sd_quota_mutex; + + unsigned int sd_quota_slots; + unsigned int sd_quota_chunks; + unsigned char **sd_quota_bitmap; + + uint64_t sd_quota_sync_gen; + unsigned long sd_quota_sync_time; + + /* Log stuff */ + + spinlock_t sd_log_lock; + atomic_t sd_log_trans_count; + wait_queue_head_t sd_log_trans_wq; + atomic_t sd_log_flush_count; + wait_queue_head_t sd_log_flush_wq; + + unsigned int sd_log_blks_reserved; + unsigned int sd_log_commited_buf; + unsigned int sd_log_commited_revoke; + + unsigned int sd_log_num_gl; + unsigned int sd_log_num_buf; + unsigned int sd_log_num_revoke; + unsigned int sd_log_num_rg; + unsigned int sd_log_num_databuf; + struct list_head sd_log_le_gl; + struct list_head sd_log_le_buf; + struct list_head sd_log_le_revoke; + struct list_head sd_log_le_rg; + struct list_head sd_log_le_databuf; + + unsigned int sd_log_blks_free; + struct list_head sd_log_blks_list; + wait_queue_head_t sd_log_blks_wait; + + uint64_t sd_log_sequence; + unsigned int sd_log_head; + unsigned int sd_log_tail; + uint64_t sd_log_wraps; + int sd_log_idle; + + unsigned long sd_log_flush_time; + struct semaphore sd_log_flush_lock; + struct list_head sd_log_flush_list; + + unsigned int sd_log_flush_head; + uint64_t sd_log_flush_wrapped; + + struct list_head sd_ail1_list; + struct list_head sd_ail2_list; + uint64_t sd_ail_sync_gen; + + /* Replay stuff */ + + struct list_head sd_revoke_list; + unsigned int sd_replay_tail; + + unsigned int sd_found_blocks; + unsigned int sd_found_revokes; + unsigned int sd_replayed_blocks; + + /* For quiescing the filesystem */ + + struct gfs2_holder sd_freeze_gh; + struct semaphore sd_freeze_lock; + unsigned int sd_freeze_count; + + /* Counters */ + + atomic_t sd_glock_count; + atomic_t sd_glock_held_count; + atomic_t sd_inode_count; + atomic_t sd_bufdata_count; + + atomic_t sd_fh2dentry_misses; + atomic_t sd_reclaimed; + atomic_t sd_log_flush_incore; + atomic_t sd_log_flush_ondisk; + + atomic_t sd_glock_nq_calls; + atomic_t sd_glock_dq_calls; + atomic_t sd_glock_prefetch_calls; + atomic_t sd_lm_lock_calls; + atomic_t sd_lm_unlock_calls; + atomic_t sd_lm_callbacks; + + atomic_t sd_ops_address; + atomic_t sd_ops_dentry; + atomic_t sd_ops_export; + atomic_t sd_ops_file; + atomic_t sd_ops_inode; + atomic_t sd_ops_super; + atomic_t sd_ops_vm; + + char sd_fsname[GFS2_FSNAME_LEN]; + char sd_table_name[GFS2_FSNAME_LEN]; + char sd_proto_name[GFS2_FSNAME_LEN]; + + /* Debugging crud */ + + unsigned long sd_last_warning; +}; + +#endif /* __INCORE_DOT_H__ */ + --- a/include/linux/gfs2_ioctl.h 1969-12-31 17:00:00.000000000 -0700 +++ b/include/linux/gfs2_ioctl.h 2005-10-10 11:28:49.172806280 -0500 @@ -0,0 +1,30 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __GFS2_IOCTL_DOT_H__ +#define __GFS2_IOCTL_DOT_H__ + +#define _GFS2C_(x) (('G' << 16) | ('2' << 8) | (x)) + +/* Ioctls implemented */ + +#define GFS2_IOCTL_IDENTIFY _GFS2C_(1) +#define GFS2_IOCTL_SUPER _GFS2C_(2) + +struct gfs2_ioctl { + unsigned int gi_argc; + char **gi_argv; + + char __user *gi_data; + unsigned int gi_size; + uint64_t gi_offset; +}; + +#endif /* ___GFS2_IOCTL_DOT_H__ */ + --- a/include/linux/gfs2_ondisk.h 1969-12-31 17:00:00.000000000 -0700 +++ b/include/linux/gfs2_ondisk.h 2005-10-10 11:28:49.187803942 -0500 @@ -0,0 +1,1093 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __GFS2_ONDISK_DOT_H__ +#define __GFS2_ONDISK_DOT_H__ + +#define GFS2_MAGIC 0x07131974 +#define GFS2_BASIC_BLOCK 512 +#define GFS2_BASIC_BLOCK_SHIFT 9 + +/* Lock numbers of the LM_TYPE_NONDISK type */ + +#define GFS2_MOUNT_LOCK 0 +#define GFS2_LIVE_LOCK 1 +#define GFS2_TRANS_LOCK 2 +#define GFS2_RENAME_LOCK 3 + +/* Format numbers for various metadata types */ + +#define GFS2_FORMAT_NONE 0 +#define GFS2_FORMAT_SB 100 +#define GFS2_FORMAT_RG 200 +#define GFS2_FORMAT_RB 300 +#define GFS2_FORMAT_DI 400 +#define GFS2_FORMAT_IN 500 +#define GFS2_FORMAT_LF 600 +#define GFS2_FORMAT_JD 700 +#define GFS2_FORMAT_LH 800 +#define GFS2_FORMAT_LD 900 +#define GFS2_FORMAT_LB 1000 +#define GFS2_FORMAT_EA 1100 +#define GFS2_FORMAT_ED 1200 +#define GFS2_FORMAT_UT 1300 +#define GFS2_FORMAT_QC 1400 +/* These are format numbers for entities contained in files */ +#define GFS2_FORMAT_RI 1500 +#define GFS2_FORMAT_DE 1600 +#define GFS2_FORMAT_QU 1700 +/* These are part of the superblock */ +#define GFS2_FORMAT_FS 1801 +#define GFS2_FORMAT_MULTI 1900 + +/* + * An on-disk inode number + */ + +#define gfs2_inum_equal(ino1, ino2) \ + (((ino1)->no_formal_ino == (ino2)->no_formal_ino) && \ + ((ino1)->no_addr == (ino2)->no_addr)) + +struct gfs2_inum { + uint64_t no_formal_ino; + uint64_t no_addr; +}; + +/* + * Generic metadata head structure + * Every inplace buffer logged in the journal must start with this. + */ + +#define GFS2_METATYPE_NONE 0 +#define GFS2_METATYPE_SB 1 +#define GFS2_METATYPE_RG 2 +#define GFS2_METATYPE_RB 3 +#define GFS2_METATYPE_DI 4 +#define GFS2_METATYPE_IN 5 +#define GFS2_METATYPE_LF 6 +#define GFS2_METATYPE_JD 7 +#define GFS2_METATYPE_LH 8 +#define GFS2_METATYPE_LD 9 +#define GFS2_METATYPE_LB 10 +#define GFS2_METATYPE_EA 11 +#define GFS2_METATYPE_ED 12 +#define GFS2_METATYPE_UT 13 +#define GFS2_METATYPE_QC 14 + +struct gfs2_meta_header { + uint32_t mh_magic; + uint16_t mh_type; + uint16_t mh_format; + uint64_t mh_blkno; +}; + +/* + * super-block structure + * + * It's probably good if SIZEOF_SB <= GFS2_BASIC_BLOCK (512 bytes) + * + * Order is important, need to be able to read old superblocks to do on-disk + * version upgrades. + */ + +/* Address of superblock in GFS2 basic blocks */ +#define GFS2_SB_ADDR 128 + +/* The lock number for the superblock (must be zero) */ +#define GFS2_SB_LOCK 0 + +/* Requirement: GFS2_LOCKNAME_LEN % 8 == 0 + Includes: the fencing zero at the end */ +#define GFS2_LOCKNAME_LEN 64 + +struct gfs2_sb { + struct gfs2_meta_header sb_header; + + uint32_t sb_fs_format; + uint32_t sb_multihost_format; + + uint32_t sb_bsize; + uint32_t sb_bsize_shift; + + struct gfs2_inum sb_master_dir; + + char sb_lockproto[GFS2_LOCKNAME_LEN]; + char sb_locktable[GFS2_LOCKNAME_LEN]; +}; + +/* + * resource index structure + */ + +struct gfs2_rindex { + uint64_t ri_addr; /* grp block disk address */ + uint32_t ri_length; /* length of rgrp header in fs blocks */ + uint32_t ri_pad; + + uint64_t ri_data0; /* first data location */ + uint32_t ri_data; /* num of data blocks in rgrp */ + + uint32_t ri_bitbytes; /* number of bytes in data bitmaps */ + + char ri_reserved[32]; +}; + +/* + * resource group header structure + */ + +/* Number of blocks per byte in rgrp */ +#define GFS2_NBBY 4 +#define GFS2_BIT_SIZE 2 +#define GFS2_BIT_MASK 0x00000003 + +#define GFS2_BLKST_FREE 0 +#define GFS2_BLKST_USED 1 +#define GFS2_BLKST_INVALID 2 +#define GFS2_BLKST_DINODE 3 + +#define GFS2_RGF_JOURNAL 0x00000001 +#define GFS2_RGF_METAONLY 0x00000002 +#define GFS2_RGF_DATAONLY 0x00000004 +#define GFS2_RGF_NOALLOC 0x00000008 + +struct gfs2_rgrp { + struct gfs2_meta_header rg_header; + + uint32_t rg_flags; + uint32_t rg_free; + uint32_t rg_dinodes; + + char rg_reserved[36]; +}; + +/* + * quota structure + */ + +struct gfs2_quota { + uint64_t qu_limit; + uint64_t qu_warn; + int64_t qu_value; +}; + +/* + * dinode structure + */ + +#define GFS2_MAX_META_HEIGHT 10 +#define GFS2_DIR_MAX_DEPTH 17 + +#define DT2IF(dt) (((dt) << 12) & S_IFMT) +#define IF2DT(sif) (((sif) & S_IFMT) >> 12) + +/* Dinode flags */ +#define GFS2_DIF_SYSTEM 0x00000001 +#define GFS2_DIF_JDATA 0x00000002 +#define GFS2_DIF_EXHASH 0x00000004 +#define GFS2_DIF_EA_INDIRECT 0x00000008 +#define GFS2_DIF_DIRECTIO 0x00000010 +#define GFS2_DIF_IMMUTABLE 0x00000020 +#define GFS2_DIF_APPENDONLY 0x00000040 +#define GFS2_DIF_NOATIME 0x00000080 +#define GFS2_DIF_SYNC 0x00000100 +#define GFS2_DIF_INHERIT_DIRECTIO 0x00000200 +#define GFS2_DIF_INHERIT_JDATA 0x00000400 +#define GFS2_DIF_TRUNC_IN_PROG 0x00000800 + +struct gfs2_dinode { + struct gfs2_meta_header di_header; + + struct gfs2_inum di_num; + + uint32_t di_mode; /* mode of file */ + uint32_t di_uid; /* owner's user id */ + uint32_t di_gid; /* owner's group id */ + uint32_t di_nlink; /* number of links to this file */ + uint64_t di_size; /* number of bytes in file */ + uint64_t di_blocks; /* number of blocks in file */ + int64_t di_atime; /* time last accessed */ + int64_t di_mtime; /* time last modified */ + int64_t di_ctime; /* time last changed */ + uint32_t di_major; /* device major number */ + uint32_t di_minor; /* device minor number */ + + uint64_t di_goal_meta; /* rgrp to alloc from next */ + uint64_t di_goal_data; /* data block goal */ + + uint32_t di_flags; /* GFS2_DIF_... */ + uint32_t di_payload_format; /* GFS2_FORMAT_... */ + uint16_t di_height; /* height of metadata */ + + /* These only apply to directories */ + uint16_t di_depth; /* Number of bits in the table */ + uint32_t di_entries; /* The number of entries in the directory */ + + uint64_t di_eattr; /* extended attribute block number */ + + char di_reserved[32]; +}; + +/* + * directory structure - many of these per directory file + */ + +#define GFS2_FNAMESIZE 255 +#define GFS2_DIRENT_SIZE(name_len) ((sizeof(struct gfs2_dirent) + (name_len) + 7) & ~7) + +struct gfs2_dirent { + struct gfs2_inum de_inum; + uint32_t de_hash; + uint32_t de_rec_len; + uint8_t de_name_len; + uint8_t de_type; + uint16_t de_pad1; + uint32_t de_pad2; +}; + +/* + * Header of leaf directory nodes + */ + +struct gfs2_leaf { + struct gfs2_meta_header lf_header; + + uint16_t lf_depth; /* Depth of leaf */ + uint16_t lf_entries; /* Number of dirents in leaf */ + uint32_t lf_dirent_format; /* Format of the dirents */ + uint64_t lf_next; /* Next leaf, if overflow */ + + char lf_reserved[32]; +}; + +/* + * Extended attribute header format + */ + +#define GFS2_EA_MAX_NAME_LEN 255 +#define GFS2_EA_MAX_DATA_LEN 65536 + +#define GFS2_EATYPE_UNUSED 0 +#define GFS2_EATYPE_USR 1 +#define GFS2_EATYPE_SYS 2 + +#define GFS2_EATYPE_LAST 2 +#define GFS2_EATYPE_VALID(x) ((x) <= GFS2_EATYPE_LAST) + +#define GFS2_EAFLAG_LAST 0x01 /* last ea in block */ + +struct gfs2_ea_header { + uint32_t ea_rec_len; + uint32_t ea_data_len; + uint8_t ea_name_len; /* no NULL pointer after the string */ + uint8_t ea_type; /* GFS2_EATYPE_... */ + uint8_t ea_flags; /* GFS2_EAFLAG_... */ + uint8_t ea_num_ptrs; + uint32_t ea_pad; +}; + +/* + * Log header structure + */ + +#define GFS2_LOG_HEAD_UNMOUNT 0x00000001 /* log is clean */ + +struct gfs2_log_header { + struct gfs2_meta_header lh_header; + + uint64_t lh_sequence; /* Sequence number of this transaction */ + uint32_t lh_flags; /* GFS2_LOG_HEAD_... */ + uint32_t lh_tail; /* Block number of log tail */ + uint32_t lh_blkno; + uint32_t lh_hash; +}; + +/* + * Log type descriptor + */ + +#define GFS2_LOG_DESC_METADATA 300 +/* ld_data1 is the number of metadata blocks in the descriptor. + ld_data2 is unused. */ + +#define GFS2_LOG_DESC_REVOKE 301 +/* ld_data1 is the number of revoke blocks in the descriptor. + ld_data2 is unused. */ + +struct gfs2_log_descriptor { + struct gfs2_meta_header ld_header; + + uint32_t ld_type; /* GFS2_LOG_DESC_... */ + uint32_t ld_length; /* Number of buffers in this chunk */ + uint32_t ld_data1; /* descriptor-specific field */ + uint32_t ld_data2; /* descriptor-specific field */ + + char ld_reserved[32]; +}; + +/* + * Inum Range + * Describe a range of formal inode numbers allocated to + * one machine to assign to inodes. + */ + +#define GFS2_INUM_QUANTUM 1048576 + +struct gfs2_inum_range { + uint64_t ir_start; + uint64_t ir_length; +}; + +/* + * Statfs change + * Describes an change to the pool of free and allocated + * blocks. + */ + +struct gfs2_statfs_change { + int64_t sc_total; + int64_t sc_free; + int64_t sc_dinodes; +}; + +/* + * Unlinked Tag + * Describes an allocated inode that isn't linked into + * the directory tree and might need to be deallocated. + */ + +#define GFS2_UTF_UNINIT 0x00000001 + +struct gfs2_unlinked_tag { + struct gfs2_inum ut_inum; + uint32_t ut_flags; /* GFS2_UTF_... */ + uint32_t ut_pad; +}; + +/* + * Quota change + * Describes an allocation change for a particular + * user or group. + */ + +#define GFS2_QCF_USER 0x00000001 + +struct gfs2_quota_change { + int64_t qc_change; + uint32_t qc_flags; /* GFS2_QCF_... */ + uint32_t qc_id; +}; + +/* Translation functions */ + +void gfs2_inum_in(struct gfs2_inum *no, char *buf); +void gfs2_inum_out(struct gfs2_inum *no, char *buf); +void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf); +void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf); +void gfs2_sb_in(struct gfs2_sb *sb, char *buf); +void gfs2_sb_out(struct gfs2_sb *sb, char *buf); +void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf); +void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf); +void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf); +void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf); +void gfs2_quota_in(struct gfs2_quota *qu, char *buf); +void gfs2_quota_out(struct gfs2_quota *qu, char *buf); +void gfs2_dinode_in(struct gfs2_dinode *di, char *buf); +void gfs2_dinode_out(struct gfs2_dinode *di, char *buf); +void gfs2_dirent_in(struct gfs2_dirent *de, char *buf); +void gfs2_dirent_out(struct gfs2_dirent *de, char *buf); +void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf); +void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf); +void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf); +void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf); +void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf); +void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf); +void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf); +void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf); +void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf); +void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf); +void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf); +void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf); +void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf); +void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf); +void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf); +void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf); + +/* Printing functions */ + +void gfs2_inum_print(struct gfs2_inum *no); +void gfs2_meta_header_print(struct gfs2_meta_header *mh); +void gfs2_sb_print(struct gfs2_sb *sb); +void gfs2_rindex_print(struct gfs2_rindex *ri); +void gfs2_rgrp_print(struct gfs2_rgrp *rg); +void gfs2_quota_print(struct gfs2_quota *qu); +void gfs2_dinode_print(struct gfs2_dinode *di); +void gfs2_dirent_print(struct gfs2_dirent *de, char *name); +void gfs2_leaf_print(struct gfs2_leaf *lf); +void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name); +void gfs2_log_header_print(struct gfs2_log_header *lh); +void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld); +void gfs2_inum_range_print(struct gfs2_inum_range *ir); +void gfs2_statfs_change_print(struct gfs2_statfs_change *sc); +void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut); +void gfs2_quota_change_print(struct gfs2_quota_change *qc); + +#endif /* __GFS2_ONDISK_DOT_H__ */ + + + +#ifdef WANT_GFS2_CONVERSION_FUNCTIONS + +#define CPIN_08(s1, s2, member, count) {memcpy((s1->member), (s2->member), (count));} +#define CPOUT_08(s1, s2, member, count) {memcpy((s2->member), (s1->member), (count));} +#define CPIN_16(s1, s2, member) {(s1->member) = le16_to_cpu((s2->member));} +#define CPOUT_16(s1, s2, member) {(s2->member) = cpu_to_le16((s1->member));} +#define CPIN_32(s1, s2, member) {(s1->member) = le32_to_cpu((s2->member));} +#define CPOUT_32(s1, s2, member) {(s2->member) = cpu_to_le32((s1->member));} +#define CPIN_64(s1, s2, member) {(s1->member) = le64_to_cpu((s2->member));} +#define CPOUT_64(s1, s2, member) {(s2->member) = cpu_to_le64((s1->member));} + +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member); +#define pa(struct, member, count) print_array(#member, struct->member, count); + +/** + * print_array - Print out an array of bytes + * @title: what to print before the array + * @buf: the array + * @count: the number of bytes + * + */ + +static void print_array(char *title, char *buf, int count) +{ + int x; + + printk(" %s =\n", title); + for (x = 0; x < count; x++) { + printk("%.2X ", (unsigned char)buf[x]); + if (x % 16 == 15) + printk("\n"); + } + if (x % 16) + printk("\n"); +} + +/* + * gfs2_xxx_in - read in an xxx struct + * first arg: the cpu-order structure + * buf: the disk-order buffer + * + * gfs2_xxx_out - write out an xxx struct + * first arg: the cpu-order structure + * buf: the disk-order buffer + * + * gfs2_xxx_print - print out an xxx struct + * first arg: the cpu-order structure + */ + +void gfs2_inum_in(struct gfs2_inum *no, char *buf) +{ + struct gfs2_inum *str = (struct gfs2_inum *)buf; + + CPIN_64(no, str, no_formal_ino); + CPIN_64(no, str, no_addr); +} + +void gfs2_inum_out(struct gfs2_inum *no, char *buf) +{ + struct gfs2_inum *str = (struct gfs2_inum *)buf; + + CPOUT_64(no, str, no_formal_ino); + CPOUT_64(no, str, no_addr); +} + +void gfs2_inum_print(struct gfs2_inum *no) +{ + pv(no, no_formal_ino, "%llu"); + pv(no, no_addr, "%llu"); +} + +void gfs2_meta_header_in(struct gfs2_meta_header *mh, char *buf) +{ + struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf; + + CPIN_32(mh, str, mh_magic); + CPIN_16(mh, str, mh_type); + CPIN_16(mh, str, mh_format); + CPIN_64(mh, str, mh_blkno); +} + +void gfs2_meta_header_out(struct gfs2_meta_header *mh, char *buf) +{ + struct gfs2_meta_header *str = (struct gfs2_meta_header *)buf; + + CPOUT_32(mh, str, mh_magic); + CPOUT_16(mh, str, mh_type); + CPOUT_16(mh, str, mh_format); + CPOUT_64(mh, str, mh_blkno); +} + +void gfs2_meta_header_print(struct gfs2_meta_header *mh) +{ + pv(mh, mh_magic, "0x%.8X"); + pv(mh, mh_type, "%u"); + pv(mh, mh_format, "%u"); + pv(mh, mh_blkno, "%llu"); +} + +void gfs2_sb_in(struct gfs2_sb *sb, char *buf) +{ + struct gfs2_sb *str = (struct gfs2_sb *)buf; + + gfs2_meta_header_in(&sb->sb_header, buf); + + CPIN_32(sb, str, sb_fs_format); + CPIN_32(sb, str, sb_multihost_format); + + CPIN_32(sb, str, sb_bsize); + CPIN_32(sb, str, sb_bsize_shift); + + gfs2_inum_in(&sb->sb_master_dir, (char *)&str->sb_master_dir); + + CPIN_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN); + CPIN_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN); +} + +void gfs2_sb_out(struct gfs2_sb *sb, char *buf) +{ + struct gfs2_sb *str = (struct gfs2_sb *)buf; + + gfs2_meta_header_out(&sb->sb_header, buf); + + CPOUT_32(sb, str, sb_fs_format); + CPOUT_32(sb, str, sb_multihost_format); + + CPOUT_32(sb, str, sb_bsize); + CPOUT_32(sb, str, sb_bsize_shift); + + gfs2_inum_out(&sb->sb_master_dir, (char *)&str->sb_master_dir); + + CPOUT_08(sb, str, sb_lockproto, GFS2_LOCKNAME_LEN); + CPOUT_08(sb, str, sb_locktable, GFS2_LOCKNAME_LEN); +} + +void gfs2_sb_print(struct gfs2_sb *sb) +{ + gfs2_meta_header_print(&sb->sb_header); + + pv(sb, sb_fs_format, "%u"); + pv(sb, sb_multihost_format, "%u"); + + pv(sb, sb_bsize, "%u"); + pv(sb, sb_bsize_shift, "%u"); + + gfs2_inum_print(&sb->sb_master_dir); + + pv(sb, sb_lockproto, "%s"); + pv(sb, sb_locktable, "%s"); +} + +void gfs2_rindex_in(struct gfs2_rindex *ri, char *buf) +{ + struct gfs2_rindex *str = (struct gfs2_rindex *)buf; + + CPIN_64(ri, str, ri_addr); + CPIN_32(ri, str, ri_length); + CPIN_32(ri, str, ri_pad); + + CPIN_64(ri, str, ri_data0); + CPIN_32(ri, str, ri_data); + + CPIN_32(ri, str, ri_bitbytes); + + CPIN_08(ri, str, ri_reserved, 32); +} + +void gfs2_rindex_out(struct gfs2_rindex *ri, char *buf) +{ + struct gfs2_rindex *str = (struct gfs2_rindex *)buf; + + CPOUT_64(ri, str, ri_addr); + CPOUT_32(ri, str, ri_length); + CPOUT_32(ri, str, ri_pad); + + CPOUT_64(ri, str, ri_data0); + CPOUT_32(ri, str, ri_data); + + CPOUT_32(ri, str, ri_bitbytes); + + CPOUT_08(ri, str, ri_reserved, 32); +} + +void gfs2_rindex_print(struct gfs2_rindex *ri) +{ + pv(ri, ri_addr, "%llu"); + pv(ri, ri_length, "%u"); + pv(ri, ri_pad, "%u"); + + pv(ri, ri_data0, "%llu"); + pv(ri, ri_data, "%u"); + + pv(ri, ri_bitbytes, "%u"); + + pa(ri, ri_reserved, 32); +} + +void gfs2_rgrp_in(struct gfs2_rgrp *rg, char *buf) +{ + struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf; + + gfs2_meta_header_in(&rg->rg_header, buf); + CPIN_32(rg, str, rg_flags); + CPIN_32(rg, str, rg_free); + CPIN_32(rg, str, rg_dinodes); + + CPIN_08(rg, str, rg_reserved, 36); +} + +void gfs2_rgrp_out(struct gfs2_rgrp *rg, char *buf) +{ + struct gfs2_rgrp *str = (struct gfs2_rgrp *)buf; + + gfs2_meta_header_out(&rg->rg_header, buf); + CPOUT_32(rg, str, rg_flags); + CPOUT_32(rg, str, rg_free); + CPOUT_32(rg, str, rg_dinodes); + + CPOUT_08(rg, str, rg_reserved, 36); +} + +void gfs2_rgrp_print(struct gfs2_rgrp *rg) +{ + gfs2_meta_header_print(&rg->rg_header); + pv(rg, rg_flags, "%u"); + pv(rg, rg_free, "%u"); + pv(rg, rg_dinodes, "%u"); + + pa(rg, rg_reserved, 36); +} + +void gfs2_quota_in(struct gfs2_quota *qu, char *buf) +{ + struct gfs2_quota *str = (struct gfs2_quota *)buf; + + CPIN_64(qu, str, qu_limit); + CPIN_64(qu, str, qu_warn); + CPIN_64(qu, str, qu_value); +} + +void gfs2_quota_out(struct gfs2_quota *qu, char *buf) +{ + struct gfs2_quota *str = (struct gfs2_quota *)buf; + + CPOUT_64(qu, str, qu_limit); + CPOUT_64(qu, str, qu_warn); + CPOUT_64(qu, str, qu_value); +} + +void gfs2_quota_print(struct gfs2_quota *qu) +{ + pv(qu, qu_limit, "%llu"); + pv(qu, qu_warn, "%llu"); + pv(qu, qu_value, "%lld"); +} + +void gfs2_dinode_in(struct gfs2_dinode *di, char *buf) +{ + struct gfs2_dinode *str = (struct gfs2_dinode *)buf; + + gfs2_meta_header_in(&di->di_header, buf); + gfs2_inum_in(&di->di_num, (char *)&str->di_num); + + CPIN_32(di, str, di_mode); + CPIN_32(di, str, di_uid); + CPIN_32(di, str, di_gid); + CPIN_32(di, str, di_nlink); + CPIN_64(di, str, di_size); + CPIN_64(di, str, di_blocks); + CPIN_64(di, str, di_atime); + CPIN_64(di, str, di_mtime); + CPIN_64(di, str, di_ctime); + CPIN_32(di, str, di_major); + CPIN_32(di, str, di_minor); + + CPIN_64(di, str, di_goal_meta); + CPIN_64(di, str, di_goal_data); + + CPIN_32(di, str, di_flags); + CPIN_32(di, str, di_payload_format); + CPIN_16(di, str, di_height); + + CPIN_16(di, str, di_depth); + CPIN_32(di, str, di_entries); + + CPIN_64(di, str, di_eattr); + + CPIN_08(di, str, di_reserved, 32); +} + +void gfs2_dinode_out(struct gfs2_dinode *di, char *buf) +{ + struct gfs2_dinode *str = (struct gfs2_dinode *)buf; + + gfs2_meta_header_out(&di->di_header, buf); + gfs2_inum_out(&di->di_num, (char *)&str->di_num); + + CPOUT_32(di, str, di_mode); + CPOUT_32(di, str, di_uid); + CPOUT_32(di, str, di_gid); + CPOUT_32(di, str, di_nlink); + CPOUT_64(di, str, di_size); + CPOUT_64(di, str, di_blocks); + CPOUT_64(di, str, di_atime); + CPOUT_64(di, str, di_mtime); + CPOUT_64(di, str, di_ctime); + CPOUT_32(di, str, di_major); + CPOUT_32(di, str, di_minor); + + CPOUT_64(di, str, di_goal_meta); + CPOUT_64(di, str, di_goal_data); + + CPOUT_32(di, str, di_flags); + CPOUT_32(di, str, di_payload_format); + CPOUT_16(di, str, di_height); + + CPOUT_16(di, str, di_depth); + CPOUT_32(di, str, di_entries); + + CPOUT_64(di, str, di_eattr); + + CPOUT_08(di, str, di_reserved, 32); +} + +void gfs2_dinode_print(struct gfs2_dinode *di) +{ + gfs2_meta_header_print(&di->di_header); + gfs2_inum_print(&di->di_num); + + pv(di, di_mode, "0%o"); + pv(di, di_uid, "%u"); + pv(di, di_gid, "%u"); + pv(di, di_nlink, "%u"); + pv(di, di_size, "%llu"); + pv(di, di_blocks, "%llu"); + pv(di, di_atime, "%lld"); + pv(di, di_mtime, "%lld"); + pv(di, di_ctime, "%lld"); + pv(di, di_major, "%u"); + pv(di, di_minor, "%u"); + + pv(di, di_goal_meta, "%llu"); + pv(di, di_goal_data, "%llu"); + + pv(di, di_flags, "0x%.8X"); + pv(di, di_payload_format, "%u"); + pv(di, di_height, "%u"); + + pv(di, di_depth, "%u"); + pv(di, di_entries, "%u"); + + pv(di, di_eattr, "%llu"); + + pa(di, di_reserved, 32); +} + +void gfs2_dirent_in(struct gfs2_dirent *de, char *buf) +{ + struct gfs2_dirent *str = (struct gfs2_dirent *)buf; + + gfs2_inum_in(&de->de_inum, buf); + CPIN_32(de, str, de_hash); + CPIN_32(de, str, de_rec_len); + de->de_name_len = str->de_name_len; + de->de_type = str->de_type; + CPIN_16(de, str, de_pad1); + CPIN_32(de, str, de_pad2); +} + +void gfs2_dirent_out(struct gfs2_dirent *de, char *buf) +{ + struct gfs2_dirent *str = (struct gfs2_dirent *)buf; + + gfs2_inum_out(&de->de_inum, buf); + CPOUT_32(de, str, de_hash); + CPOUT_32(de, str, de_rec_len); + str->de_name_len = de->de_name_len; + str->de_type = de->de_type; + CPOUT_16(de, str, de_pad1); + CPOUT_32(de, str, de_pad2); +} + +void gfs2_dirent_print(struct gfs2_dirent *de, char *name) +{ + char buf[GFS2_FNAMESIZE + 1]; + + gfs2_inum_print(&de->de_inum); + pv(de, de_hash, "0x%.8X"); + pv(de, de_rec_len, "%u"); + pv(de, de_name_len, "%u"); + pv(de, de_type, "%u"); + pv(de, de_pad1, "%u"); + pv(de, de_pad2, "%u"); + + memset(buf, 0, GFS2_FNAMESIZE + 1); + memcpy(buf, name, de->de_name_len); + printk(" name = %s\n", buf); +} + +void gfs2_leaf_in(struct gfs2_leaf *lf, char *buf) +{ + struct gfs2_leaf *str = (struct gfs2_leaf *)buf; + + gfs2_meta_header_in(&lf->lf_header, buf); + CPIN_16(lf, str, lf_depth); + CPIN_16(lf, str, lf_entries); + CPIN_32(lf, str, lf_dirent_format); + CPIN_64(lf, str, lf_next); + + CPIN_08(lf, str, lf_reserved, 32); +} + +void gfs2_leaf_out(struct gfs2_leaf *lf, char *buf) +{ + struct gfs2_leaf *str = (struct gfs2_leaf *)buf; + + gfs2_meta_header_out(&lf->lf_header, buf); + CPOUT_16(lf, str, lf_depth); + CPOUT_16(lf, str, lf_entries); + CPOUT_32(lf, str, lf_dirent_format); + CPOUT_64(lf, str, lf_next); + + CPOUT_08(lf, str, lf_reserved, 32); +} + +void gfs2_leaf_print(struct gfs2_leaf *lf) +{ + gfs2_meta_header_print(&lf->lf_header); + pv(lf, lf_depth, "%u"); + pv(lf, lf_entries, "%u"); + pv(lf, lf_dirent_format, "%u"); + pv(lf, lf_next, "%llu"); + + pa(lf, lf_reserved, 32); +} + +void gfs2_ea_header_in(struct gfs2_ea_header *ea, char *buf) +{ + struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf; + + CPIN_32(ea, str, ea_rec_len); + CPIN_32(ea, str, ea_data_len); + ea->ea_name_len = str->ea_name_len; + ea->ea_type = str->ea_type; + ea->ea_flags = str->ea_flags; + ea->ea_num_ptrs = str->ea_num_ptrs; + CPIN_32(ea, str, ea_pad); +} + +void gfs2_ea_header_out(struct gfs2_ea_header *ea, char *buf) +{ + struct gfs2_ea_header *str = (struct gfs2_ea_header *)buf; + + CPOUT_32(ea, str, ea_rec_len); + CPOUT_32(ea, str, ea_data_len); + str->ea_name_len = ea->ea_name_len; + str->ea_type = ea->ea_type; + str->ea_flags = ea->ea_flags; + str->ea_num_ptrs = ea->ea_num_ptrs; + CPOUT_32(ea, str, ea_pad); +} + +void gfs2_ea_header_print(struct gfs2_ea_header *ea, char *name) +{ + char buf[GFS2_EA_MAX_NAME_LEN + 1]; + + pv(ea, ea_rec_len, "%u"); + pv(ea, ea_data_len, "%u"); + pv(ea, ea_name_len, "%u"); + pv(ea, ea_type, "%u"); + pv(ea, ea_flags, "%u"); + pv(ea, ea_num_ptrs, "%u"); + pv(ea, ea_pad, "%u"); + + memset(buf, 0, GFS2_EA_MAX_NAME_LEN + 1); + memcpy(buf, name, ea->ea_name_len); + printk(" name = %s\n", buf); +} + +void gfs2_log_header_in(struct gfs2_log_header *lh, char *buf) +{ + struct gfs2_log_header *str = (struct gfs2_log_header *)buf; + + gfs2_meta_header_in(&lh->lh_header, buf); + CPIN_64(lh, str, lh_sequence); + CPIN_32(lh, str, lh_flags); + CPIN_32(lh, str, lh_tail); + CPIN_32(lh, str, lh_blkno); + CPIN_32(lh, str, lh_hash); +} + +void gfs2_log_header_out(struct gfs2_log_header *lh, char *buf) +{ + struct gfs2_log_header *str = (struct gfs2_log_header *)buf; + + gfs2_meta_header_out(&lh->lh_header, buf); + CPOUT_64(lh, str, lh_sequence); + CPOUT_32(lh, str, lh_flags); + CPOUT_32(lh, str, lh_tail); + CPOUT_32(lh, str, lh_blkno); + CPOUT_32(lh, str, lh_hash); +} + +void gfs2_log_header_print(struct gfs2_log_header *lh) +{ + gfs2_meta_header_print(&lh->lh_header); + pv(lh, lh_sequence, "%llu"); + pv(lh, lh_flags, "0x%.8X"); + pv(lh, lh_tail, "%u"); + pv(lh, lh_blkno, "%u"); + pv(lh, lh_hash, "0x%.8X"); +} + +void gfs2_log_descriptor_in(struct gfs2_log_descriptor *ld, char *buf) +{ + struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf; + + gfs2_meta_header_in(&ld->ld_header, buf); + CPIN_32(ld, str, ld_type); + CPIN_32(ld, str, ld_length); + CPIN_32(ld, str, ld_data1); + CPIN_32(ld, str, ld_data2); + + CPIN_08(ld, str, ld_reserved, 32); +} + +void gfs2_log_descriptor_out(struct gfs2_log_descriptor *ld, char *buf) +{ + struct gfs2_log_descriptor *str = (struct gfs2_log_descriptor *)buf; + + gfs2_meta_header_out(&ld->ld_header, buf); + CPOUT_32(ld, str, ld_type); + CPOUT_32(ld, str, ld_length); + CPOUT_32(ld, str, ld_data1); + CPOUT_32(ld, str, ld_data2); + + CPOUT_08(ld, str, ld_reserved, 32); +} + +void gfs2_log_descriptor_print(struct gfs2_log_descriptor *ld) +{ + gfs2_meta_header_print(&ld->ld_header); + pv(ld, ld_type, "%u"); + pv(ld, ld_length, "%u"); + pv(ld, ld_data1, "%u"); + pv(ld, ld_data2, "%u"); + + pa(ld, ld_reserved, 32); +} + +void gfs2_inum_range_in(struct gfs2_inum_range *ir, char *buf) +{ + struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf; + + CPIN_64(ir, str, ir_start); + CPIN_64(ir, str, ir_length); +} + +void gfs2_inum_range_out(struct gfs2_inum_range *ir, char *buf) +{ + struct gfs2_inum_range *str = (struct gfs2_inum_range *)buf; + + CPOUT_64(ir, str, ir_start); + CPOUT_64(ir, str, ir_length); +} + +void gfs2_inum_range_print(struct gfs2_inum_range *ir) +{ + pv(ir, ir_start, "%llu"); + pv(ir, ir_length, "%llu"); +} + +void gfs2_statfs_change_in(struct gfs2_statfs_change *sc, char *buf) +{ + struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf; + + CPIN_64(sc, str, sc_total); + CPIN_64(sc, str, sc_free); + CPIN_64(sc, str, sc_dinodes); +} + +void gfs2_statfs_change_out(struct gfs2_statfs_change *sc, char *buf) +{ + struct gfs2_statfs_change *str = (struct gfs2_statfs_change *)buf; + + CPOUT_64(sc, str, sc_total); + CPOUT_64(sc, str, sc_free); + CPOUT_64(sc, str, sc_dinodes); +} + +void gfs2_statfs_change_print(struct gfs2_statfs_change *sc) +{ + pv(sc, sc_total, "%lld"); + pv(sc, sc_free, "%lld"); + pv(sc, sc_dinodes, "%lld"); +} + +void gfs2_unlinked_tag_in(struct gfs2_unlinked_tag *ut, char *buf) +{ + struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf; + + gfs2_inum_in(&ut->ut_inum, buf); + CPIN_32(ut, str, ut_flags); + CPIN_32(ut, str, ut_pad); +} + +void gfs2_unlinked_tag_out(struct gfs2_unlinked_tag *ut, char *buf) +{ + struct gfs2_unlinked_tag *str = (struct gfs2_unlinked_tag *)buf; + + gfs2_inum_out(&ut->ut_inum, buf); + CPOUT_32(ut, str, ut_flags); + CPOUT_32(ut, str, ut_pad); +} + +void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut) +{ + gfs2_inum_print(&ut->ut_inum); + pv(ut, ut_flags, "%u"); + pv(ut, ut_pad, "%u"); +} + +void gfs2_quota_change_in(struct gfs2_quota_change *qc, char *buf) +{ + struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf; + + CPIN_64(qc, str, qc_change); + CPIN_32(qc, str, qc_flags); + CPIN_32(qc, str, qc_id); +} + +void gfs2_quota_change_out(struct gfs2_quota_change *qc, char *buf) +{ + struct gfs2_quota_change *str = (struct gfs2_quota_change *)buf; + + CPOUT_64(qc, str, qc_change); + CPOUT_32(qc, str, qc_flags); + CPOUT_32(qc, str, qc_id); +} + +void gfs2_quota_change_print(struct gfs2_quota_change *qc) +{ + pv(qc, qc_change, "%lld"); + pv(qc, qc_flags, "0x%.8X"); + pv(qc, qc_id, "%u"); +} + +#endif /* WANT_GFS2_CONVERSION_FUNCTIONS */ + [PATCH 02/16] GFS: core fs Core file system functions. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/bmap.c | 1211 ++++++++++++++++++++++++++++++++++++ fs/gfs2/bmap.h | 39 + fs/gfs2/daemon.c | 225 ++++++ fs/gfs2/daemon.h | 20 fs/gfs2/format.h | 21 fs/gfs2/glops.c | 487 ++++++++++++++ fs/gfs2/glops.h | 23 fs/gfs2/inode.c | 1805 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/inode.h | 74 ++ 9 files changed, 3905 insertions(+) --- a/fs/gfs2/bmap.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/bmap.c 2005-10-10 11:28:49.123813920 -0500 @@ -0,0 +1,1211 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +struct metapath { + unsigned int mp_list[GFS2_MAX_META_HEIGHT]; +}; + +typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, + uint64_t *bottom, unsigned int height, + void *data); + +struct strip_mine { + int sm_first; + unsigned int sm_height; +}; + +/** + * @gfs2_unstuffer_sync - Synchronously unstuff a dinode + * @ip: + * @dibh: + * @block: + * @private: + * + * Cheat and use a metadata buffer instead of a data page. + * + * Returns: errno + */ + +int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct buffer_head *bh; + int error; + + bh = gfs2_meta_new(ip->i_gl, block); + + gfs2_buffer_copy_tail(bh, 0, dibh, sizeof(struct gfs2_dinode)); + + set_buffer_dirty(bh); + error = sync_dirty_buffer(bh); + + brelse(bh); + + return error; +} + +/** + * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big + * @ip: The GFS2 inode to unstuff + * @unstuffer: the routine that handles unstuffing a non-zero length file + * @private: private data for the unstuffer + * + * This routine unstuffs a dinode and returns it to a "normal" state such + * that the height can be grown in the traditional way. + * + * Returns: errno + */ + +int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, + void *private) +{ + struct buffer_head *bh, *dibh; + uint64_t block = 0; + int journaled = gfs2_is_jdata(ip); + int error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (ip->i_di.di_size) { + /* Get a free block, fill it with the stuffed data, + and write it out to disk */ + + if (journaled) { + block = gfs2_alloc_meta(ip); + + error = gfs2_jdata_get_buffer(ip, block, 1, &bh); + if (error) + goto out_brelse; + gfs2_buffer_copy_tail(bh, + sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + brelse(bh); + } else { + block = gfs2_alloc_data(ip); + + error = unstuffer(ip, dibh, block, private); + if (error) + goto out_brelse; + } + } + + /* Set up the pointer to the new block */ + + gfs2_trans_add_bh(ip->i_gl, dibh); + + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + if (ip->i_di.di_size) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_le64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height = 1; + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + + out_brelse: + brelse(dibh); + + out: + up_write(&ip->i_rw_mutex); + + return error; +} + +/** + * calc_tree_height - Calculate the height of a metadata tree + * @ip: The GFS2 inode + * @size: The proposed size of the file + * + * Work out how tall a metadata tree needs to be in order to accommodate a + * file of a particular size. If size is less than the current size of + * the inode, then the current size of the inode is used instead of the + * supplied one. + * + * Returns: the height the tree should be + */ + +static unsigned int calc_tree_height(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t *arr; + unsigned int max, height; + + if (ip->i_di.di_size > size) + size = ip->i_di.di_size; + + if (gfs2_is_jdata(ip)) { + arr = sdp->sd_jheightsize; + max = sdp->sd_max_jheight; + } else { + arr = sdp->sd_heightsize; + max = sdp->sd_max_height; + } + + for (height = 0; height < max; height++) + if (arr[height] >= size) + break; + + return height; +} + +/** + * build_height - Build a metadata tree of the requested height + * @ip: The GFS2 inode + * @height: The height to build to + * + * This routine makes sure that the metadata tree is tall enough to hold + * "size" bytes of data. + * + * Returns: errno + */ + +static int build_height(struct gfs2_inode *ip, int height) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh, *dibh; + uint64_t block = 0, *bp; + unsigned int x; + int new_block; + int error; + + while (ip->i_di.di_height < height) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + new_block = 0; + bp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); + for (x = 0; x < sdp->sd_diptrs; x++, bp++) + if (*bp) { + new_block = 1; + break; + } + + if (new_block) { + /* Get a new block, fill it with the old direct + pointers, and write it out */ + + block = gfs2_alloc_meta(ip); + + bh = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, + GFS2_METATYPE_IN, + GFS2_FORMAT_IN); + gfs2_buffer_copy_tail(bh, + sizeof(struct gfs2_meta_header), + dibh, sizeof(struct gfs2_dinode)); + + brelse(bh); + } + + /* Set up the new direct pointer and write it out to disk */ + + gfs2_trans_add_bh(ip->i_gl, dibh); + + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + if (new_block) { + *(uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)) = cpu_to_le64(block); + ip->i_di.di_blocks++; + } + + ip->i_di.di_height++; + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + return 0; +} + +/** + * find_metapath - Find path through the metadata tree + * @ip: The inode pointer + * @mp: The metapath to return the result in + * @block: The disk block to look up + * + * This routine returns a struct metapath structure that defines a path + * through the metadata of inode "ip" to get to block "block". + * + * Example: + * Given: "ip" is a height 3 file, "offset" is 101342453, and this is a + * filesystem with a blocksize of 4096. + * + * find_metapath() would return a struct metapath structure set to: + * mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48, + * and mp_list[2] = 165. + * + * That means that in order to get to the block containing the byte at + * offset 101342453, we would load the indirect block pointed to by pointer + * 0 in the dinode. We would then load the indirect block pointed to by + * pointer 48 in that indirect block. We would then load the data block + * pointed to by pointer 165 in that indirect block. + * + * ---------------------------------------- + * | Dinode | | + * | | 4| + * | |0 1 2 3 4 5 9| + * | | 6| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 5| + * | 4 4 4 4 4 5 5 1| + * |0 5 6 7 8 9 0 1 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Indirect Block | + * | 1 1 1 1 1 5| + * | 6 6 6 6 6 1| + * |0 3 4 5 6 7 2| + * ---------------------------------------- + * | + * | + * V + * ---------------------------------------- + * | Data block containing offset | + * | 101342453 | + * | | + * | | + * ---------------------------------------- + * + */ + +static struct metapath *find_metapath(struct gfs2_inode *ip, uint64_t block) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct metapath *mp; + uint64_t b = block; + unsigned int i; + + mp = kzalloc(sizeof(struct metapath), GFP_KERNEL | __GFP_NOFAIL); + + for (i = ip->i_di.di_height; i--;) + mp->mp_list[i] = do_div(b, sdp->sd_inptrs); + + return mp; +} + +/** + * metapointer - Return pointer to start of metadata in a buffer + * @bh: The buffer + * @height: The metadata height (0 = dinode) + * @mp: The metapath + * + * Return a pointer to the block number of the next height of the metadata + * tree given a buffer containing the pointer to the current height of the + * metadata tree. + */ + +static inline uint64_t *metapointer(struct buffer_head *bh, + unsigned int height, struct metapath *mp) +{ + unsigned int head_size = (height > 0) ? + sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode); + + return ((uint64_t *)(bh->b_data + head_size)) + mp->mp_list[height]; +} + +/** + * lookup_block - Get the next metadata block in metadata tree + * @ip: The GFS2 inode + * @bh: Buffer containing the pointers to metadata blocks + * @height: The height of the tree (0 = dinode) + * @mp: The metapath + * @create: Non-zero if we may create a new meatdata block + * @new: Used to indicate if we did create a new metadata block + * @block: the returned disk block number + * + * Given a metatree, complete to a particular height, checks to see if the next + * height of the tree exists. If not the next height of the tree is created. + * The block number of the next height of the metadata tree is returned. + * + */ + +static void lookup_block(struct gfs2_inode *ip, struct buffer_head *bh, + unsigned int height, struct metapath *mp, int create, + int *new, uint64_t *block) +{ + uint64_t *ptr = metapointer(bh, height, mp); + + if (*ptr) { + *block = le64_to_cpu(*ptr); + return; + } + + *block = 0; + + if (!create) + return; + + if (height == ip->i_di.di_height - 1 && + !gfs2_is_jdata(ip)) + *block = gfs2_alloc_data(ip); + else + *block = gfs2_alloc_meta(ip); + + gfs2_trans_add_bh(ip->i_gl, bh); + + *ptr = cpu_to_le64(*block); + ip->i_di.di_blocks++; + + *new = 1; +} + +/** + * gfs2_block_map - Map a block from an inode to a disk block + * @ip: The GFS2 inode + * @lblock: The logical block number + * @new: Value/Result argument (1 = may create/did create new blocks) + * @dblock: the disk block number of the start of an extent + * @extlen: the size of the extent + * + * Find the block number on the current device which corresponds to an + * inode's block. If the block had to be created, "new" will be set. + * + * Returns: errno + */ + +int gfs2_block_map(struct gfs2_inode *ip, uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh; + struct metapath *mp; + int create = *new; + unsigned int bsize; + unsigned int height; + unsigned int end_of_metadata; + unsigned int x; + int error = 0; + + *new = 0; + *dblock = 0; + if (extlen) + *extlen = 0; + + if (create) + down_write(&ip->i_rw_mutex); + else + down_read(&ip->i_rw_mutex); + + if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + goto out; + + bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize; + + height = calc_tree_height(ip, (lblock + 1) * bsize); + if (ip->i_di.di_height < height) { + if (!create) + goto out; + + error = build_height(ip, height); + if (error) + goto out; + } + + mp = find_metapath(ip, lblock); + end_of_metadata = ip->i_di.di_height - 1; + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_kfree; + + for (x = 0; x < end_of_metadata; x++) { + lookup_block(ip, bh, x, mp, create, new, dblock); + brelse(bh); + if (!*dblock) + goto out_kfree; + + error = gfs2_meta_indirect_buffer(ip, x+1, *dblock, *new, &bh); + if (error) + goto out_kfree; + } + + lookup_block(ip, bh, end_of_metadata, mp, create, new, dblock); + + if (extlen && *dblock) { + *extlen = 1; + + if (!*new) { + uint64_t tmp_dblock; + int tmp_new; + unsigned int nptrs; + + nptrs = (end_of_metadata) ? sdp->sd_inptrs : + sdp->sd_diptrs; + + while (++mp->mp_list[end_of_metadata] < nptrs) { + lookup_block(ip, bh, end_of_metadata, mp, + 0, &tmp_new, &tmp_dblock); + + if (*dblock + *extlen != tmp_dblock) + break; + + (*extlen)++; + } + } + } + + brelse(bh); + + if (*new) { + error = gfs2_meta_inode_buffer(ip, &bh); + if (!error) { + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_dinode_out(&ip->i_di, bh->b_data); + brelse(bh); + } + } + + out_kfree: + kfree(mp); + + out: + if (create) + up_write(&ip->i_rw_mutex); + else + up_read(&ip->i_rw_mutex); + + return error; +} + +/** + * recursive_scan - recursively scan through the end of a file + * @ip: the inode + * @dibh: the dinode buffer + * @mp: the path through the metadata to the point to start + * @height: the height the recursion is at + * @block: the indirect block to look at + * @first: 1 if this is the first block + * @bc: the call to make for each piece of metadata + * @data: data opaque to this function to pass to @bc + * + * When this is first called @height and @block should be zero and + * @first should be 1. + * + * Returns: errno + */ + +static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh, + struct metapath *mp, unsigned int height, + uint64_t block, int first, block_call_t bc, + void *data) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *bh = NULL; + uint64_t *top, *bottom; + uint64_t bn; + int error; + int mh_size = sizeof(struct gfs2_meta_header); + + if (!height) { + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + return error; + dibh = bh; + + top = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) + + mp->mp_list[0]; + bottom = (uint64_t *)(bh->b_data + sizeof(struct gfs2_dinode)) + + sdp->sd_diptrs; + } else { + error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh); + if (error) + return error; + + top = (uint64_t *)(bh->b_data + mh_size) + + ((first) ? mp->mp_list[height] : 0); + + bottom = (uint64_t *)(bh->b_data + mh_size) + sdp->sd_inptrs; + } + + error = bc(ip, dibh, bh, top, bottom, height, data); + if (error) + goto out; + + if (height < ip->i_di.di_height - 1) + for (; top < bottom; top++, first = 0) { + if (!*top) + continue; + + bn = le64_to_cpu(*top); + + error = recursive_scan(ip, dibh, mp, height + 1, bn, + first, bc, data); + if (error) + break; + } + + out: + brelse(bh); + + return error; +} + +/** + * do_strip - Look for a layer a particular layer of the file and strip it off + * @ip: the inode + * @dibh: the dinode buffer + * @bh: A buffer of pointers + * @top: The first pointer in the buffer + * @bottom: One more than the last pointer + * @height: the height this buffer is at + * @data: a pointer to a struct strip_mine + * + * Returns: errno + */ + +static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct strip_mine *sm = (struct strip_mine *)data; + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrp_list rlist; + uint64_t bn, bstart; + uint32_t blen; + uint64_t *p; + unsigned int rg_blocks = 0; + int metadata; + unsigned int revokes = 0; + int x; + int error; + + if (!*top) + sm->sm_first = 0; + + if (height != sm->sm_height) + return 0; + + if (sm->sm_first) { + top++; + sm->sm_first = 0; + } + + metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip); + if (metadata) + revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs; + + error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh); + if (error) + return error; + + memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = le64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) + gfs2_rlist_add(sdp, &rlist, bstart); + + bstart = bn; + blen = 1; + } + } + + if (bstart) + gfs2_rlist_add(sdp, &rlist, bstart); + else + goto out; /* Nothing to do */ + + gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); + + for (x = 0; x < rlist.rl_rgrps; x++) { + struct gfs2_rgrpd *rgd; + rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl); + rg_blocks += rgd->rd_ri.ri_length; + } + + error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); + if (error) + goto out_rlist; + + error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + + RES_INDIRECT + RES_STATFS + RES_QUOTA, + revokes); + if (error) + goto out_rg_gunlock; + + down_write(&ip->i_rw_mutex); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_trans_add_bh(ip->i_gl, bh); + + bstart = 0; + blen = 0; + + for (p = top; p < bottom; p++) { + if (!*p) + continue; + + bn = le64_to_cpu(*p); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) { + if (metadata) + gfs2_free_meta(ip, bstart, blen); + else + gfs2_free_data(ip, bstart, blen); + } + + bstart = bn; + blen = 1; + } + + *p = 0; + if (!ip->i_di.di_blocks) + gfs2_consist_inode(ip); + ip->i_di.di_blocks--; + } + if (bstart) { + if (metadata) + gfs2_free_meta(ip, bstart, blen); + else + gfs2_free_data(ip, bstart, blen); + } + + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_dinode_out(&ip->i_di, dibh->b_data); + + up_write(&ip->i_rw_mutex); + + gfs2_trans_end(sdp); + + out_rg_gunlock: + gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + + out_rlist: + gfs2_rlist_free(&rlist); + + out: + gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh); + + return error; +} + +/** + * do_grow - Make a file look bigger than it is + * @ip: the inode + * @size: the size to set the file to + * + * Called with an exclusive lock on @ip. + * + * Returns: errno + */ + +static int do_grow(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al; + struct buffer_head *dibh; + unsigned int h; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_height + RES_DATA; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_height + al->al_rgd->rd_ri.ri_length + + RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, + NULL); + if (error) + goto out_end_trans; + } + + h = calc_tree_height(ip, size); + if (ip->i_di.di_height < h) { + down_write(&ip->i_rw_mutex); + error = build_height(ip, h); + up_write(&ip->i_rw_mutex); + if (error) + goto out_end_trans; + } + } + + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipres: + gfs2_inplace_release(ip); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +static int truncator_journaled(struct gfs2_inode *ip, uint64_t size) +{ + uint64_t lbn, dbn; + uint32_t off; + struct buffer_head *bh; + int new = 0; + int error; + + lbn = size; + off = do_div(lbn, ip->i_sbd->sd_jbsize); + + error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); + if (error || !dbn) + return error; + + error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh); + if (error) + return error; + + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off); + + brelse(bh); + + return 0; +} + +static int trunc_start(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int journaled = gfs2_is_jdata(ip); + int error; + + error = gfs2_trans_begin(sdp, + RES_DINODE + ((journaled) ? RES_JDATA : 0), 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (gfs2_is_stuffed(ip)) { + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size); + error = 1; + + } else { + if (journaled) { + uint64_t junk = size; + /* we're just interested in the modulus */ + if (do_div(junk, sdp->sd_jbsize)) + error = truncator_journaled(ip, size); + } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1)) + error = truncator(ip, size); + + if (!error) { + ip->i_di.di_size = size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG; + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + } + } + + brelse(dibh); + + out: + gfs2_trans_end(sdp); + + return error; +} + +static int trunc_dealloc(struct gfs2_inode *ip, uint64_t size) +{ + unsigned int height = ip->i_di.di_height; + uint64_t lblock; + struct metapath *mp; + int error; + + if (!size) + lblock = 0; + else if (gfs2_is_jdata(ip)) { + lblock = size - 1; + do_div(lblock, ip->i_sbd->sd_jbsize); + } else + lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift; + + mp = find_metapath(ip, lblock); + gfs2_alloc_get(ip); + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + while (height--) { + struct strip_mine sm; + sm.sm_first = !!size; + sm.sm_height = height; + + error = recursive_scan(ip, NULL, mp, 0, 0, 1, do_strip, &sm); + if (error) + break; + } + + gfs2_quota_unhold(ip); + + out: + gfs2_alloc_put(ip); + kfree(mp); + + return error; +} + +static int trunc_end(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (!ip->i_di.di_size) { + ip->i_di.di_height = 0; + ip->i_di.di_goal_meta = + ip->i_di.di_goal_data = + ip->i_num.no_addr; + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + } + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + out: + up_write(&ip->i_rw_mutex); + + gfs2_trans_end(sdp); + + return error; +} + +/** + * do_shrink - make a file smaller + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * Called with an exclusive lock on @ip. + * + * Returns: errno + */ + +static int do_shrink(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + int error; + + error = trunc_start(ip, size, truncator); + if (error < 0) + return error; + if (error > 0) + return 0; + + error = trunc_dealloc(ip, size); + if (!error) + error = trunc_end(ip); + + return error; +} + +/** + * gfs2_truncatei - make a file a give size + * @ip: the inode + * @size: the size to make the file + * @truncator: function to truncate the last partial block + * + * The file size can grow, shrink, or stay the same size. + * + * Returns: errno + */ + +int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator) +{ + int error; + + if (gfs2_assert_warn(ip->i_sbd, S_ISREG(ip->i_di.di_mode))) + return -EINVAL; + + if (size > ip->i_di.di_size) + error = do_grow(ip, size); + else + error = do_shrink(ip, size, truncator); + + return error; +} + +int gfs2_truncatei_resume(struct gfs2_inode *ip) +{ + int error; + error = trunc_dealloc(ip, ip->i_di.di_size); + if (!error) + error = trunc_end(ip); + return error; +} + +int gfs2_file_dealloc(struct gfs2_inode *ip) +{ + return trunc_dealloc(ip, 0); +} + +/** + * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file + * @ip: the file + * @len: the number of bytes to be written to the file + * @data_blocks: returns the number of data blocks required + * @ind_blocks: returns the number of indirect blocks required + * + */ + +void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, + unsigned int *data_blocks, unsigned int *ind_blocks) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + unsigned int tmp; + + if (gfs2_is_jdata(ip)) { + *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2; + *ind_blocks = 3 * (sdp->sd_max_jheight - 1); + } else { + *data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3; + *ind_blocks = 3 * (sdp->sd_max_height - 1); + } + + for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) { + tmp = DIV_RU(tmp, sdp->sd_inptrs); + *ind_blocks += tmp; + } +} + +/** + * gfs2_write_alloc_required - figure out if a write will require an allocation + * @ip: the file being written to + * @offset: the offset to write to + * @len: the number of bytes being written + * @alloc_required: set to 1 if an alloc is required, 0 otherwise + * + * Returns: errno + */ + +int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, + unsigned int len, int *alloc_required) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t lblock, lblock_stop, dblock; + uint32_t extlen; + int new = 0; + int error = 0; + + *alloc_required = 0; + + if (!len) + return 0; + + if (gfs2_is_stuffed(ip)) { + if (offset + len > + sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) + *alloc_required = 1; + return 0; + } + + if (gfs2_is_jdata(ip)) { + unsigned int bsize = sdp->sd_jbsize; + lblock = offset; + do_div(lblock, bsize); + lblock_stop = offset + len + bsize - 1; + do_div(lblock_stop, bsize); + } else { + unsigned int shift = sdp->sd_sb.sb_bsize_shift; + lblock = offset >> shift; + lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; + } + + for (; lblock < lblock_stop; lblock += extlen) { + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (!dblock) { + *alloc_required = 1; + return 0; + } + } + + return 0; +} + +/** + * do_gfm - Copy out the dinode/indirect blocks of a file + * @ip: the file + * @dibh: the dinode buffer + * @bh: the indirect buffer we're looking at + * @top: the first pointer in the block + * @bottom: one more than the last pointer in the block + * @height: the height the block is at + * @data: a pointer to a struct gfs2_user_buffer structure + * + * If this is a journaled file, copy out the data too. + * + * Returns: errno + */ + +static int do_gfm(struct gfs2_inode *ip, struct buffer_head *dibh, + struct buffer_head *bh, uint64_t *top, uint64_t *bottom, + unsigned int height, void *data) +{ + struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data; + int error; + + error = gfs2_add_bh_to_ub(ub, bh); + if (error) + return error; + + if (!S_ISDIR(ip->i_di.di_mode) || + height + 1 != ip->i_di.di_height) + return 0; + + for (; top < bottom; top++) + if (*top) { + struct buffer_head *data_bh; + + error = gfs2_meta_read(ip->i_gl, le64_to_cpu(*top), + DIO_START | DIO_WAIT, + &data_bh); + if (error) + return error; + + error = gfs2_add_bh_to_ub(ub, data_bh); + + brelse(data_bh); + + if (error) + return error; + } + + return 0; +} + +/** + * gfs2_get_file_meta - return all the metadata for a file + * @ip: the file + * @ub: the structure representing the meta + * + * Returns: errno + */ + +int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub) +{ + int error; + + if (gfs2_is_stuffed(ip)) { + struct buffer_head *dibh; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + error = gfs2_add_bh_to_ub(ub, dibh); + brelse(dibh); + } + } else { + struct metapath *mp = find_metapath(ip, 0); + error = recursive_scan(ip, NULL, mp, 0, 0, 1, do_gfm, ub); + kfree(mp); + } + + return error; +} + --- a/fs/gfs2/bmap.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/bmap.h 2005-10-10 11:28:49.124813764 -0500 @@ -0,0 +1,39 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __BMAP_DOT_H__ +#define __BMAP_DOT_H__ + +typedef int (*gfs2_unstuffer_t) (struct gfs2_inode * ip, + struct buffer_head * dibh, uint64_t block, + void *private); +int gfs2_unstuffer_sync(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs2_unstuff_dinode(struct gfs2_inode *ip, gfs2_unstuffer_t unstuffer, + void *private); + +int gfs2_block_map(struct gfs2_inode *ip, + uint64_t lblock, int *new, + uint64_t *dblock, uint32_t *extlen); + +typedef int (*gfs2_truncator_t) (struct gfs2_inode * ip, uint64_t size); +int gfs2_truncatei(struct gfs2_inode *ip, uint64_t size, + gfs2_truncator_t truncator); +int gfs2_truncatei_resume(struct gfs2_inode *ip); +int gfs2_file_dealloc(struct gfs2_inode *ip); + +void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len, + unsigned int *data_blocks, + unsigned int *ind_blocks); +int gfs2_write_alloc_required(struct gfs2_inode *ip, uint64_t offset, + unsigned int len, int *alloc_required); + +int gfs2_get_file_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub); + +#endif /* __BMAP_DOT_H__ */ --- a/fs/gfs2/daemon.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/daemon.c 2005-10-10 11:28:49.125813608 -0500 @@ -0,0 +1,225 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "daemon.h" +#include "glock.h" +#include "log.h" +#include "quota.h" +#include "recovery.h" +#include "super.h" +#include "unlinked.h" + +/* This uses schedule_timeout() instead of msleep() because it's good for + the daemons to wake up more often than the timeout when unmounting so + the user's unmount doesn't sit there forever. + + The kthread functions used to start these daemons block and flush signals. */ + +/** + * gfs2_scand - Look for cached glocks and inodes to toss from memory + * @sdp: Pointer to GFS2 superblock + * + * One of these daemons runs, finding candidates to add to sd_reclaim_list. + * See gfs2_glockd() + */ + +int gfs2_scand(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + unsigned long t; + + while (!kthread_should_stop()) { + gfs2_scand_internal(sdp); + t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; + schedule_timeout_interruptible(t); + } + + return 0; +} + +/** + * gfs2_glockd - Reclaim unused glock structures + * @sdp: Pointer to GFS2 superblock + * + * One or more of these daemons run, reclaiming glocks on sd_reclaim_list. + * Number of daemons can be set by user, with num_glockd mount option. + */ + +int gfs2_glockd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + DECLARE_WAITQUEUE(wait_chan, current); + + while (!kthread_should_stop()) { + while (atomic_read(&sdp->sd_reclaim_count)) + gfs2_reclaim_glock(sdp); + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&sdp->sd_reclaim_wq, &wait_chan); + if (!atomic_read(&sdp->sd_reclaim_count) && + !kthread_should_stop()) + schedule(); + remove_wait_queue(&sdp->sd_reclaim_wq, &wait_chan); + set_current_state(TASK_RUNNING); + } + + return 0; +} + +/** + * gfs2_recoverd - Recover dead machine's journals + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_recoverd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + unsigned long t; + + while (!kthread_should_stop()) { + gfs2_check_journals(sdp); + t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; + schedule_timeout_interruptible(t); + } + + return 0; +} + +/** + * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks + * @sdp: Pointer to GFS2 superblock + * + * Also, periodically check to make sure that we're using the most recent + * journal index. + */ + +int gfs2_logd(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + struct gfs2_holder ji_gh; + unsigned long t; + + while (!kthread_should_stop()) { + /* Advance the log tail */ + + t = sdp->sd_log_flush_time + + gfs2_tune_get(sdp, gt_log_flush_secs) * HZ; + + gfs2_ail1_empty(sdp, DIO_ALL); + + if (time_after_eq(jiffies, t)) { + gfs2_log_flush(sdp); + sdp->sd_log_flush_time = jiffies; + } + + /* Check for latest journal index */ + + t = sdp->sd_jindex_refresh_time + + gfs2_tune_get(sdp, gt_jindex_refresh_secs) * HZ; + + if (time_after_eq(jiffies, t)) { + if (!gfs2_jindex_hold(sdp, &ji_gh)) + gfs2_glock_dq_uninit(&ji_gh); + sdp->sd_jindex_refresh_time = jiffies; + } + + t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; + schedule_timeout_interruptible(t); + } + + return 0; +} + +/** + * gfs2_quotad - Write cached quota changes into the quota file + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_quotad(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + unsigned long t; + int error; + + while (!kthread_should_stop()) { + /* Update the master statfs file */ + + t = sdp->sd_statfs_sync_time + + gfs2_tune_get(sdp, gt_statfs_quantum) * HZ; + + if (time_after_eq(jiffies, t)) { + error = gfs2_statfs_sync(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + fs_err(sdp, "quotad: (1) error=%d\n", error); + sdp->sd_statfs_sync_time = jiffies; + } + + /* Update quota file */ + + t = sdp->sd_quota_sync_time + + gfs2_tune_get(sdp, gt_quota_quantum) * HZ; + + if (time_after_eq(jiffies, t)) { + error = gfs2_quota_sync(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + fs_err(sdp, "quotad: (2) error=%d\n", error); + sdp->sd_quota_sync_time = jiffies; + } + + gfs2_quota_scan(sdp); + + t = gfs2_tune_get(sdp, gt_quotad_secs) * HZ; + schedule_timeout_interruptible(t); + } + + return 0; +} + +/** + * gfs2_inoded - Deallocate unlinked inodes + * @sdp: Pointer to GFS2 superblock + * + */ + +int gfs2_inoded(void *data) +{ + struct gfs2_sbd *sdp = (struct gfs2_sbd *)data; + unsigned long t; + int error; + + while (!kthread_should_stop()) { + error = gfs2_unlinked_dealloc(sdp); + if (error && + error != -EROFS && + !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + fs_err(sdp, "inoded: error = %d\n", error); + + t = gfs2_tune_get(sdp, gt_inoded_secs) * HZ; + schedule_timeout_interruptible(t); + } + + return 0; +} + --- a/fs/gfs2/daemon.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/daemon.h 2005-10-10 11:28:49.127813296 -0500 @@ -0,0 +1,20 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __DAEMON_DOT_H__ +#define __DAEMON_DOT_H__ + +int gfs2_scand(void *data); +int gfs2_glockd(void *data); +int gfs2_recoverd(void *data); +int gfs2_logd(void *data); +int gfs2_quotad(void *data); +int gfs2_inoded(void *data); + +#endif /* __DAEMON_DOT_H__ */ --- a/fs/gfs2/format.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/format.h 2005-10-10 11:28:49.164807527 -0500 @@ -0,0 +1,21 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __FORMAT_DOT_H__ +#define __FORMAT_DOT_H__ + +static const uint32_t gfs2_old_fs_formats[] = { + 0 +}; + +static const uint32_t gfs2_old_multihost_formats[] = { + 0 +}; + +#endif /* __FORMAT_DOT_H__ */ --- a/fs/gfs2/glops.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/glops.c 2005-10-10 11:28:49.208800667 -0500 @@ -0,0 +1,487 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "page.h" +#include "recovery.h" +#include "rgrp.h" + +/** + * meta_go_sync - sync out the metadata for this glock + * @gl: the glock + * @flags: DIO_* + * + * Called when demoting or unlocking an EX glock. We must flush + * to disk all dirty buffers/pages relating to this glock, and must not + * not return to caller to demote/unlock the glock until I/O is complete. + */ + +static void meta_go_sync(struct gfs2_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) { + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + if (flags & DIO_RELEASE) + gfs2_ail_empty_gl(gl); + } + + clear_bit(GLF_SYNC, &gl->gl_flags); +} + +/** + * meta_go_inval - invalidate the metadata for this glock + * @gl: the glock + * @flags: + * + */ + +static void meta_go_inval(struct gfs2_glock *gl, int flags) +{ + if (!(flags & DIO_METADATA)) + return; + + gfs2_meta_inval(gl); + gl->gl_vn++; +} + +/** + * meta_go_demote_ok - Check to see if it's ok to unlock a glock + * @gl: the glock + * + * Returns: 1 if we have no cached data; ok to demote meta glock + */ + +static int meta_go_demote_ok(struct gfs2_glock *gl) +{ + return !gl->gl_aspace->i_mapping->nrpages; +} + +/** + * inode_go_xmote_th - promote/demote a glock + * @gl: the glock + * @state: the requested state + * @flags: + * + */ + +static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state, + int flags) +{ + if (gl->gl_state != LM_ST_UNLOCKED) + gfs2_pte_inval(gl); + gfs2_glock_xmote_th(gl, state, flags); +} + +/** + * inode_go_xmote_bh - After promoting/demoting a glock + * @gl: the glock + * + */ + +static void inode_go_xmote_bh(struct gfs2_glock *gl) +{ + struct gfs2_holder *gh = gl->gl_req_gh; + struct buffer_head *bh; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + (!gh || !(gh->gh_flags & GL_SKIP))) { + error = gfs2_meta_read(gl, gl->gl_name.ln_number, DIO_START, + &bh); + if (!error) + brelse(bh); + } +} + +/** + * inode_go_drop_th - unlock a glock + * @gl: the glock + * + * Invoked from rq_demote(). + * Another node needs the lock in EXCLUSIVE mode, or lock (unused for too long) + * is being purged from our node's glock cache; we're dropping lock. + */ + +static void inode_go_drop_th(struct gfs2_glock *gl) +{ + gfs2_pte_inval(gl); + gfs2_glock_drop_th(gl); +} + +/** + * inode_go_sync - Sync the dirty data and/or metadata for an inode glock + * @gl: the glock protecting the inode + * @flags: + * + */ + +static void inode_go_sync(struct gfs2_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { + if (meta && data) { + gfs2_page_sync(gl, flags | DIO_START); + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + gfs2_page_sync(gl, flags | DIO_WAIT); + clear_bit(GLF_DIRTY, &gl->gl_flags); + } else if (meta) { + gfs2_log_flush_glock(gl); + gfs2_meta_sync(gl, flags | DIO_START | DIO_WAIT); + } else if (data) + gfs2_page_sync(gl, flags | DIO_START | DIO_WAIT); + if (flags & DIO_RELEASE) + gfs2_ail_empty_gl(gl); + } + + clear_bit(GLF_SYNC, &gl->gl_flags); +} + +/** + * inode_go_inval - prepare a inode glock to be released + * @gl: the glock + * @flags: + * + */ + +static void inode_go_inval(struct gfs2_glock *gl, int flags) +{ + int meta = (flags & DIO_METADATA); + int data = (flags & DIO_DATA); + + if (meta) { + gfs2_meta_inval(gl); + gl->gl_vn++; + } + if (data) + gfs2_page_inval(gl); +} + +/** + * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int inode_go_demote_ok(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + int demote = 0; + + if (!get_gl2ip(gl) && !gl->gl_aspace->i_mapping->nrpages) + demote = 1; + else if (!sdp->sd_args.ar_localcaching && + time_after_eq(jiffies, gl->gl_stamp + + gfs2_tune_get(sdp, gt_demote_secs) * HZ)) + demote = 1; + + return demote; +} + +/** + * inode_go_lock - operation done after an inode lock is locked by a process + * @gl: the glock + * @flags: + * + * Returns: errno + */ + +static int inode_go_lock(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_inode *ip = get_gl2ip(gl); + int error = 0; + + if (!ip) + return 0; + + if (ip->i_vn != gl->gl_vn) { + error = gfs2_inode_refresh(ip); + if (error) + return error; + gfs2_inode_attr_in(ip); + } + + if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) && + (gl->gl_state == LM_ST_EXCLUSIVE) && + (gh->gh_flags & GL_LOCAL_EXCL)) + error = gfs2_truncatei_resume(ip); + + return error; +} + +/** + * inode_go_unlock - operation done before an inode lock is unlocked by a + * process + * @gl: the glock + * @flags: + * + */ + +static void inode_go_unlock(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_inode *ip = get_gl2ip(gl); + + if (ip && test_bit(GLF_DIRTY, &gl->gl_flags)) + gfs2_inode_attr_in(ip); + + if (ip) + gfs2_meta_cache_flush(ip); +} + +/** + * inode_greedy - + * @gl: the glock + * + */ + +static void inode_greedy(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = get_gl2ip(gl); + unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum); + unsigned int max = gfs2_tune_get(sdp, gt_greedy_max); + unsigned int new_time; + + spin_lock(&ip->i_spin); + + if (time_after(ip->i_last_pfault + quantum, jiffies)) { + new_time = ip->i_greedy + quantum; + if (new_time > max) + new_time = max; + } else { + new_time = ip->i_greedy - quantum; + if (!new_time || new_time > max) + new_time = 1; + } + + ip->i_greedy = new_time; + + spin_unlock(&ip->i_spin); + + gfs2_inode_put(ip); +} + +/** + * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int rgrp_go_demote_ok(struct gfs2_glock *gl) +{ + return !gl->gl_aspace->i_mapping->nrpages; +} + +/** + * rgrp_go_lock - operation done after an rgrp lock is locked by + * a first holder on this node. + * @gl: the glock + * @flags: + * + * Returns: errno + */ + +static int rgrp_go_lock(struct gfs2_holder *gh) +{ + return gfs2_rgrp_bh_get(get_gl2rgd(gh->gh_gl)); +} + +/** + * rgrp_go_unlock - operation done before an rgrp lock is unlocked by + * a last holder on this node. + * @gl: the glock + * @flags: + * + */ + +static void rgrp_go_unlock(struct gfs2_holder *gh) +{ + gfs2_rgrp_bh_put(get_gl2rgd(gh->gh_gl)); +} + +/** + * trans_go_xmote_th - promote/demote the transaction glock + * @gl: the glock + * @state: the requested state + * @flags: + * + */ + +static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state, + int flags) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + } + + gfs2_glock_xmote_th(gl, state, flags); +} + +/** + * trans_go_xmote_bh - After promoting/demoting the transaction glock + * @gl: the glock + * + */ + +static void trans_go_xmote_bh(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl; + struct gfs2_log_header head; + int error; + + if (gl->gl_state != LM_ST_UNLOCKED && + test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs2_find_jhead(sdp->sd_jdesc, &head); + if (error) + gfs2_consist(sdp); + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) + gfs2_consist(sdp); + + /* Initialize some head of the log stuff */ + if (!test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) { + sdp->sd_log_sequence = head.lh_sequence + 1; + gfs2_log_pointers_init(sdp, head.lh_blkno); + } + } +} + +/** + * trans_go_drop_th - unlock the transaction glock + * @gl: the glock + * + * We want to sync the device even with localcaching. Remember + * that localcaching journal replay only marks buffers dirty. + */ + +static void trans_go_drop_th(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + + if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + } + + gfs2_glock_drop_th(gl); +} + +/** + * quota_go_demote_ok - Check to see if it's ok to unlock a quota glock + * @gl: the glock + * + * Returns: 1 if it's ok + */ + +static int quota_go_demote_ok(struct gfs2_glock *gl) +{ + return !atomic_read(&gl->gl_lvb_count); +} + +struct gfs2_glock_operations gfs2_meta_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = meta_go_demote_ok, + .go_type = LM_TYPE_META +}; + +struct gfs2_glock_operations gfs2_inode_glops = { + .go_xmote_th = inode_go_xmote_th, + .go_xmote_bh = inode_go_xmote_bh, + .go_drop_th = inode_go_drop_th, + .go_sync = inode_go_sync, + .go_inval = inode_go_inval, + .go_demote_ok = inode_go_demote_ok, + .go_lock = inode_go_lock, + .go_unlock = inode_go_unlock, + .go_greedy = inode_greedy, + .go_type = LM_TYPE_INODE +}; + +struct gfs2_glock_operations gfs2_rgrp_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_sync = meta_go_sync, + .go_inval = meta_go_inval, + .go_demote_ok = rgrp_go_demote_ok, + .go_lock = rgrp_go_lock, + .go_unlock = rgrp_go_unlock, + .go_type = LM_TYPE_RGRP +}; + +struct gfs2_glock_operations gfs2_trans_glops = { + .go_xmote_th = trans_go_xmote_th, + .go_xmote_bh = trans_go_xmote_bh, + .go_drop_th = trans_go_drop_th, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs2_glock_operations gfs2_iopen_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_callback = gfs2_iopen_go_callback, + .go_type = LM_TYPE_IOPEN +}; + +struct gfs2_glock_operations gfs2_flock_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_FLOCK +}; + +struct gfs2_glock_operations gfs2_nondisk_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_NONDISK +}; + +struct gfs2_glock_operations gfs2_quota_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_demote_ok = quota_go_demote_ok, + .go_type = LM_TYPE_QUOTA +}; + +struct gfs2_glock_operations gfs2_journal_glops = { + .go_xmote_th = gfs2_glock_xmote_th, + .go_drop_th = gfs2_glock_drop_th, + .go_type = LM_TYPE_JOURNAL +}; + --- a/fs/gfs2/glops.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/glops.h 2005-10-10 11:28:49.208800667 -0500 @@ -0,0 +1,23 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __GLOPS_DOT_H__ +#define __GLOPS_DOT_H__ + +extern struct gfs2_glock_operations gfs2_meta_glops; +extern struct gfs2_glock_operations gfs2_inode_glops; +extern struct gfs2_glock_operations gfs2_rgrp_glops; +extern struct gfs2_glock_operations gfs2_trans_glops; +extern struct gfs2_glock_operations gfs2_iopen_glops; +extern struct gfs2_glock_operations gfs2_flock_glops; +extern struct gfs2_glock_operations gfs2_nondisk_glops; +extern struct gfs2_glock_operations gfs2_quota_glops; +extern struct gfs2_glock_operations gfs2_journal_glops; + +#endif /* __GLOPS_DOT_H__ */ --- a/fs/gfs2/inode.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/inode.c 2005-10-10 11:28:49.210800356 -0500 @@ -0,0 +1,1805 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "bmap.h" +#include "dir.h" +#include "eattr.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "ops_address.h" +#include "ops_file.h" +#include "ops_inode.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS2 inode (with embedded disk inode data) + * @inode: The Linux VFS inode + * + */ + +static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode) +{ + inode->i_ino = ip->i_num.no_formal_ino; + + switch (ip->i_di.di_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor); + break; + default: + inode->i_rdev = 0; + break; + }; + + inode->i_mode = ip->i_di.di_mode; + inode->i_nlink = ip->i_di.di_nlink; + inode->i_uid = ip->i_di.di_uid; + inode->i_gid = ip->i_di.di_gid; + i_size_write(inode, ip->i_di.di_size); + inode->i_atime.tv_sec = ip->i_di.di_atime; + inode->i_mtime.tv_sec = ip->i_di.di_mtime; + inode->i_ctime.tv_sec = ip->i_di.di_ctime; + inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_ctime.tv_nsec = 0; + inode->i_blksize = PAGE_SIZE; + inode->i_blocks = ip->i_di.di_blocks << + (ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT); + + if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + + if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; +} + +/** + * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode + * @ip: The GFS2 inode (with embedded disk inode data) + * + */ + +void gfs2_inode_attr_in(struct gfs2_inode *ip) +{ + struct inode *inode; + + inode = gfs2_ip2v_lookup(ip); + if (inode) { + inode_attr_in(ip, inode); + iput(inode); + } +} + +/** + * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode + * @ip: The GFS2 inode + * + * Only copy out the attributes that we want the VFS layer + * to be able to modify. + */ + +void gfs2_inode_attr_out(struct gfs2_inode *ip) +{ + struct inode *inode = ip->i_vnode; + + gfs2_assert_withdraw(ip->i_sbd, + (ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT)); + ip->i_di.di_mode = inode->i_mode; + ip->i_di.di_uid = inode->i_uid; + ip->i_di.di_gid = inode->i_gid; + ip->i_di.di_atime = inode->i_atime.tv_sec; + ip->i_di.di_mtime = inode->i_mtime.tv_sec; + ip->i_di.di_ctime = inode->i_ctime.tv_sec; +} + +/** + * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode + * @ip: the struct gfs2_inode to get the struct inode for + * + * Returns: A VFS inode, or NULL if none + */ + +struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip) +{ + struct inode *inode = NULL; + + gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags)); + + spin_lock(&ip->i_spin); + if (ip->i_vnode) + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_spin); + + return inode; +} + +/** + * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode + * @ip: the struct gfs2_inode to get the struct inode for + * + * Returns: A VFS inode, or NULL if no mem + */ + +struct inode *gfs2_ip2v(struct gfs2_inode *ip) +{ + struct inode *inode, *tmp; + + inode = gfs2_ip2v_lookup(ip); + if (inode) + return inode; + + tmp = new_inode(ip->i_sbd->sd_vfs); + if (!tmp) + return NULL; + + inode_attr_in(ip, tmp); + + if (S_ISREG(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_file_iops; + tmp->i_fop = &gfs2_file_fops; + tmp->i_mapping->a_ops = &gfs2_file_aops; + } else if (S_ISDIR(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_dir_iops; + tmp->i_fop = &gfs2_dir_fops; + } else if (S_ISLNK(ip->i_di.di_mode)) { + tmp->i_op = &gfs2_symlink_iops; + } else { + tmp->i_op = &gfs2_dev_iops; + init_special_inode(tmp, tmp->i_mode, tmp->i_rdev); + } + + set_v2ip(tmp, NULL); + + for (;;) { + spin_lock(&ip->i_spin); + if (!ip->i_vnode) + break; + inode = igrab(ip->i_vnode); + spin_unlock(&ip->i_spin); + + if (inode) { + iput(tmp); + return inode; + } + yield(); + } + + inode = tmp; + + gfs2_inode_hold(ip); + ip->i_vnode = inode; + set_v2ip(inode, ip); + + spin_unlock(&ip->i_spin); + + insert_inode_hash(inode); + + return inode; +} + +static int iget_test(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_inum *inum = (struct gfs2_inum *)opaque; + + if (ip && ip->i_num.no_addr == inum->no_addr) + return 1; + + return 0; +} + +struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum) +{ + return ilookup5(sb, (unsigned long)inum->no_formal_ino, + iget_test, inum); +} + +void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type) +{ + spin_lock(&ip->i_spin); + if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) { + ip->i_di.di_nlink = 1; + ip->i_di.di_mode = DT2IF(type); + } + spin_unlock(&ip->i_spin); +} + +/** + * gfs2_inode_refresh - Refresh the incore copy of the dinode + * @ip: The GFS2 inode + * + * Returns: errno + */ + +int gfs2_inode_refresh(struct gfs2_inode *ip) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) { + brelse(dibh); + return -EIO; + } + + spin_lock(&ip->i_spin); + gfs2_dinode_in(&ip->i_di, dibh->b_data); + set_bit(GIF_MIN_INIT, &ip->i_flags); + spin_unlock(&ip->i_spin); + + brelse(dibh); + + if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino) + return -ESTALE; + + ip->i_vn = ip->i_gl->gl_vn; + + return 0; +} + +/** + * inode_create - create a struct gfs2_inode + * @i_gl: The glock covering the inode + * @inum: The inode number + * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode) + * @io_state: the state the iopen glock should be acquired in + * @ipp: pointer to put the returned inode in + * + * Returns: errno + */ + +static int inode_create(struct gfs2_glock *i_gl, struct gfs2_inum *inum, + struct gfs2_glock *io_gl, unsigned int io_state, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = i_gl->gl_sbd; + struct gfs2_inode *ip; + int error = 0; + + ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL); + if (!ip) + return -ENOMEM; + memset(ip, 0, sizeof(struct gfs2_inode)); + + ip->i_num = *inum; + + atomic_set(&ip->i_count, 1); + + ip->i_vn = i_gl->gl_vn - 1; + + ip->i_gl = i_gl; + ip->i_sbd = sdp; + + spin_lock_init(&ip->i_spin); + init_rwsem(&ip->i_rw_mutex); + + ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default); + + error = gfs2_glock_nq_init(io_gl, + io_state, GL_LOCAL_EXCL | GL_EXACT, + &ip->i_iopen_gh); + if (error) + goto fail; + ip->i_iopen_gh.gh_owner = NULL; + + spin_lock(&io_gl->gl_spin); + gfs2_glock_hold(i_gl); + set_gl2gl(io_gl, i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs2_glock_hold(i_gl); + set_gl2ip(i_gl, ip); + + atomic_inc(&sdp->sd_inode_count); + + *ipp = ip; + + return 0; + + fail: + gfs2_meta_cache_flush(ip); + kmem_cache_free(gfs2_inode_cachep, ip); + *ipp = NULL; + + return error; +} + +/** + * gfs2_inode_get - Create or get a reference on an inode + * @i_gl: The glock covering the inode + * @inum: The inode number + * @create: + * @ipp: pointer to put the returned inode in + * + * Returns: errno + */ + +int gfs2_inode_get(struct gfs2_glock *i_gl, struct gfs2_inum *inum, int create, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = i_gl->gl_sbd; + struct gfs2_glock *io_gl; + int error = 0; + + gfs2_glmutex_lock(i_gl); + + *ipp = get_gl2ip(i_gl); + if (*ipp) { + error = -ESTALE; + if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino) + goto out; + atomic_inc(&(*ipp)->i_count); + error = 0; + goto out; + } + + if (!create) + goto out; + + error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops, + CREATE, &io_gl); + if (!error) { + error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp); + gfs2_glock_put(io_gl); + } + + out: + gfs2_glmutex_unlock(i_gl); + + return error; +} + +void gfs2_inode_hold(struct gfs2_inode *ip) +{ + gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0); + atomic_inc(&ip->i_count); +} + +void gfs2_inode_put(struct gfs2_inode *ip) +{ + gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0); + atomic_dec(&ip->i_count); +} + +void gfs2_inode_destroy(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl; + struct gfs2_glock *i_gl = ip->i_gl; + + gfs2_assert_warn(sdp, !atomic_read(&ip->i_count)); + gfs2_assert(sdp, get_gl2gl(io_gl) == i_gl); + + spin_lock(&io_gl->gl_spin); + set_gl2gl(io_gl, NULL); + gfs2_glock_put(i_gl); + spin_unlock(&io_gl->gl_spin); + + gfs2_glock_dq_uninit(&ip->i_iopen_gh); + + gfs2_meta_cache_flush(ip); + kmem_cache_free(gfs2_inode_cachep, ip); + + set_gl2ip(i_gl, NULL); + gfs2_glock_put(i_gl); + + atomic_dec(&sdp->sd_inode_count); +} + +static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al; + struct gfs2_rgrpd *rgd; + int error; + + if (ip->i_di.di_blocks != 1) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + if (error) + goto out_qs; + + rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr); + if (!rgd) { + gfs2_consist_inode(ip); + error = -EIO; + goto out_rindex_relse; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, + &al->al_rgd_gh); + if (error) + goto out_rindex_relse; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED + + RES_STATFS + RES_QUOTA, 1); + if (error) + goto out_rg_gunlock; + + gfs2_trans_add_gl(ip->i_gl); + + gfs2_free_di(rgd, ip); + + error = gfs2_unlinked_ondisk_rm(sdp, ul); + + gfs2_trans_end(sdp); + clear_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + out_rg_gunlock: + gfs2_glock_dq_uninit(&al->al_rgd_gh); + + out_rindex_relse: + gfs2_glock_dq_uninit(&al->al_ri_gh); + + out_qs: + gfs2_quota_unhold(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +/** + * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode) + * @sdp: the filesystem + * @inum: the inode number to deallocate + * @io_gh: a holder for the iopen glock for this inode + * + * Returns: errno + */ + +static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul, + struct gfs2_holder *io_gh) +{ + struct gfs2_inode *ip; + struct gfs2_holder i_gh; + int error; + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops, + LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + /* We reacquire the iopen lock here to avoid a race with the NFS server + calling gfs2_read_inode() with the inode number of a inode we're in + the process of deallocating. And we can't keep our hold on the lock + from inode_dealloc_init() for deadlock reasons. */ + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh); + error = gfs2_glock_nq(io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + error = 1; + default: + goto out; + } + + gfs2_assert_warn(sdp, !get_gl2ip(i_gh.gh_gl)); + error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl, + LM_ST_EXCLUSIVE, &ip); + + gfs2_glock_dq(io_gh); + + if (error) + goto out; + + error = gfs2_inode_refresh(ip); + if (error) + goto out_iput; + + if (ip->i_di.di_nlink) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + error = -EIO; + goto out_iput; + } + + if (S_ISDIR(ip->i_di.di_mode) && + (ip->i_di.di_flags & GFS2_DIF_EXHASH)) { + error = gfs2_dir_exhash_dealloc(ip); + if (error) + goto out_iput; + } + + if (ip->i_di.di_eattr) { + error = gfs2_ea_dealloc(ip); + if (error) + goto out_iput; + } + + if (!gfs2_is_stuffed(ip)) { + error = gfs2_file_dealloc(ip); + if (error) + goto out_iput; + } + + error = dinode_dealloc(ip, ul); + if (error) + goto out_iput; + + out_iput: + gfs2_glmutex_lock(i_gh.gh_gl); + gfs2_inode_put(ip); + gfs2_inode_destroy(ip); + gfs2_glmutex_unlock(i_gh.gh_gl); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * try_inode_dealloc - Try to deallocate an inode and all its blocks + * @sdp: the filesystem + * + * Returns: 0 on success, -errno on error, 1 on busy (inode open) + */ + +static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_holder io_gh; + int error = 0; + + gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum); + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops, + LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh); + switch (error) { + case 0: + break; + case GLR_TRYFAILED: + return 1; + default: + return error; + } + + gfs2_glock_dq(&io_gh); + error = inode_dealloc(sdp, ul, &io_gh); + gfs2_holder_uninit(&io_gh); + + return error; +} + +static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_rgrpd *rgd; + struct gfs2_holder ri_gh, rgd_gh; + int error; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + return error; + + rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr); + if (!rgd) { + gfs2_consist(sdp); + error = -EIO; + goto out; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh); + if (error) + goto out; + + error = gfs2_trans_begin(sdp, + RES_RG_BIT + RES_UNLINKED + RES_STATFS, + 0); + if (error) + goto out_gunlock; + + gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr); + gfs2_unlinked_ondisk_rm(sdp, ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_uninit(&rgd_gh); + out: + gfs2_glock_dq_uninit(&ri_gh); + + return error; +} + +int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT) + return inode_dealloc_uninit(sdp, ul); + else + return try_inode_dealloc(sdp, ul); +} + +/** + * gfs2_change_nlink - Change nlink count on inode + * @ip: The GFS2 inode + * @diff: The change in the nlink count required + * + * Returns: errno + */ + +int gfs2_change_nlink(struct gfs2_inode *ip, int diff) +{ + struct buffer_head *dibh; + uint32_t nlink; + int error; + + nlink = ip->i_di.di_nlink + diff; + + /* If we are reducing the nlink count, but the new value ends up being + bigger than the old one, we must have underflowed. */ + if (diff < 0 && nlink > ip->i_di.di_nlink) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + ip->i_di.di_nlink = nlink; + ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * gfs2_lookupi - Look up a filename in a directory and return its inode + * @d_gh: An initialized holder for the directory glock + * @name: The name of the inode to look for + * @is_root: If 1, ignore the caller's permissions + * @i_gh: An uninitialized holder for the new inode glock + * + * There will always be a vnode (Linux VFS inode) for the d_gh inode unless + * @is_root is true. + * + * Returns: errno + */ + +int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root, + struct gfs2_inode **ipp) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder d_gh; + struct gfs2_inum inum; + unsigned int type; + struct gfs2_glock *gl; + int error; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return -ENAMETOOLONG; + + if (gfs2_filecmp(name, ".", 1) || + (gfs2_filecmp(name, "..", 2) && dip == sdp->sd_root_dir)) { + gfs2_inode_hold(dip); + *ipp = dip; + return 0; + } + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + return error; + + if (!is_root) { + error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL); + if (error) + goto out; + } + + error = gfs2_dir_search(dip, name, &inum, &type); + if (error) + goto out; + + error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops, + CREATE, &gl); + if (error) + goto out; + + error = gfs2_inode_get(gl, &inum, CREATE, ipp); + if (!error) + gfs2_inode_min_init(*ipp, type); + + gfs2_glock_put(gl); + + out: + gfs2_glock_dq_uninit(&d_gh); + + return error; +} + +static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino) +{ + struct gfs2_inode *ip = sdp->sd_ir_inode; + struct buffer_head *bh; + struct gfs2_inum_range ir; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + down(&sdp->sd_inum_mutex); + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) { + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + return error; + } + + gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + if (ir.ir_length) { + *formal_ino = ir.ir_start++; + ir.ir_length--; + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_inum_range_out(&ir, + bh->b_data + sizeof(struct gfs2_dinode)); + brelse(bh); + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + return 0; + } + + brelse(bh); + + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + + return 1; +} + +static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino) +{ + struct gfs2_inode *ip = sdp->sd_ir_inode; + struct gfs2_inode *m_ip = sdp->sd_inum_inode; + struct gfs2_holder gh; + struct buffer_head *bh; + struct gfs2_inum_range ir; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (error) + return error; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); + if (error) + goto out; + down(&sdp->sd_inum_mutex); + + error = gfs2_meta_inode_buffer(ip, &bh); + if (error) + goto out_end_trans; + + gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + if (!ir.ir_length) { + struct buffer_head *m_bh; + uint64_t x, y; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out_brelse; + + x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)); + x = y = le64_to_cpu(x); + ir.ir_start = x; + ir.ir_length = GFS2_INUM_QUANTUM; + x += GFS2_INUM_QUANTUM; + if (x < y) + gfs2_consist_inode(m_ip); + x = cpu_to_le64(x); + gfs2_trans_add_bh(m_ip->i_gl, m_bh); + *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x; + + brelse(m_bh); + } + + *formal_ino = ir.ir_start++; + ir.ir_length--; + + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode)); + + out_brelse: + brelse(bh); + + out_end_trans: + up(&sdp->sd_inum_mutex); + gfs2_trans_end(sdp); + + out: + gfs2_glock_dq_uninit(&gh); + + return error; +} + +static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum) +{ + int error; + + error = pick_formal_ino_1(sdp, inum); + if (error <= 0) + return error; + + error = pick_formal_ino_2(sdp, inum); + + return error; +} + +/** + * create_ok - OK to create a new on-disk inode here? + * @dip: Directory in which dinode is to be created + * @name: Name of new dinode + * @mode: + * + * Returns: errno + */ + +static int create_ok(struct gfs2_inode *dip, struct qstr *name, + unsigned int mode) +{ + int error; + + error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL); + if (error) + return error; + + /* Don't create entries in an unlinked directory */ + if (!dip->i_di.di_nlink) + return -EPERM; + + error = gfs2_dir_search(dip, name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + return -EEXIST; + default: + return error; + } + + if (dip->i_di.di_entries == (uint32_t)-1) + return -EFBIG; + if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1) + return -EMLINK; + + return 0; +} + +static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode, + unsigned int *uid, unsigned int *gid) +{ + if (dip->i_sbd->sd_args.ar_suiddir && + (dip->i_di.di_mode & S_ISUID) && + dip->i_di.di_uid) { + if (S_ISDIR(*mode)) + *mode |= S_ISUID; + else if (dip->i_di.di_uid != current->fsuid) + *mode &= ~07111; + *uid = dip->i_di.di_uid; + } else + *uid = current->fsuid; + + if (dip->i_di.di_mode & S_ISGID) { + if (S_ISDIR(*mode)) + *mode |= S_ISGID; + *gid = dip->i_di.di_gid; + } else + *gid = current->fsgid; +} + +static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + int error; + + gfs2_alloc_get(dip); + + dip->i_alloc->al_requested = RES_DINODE; + error = gfs2_inplace_reserve(dip); + if (error) + goto out; + + error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED + + RES_STATFS, 0); + if (error) + goto out_ipreserv; + + ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip); + + ul->ul_ut.ut_flags = GFS2_UTF_UNINIT; + error = gfs2_unlinked_ondisk_add(sdp, ul); + + gfs2_trans_end(sdp); + + out_ipreserv: + gfs2_inplace_release(dip); + + out: + gfs2_alloc_put(dip); + + return error; +} + +/** + * init_dinode - Fill in a new dinode structure + * @dip: the directory this inode is being created in + * @gl: The glock covering the new inode + * @inum: the inode number + * @mode: the file permissions + * @uid: + * @gid: + * + */ + +static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + struct gfs2_inum *inum, unsigned int mode, + unsigned int uid, unsigned int gid) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_dinode di; + struct buffer_head *dibh; + + dibh = gfs2_meta_new(gl, inum->no_addr); + gfs2_trans_add_bh(gl, dibh); + gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + memset(&di, 0, sizeof(struct gfs2_dinode)); + gfs2_meta_header_in(&di.di_header, dibh->b_data); + di.di_num = *inum; + di.di_mode = mode; + di.di_uid = uid; + di.di_gid = gid; + di.di_blocks = 1; + di.di_atime = di.di_mtime = di.di_ctime = get_seconds(); + di.di_goal_meta = di.di_goal_data = inum->no_addr; + + if (S_ISREG(mode)) { + if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) || + gfs2_tune_get(sdp, gt_new_files_jdata)) + di.di_flags |= GFS2_DIF_JDATA; + if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) || + gfs2_tune_get(sdp, gt_new_files_directio)) + di.di_flags |= GFS2_DIF_DIRECTIO; + } else if (S_ISDIR(mode)) { + di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO); + di.di_flags |= (dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA); + } + + gfs2_dinode_out(&di, dibh->b_data); + brelse(dibh); +} + +static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, + unsigned int mode, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + unsigned int uid, gid; + int error; + + munge_mode_uid_gid(dip, &mode, &uid, &gid); + + gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, uid, gid); + if (error) + goto out; + + error = gfs2_quota_check(dip, uid, gid); + if (error) + goto out_quota; + + error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED + + RES_QUOTA, 0); + if (error) + goto out_quota; + + ul->ul_ut.ut_flags = 0; + error = gfs2_unlinked_ondisk_munge(sdp, ul); + + init_dinode(dip, gl, &ul->ul_ut.ut_inum, + mode, uid, gid); + + gfs2_quota_change(dip, +1, uid, gid); + + gfs2_trans_end(sdp); + + out_quota: + gfs2_quota_unlock(dip); + + out: + gfs2_alloc_put(dip); + + return error; +} + +static int link_dinode(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_alloc *al; + int alloc_required; + struct buffer_head *dibh; + int error; + + al = gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_diradd_alloc_required(dip, name, &alloc_required); + if (alloc_required) { + error = gfs2_quota_check(dip, dip->i_di.di_uid, + dip->i_di.di_gid); + if (error) + goto fail_quota_locks; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto fail_quota_locks; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 2 * RES_DINODE + RES_UNLINKED + + RES_STATFS + RES_QUOTA, 0); + if (error) + goto fail_ipreserv; + } else { + error = gfs2_trans_begin(sdp, + RES_LEAF + + 2 * RES_DINODE + + RES_UNLINKED, 0); + if (error) + goto fail_quota_locks; + } + + error = gfs2_dir_add(dip, name, &ip->i_num, IF2DT(ip->i_di.di_mode)); + if (error) + goto fail_end_trans; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + ip->i_di.di_nlink = 1; + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + error = gfs2_unlinked_ondisk_rm(sdp, ul); + if (error) + goto fail_end_trans; + + return 0; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipreserv: + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + + fail_quota_locks: + gfs2_quota_unlock(dip); + + fail: + gfs2_alloc_put(dip); + + return error; +} + +/** + * gfs2_createi - Create a new inode + * @ghs: An array of two holders + * @name: The name of the new file + * @mode: the permissions on the new inode + * + * @ghs[0] is an initialized holder for the directory + * @ghs[1] is the holder for the inode lock + * + * If the return value is 0, the glocks on both the directory and the new + * file are held. A transaction has been started and an inplace reservation + * is held, as well. + * + * Returns: errno + */ + +int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode) +{ + struct gfs2_inode *dip = get_gl2ip(ghs->gh_gl); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_unlinked *ul; + struct gfs2_inode *ip; + int error; + + if (!name->len || name->len > GFS2_FNAMESIZE) + return -ENAMETOOLONG; + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq(ghs); + if (error) + goto fail; + + error = create_ok(dip, name, mode); + if (error) + goto fail_gunlock; + + error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino); + if (error) + goto fail_gunlock; + + error = alloc_dinode(dip, ul); + if (error) + goto fail_gunlock; + + if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) { + gfs2_glock_dq(ghs); + + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, + &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, + ghs + 1); + if (error) { + gfs2_unlinked_put(sdp, ul); + return error; + } + + gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs); + error = gfs2_glock_nq(ghs); + if (error) { + gfs2_glock_dq_uninit(ghs + 1); + gfs2_unlinked_put(sdp, ul); + return error; + } + + error = create_ok(dip, name, mode); + if (error) + goto fail_gunlock2; + } else { + error = gfs2_glock_nq_num(sdp, + ul->ul_ut.ut_inum.no_addr, + &gfs2_inode_glops, + LM_ST_EXCLUSIVE, GL_SKIP, + ghs + 1); + if (error) + goto fail_gunlock; + } + + error = make_dinode(dip, ghs[1].gh_gl, mode, ul); + if (error) + goto fail_gunlock2; + + error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip); + if (error) + goto fail_gunlock2; + + error = gfs2_inode_refresh(ip); + if (error) + goto fail_iput; + + error = gfs2_acl_create(dip, ip); + if (error) + goto fail_iput; + + error = link_dinode(dip, name, ip, ul); + if (error) + goto fail_iput; + + gfs2_unlinked_put(sdp, ul); + + return 0; + + fail_iput: + gfs2_inode_put(ip); + + fail_gunlock2: + gfs2_glock_dq_uninit(ghs + 1); + + fail_gunlock: + gfs2_glock_dq(ghs); + + fail: + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_unlinki - Unlink a file + * @dip: The inode of the directory + * @name: The name of the file to be unlinked + * @ip: The inode of the file to be removed + * + * Assumes Glocks on both dip and ip are held. + * + * Returns: errno + */ + +int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + int error; + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(ip, -1); + if (error) + return error; + + /* If this inode is being unlinked from the directory structure, + we need to mark that in the log so that it isn't lost during + a crash. */ + + if (!ip->i_di.di_nlink) { + ul->ul_ut.ut_inum = ip->i_num; + error = gfs2_unlinked_ondisk_add(sdp, ul); + if (!error) + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + } + + return error; +} + +/** + * gfs2_rmdiri - Remove a directory + * @dip: The parent directory of the directory to be removed + * @name: The name of the directory to be removed + * @ip: The GFS2 inode of the directory to be removed + * + * Assumes Glocks on dip and ip are held + * + * Returns: errno + */ + +int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct qstr dotname; + int error; + + if (ip->i_di.di_entries != 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + return -EIO; + } + + error = gfs2_dir_del(dip, name); + if (error) + return error; + + error = gfs2_change_nlink(dip, -1); + if (error) + return error; + + dotname.len = 1; + dotname.name = "."; + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + dotname.len = 2; + dotname.name = ".."; + error = gfs2_dir_del(ip, &dotname); + if (error) + return error; + + error = gfs2_change_nlink(ip, -2); + if (error) + return error; + + /* This inode is being unlinked from the directory structure and + we need to mark that in the log so that it isn't lost during + a crash. */ + + ul->ul_ut.ut_inum = ip->i_num; + error = gfs2_unlinked_ondisk_add(sdp, ul); + if (!error) + set_bit(GLF_STICKY, &ip->i_gl->gl_flags); + + return error; +} + +/* + * gfs2_unlink_ok - check to see that a inode is still in a directory + * @dip: the directory + * @name: the name of the file + * @ip: the inode + * + * Assumes that the lock on (at least) @dip is held. + * + * Returns: 0 if the parent/child relationship is correct, errno if it isn't + */ + +int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip) +{ + struct gfs2_inum inum; + unsigned int type; + int error; + + if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode)) + return -EPERM; + + if ((dip->i_di.di_mode & S_ISVTX) && + dip->i_di.di_uid != current->fsuid && + ip->i_di.di_uid != current->fsuid && + !capable(CAP_FOWNER)) + return -EPERM; + + if (IS_APPEND(dip->i_vnode)) + return -EPERM; + + error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL); + if (error) + return error; + + error = gfs2_dir_search(dip, name, &inum, &type); + if (error) + return error; + + if (!gfs2_inum_equal(&inum, &ip->i_num)) + return -ENOENT; + + if (IF2DT(ip->i_di.di_mode) != type) { + gfs2_consist_inode(dip); + return -EIO; + } + + return 0; +} + +/* + * gfs2_ok_to_move - check if it's ok to move a directory to another directory + * @this: move this + * @to: to here + * + * Follow @to back to the root and make sure we don't encounter @this + * Assumes we already hold the rename lock. + * + * Returns: errno + */ + +int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) +{ + struct gfs2_sbd *sdp = this->i_sbd; + struct gfs2_inode *tmp; + struct qstr dotdot; + int error = 0; + + memset(&dotdot, 0, sizeof(struct qstr)); + dotdot.name = ".."; + dotdot.len = 2; + + gfs2_inode_hold(to); + + for (;;) { + if (to == this) { + error = -EINVAL; + break; + } + if (to == sdp->sd_root_dir) { + error = 0; + break; + } + + error = gfs2_lookupi(to, &dotdot, 1, &tmp); + if (error) + break; + + gfs2_inode_put(to); + to = tmp; + } + + gfs2_inode_put(to); + + return error; +} + +/** + * gfs2_readlinki - return the contents of a symlink + * @ip: the symlink's inode + * @buf: a pointer to the buffer to be filled + * @len: a pointer to the length of @buf + * + * If @buf is too small, a piece of memory is kmalloc()ed and needs + * to be freed by the caller. + * + * Returns: errno + */ + +int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) +{ + struct gfs2_holder i_gh; + struct buffer_head *dibh; + unsigned int x; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs2_glock_nq_atime(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (!ip->i_di.di_size) { + gfs2_consist_inode(ip); + error = -EIO; + goto out; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + x = ip->i_di.di_size + 1; + if (x > *len) { + *buf = kmalloc(x, GFP_KERNEL); + if (!*buf) { + error = -ENOMEM; + goto out_brelse; + } + } + + memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x); + *len = x; + + out_brelse: + brelse(dibh); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and + * conditionally update the inode's atime + * @gh: the holder to acquire + * + * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap + * Update if the difference between the current time and the inode's current + * atime is greater than an interval specified at mount. + * + * Returns: errno + */ + +int gfs2_glock_nq_atime(struct gfs2_holder *gh) +{ + struct gfs2_glock *gl = gh->gh_gl; + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_inode *ip = get_gl2ip(gl); + int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum); + unsigned int state; + int flags; + int error; + + if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || + gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || + gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) + return -EINVAL; + + state = gh->gh_state; + flags = gh->gh_flags; + + error = gfs2_glock_nq(gh); + if (error) + return error; + + if (test_bit(SDF_NOATIME, &sdp->sd_flags) || + (sdp->sd_vfs->s_flags & MS_RDONLY)) + return 0; + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(LM_ST_EXCLUSIVE, + gh->gh_flags & ~LM_FLAG_ANY, + gh); + error = gfs2_glock_nq(gh); + if (error) + return error; + + /* Verify that atime hasn't been updated while we were + trying to get exclusive lock. */ + + curtime = get_seconds(); + if (curtime - ip->i_di.di_atime >= quantum) { + struct buffer_head *dibh; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error == -EROFS) + return 0; + if (error) + goto fail; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_atime = curtime; + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs2_trans_end(sdp); + } + + /* If someone else has asked for the glock, + unlock and let them have it. Then reacquire + in the original state. */ + if (gfs2_glock_is_blocking(gl)) { + gfs2_glock_dq(gh); + gfs2_holder_reinit(state, flags, gh); + return gfs2_glock_nq(gh); + } + } + + return 0; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail: + gfs2_glock_dq(gh); + + return error; +} + +/** + * glock_compare_atime - Compare two struct gfs2_glock structures for sort + * @arg_a: the first structure + * @arg_b: the second structure + * + * Returns: 1 if A > B + * -1 if A < B + * 0 if A = B + */ + +static int glock_compare_atime(const void *arg_a, const void *arg_b) +{ + struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a; + struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b; + struct lm_lockname *a = &gh_a->gh_gl->gl_name; + struct lm_lockname *b = &gh_b->gh_gl->gl_name; + int ret = 0; + + if (a->ln_number > b->ln_number) + ret = 1; + else if (a->ln_number < b->ln_number) + ret = -1; + else { + if (gh_a->gh_state == LM_ST_SHARED && + gh_b->gh_state == LM_ST_EXCLUSIVE) + ret = 1; + else if (gh_a->gh_state == LM_ST_SHARED && + (gh_b->gh_flags & GL_ATIME)) + ret = 1; + } + + return ret; +} + +/** + * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an + * atime update + * @num_gh: the number of structures + * @ghs: an array of struct gfs2_holder structures + * + * Returns: 0 on success (all glocks acquired), + * errno on failure (no glocks acquired) + */ + +int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs) +{ + struct gfs2_holder **p; + unsigned int x; + int error = 0; + + if (!num_gh) + return 0; + + if (num_gh == 1) { + ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + if (ghs->gh_flags & GL_ATIME) + error = gfs2_glock_nq_atime(ghs); + else + error = gfs2_glock_nq(ghs); + return error; + } + + p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL); + if (!p) + return -ENOMEM; + + for (x = 0; x < num_gh; x++) + p[x] = &ghs[x]; + + sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL); + + for (x = 0; x < num_gh; x++) { + p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC); + + if (p[x]->gh_flags & GL_ATIME) + error = gfs2_glock_nq_atime(p[x]); + else + error = gfs2_glock_nq(p[x]); + + if (error) { + while (x--) + gfs2_glock_dq(p[x]); + break; + } + } + + kfree(p); + + return error; +} + +/** + * gfs2_try_toss_vnode - See if we can toss a vnode from memory + * @ip: the inode + * + * Returns: 1 if the vnode was tossed + */ + +void gfs2_try_toss_vnode(struct gfs2_inode *ip) +{ + struct inode *inode; + + inode = gfs2_ip2v_lookup(ip); + if (!inode) + return; + + d_prune_aliases(inode); + + if (S_ISDIR(ip->i_di.di_mode)) { + struct list_head *head = &inode->i_dentry; + struct dentry *d = NULL; + + spin_lock(&dcache_lock); + if (list_empty(head)) + spin_unlock(&dcache_lock); + else { + d = list_entry(head->next, struct dentry, d_alias); + dget_locked(d); + spin_unlock(&dcache_lock); + + if (have_submounts(d)) + dput(d); + else { + shrink_dcache_parent(d); + dput(d); + d_prune_aliases(inode); + } + } + } + + inode->i_nlink = 0; + iput(inode); +} + + +static int +__gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + error = inode_setattr(ip->i_vnode, attr); + gfs2_assert_warn(ip->i_sbd, !error); + gfs2_inode_attr_out(ip); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + return error; +} + +/** + * gfs2_setattr_simple - + * @ip: + * @attr: + * + * Called with a reference on the vnode. + * + * Returns: errno + */ + +int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) +{ + int error; + + if (get_transaction) + return __gfs2_setattr_simple(ip, attr); + + error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0); + if (error) + return error; + + error = __gfs2_setattr_simple(ip, attr); + + gfs2_trans_end(ip->i_sbd); + + return error; +} + +int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd) +{ + return permission(inode, mask, nd); +} + --- a/fs/gfs2/inode.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/inode.h 2005-10-10 11:28:49.210800356 -0500 @@ -0,0 +1,74 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __INODE_DOT_H__ +#define __INODE_DOT_H__ + +static inline int gfs2_is_stuffed(struct gfs2_inode *ip) +{ + return !ip->i_di.di_height; +} + +static inline int gfs2_is_jdata(struct gfs2_inode *ip) +{ + return ip->i_di.di_flags & GFS2_DIF_JDATA; +} + +void gfs2_inode_attr_in(struct gfs2_inode *ip); +void gfs2_inode_attr_out(struct gfs2_inode *ip); +struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip); +struct inode *gfs2_ip2v(struct gfs2_inode *ip); +struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum); + +void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type); +int gfs2_inode_refresh(struct gfs2_inode *ip); + +int gfs2_inode_get(struct gfs2_glock *i_gl, + struct gfs2_inum *inum, int create, + struct gfs2_inode **ipp); +void gfs2_inode_hold(struct gfs2_inode *ip); +void gfs2_inode_put(struct gfs2_inode *ip); +void gfs2_inode_destroy(struct gfs2_inode *ip); + +int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_change_nlink(struct gfs2_inode *ip, int diff); +int gfs2_lookupi(struct gfs2_inode *dip, struct qstr *name, int is_root, + struct gfs2_inode **ipp); +int gfs2_createi(struct gfs2_holder *ghs, struct qstr *name, unsigned int mode); +int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul); +int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip, struct gfs2_unlinked *ul); +int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name, + struct gfs2_inode *ip); +int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); +int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); + +int gfs2_glock_nq_atime(struct gfs2_holder *gh); +int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs); + +void gfs2_try_toss_vnode(struct gfs2_inode *ip); + +int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); + +int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd); + +static inline int gfs2_lookup_simple(struct gfs2_inode *dip, char *name, + struct gfs2_inode **ipp) +{ + struct qstr qstr; + memset(&qstr, 0, sizeof(struct qstr)); + qstr.name = name; + qstr.len = strlen(name); + return gfs2_lookupi(dip, &qstr, 1, ipp); +} + +#endif /* __INODE_DOT_H__ */ + [PATCH 03/16] GFS: core fs Core file system functions. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/jdata.c | 382 +++++++++++++++++++++ fs/gfs2/jdata.h | 52 ++ fs/gfs2/lops.c | 509 ++++++++++++++++++++++++++++ fs/gfs2/lops.h | 95 +++++ fs/gfs2/main.c | 101 +++++ fs/gfs2/meta_io.c | 883 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/meta_io.h | 88 ++++ fs/gfs2/ondisk.c | 23 + fs/gfs2/ops_address.c | 515 +++++++++++++++++++++++++++++ fs/gfs2/ops_address.h | 15 10 files changed, 2663 insertions(+) --- a/fs/gfs2/jdata.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/jdata.c 2005-10-10 11:28:49.220798796 -0500 @@ -0,0 +1,382 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "trans.h" + +int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp) +{ + struct buffer_head *bh; + int error = 0; + + if (new) { + bh = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + } else { + error = gfs2_meta_read(ip->i_gl, block, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) { + brelse(bh); + return -EIO; + } + } + + *bhp = bh; + + return 0; +} + +/** + * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read() + * @bh: The buffer to copy from, or NULL meaning zero the buffer + * @buf: The buffer to copy/zero + * @offset: The offset in the buffer to copy from + * @size: The amount of data to copy/zero + * + * Returns: errno + */ + +int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset, + unsigned int size) +{ + if (bh) + memcpy(*buf, bh->b_data + offset, size); + else + memset(*buf, 0, size); + *buf += size; + return 0; +} + +/** + * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read() + * @bh: The buffer + * @buf: The destination of the data + * @offset: The offset into the buffer + * @size: The amount of data to copy + * + * Returns: errno + */ + +int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset, + unsigned int size) +{ + int error; + + if (bh) + error = copy_to_user(*buf, bh->b_data + offset, size); + else + error = clear_user(*buf, size); + + if (error) + error = -EFAULT; + else + *buf += size; + + return error; +} + +static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf, + unsigned int offset, unsigned int size, + read_copy_fn_t copy_fn) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + error = copy_fn(dibh, &buf, + offset + sizeof(struct gfs2_dinode), size); + brelse(dibh); + } + + return (error) ? error : size; +} + +/** + * gfs2_jdata_read - Read a jdata file + * @ip: The GFS2 Inode + * @buf: The buffer to place result into + * @offset: File offset to begin jdata_readng from + * @size: Amount of data to transfer + * @copy_fn: Function to actually perform the copy + * + * The @copy_fn only copies a maximum of a single block at once so + * we are safe calling it with int arguments. It is done so that + * we don't needlessly put 64bit arguments on the stack and it + * also makes the code in the @copy_fn nicer too. + * + * Returns: The amount of data actually copied or the error + */ + +int gfs2_jdata_read(struct gfs2_inode *ip, char *buf, uint64_t offset, + unsigned int size, read_copy_fn_t copy_fn) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + uint64_t lblock, dblock; + uint32_t extlen = 0; + unsigned int o; + int copied = 0; + int error = 0; + + if (offset >= ip->i_di.di_size) + return 0; + + if ((offset + size) > ip->i_di.di_size) + size = ip->i_di.di_size - offset; + + if (!size) + return 0; + + if (gfs2_is_stuffed(ip)) + return jdata_read_stuffed(ip, buf, (unsigned int)offset, size, + copy_fn); + + if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) + return -EINVAL; + + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize) + + sizeof(struct gfs2_meta_header); + + while (copied < size) { + unsigned int amount; + struct buffer_head *bh; + int new; + + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + new = 0; + error = gfs2_block_map(ip, lblock, &new, + &dblock, &extlen); + if (error) + goto fail; + } + + if (extlen > 1) + gfs2_meta_ra(ip->i_gl, dblock, extlen); + + if (dblock) { + error = gfs2_jdata_get_buffer(ip, dblock, new, &bh); + if (error) + goto fail; + dblock++; + extlen--; + } else + bh = NULL; + + error = copy_fn(bh, &buf, o, amount); + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + + o = sizeof(struct gfs2_meta_header); + } + + return copied; + + fail: + return (copied) ? copied : error; +} + +/** + * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write() + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * + * Returns: errno + */ + +int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh, + char **buf, unsigned int offset, unsigned int size) +{ + gfs2_trans_add_bh(ip->i_gl, bh); + memcpy(bh->b_data + offset, *buf, size); + + *buf += size; + + return 0; +} + +/** + * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write() + * @bh: The buffer to copy to or clear + * @buf: The buffer to copy from + * @offset: The offset in the buffer to write to + * @size: The amount of data to write + * + * Returns: errno + */ + +int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh, + char **buf, unsigned int offset, unsigned int size) +{ + int error = 0; + + gfs2_trans_add_bh(ip->i_gl, bh); + if (copy_from_user(bh->b_data + offset, *buf, size)) + error = -EFAULT; + else + *buf += size; + + return error; +} + +static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf, + unsigned int offset, unsigned int size, + write_copy_fn_t copy_fn) +{ + struct buffer_head *dibh; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + error = copy_fn(ip, + dibh, &buf, + offset + sizeof(struct gfs2_dinode), size); + if (!error) { + if (ip->i_di.di_size < offset + size) + ip->i_di.di_size = offset + size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + } + + brelse(dibh); + + return (error) ? error : size; +} + +/** + * gfs2_jdata_write - Write bytes to a file + * @ip: The GFS2 inode + * @buf: The buffer containing information to be written + * @offset: The file offset to start writing at + * @size: The amount of data to write + * @copy_fn: Function to do the actual copying + * + * Returns: The number of bytes correctly written or error code + */ + +int gfs2_jdata_write(struct gfs2_inode *ip, char *buf, uint64_t offset, + unsigned int size, write_copy_fn_t copy_fn) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + uint64_t lblock, dblock; + uint32_t extlen = 0; + unsigned int o; + int copied = 0; + int error = 0; + + if (!size) + return 0; + + if (gfs2_is_stuffed(ip) && + offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) + return jdata_write_stuffed(ip, buf, (unsigned int)offset, size, + copy_fn); + + if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip))) + return -EINVAL; + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, NULL, NULL); + if (error) + return error; + } + + lblock = offset; + o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header); + + while (copied < size) { + unsigned int amount; + struct buffer_head *bh; + int new; + + amount = size - copied; + if (amount > sdp->sd_sb.sb_bsize - o) + amount = sdp->sd_sb.sb_bsize - o; + + if (!extlen) { + new = 1; + error = gfs2_block_map(ip, lblock, &new, + &dblock, &extlen); + if (error) + goto fail; + error = -EIO; + if (gfs2_assert_withdraw(sdp, dblock)) + goto fail; + } + + error = gfs2_jdata_get_buffer(ip, dblock, + (amount == sdp->sd_jbsize) ? 1 : new, + &bh); + if (error) + goto fail; + + error = copy_fn(ip, bh, &buf, o, amount); + brelse(bh); + if (error) + goto fail; + + copied += amount; + lblock++; + dblock++; + extlen--; + + o = sizeof(struct gfs2_meta_header); + } + + out: + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + if (ip->i_di.di_size < offset + copied) + ip->i_di.di_size = offset + copied; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + return copied; + + fail: + if (copied) + goto out; + return error; +} + --- a/fs/gfs2/jdata.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/jdata.h 2005-10-10 11:28:49.220798796 -0500 @@ -0,0 +1,52 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __FILE_DOT_H__ +#define __FILE_DOT_H__ + +int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new, + struct buffer_head **bhp); + +typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +typedef int (*write_copy_fn_t) (struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); + +int gfs2_copy2mem(struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_copy2user(struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_jdata_read(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size, + read_copy_fn_t copy_fn); + +int gfs2_copy_from_mem(struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_copy_from_user(struct gfs2_inode *ip, + struct buffer_head *bh, char **buf, + unsigned int offset, unsigned int size); +int gfs2_jdata_write(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size, + write_copy_fn_t copy_fn); + +static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size) +{ + return gfs2_jdata_read(ip, buf, offset, size, gfs2_copy2mem); +} + +static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, char *buf, + uint64_t offset, unsigned int size) +{ + return gfs2_jdata_write(ip, buf, offset, size, gfs2_copy_from_mem); +} + +#endif /* __FILE_DOT_H__ */ --- a/fs/gfs2/lops.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/lops.c 2005-10-10 11:28:49.242795366 -0500 @@ -0,0 +1,509 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "recovery.h" +#include "rgrp.h" +#include "trans.h" + +static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_glock *gl; + + get_transaction->tr_touched = 1; + + if (!list_empty(&le->le_list)) + return; + + gl = container_of(le, struct gfs2_glock, gl_le); + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) + return; + gfs2_glock_hold(gl); + set_bit(GLF_DIRTY, &gl->gl_flags); + + gfs2_log_lock(sdp); + sdp->sd_log_num_gl++; + list_add(&le->le_list, &sdp->sd_log_le_gl); + gfs2_log_unlock(sdp); +} + +static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_gl; + struct gfs2_glock *gl; + + while (!list_empty(head)) { + gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); + list_del_init(&gl->gl_le.le_list); + sdp->sd_log_num_gl--; + + gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); + gfs2_glock_put(gl); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); +} + +static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); + struct gfs2_trans *tr; + + if (!list_empty(&bd->bd_list_tr)) + return; + + tr = get_transaction; + tr->tr_touched = 1; + tr->tr_num_buf++; + list_add(&bd->bd_list_tr, &tr->tr_list_buf); + + if (!list_empty(&le->le_list)) + return; + + gfs2_trans_add_gl(bd->bd_gl); + + gfs2_meta_check(sdp, bd->bd_bh); + gfs2_meta_pin(sdp, bd->bd_bh); + + gfs2_log_lock(sdp); + sdp->sd_log_num_buf++; + list_add(&le->le_list, &sdp->sd_log_le_buf); + gfs2_log_unlock(sdp); + + tr->tr_num_buf_new++; +} + +static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) +{ + struct list_head *head = &tr->tr_list_buf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); + list_del_init(&bd->bd_list_tr); + tr->tr_num_buf--; + } + gfs2_assert_warn(sdp, !tr->tr_num_buf); +} + +static void buf_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct buffer_head *bh; + struct gfs2_log_descriptor ld; + struct gfs2_bufdata *bd; + + if (!sdp->sd_log_num_buf) + return; + + bh = gfs2_log_get_buf(sdp); + memset(&ld, 0, sizeof(struct gfs2_log_descriptor)); + ld.ld_header.mh_magic = GFS2_MAGIC; + ld.ld_header.mh_type = GFS2_METATYPE_LD; + ld.ld_header.mh_format = GFS2_FORMAT_LD; + ld.ld_header.mh_blkno = bh->b_blocknr; + ld.ld_type = GFS2_LOG_DESC_METADATA; + ld.ld_length = sdp->sd_log_num_buf + 1; + ld.ld_data1 = sdp->sd_log_num_buf; + gfs2_log_descriptor_out(&ld, bh->b_data); + + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + + list_for_each_entry(bd, &sdp->sd_log_le_buf, bd_le.le_list) { + bh = gfs2_log_fake_buf(sdp, bd->bd_bh); + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + } +} + +static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_buf; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); + sdp->sd_log_num_buf--; + + gfs2_meta_unpin(sdp, bd->bd_bh, ai); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); +} + +static void buf_lo_before_scan(struct gfs2_jdesc *jd, + struct gfs2_log_header *head, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (pass != 0) + return; + + sdp->sd_found_blocks = 0; + sdp->sd_replayed_blocks = 0; +} + +static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + struct gfs2_glock *gl = jd->jd_inode->i_gl; + unsigned int blks = ld->ld_data1; + struct buffer_head *bh_log, *bh_ip; + uint64_t blkno; + int error = 0; + + if (pass != 1 || ld->ld_type != GFS2_LOG_DESC_METADATA) + return 0; + + gfs2_replay_incr_blk(sdp, &start); + + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + error = gfs2_replay_read_block(jd, start, &bh_log); + if (error) + return error; + + blkno = ((struct gfs2_meta_header *)bh_log->b_data)->mh_blkno; + blkno = le64_to_cpu(blkno); + + sdp->sd_found_blocks++; + + if (gfs2_revoke_check(sdp, blkno, start)) { + brelse(bh_log); + continue; + } + + bh_ip = gfs2_meta_new(gl, blkno); + memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); + + if (gfs2_meta_check(sdp, bh_ip)) + error = -EIO; + else + mark_buffer_dirty(bh_ip); + + brelse(bh_log); + brelse(bh_ip); + + if (error) + break; + + sdp->sd_replayed_blocks++; + } + + return error; +} + +static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + return; + } + if (pass != 1) + return; + + gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT); + + fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", + jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); +} + +static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_trans *tr; + + tr = get_transaction; + tr->tr_touched = 1; + tr->tr_num_revoke++; + + gfs2_log_lock(sdp); + sdp->sd_log_num_revoke++; + list_add(&le->le_list, &sdp->sd_log_le_revoke); + gfs2_log_unlock(sdp); +} + +static void revoke_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct gfs2_log_descriptor ld; + struct gfs2_meta_header *mh = &ld.ld_header; + struct buffer_head *bh; + unsigned int offset; + struct list_head *head = &sdp->sd_log_le_revoke; + struct gfs2_revoke *rv; + + if (!sdp->sd_log_num_revoke) + return; + + bh = gfs2_log_get_buf(sdp); + memset(&ld, 0, sizeof(struct gfs2_log_descriptor)); + ld.ld_header.mh_magic = GFS2_MAGIC; + ld.ld_header.mh_type = GFS2_METATYPE_LD; + ld.ld_header.mh_format = GFS2_FORMAT_LD; + ld.ld_header.mh_blkno = bh->b_blocknr; + ld.ld_type = GFS2_LOG_DESC_REVOKE; + ld.ld_length = gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(uint64_t)); + ld.ld_data1 = sdp->sd_log_num_revoke; + gfs2_log_descriptor_out(&ld, bh->b_data); + offset = sizeof(struct gfs2_log_descriptor); + + while (!list_empty(head)) { + rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); + list_del(&rv->rv_le.le_list); + sdp->sd_log_num_revoke--; + + if (offset + sizeof(uint64_t) > sdp->sd_sb.sb_bsize) { + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); + + bh = gfs2_log_get_buf(sdp); + mh->mh_type = GFS2_METATYPE_LB; + mh->mh_format = GFS2_FORMAT_LB; + mh->mh_blkno = bh->b_blocknr; + gfs2_meta_header_out(mh, bh->b_data); + offset = sizeof(struct gfs2_meta_header); + } + + *(uint64_t *)(bh->b_data + offset) = cpu_to_le64(rv->rv_blkno); + kfree(rv); + + offset += sizeof(uint64_t); + } + gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); + + set_buffer_dirty(bh); + ll_rw_block(WRITE, 1, &bh); +} + +static void revoke_lo_before_scan(struct gfs2_jdesc *jd, + struct gfs2_log_header *head, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (pass != 0) + return; + + sdp->sd_found_revokes = 0; + sdp->sd_replay_tail = head->lh_tail; +} + +static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + unsigned int blks = ld->ld_length; + unsigned int revokes = ld->ld_data1; + struct buffer_head *bh; + unsigned int offset; + uint64_t blkno; + int first = 1; + int error; + + if (pass != 0 || ld->ld_type != GFS2_LOG_DESC_REVOKE) + return 0; + + offset = sizeof(struct gfs2_log_descriptor); + + for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { + error = gfs2_replay_read_block(jd, start, &bh); + if (error) + return error; + + if (!first) + gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); + + while (offset + sizeof(uint64_t) <= sdp->sd_sb.sb_bsize) { + blkno = *(uint64_t *)(bh->b_data + offset); + blkno = le64_to_cpu(blkno); + + error = gfs2_revoke_add(sdp, blkno, start); + if (error < 0) + return error; + else if (error) + sdp->sd_found_revokes++; + + if (!--revokes) + break; + offset += sizeof(uint64_t); + } + + brelse(bh); + offset = sizeof(struct gfs2_meta_header); + first = 0; + } + + return 0; +} + +static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) +{ + struct gfs2_sbd *sdp = jd->jd_inode->i_sbd; + + if (error) { + gfs2_revoke_clean(sdp); + return; + } + if (pass != 1) + return; + + fs_info(sdp, "jid=%u: Found %u revoke tags\n", + jd->jd_jid, sdp->sd_found_revokes); + + gfs2_revoke_clean(sdp); +} + +static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + struct gfs2_rgrpd *rgd; + + get_transaction->tr_touched = 1; + + if (!list_empty(&le->le_list)) + return; + + rgd = container_of(le, struct gfs2_rgrpd, rd_le); + gfs2_rgrp_bh_hold(rgd); + + gfs2_log_lock(sdp); + sdp->sd_log_num_rg++; + list_add(&le->le_list, &sdp->sd_log_le_rg); + gfs2_log_unlock(sdp); +} + +static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &sdp->sd_log_le_rg; + struct gfs2_rgrpd *rgd; + + while (!list_empty(head)) { + rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list); + list_del_init(&rgd->rd_le.le_list); + sdp->sd_log_num_rg--; + + gfs2_rgrp_repolish_clones(rgd); + gfs2_rgrp_bh_put(rgd); + } + gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); +} + +static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + get_transaction->tr_touched = 1; + + gfs2_log_lock(sdp); + sdp->sd_log_num_databuf++; + list_add(&le->le_list, &sdp->sd_log_le_databuf); + gfs2_log_unlock(sdp); +} + +static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct list_head *head = &sdp->sd_log_le_databuf; + LIST_HEAD(started); + struct gfs2_databuf *db; + struct buffer_head *bh; + + while (!list_empty(head)) { + db = list_entry(head->prev, struct gfs2_databuf, db_le.le_list); + list_move(&db->db_le.le_list, &started); + + gfs2_log_lock(sdp); + bh = db->db_bh; + if (bh) { + get_bh(bh); + gfs2_log_unlock(sdp); + if (buffer_dirty(bh)) { + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + } + brelse(bh); + } else + gfs2_log_unlock(sdp); + } + + while (!list_empty(&started)) { + db = list_entry(started.next, struct gfs2_databuf, + db_le.le_list); + list_del(&db->db_le.le_list); + sdp->sd_log_num_databuf--; + + gfs2_log_lock(sdp); + bh = db->db_bh; + if (bh) { + set_v2db(bh, NULL); + gfs2_log_unlock(sdp); + wait_on_buffer(bh); + brelse(bh); + } else + gfs2_log_unlock(sdp); + + kfree(db); + } + + gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); +} + +struct gfs2_log_operations gfs2_glock_lops = { + .lo_add = glock_lo_add, + .lo_after_commit = glock_lo_after_commit, + .lo_name = "glock" +}; + +struct gfs2_log_operations gfs2_buf_lops = { + .lo_add = buf_lo_add, + .lo_incore_commit = buf_lo_incore_commit, + .lo_before_commit = buf_lo_before_commit, + .lo_after_commit = buf_lo_after_commit, + .lo_before_scan = buf_lo_before_scan, + .lo_scan_elements = buf_lo_scan_elements, + .lo_after_scan = buf_lo_after_scan, + .lo_name = "buf" +}; + +struct gfs2_log_operations gfs2_revoke_lops = { + .lo_add = revoke_lo_add, + .lo_before_commit = revoke_lo_before_commit, + .lo_before_scan = revoke_lo_before_scan, + .lo_scan_elements = revoke_lo_scan_elements, + .lo_after_scan = revoke_lo_after_scan, + .lo_name = "revoke" +}; + +struct gfs2_log_operations gfs2_rg_lops = { + .lo_add = rg_lo_add, + .lo_after_commit = rg_lo_after_commit, + .lo_name = "rg" +}; + +struct gfs2_log_operations gfs2_databuf_lops = { + .lo_add = databuf_lo_add, + .lo_before_commit = databuf_lo_before_commit, + .lo_name = "databuf" +}; + +struct gfs2_log_operations *gfs2_log_ops[] = { + &gfs2_glock_lops, + &gfs2_buf_lops, + &gfs2_revoke_lops, + &gfs2_rg_lops, + &gfs2_databuf_lops, + NULL +}; + --- a/fs/gfs2/lops.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/lops.h 2005-10-10 11:28:49.249794275 -0500 @@ -0,0 +1,95 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __LOPS_DOT_H__ +#define __LOPS_DOT_H__ + +extern struct gfs2_log_operations gfs2_glock_lops; +extern struct gfs2_log_operations gfs2_buf_lops; +extern struct gfs2_log_operations gfs2_revoke_lops; +extern struct gfs2_log_operations gfs2_rg_lops; +extern struct gfs2_log_operations gfs2_databuf_lops; + +extern struct gfs2_log_operations *gfs2_log_ops[]; + +static inline void lops_init_le(struct gfs2_log_element *le, + struct gfs2_log_operations *lops) +{ + INIT_LIST_HEAD(&le->le_list); + le->le_ops = lops; +} + +static inline void lops_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + if (le->le_ops->lo_add) + le->le_ops->lo_add(sdp, le); +} + +static inline void lops_incore_commit(struct gfs2_sbd *sdp, + struct gfs2_trans *tr) +{ + int x; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_incore_commit) + gfs2_log_ops[x]->lo_incore_commit(sdp, tr); +} + +static inline void lops_before_commit(struct gfs2_sbd *sdp) +{ + int x; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_before_commit) + gfs2_log_ops[x]->lo_before_commit(sdp); +} + +static inline void lops_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + int x; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_after_commit) + gfs2_log_ops[x]->lo_after_commit(sdp, ai); +} + +static inline void lops_before_scan(struct gfs2_jdesc *jd, + struct gfs2_log_header *head, + unsigned int pass) +{ + int x; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_before_scan) + gfs2_log_ops[x]->lo_before_scan(jd, head, pass); +} + +static inline int lops_scan_elements(struct gfs2_jdesc *jd, unsigned int start, + struct gfs2_log_descriptor *ld, + unsigned int pass) +{ + int x, error; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_scan_elements) { + error = gfs2_log_ops[x]->lo_scan_elements(jd, start, + ld, pass); + if (error) + return error; + } + + return 0; +} + +static inline void lops_after_scan(struct gfs2_jdesc *jd, int error, + unsigned int pass) +{ + int x; + for (x = 0; gfs2_log_ops[x]; x++) + if (gfs2_log_ops[x]->lo_before_scan) + gfs2_log_ops[x]->lo_after_scan(jd, error, pass); +} + +#endif /* __LOPS_DOT_H__ */ + --- a/fs/gfs2/main.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/main.c 2005-10-10 11:28:49.257793028 -0500 @@ -0,0 +1,101 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "ops_fstype.h" +#include "sys.h" + +/** + * init_gfs2_fs - Register GFS2 as a filesystem + * + * Returns: 0 on success, error code on failure + */ + +static int __init init_gfs2_fs(void) +{ + int error; + + error = gfs2_sys_init(); + if (error) + return error; + + error = -ENOMEM; + + gfs2_glock_cachep = kmem_cache_create("gfs2_glock", + sizeof(struct gfs2_glock), + 0, 0, NULL, NULL); + if (!gfs2_glock_cachep) + goto fail; + + gfs2_inode_cachep = kmem_cache_create("gfs2_inode", + sizeof(struct gfs2_inode), + 0, 0, NULL, NULL); + if (!gfs2_inode_cachep) + goto fail; + + gfs2_bufdata_cachep = kmem_cache_create("gfs2_bufdata", + sizeof(struct gfs2_bufdata), + 0, 0, NULL, NULL); + if (!gfs2_bufdata_cachep) + goto fail; + + error = register_filesystem(&gfs2_fs_type); + if (error) + goto fail; + + printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); + + return 0; + + fail: + if (gfs2_bufdata_cachep) + kmem_cache_destroy(gfs2_bufdata_cachep); + + if (gfs2_inode_cachep) + kmem_cache_destroy(gfs2_inode_cachep); + + if (gfs2_glock_cachep) + kmem_cache_destroy(gfs2_glock_cachep); + + gfs2_sys_uninit(); + return error; +} + +/** + * exit_gfs2_fs - Unregister the file system + * + */ + +static void __exit exit_gfs2_fs(void) +{ + unregister_filesystem(&gfs2_fs_type); + + kmem_cache_destroy(gfs2_bufdata_cachep); + kmem_cache_destroy(gfs2_inode_cachep); + kmem_cache_destroy(gfs2_glock_cachep); + + gfs2_sys_uninit(); +} + +MODULE_DESCRIPTION("Global File System"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +module_init(init_gfs2_fs); +module_exit(exit_gfs2_fs); + --- a/fs/gfs2/meta_io.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/meta_io.c 2005-10-10 11:28:49.258792872 -0500 @@ -0,0 +1,883 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "rgrp.h" +#include "trans.h" + +#define buffer_busy(bh) \ +((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock) | (1ul << BH_Pinned))) +#define buffer_in_io(bh) \ +((bh)->b_state & ((1ul << BH_Dirty) | (1ul << BH_Lock))) + +static int aspace_get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + gfs2_assert_warn(get_v2sdp(inode->i_sb), 0); + return -EOPNOTSUPP; +} + +static int gfs2_aspace_writepage(struct page *page, + struct writeback_control *wbc) +{ + return block_write_full_page(page, aspace_get_block, wbc); +} + +/** + * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out. + * @bh: the buffer we're stuck on + * + */ + +static void stuck_releasepage(struct buffer_head *bh) +{ + struct gfs2_sbd *sdp = get_v2sdp(bh->b_page->mapping->host->i_sb); + struct gfs2_bufdata *bd = get_v2bd(bh); + struct gfs2_glock *gl; + + fs_warn(sdp, "stuck in gfs2_releasepage()\n"); + fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n", + (uint64_t)bh->b_blocknr, atomic_read(&bh->b_count)); + fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh)); + fs_warn(sdp, "get_v2bd(bh) = %s\n", (bd) ? "!NULL" : "NULL"); + + if (!bd) + return; + + gl = bd->bd_gl; + + fs_warn(sdp, "gl = (%u, %llu)\n", + gl->gl_name.ln_type, gl->gl_name.ln_number); + + fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n", + (list_empty(&bd->bd_list_tr)) ? "no" : "yes", + (list_empty(&bd->bd_le.le_list)) ? "no" : "yes"); + + if (gl->gl_ops == &gfs2_inode_glops) { + struct gfs2_inode *ip = get_gl2ip(gl); + unsigned int x; + + if (!ip) + return; + + fs_warn(sdp, "ip = %llu %llu\n", + ip->i_num.no_formal_ino, ip->i_num.no_addr); + fs_warn(sdp, "ip->i_count = %d, ip->i_vnode = %s\n", + atomic_read(&ip->i_count), + (ip->i_vnode) ? "!NULL" : "NULL"); + + for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) + fs_warn(sdp, "ip->i_cache[%u] = %s\n", + x, (ip->i_cache[x]) ? "!NULL" : "NULL"); + } +} + +/** + * gfs2_aspace_releasepage - free the metadata associated with a page + * @page: the page that's being released + * @gfp_mask: passed from Linux VFS, ignored by us + * + * Call try_to_free_buffers() if the buffers in this page can be + * released. + * + * Returns: 0 + */ + +static int gfs2_aspace_releasepage(struct page *page, int gfp_mask) +{ + struct inode *aspace = page->mapping->host; + struct gfs2_sbd *sdp = get_v2sdp(aspace->i_sb); + struct buffer_head *bh, *head; + struct gfs2_bufdata *bd; + unsigned long t; + + if (!page_has_buffers(page)) + goto out; + + head = bh = page_buffers(page); + do { + t = jiffies; + + while (atomic_read(&bh->b_count)) { + if (atomic_read(&aspace->i_writecount)) { + if (time_after_eq(jiffies, t + + gfs2_tune_get(sdp, gt_stall_secs) * HZ)) { + stuck_releasepage(bh); + t = jiffies; + } + + yield(); + continue; + } + + return 0; + } + + gfs2_assert_warn(sdp, !buffer_pinned(bh)); + + bd = get_v2bd(bh); + if (bd) { + gfs2_assert_warn(sdp, bd->bd_bh == bh); + gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); + gfs2_assert_warn(sdp, list_empty(&bd->bd_le.le_list)); + gfs2_assert_warn(sdp, !bd->bd_ail); + kmem_cache_free(gfs2_bufdata_cachep, bd); + atomic_dec(&sdp->sd_bufdata_count); + set_v2bd(bh, NULL); + } + + bh = bh->b_this_page; + } + while (bh != head); + + out: + return try_to_free_buffers(page); +} + +static struct address_space_operations aspace_aops = { + .writepage = gfs2_aspace_writepage, + .releasepage = gfs2_aspace_releasepage, +}; + +/** + * gfs2_aspace_get - Create and initialize a struct inode structure + * @sdp: the filesystem the aspace is in + * + * Right now a struct inode is just a struct inode. Maybe Linux + * will supply a more lightweight address space construct (that works) + * in the future. + * + * Make sure pages/buffers in this aspace aren't in high memory. + * + * Returns: the aspace + */ + +struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) +{ + struct inode *aspace; + + aspace = new_inode(sdp->sd_vfs); + if (aspace) { + mapping_set_gfp_mask(aspace->i_mapping, GFP_KERNEL); + aspace->i_mapping->a_ops = &aspace_aops; + aspace->i_size = ~0ULL; + set_v2ip(aspace, NULL); + insert_inode_hash(aspace); + } + + return aspace; +} + +void gfs2_aspace_put(struct inode *aspace) +{ + remove_inode_hash(aspace); + iput(aspace); +} + +/** + * gfs2_ail1_start_one - Start I/O on a part of the AIL + * @sdp: the filesystem + * @tr: the part of the AIL + * + */ + +void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head, *tmp, *prev; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + int retry; + + do { + retry = 0; + + for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list); + bh = bd->bd_bh; + + gfs2_assert(sdp, bd->bd_ail == ai); + + if (!buffer_busy(bh)) { + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + list_move(&bd->bd_ail_st_list, + &ai->ai_ail2_list); + continue; + } + + if (!buffer_dirty(bh)) + continue; + + list_move(&bd->bd_ail_st_list, head); + + gfs2_log_unlock(sdp); + wait_on_buffer(bh); + ll_rw_block(WRITE, 1, &bh); + gfs2_log_lock(sdp); + + retry = 1; + break; + } + } while (retry); +} + +/** + * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced + * @sdp: the filesystem + * @ai: the AIL entry + * + */ + +int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) +{ + struct list_head *head, *tmp, *prev; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + for (head = &ai->ai_ail1_list, tmp = head->prev, prev = tmp->prev; + tmp != head; + tmp = prev, prev = tmp->prev) { + bd = list_entry(tmp, struct gfs2_bufdata, bd_ail_st_list); + bh = bd->bd_bh; + + gfs2_assert(sdp, bd->bd_ail == ai); + + if (buffer_busy(bh)) { + if (flags & DIO_ALL) + continue; + else + break; + } + + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + + list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); + } + + return list_empty(head); +} + +/** + * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced + * @sdp: the filesystem + * @ai: the AIL entry + * + */ + +void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +{ + struct list_head *head = &ai->ai_ail2_list; + struct gfs2_bufdata *bd; + + while (!list_empty(head)) { + bd = list_entry(head->prev, struct gfs2_bufdata, + bd_ail_st_list); + gfs2_assert(sdp, bd->bd_ail == ai); + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&bd->bd_gl->gl_ail_count); + brelse(bd->bd_bh); + } +} + +/** + * ail_empty_gl - remove all buffers for a given lock from the AIL + * @gl: the glock + * + * None of the buffers should be dirty, locked, or pinned. + */ + +void gfs2_ail_empty_gl(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + unsigned int blocks; + struct list_head *head = &gl->gl_ail_list; + struct gfs2_bufdata *bd; + struct buffer_head *bh; + uint64_t blkno; + int error; + + blocks = atomic_read(&gl->gl_ail_count); + if (!blocks) + return; + + error = gfs2_trans_begin(sdp, 0, blocks); + if (gfs2_assert_withdraw(sdp, !error)) + return; + + gfs2_log_lock(sdp); + while (!list_empty(head)) { + bd = list_entry(head->next, struct gfs2_bufdata, + bd_ail_gl_list); + bh = bd->bd_bh; + blkno = bh->b_blocknr; + gfs2_assert_withdraw(sdp, !buffer_busy(bh)); + + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&gl->gl_ail_count); + brelse(bh); + gfs2_log_unlock(sdp); + + gfs2_trans_add_revoke(sdp, blkno); + + gfs2_log_lock(sdp); + } + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + gfs2_log_unlock(sdp); + + gfs2_trans_end(sdp); + gfs2_log_flush(sdp); +} + +/** + * gfs2_meta_inval - Invalidate all buffers associated with a glock + * @gl: the glock + * + */ + +void gfs2_meta_inval(struct gfs2_glock *gl) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + struct address_space *mapping = gl->gl_aspace->i_mapping; + + gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); + + atomic_inc(&aspace->i_writecount); + truncate_inode_pages(mapping, 0); + atomic_dec(&aspace->i_writecount); + + gfs2_assert_withdraw(sdp, !mapping->nrpages); +} + +/** + * gfs2_meta_sync - Sync all buffers associated with a glock + * @gl: The glock + * @flags: DIO_START | DIO_WAIT + * + */ + +void gfs2_meta_sync(struct gfs2_glock *gl, int flags) +{ + struct address_space *mapping = gl->gl_aspace->i_mapping; + int error = 0; + + if (flags & DIO_START) + filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + error = filemap_fdatawait(mapping); + + if (error) + gfs2_io_error(gl->gl_sbd); +} + +/** + * getbuf - Get a buffer with a given address space + * @sdp: the filesystem + * @aspace: the address space + * @blkno: the block number (filesystem scope) + * @create: 1 if the buffer should be created + * + * Returns: the buffer + */ + +static struct buffer_head *getbuf(struct gfs2_sbd *sdp, struct inode *aspace, + uint64_t blkno, int create) +{ + struct page *page; + struct buffer_head *bh; + unsigned int shift; + unsigned long index; + unsigned int bufnum; + + shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; + index = blkno >> shift; /* convert block to page */ + bufnum = blkno - (index << shift); /* block buf index within page */ + + if (create) { + for (;;) { + page = grab_cache_page(aspace->i_mapping, index); + if (page) + break; + yield(); + } + } else { + page = find_lock_page(aspace->i_mapping, index); + if (!page) + return NULL; + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); + + /* Locate header for our buffer within our page */ + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + get_bh(bh); + + if (!buffer_mapped(bh)) + map_bh(bh, sdp->sd_vfs, blkno); + + unlock_page(page); + mark_page_accessed(page); + page_cache_release(page); + + return bh; +} + +static void meta_prep_new(struct buffer_head *bh) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + + lock_buffer(bh); + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + unlock_buffer(bh); + + mh->mh_magic = cpu_to_le32(GFS2_MAGIC); + mh->mh_blkno = cpu_to_le64(bh->b_blocknr); +} + +/** + * gfs2_meta_new - Get a block + * @gl: The glock associated with this block + * @blkno: The block number + * + * Returns: The buffer + */ + +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno) +{ + struct buffer_head *bh; + bh = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + meta_prep_new(bh); + return bh; +} + +/** + * gfs2_meta_read - Read a block from disk + * @gl: The glock covering the block + * @blkno: The block number + * @flags: flags to gfs2_dreread() + * @bhp: the place where the buffer is returned (NULL on failure) + * + * Returns: errno + */ + +int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, int flags, + struct buffer_head **bhp) +{ + int error; + + *bhp = getbuf(gl->gl_sbd, gl->gl_aspace, blkno, CREATE); + error = gfs2_meta_reread(gl->gl_sbd, *bhp, flags); + if (error) + brelse(*bhp); + + return error; +} + +/** + * gfs2_meta_reread - Reread a block from disk + * @sdp: the filesystem + * @bh: The block to read + * @flags: Flags that control the read + * + * Returns: errno + */ + +int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags) +{ + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + if (flags & DIO_FORCE) + clear_buffer_uptodate(bh); + + if ((flags & DIO_START) && !buffer_uptodate(bh)) + ll_rw_block(READ, 1, &bh); + + if (flags & DIO_WAIT) { + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + struct gfs2_trans *tr = get_transaction; + if (tr && tr->tr_touched) + gfs2_io_error_bh(sdp, bh); + return -EIO; + } + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + } + + return 0; +} + +/** + * gfs2_meta_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer + * @gl: the glock the buffer belongs to + * @bh: The buffer to be attached to + * + */ + +void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd; + + lock_page(bh->b_page); + + if (get_v2bd(bh)) { + unlock_page(bh->b_page); + return; + } + + bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_KERNEL | __GFP_NOFAIL), + atomic_inc(&gl->gl_sbd->sd_bufdata_count); + + memset(bd, 0, sizeof(struct gfs2_bufdata)); + + bd->bd_bh = bh; + bd->bd_gl = gl; + + INIT_LIST_HEAD(&bd->bd_list_tr); + lops_init_le(&bd->bd_le, &gfs2_buf_lops); + + set_v2bd(bh, bd); + + unlock_page(bh->b_page); +} + +/** + * gfs2_meta_pin - Pin a metadata buffer in memory + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to be pinned + * + */ + +void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd = get_v2bd(bh); + + gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); + + if (test_set_buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, 0); + + wait_on_buffer(bh); + + /* If this buffer is in the AIL and it has already been written + to in-place disk block, remove it from the AIL. */ + + gfs2_log_lock(sdp); + if (bd->bd_ail && !buffer_in_io(bh)) + list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); + gfs2_log_unlock(sdp); + + clear_buffer_dirty(bh); + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + + get_bh(bh); +} + +/** + * gfs2_meta_unpin - Unpin a buffer + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to unpin + * @ai: + * + */ + +void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct gfs2_ail *ai) +{ + struct gfs2_bufdata *bd = get_v2bd(bh); + + gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); + + if (!buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, 0); + + mark_buffer_dirty(bh); + clear_buffer_pinned(bh); + + gfs2_log_lock(sdp); + if (bd->bd_ail) { + list_del(&bd->bd_ail_st_list); + brelse(bh); + } else { + struct gfs2_glock *gl = bd->bd_gl; + list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); + atomic_inc(&gl->gl_ail_count); + } + bd->bd_ail = ai; + list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + gfs2_log_unlock(sdp); +} + +/** + * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore + * @ip: the inode who owns the buffers + * @bstart: the first buffer in the run + * @blen: the number of buffers in the run + * + */ + +void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *aspace = ip->i_gl->gl_aspace; + struct buffer_head *bh; + + while (blen) { + bh = getbuf(sdp, aspace, bstart, NO_CREATE); + if (bh) { + struct gfs2_bufdata *bd = get_v2bd(bh); + + if (test_clear_buffer_pinned(bh)) { + gfs2_log_lock(sdp); + list_del_init(&bd->bd_le.le_list); + gfs2_assert_warn(sdp, sdp->sd_log_num_buf); + sdp->sd_log_num_buf--; + gfs2_log_unlock(sdp); + get_transaction->tr_num_buf_rm++; + brelse(bh); + } + if (bd) { + gfs2_log_lock(sdp); + if (bd->bd_ail) { + uint64_t blkno = bh->b_blocknr; + bd->bd_ail = NULL; + list_del(&bd->bd_ail_st_list); + list_del(&bd->bd_ail_gl_list); + atomic_dec(&bd->bd_gl->gl_ail_count); + brelse(bh); + gfs2_log_unlock(sdp); + gfs2_trans_add_revoke(sdp, blkno); + } else + gfs2_log_unlock(sdp); + } + + lock_buffer(bh); + clear_buffer_dirty(bh); + clear_buffer_uptodate(bh); + unlock_buffer(bh); + + brelse(bh); + } + + bstart++; + blen--; + } +} + +/** + * gfs2_meta_cache_flush - get rid of any references on buffers for this inode + * @ip: The GFS2 inode + * + * This releases buffers that are in the most-recently-used array of + * blocks used for indirect block addressing for this inode. + */ + +void gfs2_meta_cache_flush(struct gfs2_inode *ip) +{ + struct buffer_head **bh_slot; + unsigned int x; + + spin_lock(&ip->i_spin); + + for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { + bh_slot = &ip->i_cache[x]; + if (!*bh_slot) + break; + brelse(*bh_slot); + *bh_slot = NULL; + } + + spin_unlock(&ip->i_spin); +} + +/** + * gfs2_meta_indirect_buffer - Get a metadata buffer + * @ip: The GFS2 inode + * @height: The level of this buf in the metadata (indir addr) tree (if any) + * @num: The block number (device relative) of the buffer + * @new: Non-zero if we may create a new buffer + * @bhp: the buffer is returned here + * + * Try to use the gfs2_inode's MRU metadata tree cache. + * + * Returns: errno + */ + +int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num, + int new, struct buffer_head **bhp) +{ + struct buffer_head *bh, **bh_slot = ip->i_cache + height; + int error; + + spin_lock(&ip->i_spin); + bh = *bh_slot; + if (bh) { + if (bh->b_blocknr == num) + get_bh(bh); + else + bh = NULL; + } + spin_unlock(&ip->i_spin); + + if (bh) { + if (new) + meta_prep_new(bh); + else { + error = gfs2_meta_reread(ip->i_sbd, bh, + DIO_START | DIO_WAIT); + if (error) { + brelse(bh); + return error; + } + } + } else { + if (new) + bh = gfs2_meta_new(ip->i_gl, num); + else { + error = gfs2_meta_read(ip->i_gl, num, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + } + + spin_lock(&ip->i_spin); + if (*bh_slot != bh) { + brelse(*bh_slot); + *bh_slot = bh; + get_bh(bh); + } + spin_unlock(&ip->i_spin); + } + + if (new) { + if (gfs2_assert_warn(ip->i_sbd, height)) { + brelse(bh); + return -EIO; + } + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + } else if (gfs2_metatype_check(ip->i_sbd, bh, + (height) ? GFS2_METATYPE_IN : GFS2_METATYPE_DI)) { + brelse(bh); + return -EIO; + } + + *bhp = bh; + + return 0; +} + +/** + * gfs2_meta_ra - start readahead on an extent of a file + * @gl: the glock the blocks belong to + * @dblock: the starting disk block + * @extlen: the number of blocks in the extent + * + */ + +void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct inode *aspace = gl->gl_aspace; + struct buffer_head *first_bh, *bh; + uint32_t max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> sdp->sd_sb.sb_bsize_shift; + int error; + + if (!extlen || !max_ra) + return; + if (extlen > max_ra) + extlen = max_ra; + + first_bh = getbuf(sdp, aspace, dblock, CREATE); + + if (buffer_uptodate(first_bh)) + goto out; + if (!buffer_locked(first_bh)) { + error = gfs2_meta_reread(sdp, first_bh, DIO_START); + if (error) + goto out; + } + + dblock++; + extlen--; + + while (extlen) { + bh = getbuf(sdp, aspace, dblock, CREATE); + + if (!buffer_uptodate(bh) && !buffer_locked(bh)) { + error = gfs2_meta_reread(sdp, bh, DIO_START); + brelse(bh); + if (error) + goto out; + } else + brelse(bh); + + dblock++; + extlen--; + + if (buffer_uptodate(first_bh)) + break; + } + + out: + brelse(first_bh); +} + +/** + * gfs2_meta_syncfs - sync all the buffers in a filesystem + * @sdp: the filesystem + * + */ + +void gfs2_meta_syncfs(struct gfs2_sbd *sdp) +{ + gfs2_log_flush(sdp); + for (;;) { + gfs2_ail1_start(sdp, DIO_ALL); + if (gfs2_ail1_empty(sdp, DIO_ALL)) + break; + msleep(100); + } +} + --- a/fs/gfs2/meta_io.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/meta_io.h 2005-10-10 11:28:49.259792716 -0500 @@ -0,0 +1,88 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __DIO_DOT_H__ +#define __DIO_DOT_H__ + +static inline void gfs2_buffer_clear(struct buffer_head *bh) +{ + memset(bh->b_data, 0, bh->b_size); +} + +static inline void gfs2_buffer_clear_tail(struct buffer_head *bh, int head) +{ + memset(bh->b_data + head, 0, bh->b_size - head); +} + +static inline void gfs2_buffer_clear_ends(struct buffer_head *bh, int offset, + int amount, int journaled) +{ + int z_off1 = (journaled) ? sizeof(struct gfs2_meta_header) : 0; + int z_len1 = offset - z_off1; + int z_off2 = offset + amount; + int z_len2 = (bh)->b_size - z_off2; + + if (z_len1) + memset(bh->b_data + z_off1, 0, z_len1); + + if (z_len2) + memset(bh->b_data + z_off2, 0, z_len2); +} + +static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, + int to_head, + struct buffer_head *from_bh, + int from_head) +{ + memcpy(to_bh->b_data + to_head, + from_bh->b_data + from_head, + from_bh->b_size - from_head); + memset(to_bh->b_data + to_bh->b_size + to_head - from_head, + 0, + from_head - to_head); +} + +struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp); +void gfs2_aspace_put(struct inode *aspace); + +void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai); +int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags); +void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai); +void gfs2_ail_empty_gl(struct gfs2_glock *gl); + +void gfs2_meta_inval(struct gfs2_glock *gl); +void gfs2_meta_sync(struct gfs2_glock *gl, int flags); + +struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, uint64_t blkno); +int gfs2_meta_read(struct gfs2_glock *gl, uint64_t blkno, + int flags, struct buffer_head **bhp); +int gfs2_meta_reread(struct gfs2_sbd *sdp, struct buffer_head *bh, int flags); + +void gfs2_meta_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_meta_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); +void gfs2_meta_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct gfs2_ail *ai); + +void gfs2_meta_wipe(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen); + +void gfs2_meta_cache_flush(struct gfs2_inode *ip); +int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, uint64_t num, + int new, struct buffer_head **bhp); + +static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, + struct buffer_head **bhp) +{ + return gfs2_meta_indirect_buffer(ip, 0, ip->i_num.no_addr, 0, bhp); +} + +void gfs2_meta_ra(struct gfs2_glock *gl, uint64_t dblock, uint32_t extlen); +void gfs2_meta_syncfs(struct gfs2_sbd *sdp); + +#endif /* __DIO_DOT_H__ */ + --- a/fs/gfs2/ondisk.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ondisk.c 2005-10-10 11:28:49.262792248 -0500 @@ -0,0 +1,23 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" + +#define pv(struct, member, fmt) printk(" "#member" = "fmt"\n", struct->member); + +#define WANT_GFS2_CONVERSION_FUNCTIONS +#include + --- a/fs/gfs2/ops_address.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_address.c 2005-10-10 11:28:49.263792092 -0500 @@ -0,0 +1,515 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "log.h" +#include "meta_io.h" +#include "ops_address.h" +#include "page.h" +#include "quota.h" +#include "trans.h" + +/** + * get_block - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int get_block(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = create; + uint64_t dblock; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, NULL); + if (error) + return error; + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + return 0; +} + +/** + * get_block_noalloc - Fills in a buffer head with details about a block + * @inode: The inode + * @lblock: The block number to look up + * @bh_result: The buffer head to return the result in + * @create: Non-zero if we may add block to the file + * + * Returns: errno + */ + +static int get_block_noalloc(struct inode *inode, sector_t lblock, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = 0; + uint64_t dblock; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, NULL); + if (error) + return error; + + if (dblock) + map_bh(bh_result, inode->i_sb, dblock); + else if (gfs2_assert_withdraw(ip->i_sbd, !create)) + error = -EIO; + + return error; +} + +static int get_blocks(struct inode *inode, sector_t lblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = create; + uint64_t dblock; + uint32_t extlen; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (!dblock) + return 0; + + map_bh(bh_result, inode->i_sb, dblock); + if (new) + set_buffer_new(bh_result); + + if (extlen > max_blocks) + extlen = max_blocks; + bh_result->b_size = extlen << inode->i_blkbits; + + return 0; +} + +static int get_blocks_noalloc(struct inode *inode, sector_t lblock, + unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int new = 0; + uint64_t dblock; + uint32_t extlen; + int error; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + return error; + + if (dblock) { + map_bh(bh_result, inode->i_sb, dblock); + if (extlen > max_blocks) + extlen = max_blocks; + bh_result->b_size = extlen << inode->i_blkbits; + } else if (gfs2_assert_withdraw(ip->i_sbd, !create)) + error = -EIO; + + return error; +} + +/** + * gfs2_writepage - Write complete page + * @page: Page to write + * + * Returns: errno + * + * Use Linux VFS block_write_full_page() to write one page, + * using GFS2's get_block_noalloc to find which blocks to write. + */ + +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) { + unlock_page(page); + return -EIO; + } + if (get_transaction) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + + error = block_write_full_page(page, get_block_noalloc, wbc); + + gfs2_meta_cache_flush(ip); + + return error; +} + +/** + * stuffed_readpage - Fill in a Linux page with stuffed file data + * @ip: the inode + * @page: the page + * + * Returns: errno + */ + +static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) +{ + struct buffer_head *dibh; + void *kaddr; + int error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + return error; + + kaddr = kmap(page); + memcpy((char *)kaddr, + dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_di.di_size); + memset((char *)kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + + return 0; +} + +static int zero_readpage(struct page *page) +{ + void *kaddr; + + kaddr = kmap(page); + memset(kaddr, 0, PAGE_CACHE_SIZE); + kunmap(page); + + SetPageUptodate(page); + unlock_page(page); + + return 0; +} + +/** + * jdata_readpage - readpage that goes through gfs2_jdata_read_mem() + * @ip: + * @page: The page to read + * + * Returns: errno + */ + +static int jdata_readpage(struct gfs2_inode *ip, struct page *page) +{ + void *kaddr; + int ret; + + kaddr = kmap(page); + + ret = gfs2_jdata_read_mem(ip, kaddr, + (uint64_t)page->index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE); + if (ret >= 0) { + if (ret < PAGE_CACHE_SIZE) + memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret); + SetPageUptodate(page); + ret = 0; + } + + kunmap(page); + + unlock_page(page); + + return ret; +} + +/** + * gfs2_readpage - readpage with locking + * @file: The file to read a page for + * @page: The page to read + * + * Returns: errno + */ + +static int gfs2_readpage(struct file *file, struct page *page) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) { + unlock_page(page); + return -EOPNOTSUPP; + } + + if (!gfs2_is_jdata(ip)) { + if (gfs2_is_stuffed(ip)) { + if (!page->index) { + error = stuffed_readpage(ip, page); + unlock_page(page); + } else + error = zero_readpage(page); + } else + error = block_read_full_page(page, get_block); + } else + error = jdata_readpage(ip, page); + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + error = -EIO; + + return error; +} + +/** + * gfs2_prepare_write - Prepare to write a page to a file + * @file: The file to write to + * @page: The page which is to be prepared for writing + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int gfs2_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct gfs2_inode *ip = get_v2ip(page->mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + int error = 0; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) + return -EOPNOTSUPP; + + if (gfs2_is_stuffed(ip)) { + uint64_t file_size; + file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + + if (file_size > sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, + page); + if (!error) + error = block_prepare_write(page, from, to, + get_block); + } else if (!PageUptodate(page)) + error = stuffed_readpage(ip, page); + } else + error = block_prepare_write(page, from, to, get_block); + + return error; +} + +/** + * gfs2_commit_write - Commit write to a file + * @file: The file to write to + * @page: The page containing the data + * @from: From (byte range within page) + * @to: To (byte range within page) + * + * Returns: errno + */ + +static int gfs2_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + int error; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_is_stuffed(ip)) { + struct buffer_head *dibh; + uint64_t file_size; + void *kaddr; + + file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail; + + gfs2_trans_add_bh(ip->i_gl, dibh); + + kaddr = kmap(page); + memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from, + (char *)kaddr + from, + to - from); + kunmap(page); + + brelse(dibh); + + SetPageUptodate(page); + + if (inode->i_size < file_size) + i_size_write(inode, file_size); + } else { + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_page_add_databufs(sdp, page, from, to); + error = generic_commit_write(file, page, from, to); + if (error) + goto fail; + } + + return 0; + + fail: + ClearPageUptodate(page); + + return error; +} + +/** + * gfs2_bmap - Block map function + * @mapping: Address space info + * @lblock: The block to map + * + * Returns: The disk address for the block or 0 on hole or error + */ + +static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) +{ + struct gfs2_inode *ip = get_v2ip(mapping->host); + struct gfs2_holder i_gh; + sector_t dblock = 0; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_address); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); + if (error) + return 0; + + if (!gfs2_is_stuffed(ip)) + dblock = generic_block_bmap(mapping, lblock, get_block); + + gfs2_glock_dq_uninit(&i_gh); + + return dblock; +} + +static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_databuf *db; + + gfs2_log_lock(sdp); + db = get_v2db(bh); + if (db) { + db->db_bh = NULL; + set_v2db(bh, NULL); + gfs2_log_unlock(sdp); + brelse(bh); + } else + gfs2_log_unlock(sdp); + + lock_buffer(bh); + clear_buffer_dirty(bh); + bh->b_bdev = NULL; + clear_buffer_mapped(bh); + clear_buffer_req(bh); + clear_buffer_new(bh); + clear_buffer_delay(bh); + unlock_buffer(bh); +} + +static int gfs2_invalidatepage(struct page *page, unsigned long offset) +{ + struct gfs2_sbd *sdp = get_v2sdp(page->mapping->host->i_sb); + struct buffer_head *head, *bh, *next; + unsigned int curr_off = 0; + int ret = 1; + + BUG_ON(!PageLocked(page)); + if (!page_has_buffers(page)) + return 1; + + bh = head = page_buffers(page); + do { + unsigned int next_off = curr_off + bh->b_size; + next = bh->b_this_page; + + if (offset <= curr_off) + discard_buffer(sdp, bh); + + curr_off = next_off; + bh = next; + } while (bh != head); + + if (!offset) + ret = try_to_release_page(page, 0); + + return ret; +} + +static int gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + get_blocks_t *gb = get_blocks; + + atomic_inc(&sdp->sd_ops_address); + + if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) || + gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip))) + return -EINVAL; + + if (rw == WRITE && !get_transaction) + gb = get_blocks_noalloc; + + return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, + offset, nr_segs, gb, NULL); +} + +struct address_space_operations gfs2_file_aops = { + .writepage = gfs2_writepage, + .readpage = gfs2_readpage, + .sync_page = block_sync_page, + .prepare_write = gfs2_prepare_write, + .commit_write = gfs2_commit_write, + .bmap = gfs2_bmap, + .invalidatepage = gfs2_invalidatepage, + .direct_IO = gfs2_direct_IO, +}; + --- a/fs/gfs2/ops_address.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_address.h 2005-10-10 11:28:49.263792092 -0500 @@ -0,0 +1,15 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_ADDRESS_DOT_H__ +#define __OPS_ADDRESS_DOT_H__ + +extern struct address_space_operations gfs2_file_aops; + +#endif /* __OPS_ADDRESS_DOT_H__ */ [PATCH 04/16] GFS: core fs Core file system functions. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/ops_dentry.c | 117 +++ fs/gfs2/ops_dentry.h | 15 fs/gfs2/ops_export.c | 310 ++++++++++ fs/gfs2/ops_export.h | 15 fs/gfs2/ops_file.c | 1521 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_file.h | 16 fs/gfs2/ops_fstype.c | 860 ++++++++++++++++++++++++++++ fs/gfs2/ops_fstype.h | 15 fs/gfs2/ops_inode.c | 1265 ++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/ops_inode.h | 18 10 files changed, 4152 insertions(+) --- a/fs/gfs2/ops_dentry.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_dentry.c 2005-10-10 11:28:49.271790845 -0500 @@ -0,0 +1,117 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "ops_dentry.h" + +/** + * gfs2_drevalidate - Check directory lookup consistency + * @dentry: the mapping to check + * @nd: + * + * Check to make sure the lookup necessary to arrive at this inode from its + * parent is still good. + * + * Returns: 1 if the dentry is ok, 0 if it isn't + */ + +static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = dget_parent(dentry); + struct gfs2_inode *dip = get_v2ip(parent->d_inode); + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode; + struct gfs2_holder d_gh; + struct gfs2_inode *ip; + struct gfs2_inum inum; + unsigned int type; + int error; + + lock_kernel(); + + atomic_inc(&sdp->sd_ops_dentry); + + inode = dentry->d_inode; + if (inode && is_bad_inode(inode)) + goto invalid; + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); + if (error) + goto fail; + + error = gfs2_dir_search(dip, &dentry->d_name, &inum, &type); + switch (error) { + case 0: + if (!inode) + goto invalid_gunlock; + break; + case -ENOENT: + if (!inode) + goto valid_gunlock; + goto invalid_gunlock; + default: + goto fail_gunlock; + } + + ip = get_v2ip(inode); + + if (!gfs2_inum_equal(&ip->i_num, &inum)) + goto invalid_gunlock; + + if (IF2DT(ip->i_di.di_mode) != type) { + gfs2_consist_inode(dip); + goto fail_gunlock; + } + + valid_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + valid: + unlock_kernel(); + dput(parent); + return 1; + + invalid_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + invalid: + if (inode && S_ISDIR(inode->i_mode)) { + if (have_submounts(dentry)) + goto valid; + shrink_dcache_parent(dentry); + } + d_drop(dentry); + + unlock_kernel(); + dput(parent); + return 0; + + fail_gunlock: + gfs2_glock_dq_uninit(&d_gh); + + fail: + unlock_kernel(); + dput(parent); + return 0; +} + +struct dentry_operations gfs2_dops = { + .d_revalidate = gfs2_drevalidate, +}; + --- a/fs/gfs2/ops_dentry.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_dentry.h 2005-10-10 11:28:49.277789909 -0500 @@ -0,0 +1,15 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_DENTRY_DOT_H__ +#define __OPS_DENTRY_DOT_H__ + +extern struct dentry_operations gfs2_dops; + +#endif /* __OPS_DENTRY_DOT_H__ */ --- a/fs/gfs2/ops_export.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_export.c 2005-10-10 11:28:49.280789442 -0500 @@ -0,0 +1,310 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ops_export.h" +#include "rgrp.h" + +static struct dentry *gfs2_decode_fh(struct super_block *sb, + __u32 *fh, + int fh_len, + int fh_type, + int (*acceptable)(void *context, + struct dentry *dentry), + void *context) +{ + struct gfs2_inum this, parent; + + atomic_inc(&get_v2sdp(sb)->sd_ops_export); + + if (fh_type != fh_len) + return NULL; + + memset(&parent, 0, sizeof(struct gfs2_inum)); + + switch (fh_type) { + case 8: + parent.no_formal_ino = ((uint64_t)le32_to_cpu(fh[4])) << 32; + parent.no_formal_ino |= le32_to_cpu(fh[5]); + parent.no_addr = ((uint64_t)le32_to_cpu(fh[6])) << 32; + parent.no_addr |= le32_to_cpu(fh[7]); + case 4: + this.no_formal_ino = ((uint64_t)le32_to_cpu(fh[0])) << 32; + this.no_formal_ino |= le32_to_cpu(fh[1]); + this.no_addr = ((uint64_t)le32_to_cpu(fh[2])) << 32; + this.no_addr |= le32_to_cpu(fh[3]); + break; + default: + return NULL; + } + + return gfs2_export_ops.find_exported_dentry(sb, &this, &parent, + acceptable, context); +} + +static int gfs2_encode_fh(struct dentry *dentry, __u32 *fh, int *len, + int connectable) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + + atomic_inc(&sdp->sd_ops_export); + + if (*len < 4 || (connectable && *len < 8)) + return 255; + + fh[0] = ip->i_num.no_formal_ino >> 32; + fh[0] = cpu_to_le32(fh[0]); + fh[1] = ip->i_num.no_formal_ino & 0xFFFFFFFF; + fh[1] = cpu_to_le32(fh[1]); + fh[2] = ip->i_num.no_addr >> 32; + fh[2] = cpu_to_le32(fh[2]); + fh[3] = ip->i_num.no_addr & 0xFFFFFFFF; + fh[3] = cpu_to_le32(fh[3]); + *len = 4; + + if (!connectable || ip == sdp->sd_root_dir) + return *len; + + spin_lock(&dentry->d_lock); + inode = dentry->d_parent->d_inode; + ip = get_v2ip(inode); + gfs2_inode_hold(ip); + spin_unlock(&dentry->d_lock); + + fh[4] = ip->i_num.no_formal_ino >> 32; + fh[4] = cpu_to_le32(fh[4]); + fh[5] = ip->i_num.no_formal_ino & 0xFFFFFFFF; + fh[5] = cpu_to_le32(fh[5]); + fh[6] = ip->i_num.no_addr >> 32; + fh[6] = cpu_to_le32(fh[6]); + fh[7] = ip->i_num.no_addr & 0xFFFFFFFF; + fh[7] = cpu_to_le32(fh[7]); + *len = 8; + + gfs2_inode_put(ip); + + return *len; +} + +struct get_name_filldir { + struct gfs2_inum inum; + char *name; +}; + +static int get_name_filldir(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque; + + if (!gfs2_inum_equal(inum, &gnfd->inum)) + return 0; + + memcpy(gnfd->name, name, length); + gnfd->name[length] = 0; + + return 1; +} + +static int gfs2_get_name(struct dentry *parent, char *name, + struct dentry *child) +{ + struct inode *dir = parent->d_inode; + struct inode *inode = child->d_inode; + struct gfs2_inode *dip, *ip; + struct get_name_filldir gnfd; + struct gfs2_holder gh; + uint64_t offset = 0; + int error; + + if (!dir) + return -EINVAL; + + atomic_inc(&get_v2sdp(dir->i_sb)->sd_ops_export); + + if (!S_ISDIR(dir->i_mode) || !inode) + return -EINVAL; + + dip = get_v2ip(dir); + ip = get_v2ip(inode); + + *name = 0; + gnfd.inum = ip->i_num; + gnfd.name = name; + + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); + if (error) + return error; + + error = gfs2_dir_read(dip, &offset, &gnfd, get_name_filldir); + + gfs2_glock_dq_uninit(&gh); + + if (!error && !*name) + error = -ENOENT; + + return error; +} + +static struct dentry *gfs2_get_parent(struct dentry *child) +{ + struct gfs2_inode *dip = get_v2ip(child->d_inode); + struct qstr dotdot = { .name = "..", .len = 2 }; + struct gfs2_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&dip->i_sbd->sd_ops_export); + + error = gfs2_lookupi(dip, &dotdot, 1, &ip); + if (error) + return ERR_PTR(error); + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; +} + +static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_p) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + struct gfs2_inum *inum = (struct gfs2_inum *)inum_p; + struct gfs2_holder i_gh, ri_gh, rgd_gh; + struct gfs2_rgrpd *rgd; + struct gfs2_inode *ip; + struct inode *inode; + struct dentry *dentry; + int error; + + atomic_inc(&sdp->sd_ops_export); + + /* System files? */ + + inode = gfs2_iget(sb, inum); + if (inode) { + ip = get_v2ip(inode); + if (ip->i_num.no_formal_ino != inum->no_formal_ino) { + iput(inode); + return ERR_PTR(-ESTALE); + } + goto out_inode; + } + + error = gfs2_glock_nq_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL, + &i_gh); + if (error) + return ERR_PTR(error); + + error = gfs2_inode_get(i_gh.gh_gl, inum, NO_CREATE, &ip); + if (error) + goto fail; + if (ip) + goto out_ip; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto fail; + + error = -EINVAL; + rgd = gfs2_blk2rgrpd(sdp, inum->no_addr); + if (!rgd) + goto fail_rindex; + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh); + if (error) + goto fail_rindex; + + error = -ESTALE; + if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE) + goto fail_rgd; + + gfs2_glock_dq_uninit(&rgd_gh); + gfs2_glock_dq_uninit(&ri_gh); + + error = gfs2_inode_get(i_gh.gh_gl, inum, CREATE, &ip); + if (error) + goto fail; + + error = gfs2_inode_refresh(ip); + if (error) { + gfs2_inode_put(ip); + goto fail; + } + + atomic_inc(&sdp->sd_fh2dentry_misses); + + out_ip: + error = -EIO; + if (ip->i_di.di_flags & GFS2_DIF_SYSTEM) { + gfs2_inode_put(ip); + goto fail; + } + + gfs2_glock_dq_uninit(&i_gh); + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return ERR_PTR(-ENOMEM); + + out_inode: + dentry = d_alloc_anon(inode); + if (!dentry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + + return dentry; + + fail_rgd: + gfs2_glock_dq_uninit(&rgd_gh); + + fail_rindex: + gfs2_glock_dq_uninit(&ri_gh); + + fail: + gfs2_glock_dq_uninit(&i_gh); + return ERR_PTR(error); +} + +struct export_operations gfs2_export_ops = { + .decode_fh = gfs2_decode_fh, + .encode_fh = gfs2_encode_fh, + .get_name = gfs2_get_name, + .get_parent = gfs2_get_parent, + .get_dentry = gfs2_get_dentry, +}; + --- a/fs/gfs2/ops_export.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_export.h 2005-10-10 11:28:49.281789286 -0500 @@ -0,0 +1,15 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_EXPORT_DOT_H__ +#define __OPS_EXPORT_DOT_H__ + +extern struct export_operations gfs2_export_ops; + +#endif /* __OPS_EXPORT_DOT_H__ */ --- a/fs/gfs2/ops_file.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_file.c 2005-10-10 11:28:49.293787415 -0500 @@ -0,0 +1,1521 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "dir.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "ioctl.h" +#include "jdata.h" +#include "lm.h" +#include "log.h" +#include "meta_io.h" +#include "ops_file.h" +#include "ops_vm.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +/* "bad" is for NFS support */ +struct filldir_bad_entry { + char *fbe_name; + unsigned int fbe_length; + uint64_t fbe_offset; + struct gfs2_inum fbe_inum; + unsigned int fbe_type; +}; + +struct filldir_bad { + struct gfs2_sbd *fdb_sbd; + + struct filldir_bad_entry *fdb_entry; + unsigned int fdb_entry_num; + unsigned int fdb_entry_off; + + char *fdb_name; + unsigned int fdb_name_size; + unsigned int fdb_name_off; +}; + +/* For regular, non-NFS */ +struct filldir_reg { + struct gfs2_sbd *fdr_sbd; + int fdr_prefetch; + + filldir_t fdr_filldir; + void *fdr_opaque; +}; + +typedef ssize_t(*do_rw_t) (struct file *file, + char *buf, + size_t size, loff_t *offset, + unsigned int num_gh, struct gfs2_holder *ghs); + +/** + * gfs2_llseek - seek to a location in a file + * @file: the file + * @offset: the offset + * @origin: Where to seek from (SEEK_SET, SEEK_CUR, or SEEK_END) + * + * SEEK_END requires the glock for the file because it references the + * file's size. + * + * Returns: The new offset, or errno + */ + +static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_holder i_gh; + loff_t error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + if (origin == 2) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = remote_llseek(file, offset, origin); + gfs2_glock_dq_uninit(&i_gh); + } + } else + error = remote_llseek(file, offset, origin); + + return error; +} + +static inline unsigned int vma2state(struct vm_area_struct *vma) +{ + if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) == + (VM_MAYWRITE | VM_MAYSHARE)) + return LM_ST_EXCLUSIVE; + return LM_ST_SHARED; +} + +static ssize_t walk_vm_hard(struct file *file, char *buf, size_t size, + loff_t *offset, do_rw_t operation) +{ + struct gfs2_holder *ghs; + unsigned int num_gh = 0; + ssize_t count; + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + unsigned int x = 0; + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) { + num_gh++; + } + } + + ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL); + if (!ghs) { + if (!dumping) + up_read(&mm->mmap_sem); + return -ENOMEM; + } + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file) { + struct inode *inode = vma->vm_file->f_dentry->d_inode; + if (inode->i_sb == sb) + gfs2_holder_init(get_v2ip(inode)->i_gl, + vma2state(vma), 0, &ghs[x++]); + } + } + + if (!dumping) + up_read(&mm->mmap_sem); + + gfs2_assert(get_v2sdp(sb), x == num_gh); + + count = operation(file, buf, size, offset, num_gh, ghs); + + while (num_gh--) + gfs2_holder_uninit(&ghs[num_gh]); + kfree(ghs); + + return count; +} + +/** + * walk_vm - Walk the vmas associated with a buffer for read or write. + * If any of them are gfs2, pass the gfs2 inode down to the read/write + * worker function so that locks can be acquired in the correct order. + * @file: The file to read/write from/to + * @buf: The buffer to copy to/from + * @size: The amount of data requested + * @offset: The current file offset + * @operation: The read or write worker function + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t walk_vm(struct file *file, char *buf, size_t size, + loff_t *offset, do_rw_t operation) +{ + struct gfs2_holder gh; + + if (current->mm) { + struct super_block *sb = file->f_dentry->d_inode->i_sb; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start = (unsigned long)buf; + unsigned long end = start + size; + int dumping = (current->flags & PF_DUMPCORE); + + if (!dumping) + down_read(&mm->mmap_sem); + + for (vma = find_vma(mm, start); vma; vma = vma->vm_next) { + if (end <= vma->vm_start) + break; + if (vma->vm_file && + vma->vm_file->f_dentry->d_inode->i_sb == sb) + goto do_locks; + } + + if (!dumping) + up_read(&mm->mmap_sem); + } + + return operation(file, buf, size, offset, 0, &gh); + + do_locks: + return walk_vm_hard(file, buf, size, offset, operation); +} + +static ssize_t do_jdata_read(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + ssize_t count = 0; + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EFAULT; + + if (!(file->f_flags & O_LARGEFILE)) { + if (*offset >= MAX_NON_LFS) + return -EFBIG; + if (*offset + size > MAX_NON_LFS) + size = MAX_NON_LFS - *offset; + } + + count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user); + + if (count > 0) + *offset += count; + + return count; +} + +/** + * do_read_direct - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t do_read_direct(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + unsigned int state = LM_ST_DEFERRED; + int flags = 0; + unsigned int x; + ssize_t count = 0; + int error; + + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_SHARED; + flags |= GL_LOCAL_EXCL; + break; + } + + gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs2_is_jdata(ip)) + goto out_gunlock; + + if (gfs2_is_stuffed(ip)) { + size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1; + + if (((*offset) & mask) || (((unsigned long)buf) & mask)) + goto out_gunlock; + + count = do_jdata_read(file, buf, size & ~mask, offset); + } else + count = generic_file_read(file, buf, size, offset); + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_read_buf - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @ghs: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t do_read_buf(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + ssize_t count = 0; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]); + + error = gfs2_glock_nq_m_atime(num_gh + 1, ghs); + if (error) + goto out; + + if (gfs2_is_jdata(ip) || + (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags))) + count = do_jdata_read(file, buf, size, offset); + else + count = generic_file_read(file, buf, size, offset); + + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs2_read - Read bytes from a file + * @file: The file to read from + * @buf: The buffer to copy into + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes read + * + * Returns: The number of bytes read, errno on failure + */ + +static ssize_t gfs2_read(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (file->f_flags & O_DIRECT) + return walk_vm(file, buf, size, offset, do_read_direct); + else + return walk_vm(file, buf, size, offset, do_read_buf); +} + +/** + * grope_mapping - feel up a mapping that needs to be written + * @buf: the start of the memory to be written + * @size: the size of the memory to be written + * + * We do this after acquiring the locks on the mapping, + * but before starting the write transaction. We need to make + * sure that we don't cause recursive transactions if blocks + * need to be allocated to the file backing the mapping. + * + * Returns: errno + */ + +static int grope_mapping(char *buf, size_t size) +{ + unsigned long start = (unsigned long)buf; + unsigned long stop = start + size; + char c; + + while (start < stop) { + if (copy_from_user(&c, (char *)start, 1)) + return -EFAULT; + start += PAGE_CACHE_SIZE; + start &= PAGE_CACHE_MASK; + } + + return 0; +} + +/** + * do_write_direct_alloc - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_direct_alloc(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = NULL; + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + ssize_t count; + int error; + + gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + ind_blocks + + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto fail_ipres; + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? + (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL); + if (error) + goto fail_end_trans; + } + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + gfs2_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs2_log_flush_glock(ip->i_gl); + + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + + return count; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipres: + gfs2_inplace_release(ip); + + fail_gunlock_q: + gfs2_quota_unlock(ip); + + fail: + gfs2_alloc_put(ip); + + return error; +} + +/** + * do_write_direct - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_direct(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_file *fp = get_v2fp(file); + unsigned int state = LM_ST_DEFERRED; + int alloc_required; + unsigned int x; + size_t s; + ssize_t count = 0; + int error; + + if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags)) + state = LM_ST_EXCLUSIVE; + else + for (x = 0; x < num_gh; x++) + if (ghs[x].gh_gl == ip->i_gl) { + state = LM_ST_EXCLUSIVE; + break; + } + + restart: + gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + error = -EINVAL; + if (gfs2_is_jdata(ip)) + goto out_gunlock; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= MAX_NON_LFS) + goto out_gunlock; + if (*offset + size > MAX_NON_LFS) + size = MAX_NON_LFS - *offset; + } + + if (gfs2_is_stuffed(ip) || + *offset + size > ip->i_di.di_size || + ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID))) + alloc_required = 1; + else { + error = gfs2_write_alloc_required(ip, *offset, size, + &alloc_required); + if (error) + goto out_gunlock; + } + + if (alloc_required && state != LM_ST_EXCLUSIVE) { + gfs2_glock_dq_m(num_gh + 1, ghs); + gfs2_holder_uninit(&ghs[num_gh]); + state = LM_ST_EXCLUSIVE; + goto restart; + } + + if (alloc_required) { + set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + /* split large writes into smaller atomic transactions */ + while (size) { + s = gfs2_tune_get(sdp, gt_max_atomic_write); + if (s > size) + s = size; + + error = do_write_direct_alloc(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + struct gfs2_holder t_gh; + + clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_NEVER_RECURSE, &t_gh); + if (error) + goto out_gunlock; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + + gfs2_glock_dq_uninit(&t_gh); + } + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * do_do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_do_write_buf(struct file *file, char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = NULL; + struct buffer_head *dibh; + unsigned int data_blocks, ind_blocks; + int alloc_required, journaled; + ssize_t count; + int error; + + journaled = gfs2_is_jdata(ip); + + gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks); + + error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required); + if (error) + return error; + + if (alloc_required) { + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto fail; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto fail_gunlock_q; + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto fail_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + + ind_blocks + + ((journaled) ? data_blocks : 0) + + RES_DINODE + RES_STATFS + RES_QUOTA, + 0); + if (error) + goto fail_ipres; + } else { + error = gfs2_trans_begin(sdp, + ((journaled) ? data_blocks : 0) + + RES_DINODE, + 0); + if (error) + goto fail_ipres; + } + + if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) { + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ? + (~(S_ISUID | S_ISGID)) : (~S_ISUID); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + if (journaled || + (gfs2_is_stuffed(ip) && !test_bit(GIF_PAGED, &ip->i_flags) && + *offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))) { + + count = gfs2_jdata_write(ip, buf, *offset, size, + gfs2_copy_from_user); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + *offset += count; + } else { + struct iovec local_iov = { .iov_base = buf, .iov_len = size }; + + count = generic_file_write_nolock(file, &local_iov, 1, offset); + if (count < 0) { + error = count; + goto fail_end_trans; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto fail_end_trans; + + if (ip->i_di.di_size < inode->i_size) + ip->i_di.di_size = inode->i_size; + ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + + if (file->f_flags & O_SYNC) + gfs2_log_flush_glock(ip->i_gl); + + if (alloc_required) { + gfs2_assert_warn(sdp, count != size || + al->al_alloced); + gfs2_inplace_release(ip); + gfs2_quota_unlock(ip); + gfs2_alloc_put(ip); + } + + return count; + + fail_end_trans: + gfs2_trans_end(sdp); + + fail_ipres: + if (alloc_required) + gfs2_inplace_release(ip); + + fail_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(ip); + + fail: + if (alloc_required) + gfs2_alloc_put(ip); + + return error; +} + +/** + * do_write_buf - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * @num_gh: The number of other locks we need to do the read + * @gh: the locks we need plus one for our lock + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t do_write_buf(struct file *file, char *buf, size_t size, + loff_t *offset, unsigned int num_gh, + struct gfs2_holder *ghs) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + size_t s; + ssize_t count = 0; + int error; + + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]); + + error = gfs2_glock_nq_m(num_gh + 1, ghs); + if (error) + goto out; + + if (num_gh) { + error = grope_mapping(buf, size); + if (error) + goto out_gunlock; + } + + if (file->f_flags & O_APPEND) + *offset = ip->i_di.di_size; + + if (!(file->f_flags & O_LARGEFILE)) { + error = -EFBIG; + if (*offset >= MAX_NON_LFS) + goto out_gunlock; + if (*offset + size > MAX_NON_LFS) + size = MAX_NON_LFS - *offset; + } + + /* split large writes into smaller atomic transactions */ + while (size) { + s = gfs2_tune_get(sdp, gt_max_atomic_write); + if (s > size) + s = size; + + error = do_do_write_buf(file, buf, s, offset); + if (error < 0) + goto out_gunlock; + + buf += error; + size -= error; + count += error; + } + + error = 0; + + out_gunlock: + gfs2_glock_dq_m(num_gh + 1, ghs); + + out: + gfs2_holder_uninit(&ghs[num_gh]); + + return (count) ? count : error; +} + +/** + * gfs2_write - Write bytes to a file + * @file: The file to write to + * @buf: The buffer to copy from + * @size: The amount of data requested + * @offset: The current file offset + * + * Outputs: Offset - updated according to number of bytes written + * + * Returns: The number of bytes written, errno on failure + */ + +static ssize_t gfs2_write(struct file *file, const char *buf, size_t size, + loff_t *offset) +{ + struct inode *inode = file->f_mapping->host; + ssize_t count; + + atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file); + + if (*offset < 0) + return -EINVAL; + if (!access_ok(VERIFY_READ, buf, size)) + return -EFAULT; + + down(&inode->i_sem); + if (file->f_flags & O_DIRECT) + count = walk_vm(file, (char *)buf, size, offset, + do_write_direct); + else + count = walk_vm(file, (char *)buf, size, offset, do_write_buf); + up(&inode->i_sem); + + return count; +} + +/** + * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +static int filldir_reg_func(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct filldir_reg *fdr = (struct filldir_reg *)opaque; + struct gfs2_sbd *sdp = fdr->fdr_sbd; + int error; + + error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset, + inum->no_formal_ino, type); + if (error) + return 1; + + if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) { + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_reg - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: errno + */ + +static int readdir_reg(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs2_inode *dip = get_v2ip(file->f_mapping->host); + struct filldir_reg fdr; + struct gfs2_holder d_gh; + uint64_t offset = file->f_pos; + int error; + + fdr.fdr_sbd = dip->i_sbd; + fdr.fdr_prefetch = 1; + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs2_glock_nq_atime(&d_gh); + if (error) { + gfs2_holder_uninit(&d_gh); + return error; + } + + error = gfs2_dir_read(dip, &offset, &fdr, filldir_reg_func); + + gfs2_glock_dq_uninit(&d_gh); + + file->f_pos = offset; + + return error; +} + +/** + * filldir_bad_func - Report a directory entry to the caller of gfs2_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * For supporting NFS. + * + * Returns: 0 on success, 1 if buffer full + */ + +static int filldir_bad_func(void *opaque, const char *name, unsigned int length, + uint64_t offset, struct gfs2_inum *inum, + unsigned int type) +{ + struct filldir_bad *fdb = (struct filldir_bad *)opaque; + struct gfs2_sbd *sdp = fdb->fdb_sbd; + struct filldir_bad_entry *fbe; + + if (fdb->fdb_entry_off == fdb->fdb_entry_num || + fdb->fdb_name_off + length > fdb->fdb_name_size) + return 1; + + fbe = &fdb->fdb_entry[fdb->fdb_entry_off]; + fbe->fbe_name = fdb->fdb_name + fdb->fdb_name_off; + memcpy(fbe->fbe_name, name, length); + fbe->fbe_length = length; + fbe->fbe_offset = offset; + fbe->fbe_inum = *inum; + fbe->fbe_type = type; + + fdb->fdb_entry_off++; + fdb->fdb_name_off += length; + + if (!(length == 1 && *name == '.')) { + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_inode_glops, + LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY); + gfs2_glock_prefetch_num(sdp, + inum->no_addr, &gfs2_iopen_glops, + LM_ST_SHARED, LM_FLAG_TRY); + } + + return 0; +} + +/** + * readdir_bad - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * For supporting NFS. + * + * Returns: errno + */ + +static int readdir_bad(struct file *file, void *dirent, filldir_t filldir) +{ + struct gfs2_inode *dip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = dip->i_sbd; + struct filldir_reg fdr; + unsigned int entries, size; + struct filldir_bad *fdb; + struct gfs2_holder d_gh; + uint64_t offset = file->f_pos; + unsigned int x; + struct filldir_bad_entry *fbe; + int error; + + entries = gfs2_tune_get(sdp, gt_entries_per_readdir); + size = sizeof(struct filldir_bad) + + entries * (sizeof(struct filldir_bad_entry) + GFS2_FAST_NAME_SIZE); + + fdb = kzalloc(size, GFP_KERNEL); + if (!fdb) + return -ENOMEM; + + fdb->fdb_sbd = sdp; + fdb->fdb_entry = (struct filldir_bad_entry *)(fdb + 1); + fdb->fdb_entry_num = entries; + fdb->fdb_name = ((char *)fdb) + sizeof(struct filldir_bad) + + entries * sizeof(struct filldir_bad_entry); + fdb->fdb_name_size = entries * GFS2_FAST_NAME_SIZE; + + gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); + error = gfs2_glock_nq_atime(&d_gh); + if (error) { + gfs2_holder_uninit(&d_gh); + goto out; + } + + error = gfs2_dir_read(dip, &offset, fdb, filldir_bad_func); + + gfs2_glock_dq_uninit(&d_gh); + + fdr.fdr_sbd = sdp; + fdr.fdr_prefetch = 0; + fdr.fdr_filldir = filldir; + fdr.fdr_opaque = dirent; + + for (x = 0; x < fdb->fdb_entry_off; x++) { + fbe = &fdb->fdb_entry[x]; + + error = filldir_reg_func(&fdr, + fbe->fbe_name, fbe->fbe_length, + fbe->fbe_offset, + &fbe->fbe_inum, fbe->fbe_type); + if (error) { + file->f_pos = fbe->fbe_offset; + error = 0; + goto out; + } + } + + file->f_pos = offset; + + out: + kfree(fdb); + + return error; +} + +/** + * gfs2_readdir - Read directory entries from a directory + * @file: The directory to read from + * @dirent: Buffer for dirents + * @filldir: Function used to do the copying + * + * Returns: errno + */ + +static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) +{ + int error; + + atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file); + + if (strcmp(current->comm, "nfsd") != 0) + error = readdir_reg(file, dirent, filldir); + else + error = readdir_bad(file, dirent, filldir); + + return error; +} + +/** + * gfs2_ioctl - do an ioctl on a file + * @inode: the inode + * @file: the file pointer + * @cmd: the ioctl command + * @arg: the argument + * + * Returns: errno + */ + +static int gfs2_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&ip->i_sbd->sd_ops_file); + + switch (cmd) { + case GFS2_IOCTL_IDENTIFY: { + unsigned int x = GFS2_MAGIC; + if (copy_to_user((unsigned int *)arg, &x, sizeof(unsigned int))) + return -EFAULT; + return 0; + } + + case GFS2_IOCTL_SUPER: + return gfs2_ioctl_i(ip, (void *)arg); + + default: + return -ENOTTY; + } +} + +/** + * gfs2_mmap - + * @file: The file to map + * @vma: The VMA which described the mapping + * + * Returns: 0 or error code + */ + +static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); + error = gfs2_glock_nq_atime(&i_gh); + if (error) { + gfs2_holder_uninit(&i_gh); + return error; + } + + if (gfs2_is_jdata(ip)) { + if (vma->vm_flags & VM_MAYSHARE) + error = -EOPNOTSUPP; + else + vma->vm_ops = &gfs2_vm_ops_private; + } else { + /* This is VM_MAYWRITE instead of VM_WRITE because a call + to mprotect() can turn on VM_WRITE later. */ + + if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) == + (VM_MAYSHARE | VM_MAYWRITE)) + vma->vm_ops = &gfs2_vm_ops_sharewrite; + else + vma->vm_ops = &gfs2_vm_ops_private; + } + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * gfs2_open - open a file + * @inode: the inode to open + * @file: the struct file for this opening + * + * Returns: errno + */ + +static int gfs2_open(struct inode *inode, struct file *file) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + struct gfs2_file *fp; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); + if (!fp) + return -ENOMEM; + + init_MUTEX(&fp->f_fl_mutex); + + fp->f_inode = ip; + fp->f_vfile = file; + + gfs2_assert_warn(ip->i_sbd, !get_v2fp(file)); + set_v2fp(file, fp); + + if (S_ISREG(ip->i_di.di_mode)) { + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + goto fail; + + if (!(file->f_flags & O_LARGEFILE) && + ip->i_di.di_size > MAX_NON_LFS) { + error = -EFBIG; + goto fail_gunlock; + } + + /* Listen to the Direct I/O flag */ + + if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO) + file->f_flags |= O_DIRECT; + + /* Don't let the user open O_DIRECT on a jdata file */ + + if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) { + error = -EINVAL; + goto fail_gunlock; + } + + gfs2_glock_dq_uninit(&i_gh); + } + + return 0; + + fail_gunlock: + gfs2_glock_dq_uninit(&i_gh); + + fail: + set_v2fp(file, NULL); + kfree(fp); + + return error; +} + +/** + * gfs2_close - called to close a struct file + * @inode: the inode the struct file belongs to + * @file: the struct file being closed + * + * Returns: errno + */ + +static int gfs2_close(struct inode *inode, struct file *file) +{ + struct gfs2_sbd *sdp = get_v2sdp(inode->i_sb); + struct gfs2_file *fp; + + atomic_inc(&sdp->sd_ops_file); + + fp = get_v2fp(file); + set_v2fp(file, NULL); + + if (gfs2_assert_warn(sdp, fp)) + return -EIO; + + kfree(fp); + + return 0; +} + +/** + * gfs2_fsync - sync the dirty data for a file (across the cluster) + * @file: the file that points to the dentry (we ignore this) + * @dentry: the dentry that points to the inode to sync + * + * Returns: errno + */ + +static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + + atomic_inc(&ip->i_sbd->sd_ops_file); + gfs2_log_flush_glock(ip->i_gl); + + return 0; +} + +/** + * gfs2_lock - acquire/release a posix lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + struct lm_lockname name = + { .ln_number = ip->i_num.no_addr, + .ln_type = LM_TYPE_PLOCK }; + + atomic_inc(&sdp->sd_ops_file); + + if (!(fl->fl_flags & FL_POSIX)) + return -ENOLCK; + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (sdp->sd_args.ar_localflocks) { + if (IS_GETLK(cmd)) { + struct file_lock *tmp; + lock_kernel(); + tmp = posix_test_lock(file, fl); + fl->fl_type = F_UNLCK; + if (tmp) + memcpy(fl, tmp, sizeof(struct file_lock)); + unlock_kernel(); + return 0; + } else { + int error; + lock_kernel(); + error = posix_lock_file_wait(file, fl); + unlock_kernel(); + return error; + } + } + + if (IS_GETLK(cmd)) + return gfs2_lm_plock_get(sdp, &name, file, fl); + else if (fl->fl_type == F_UNLCK) + return gfs2_lm_punlock(sdp, &name, file, fl); + else + return gfs2_lm_plock(sdp, &name, file, cmd, fl); +} + +/** + * gfs2_sendfile - Send bytes to a file or socket + * @in_file: The file to read from + * @out_file: The file to write to + * @count: The amount of data + * @offset: The beginning file offset + * + * Outputs: offset - updated according to number of bytes read + * + * Returns: The number of bytes sent, errno on failure + */ + +static ssize_t gfs2_sendfile(struct file *in_file, loff_t *offset, size_t count, + read_actor_t actor, void __user *target) +{ + struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host); + struct gfs2_holder gh; + ssize_t retval; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); + + retval = gfs2_glock_nq_atime(&gh); + if (retval) + goto out; + + if (gfs2_is_jdata(ip)) + retval = -EOPNOTSUPP; + else + retval = generic_file_sendfile(in_file, offset, count, actor, + target); + + gfs2_glock_dq(&gh); + + out: + gfs2_holder_uninit(&gh); + + return retval; +} + +static int do_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_file *fp = get_v2fp(file); + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + struct gfs2_inode *ip = fp->f_inode; + struct gfs2_glock *gl; + unsigned int state; + int flags; + int error = 0; + + state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; + flags = ((IS_SETLKW(cmd)) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + + down(&fp->f_fl_mutex); + + gl = fl_gh->gh_gl; + if (gl) { + if (fl_gh->gh_state == state) + goto out; + gfs2_glock_hold(gl); + flock_lock_file_wait(file, + &(struct file_lock){.fl_type = F_UNLCK}); + gfs2_glock_dq_uninit(fl_gh); + } else { + error = gfs2_glock_get(ip->i_sbd, + ip->i_num.no_addr, &gfs2_flock_glops, + CREATE, &gl); + if (error) + goto out; + } + + gfs2_holder_init(gl, state, flags, fl_gh); + gfs2_glock_put(gl); + + error = gfs2_glock_nq(fl_gh); + if (error) { + gfs2_holder_uninit(fl_gh); + if (error == GLR_TRYFAILED) + error = -EAGAIN; + } else { + error = flock_lock_file_wait(file, fl); + gfs2_assert_warn(ip->i_sbd, !error); + } + + out: + up(&fp->f_fl_mutex); + + return error; +} + +static void do_unflock(struct file *file, struct file_lock *fl) +{ + struct gfs2_file *fp = get_v2fp(file); + struct gfs2_holder *fl_gh = &fp->f_fl_gh; + + down(&fp->f_fl_mutex); + flock_lock_file_wait(file, fl); + if (fl_gh->gh_gl) + gfs2_glock_dq_uninit(fl_gh); + up(&fp->f_fl_mutex); +} + +/** + * gfs2_flock - acquire/release a flock lock on a file + * @file: the file pointer + * @cmd: either modify or retrieve lock state, possibly wait + * @fl: type and range of lock + * + * Returns: errno + */ + +static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) +{ + struct gfs2_inode *ip = get_v2ip(file->f_mapping->host); + struct gfs2_sbd *sdp = ip->i_sbd; + + atomic_inc(&ip->i_sbd->sd_ops_file); + + if (!(fl->fl_flags & FL_FLOCK)) + return -ENOLCK; + if ((ip->i_di.di_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + return -ENOLCK; + + if (sdp->sd_args.ar_localflocks) + return flock_lock_file_wait(file, fl); + + if (fl->fl_type == F_UNLCK) { + do_unflock(file, fl); + return 0; + } else + return do_flock(file, cmd, fl); +} + +struct file_operations gfs2_file_fops = { + .llseek = gfs2_llseek, + .read = gfs2_read, + .write = gfs2_write, + .ioctl = gfs2_ioctl, + .mmap = gfs2_mmap, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .sendfile = gfs2_sendfile, + .flock = gfs2_flock, +}; + +struct file_operations gfs2_dir_fops = { + .readdir = gfs2_readdir, + .ioctl = gfs2_ioctl, + .open = gfs2_open, + .release = gfs2_close, + .fsync = gfs2_fsync, + .lock = gfs2_lock, + .flock = gfs2_flock, +}; + --- a/fs/gfs2/ops_file.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_file.h 2005-10-10 11:28:49.294787259 -0500 @@ -0,0 +1,16 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_FILE_DOT_H__ +#define __OPS_FILE_DOT_H__ + +extern struct file_operations gfs2_file_fops; +extern struct file_operations gfs2_dir_fops; + +#endif /* __OPS_FILE_DOT_H__ */ --- a/fs/gfs2/ops_fstype.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_fstype.c 2005-10-10 11:28:49.301786168 -0500 @@ -0,0 +1,860 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "daemon.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "lm.h" +#include "mount.h" +#include "ops_export.h" +#include "ops_fstype.h" +#include "ops_super.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "unlinked.h" +#include "sys.h" + +#define DO 0 +#define UNDO 1 + +static struct gfs2_sbd *init_sbd(struct super_block *sb) +{ + struct gfs2_sbd *sdp; + unsigned int x; + + sdp = vmalloc(sizeof(struct gfs2_sbd)); + if (!sdp) + return NULL; + + memset(sdp, 0, sizeof(struct gfs2_sbd)); + + set_v2sdp(sb, sdp); + sdp->sd_vfs = sb; + + gfs2_tune_init(&sdp->sd_tune); + + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { + sdp->sd_gl_hash[x].hb_lock = RW_LOCK_UNLOCKED; + INIT_LIST_HEAD(&sdp->sd_gl_hash[x].hb_list); + } + INIT_LIST_HEAD(&sdp->sd_reclaim_list); + spin_lock_init(&sdp->sd_reclaim_lock); + init_waitqueue_head(&sdp->sd_reclaim_wq); + + init_MUTEX(&sdp->sd_inum_mutex); + spin_lock_init(&sdp->sd_statfs_spin); + init_MUTEX(&sdp->sd_statfs_mutex); + + spin_lock_init(&sdp->sd_rindex_spin); + init_MUTEX(&sdp->sd_rindex_mutex); + INIT_LIST_HEAD(&sdp->sd_rindex_list); + INIT_LIST_HEAD(&sdp->sd_rindex_mru_list); + INIT_LIST_HEAD(&sdp->sd_rindex_recent_list); + + INIT_LIST_HEAD(&sdp->sd_jindex_list); + spin_lock_init(&sdp->sd_jindex_spin); + init_MUTEX(&sdp->sd_jindex_mutex); + + INIT_LIST_HEAD(&sdp->sd_unlinked_list); + spin_lock_init(&sdp->sd_unlinked_spin); + init_MUTEX(&sdp->sd_unlinked_mutex); + + INIT_LIST_HEAD(&sdp->sd_quota_list); + spin_lock_init(&sdp->sd_quota_spin); + init_MUTEX(&sdp->sd_quota_mutex); + + spin_lock_init(&sdp->sd_log_lock); + init_waitqueue_head(&sdp->sd_log_trans_wq); + init_waitqueue_head(&sdp->sd_log_flush_wq); + + INIT_LIST_HEAD(&sdp->sd_log_le_gl); + INIT_LIST_HEAD(&sdp->sd_log_le_buf); + INIT_LIST_HEAD(&sdp->sd_log_le_revoke); + INIT_LIST_HEAD(&sdp->sd_log_le_rg); + INIT_LIST_HEAD(&sdp->sd_log_le_databuf); + + INIT_LIST_HEAD(&sdp->sd_log_blks_list); + init_waitqueue_head(&sdp->sd_log_blks_wait); + + INIT_LIST_HEAD(&sdp->sd_ail1_list); + INIT_LIST_HEAD(&sdp->sd_ail2_list); + + init_MUTEX(&sdp->sd_log_flush_lock); + INIT_LIST_HEAD(&sdp->sd_log_flush_list); + + INIT_LIST_HEAD(&sdp->sd_revoke_list); + + init_MUTEX(&sdp->sd_freeze_lock); + + return sdp; +} + +static void init_vfs(struct gfs2_sbd *sdp) +{ + struct super_block *sb = sdp->sd_vfs; + + sb->s_magic = GFS2_MAGIC; + sb->s_op = &gfs2_super_ops; + sb->s_export_op = &gfs2_export_ops; + sb->s_maxbytes = MAX_LFS_FILESIZE; + + if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + + /* Don't let the VFS update atimes. GFS2 handles this itself. */ + sb->s_flags |= MS_NOATIME | MS_NODIRATIME; + + /* Set up the buffer cache and fill in some fake block size values + to allow us to read-in the on-disk superblock. */ + sdp->sd_sb.sb_bsize = sb_min_blocksize(sb, GFS2_BASIC_BLOCK); + sdp->sd_sb.sb_bsize_shift = sb->s_blocksize_bits; + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; +} + +static int init_names(struct gfs2_sbd *sdp, int silent) +{ + struct gfs2_sb *sb = NULL; + char *proto, *table; + int error = 0; + + proto = sdp->sd_args.ar_lockproto; + table = sdp->sd_args.ar_locktable; + + /* Try to autodetect */ + + if (!proto[0] || !table[0]) { + struct buffer_head *bh; + bh = sb_getblk(sdp->sd_vfs, + GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); + lock_buffer(bh); + clear_buffer_uptodate(bh); + clear_buffer_dirty(bh); + unlock_buffer(bh); + ll_rw_block(READ, 1, &bh); + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + brelse(bh); + return -EIO; + } + + sb = kmalloc(sizeof(struct gfs2_sb), GFP_KERNEL); + if (!sb) { + brelse(bh); + return -ENOMEM; + } + gfs2_sb_in(sb, bh->b_data); + brelse(bh); + + error = gfs2_check_sb(sdp, sb, silent); + if (error) + goto out; + + if (!proto[0]) + proto = sb->sb_lockproto; + if (!table[0]) + table = sb->sb_locktable; + } + + if (!table[0]) + table = sdp->sd_vfs->s_id; + + snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); + snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); + + out: + kfree(sb); + + return error; +} + +static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, + int undo) +{ + struct task_struct *p; + int error = 0; + + if (undo) + goto fail_trans; + + p = kthread_run(gfs2_scand, sdp, "gfs2_scand"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start scand thread: %d\n", error); + return error; + } + sdp->sd_scand_process = p; + + for (sdp->sd_glockd_num = 0; + sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; + sdp->sd_glockd_num++) { + p = kthread_run(gfs2_glockd, sdp, "gfs2_glockd"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start glockd thread: %d\n", error); + goto fail; + } + sdp->sd_glockd_process[sdp->sd_glockd_num] = p; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_MOUNT_LOCK, &gfs2_nondisk_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP | GL_NOCACHE, + mount_gh); + if (error) { + fs_err(sdp, "can't acquire mount glock: %d\n", error); + goto fail; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_LIVE_LOCK, &gfs2_nondisk_glops, + LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT | GL_NEVER_RECURSE, + &sdp->sd_live_gh); + if (error) { + fs_err(sdp, "can't acquire live glock: %d\n", error); + goto fail_mount; + } + + error = gfs2_glock_get(sdp, GFS2_RENAME_LOCK, &gfs2_nondisk_glops, + CREATE, &sdp->sd_rename_gl); + if (error) { + fs_err(sdp, "can't create rename glock: %d\n", error); + goto fail_live; + } + + error = gfs2_glock_get(sdp, GFS2_TRANS_LOCK, &gfs2_trans_glops, + CREATE, &sdp->sd_trans_gl); + if (error) { + fs_err(sdp, "can't create transaction glock: %d\n", error); + goto fail_rename; + } + set_bit(GLF_STICKY, &sdp->sd_trans_gl->gl_flags); + + return 0; + + fail_trans: + gfs2_glock_put(sdp->sd_trans_gl); + + fail_rename: + gfs2_glock_put(sdp->sd_rename_gl); + + fail_live: + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + + fail_mount: + gfs2_glock_dq_uninit(mount_gh); + + fail: + while (sdp->sd_glockd_num--) + kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); + + kthread_stop(sdp->sd_scand_process); + + return error; +} + +static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) +{ + struct super_block *sb = sdp->sd_vfs; + struct gfs2_holder sb_gh; + int error = 0; + + if (undo) { + gfs2_inode_put(sdp->sd_master_dir); + return 0; + } + + error = gfs2_glock_nq_num(sdp, + GFS2_SB_LOCK, &gfs2_meta_glops, + LM_ST_SHARED, 0, &sb_gh); + if (error) { + fs_err(sdp, "can't acquire superblock glock: %d\n", error); + return error; + } + + error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); + if (error) { + fs_err(sdp, "can't read superblock: %d\n", error); + goto out; + } + + /* Set up the buffer cache and SB for real */ + error = -EINVAL; + if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { + fs_err(sdp, "FS block size (%u) is too small for device " + "block size (%u)\n", + sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); + goto out; + } + if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { + fs_err(sdp, "FS block size (%u) is too big for machine " + "page size (%u)\n", + sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); + goto out; + } + + /* Get rid of buffers from the original block size */ + sb_gh.gh_gl->gl_ops->go_inval(sb_gh.gh_gl, DIO_METADATA | DIO_DATA); + sb_gh.gh_gl->gl_aspace->i_blkbits = sdp->sd_sb.sb_bsize_shift; + + sb_set_blocksize(sb, sdp->sd_sb.sb_bsize); + + error = gfs2_lookup_master_dir(sdp); + if (error) + fs_err(sdp, "can't read in master directory: %d\n", error); + + out: + gfs2_glock_dq_uninit(&sb_gh); + + return error; +} + +static int init_journal(struct gfs2_sbd *sdp, int undo) +{ + struct gfs2_holder ji_gh; + struct task_struct *p; + int jindex = 1; + int error = 0; + + if (undo) { + jindex = 0; + goto fail_recoverd; + } + + error = gfs2_lookup_simple(sdp->sd_master_dir, "jindex", + &sdp->sd_jindex); + if (error) { + fs_err(sdp, "can't lookup journal index: %d\n", error); + return error; + } + set_bit(GLF_STICKY, &sdp->sd_jindex->i_gl->gl_flags); + + /* Load in the journal index special file */ + + error = gfs2_jindex_hold(sdp, &ji_gh); + if (error) { + fs_err(sdp, "can't read journal index: %d\n", error); + goto fail; + } + + error = -EINVAL; + if (!gfs2_jindex_size(sdp)) { + fs_err(sdp, "no journals!\n"); + goto fail_jindex; + } + + if (sdp->sd_args.ar_spectator) { + sdp->sd_jdesc = gfs2_jdesc_find(sdp, 0); + sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + } else { + if (sdp->sd_lockstruct.ls_jid >= gfs2_jindex_size(sdp)) { + fs_err(sdp, "can't mount journal #%u\n", + sdp->sd_lockstruct.ls_jid); + fs_err(sdp, "there are only %u journals (0 - %u)\n", + gfs2_jindex_size(sdp), + gfs2_jindex_size(sdp) - 1); + goto fail_jindex; + } + sdp->sd_jdesc = gfs2_jdesc_find(sdp, sdp->sd_lockstruct.ls_jid); + + error = gfs2_glock_nq_num(sdp, + sdp->sd_lockstruct.ls_jid, + &gfs2_journal_glops, + LM_ST_EXCLUSIVE, LM_FLAG_NOEXP, + &sdp->sd_journal_gh); + if (error) { + fs_err(sdp, "can't acquire journal glock: %d\n", error); + goto fail_jindex; + } + + error = gfs2_glock_nq_init(sdp->sd_jdesc->jd_inode->i_gl, + LM_ST_SHARED, + LM_FLAG_NOEXP | GL_EXACT, + &sdp->sd_jinode_gh); + if (error) { + fs_err(sdp, "can't acquire journal inode glock: %d\n", + error); + goto fail_journal_gh; + } + + error = gfs2_jdesc_check(sdp->sd_jdesc); + if (error) { + fs_err(sdp, "my journal (%u) is bad: %d\n", + sdp->sd_jdesc->jd_jid, error); + goto fail_jinode_gh; + } + sdp->sd_log_blks_free = sdp->sd_jdesc->jd_blocks; + } + + if (sdp->sd_lockstruct.ls_first) { + unsigned int x; + for (x = 0; x < sdp->sd_journals; x++) { + error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), + WAIT); + if (error) { + fs_err(sdp, "error recovering journal %u: %d\n", + x, error); + goto fail_jinode_gh; + } + } + + gfs2_lm_others_may_mount(sdp); + } else if (!sdp->sd_args.ar_spectator) { + error = gfs2_recover_journal(sdp->sd_jdesc, WAIT); + if (error) { + fs_err(sdp, "error recovering my journal: %d\n", error); + goto fail_jinode_gh; + } + } + + set_bit(SDF_JOURNAL_CHECKED, &sdp->sd_flags); + gfs2_glock_dq_uninit(&ji_gh); + jindex = 0; + + /* Disown my Journal glock */ + + sdp->sd_journal_gh.gh_owner = NULL; + sdp->sd_jinode_gh.gh_owner = NULL; + + p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start recoverd thread: %d\n", error); + goto fail_jinode_gh; + } + sdp->sd_recoverd_process = p; + + return 0; + + fail_recoverd: + kthread_stop(sdp->sd_recoverd_process); + + fail_jinode_gh: + if (!sdp->sd_args.ar_spectator) + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + + fail_journal_gh: + if (!sdp->sd_args.ar_spectator) + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + + fail_jindex: + gfs2_jindex_free(sdp); + if (jindex) + gfs2_glock_dq_uninit(&ji_gh); + + fail: + gfs2_inode_put(sdp->sd_jindex); + + return error; +} + +static int init_inodes(struct gfs2_sbd *sdp, int undo) +{ + struct inode *inode; + struct dentry **dentry = &sdp->sd_vfs->s_root; + int error = 0; + + if (undo) + goto fail_dput; + + /* Read in the master inode number inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "inum", + &sdp->sd_inum_inode); + if (error) { + fs_err(sdp, "can't read in inum inode: %d\n", error); + return error; + } + + /* Read in the master statfs inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "statfs", + &sdp->sd_statfs_inode); + if (error) { + fs_err(sdp, "can't read in statfs inode: %d\n", error); + goto fail; + } + + /* Read in the resource index inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "rindex", + &sdp->sd_rindex); + if (error) { + fs_err(sdp, "can't get resource index inode: %d\n", error); + goto fail_statfs; + } + set_bit(GLF_STICKY, &sdp->sd_rindex->i_gl->gl_flags); + sdp->sd_rindex_vn = sdp->sd_rindex->i_gl->gl_vn - 1; + + /* Read in the quota inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "quota", + &sdp->sd_quota_inode); + if (error) { + fs_err(sdp, "can't get quota file inode: %d\n", error); + goto fail_rindex; + } + + /* Get the root inode */ + error = gfs2_lookup_simple(sdp->sd_master_dir, "root", + &sdp->sd_root_dir); + if (error) { + fs_err(sdp, "can't read in root inode: %d\n", error); + goto fail_qinode; + } + + /* Get the root inode/dentry */ + inode = gfs2_ip2v(sdp->sd_root_dir); + if (!inode) { + fs_err(sdp, "can't get root inode\n"); + error = -ENOMEM; + goto fail_rooti; + } + + *dentry = d_alloc_root(inode); + if (!*dentry) { + iput(inode); + fs_err(sdp, "can't get root dentry\n"); + error = -ENOMEM; + goto fail_rooti; + } + + return 0; + + fail_dput: + dput(*dentry); + *dentry = NULL; + + fail_rooti: + gfs2_inode_put(sdp->sd_root_dir); + + fail_qinode: + gfs2_inode_put(sdp->sd_quota_inode); + + fail_rindex: + gfs2_clear_rgrpd(sdp); + gfs2_inode_put(sdp->sd_rindex); + + fail_statfs: + gfs2_inode_put(sdp->sd_statfs_inode); + + fail: + gfs2_inode_put(sdp->sd_inum_inode); + + return error; +} + +static int init_per_node(struct gfs2_sbd *sdp, int undo) +{ + struct gfs2_inode *pn = NULL; + char buf[30]; + int error = 0; + + if (sdp->sd_args.ar_spectator) + return 0; + + if (undo) + goto fail_qc_gh; + + error = gfs2_lookup_simple(sdp->sd_master_dir, "per_node", &pn); + if (error) { + fs_err(sdp, "can't find per_node directory: %d\n", error); + return error; + } + + sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_ir_inode); + if (error) { + fs_err(sdp, "can't find local \"ir\" file: %d\n", error); + goto fail; + } + + sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_sc_inode); + if (error) { + fs_err(sdp, "can't find local \"sc\" file: %d\n", error); + goto fail_ir_i; + } + + sprintf(buf, "unlinked_tag%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_ut_inode); + if (error) { + fs_err(sdp, "can't find local \"ut\" file: %d\n", error); + goto fail_sc_i; + } + + sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); + error = gfs2_lookup_simple(pn, buf, &sdp->sd_qc_inode); + if (error) { + fs_err(sdp, "can't find local \"qc\" file: %d\n", error); + goto fail_ut_i; + } + + gfs2_inode_put(pn); + pn = NULL; + + error = gfs2_glock_nq_init(sdp->sd_ir_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_ir_gh); + if (error) { + fs_err(sdp, "can't lock local \"ir\" file: %d\n", error); + goto fail_qc_i; + } + + error = gfs2_glock_nq_init(sdp->sd_sc_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_sc_gh); + if (error) { + fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); + goto fail_ir_gh; + } + + error = gfs2_glock_nq_init(sdp->sd_ut_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_ut_gh); + if (error) { + fs_err(sdp, "can't lock local \"ut\" file: %d\n", error); + goto fail_sc_gh; + } + + error = gfs2_glock_nq_init(sdp->sd_qc_inode->i_gl, + LM_ST_EXCLUSIVE, GL_NEVER_RECURSE, + &sdp->sd_qc_gh); + if (error) { + fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); + goto fail_ut_gh; + } + + return 0; + + fail_qc_gh: + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + + fail_ut_gh: + gfs2_glock_dq_uninit(&sdp->sd_ut_gh); + + fail_sc_gh: + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + + fail_ir_gh: + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + + fail_qc_i: + gfs2_inode_put(sdp->sd_qc_inode); + + fail_ut_i: + gfs2_inode_put(sdp->sd_ut_inode); + + fail_sc_i: + gfs2_inode_put(sdp->sd_sc_inode); + + fail_ir_i: + gfs2_inode_put(sdp->sd_ir_inode); + + fail: + if (pn) + gfs2_inode_put(pn); + return error; +} + +static int init_threads(struct gfs2_sbd *sdp, int undo) +{ + struct task_struct *p; + int error = 0; + + if (undo) + goto fail_inoded; + + sdp->sd_log_flush_time = jiffies; + sdp->sd_jindex_refresh_time = jiffies; + + p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start logd thread: %d\n", error); + return error; + } + sdp->sd_logd_process = p; + + sdp->sd_statfs_sync_time = jiffies; + sdp->sd_quota_sync_time = jiffies; + + p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start quotad thread: %d\n", error); + goto fail; + } + sdp->sd_quotad_process = p; + + p = kthread_run(gfs2_inoded, sdp, "gfs2_inoded"); + error = IS_ERR(p); + if (error) { + fs_err(sdp, "can't start inoded thread: %d\n", error); + goto fail_quotad; + } + sdp->sd_inoded_process = p; + + return 0; + + fail_inoded: + kthread_stop(sdp->sd_inoded_process); + + fail_quotad: + kthread_stop(sdp->sd_quotad_process); + + fail: + kthread_stop(sdp->sd_logd_process); + + return error; +} + +/** + * fill_super - Read in superblock + * @sb: The VFS superblock + * @data: Mount options + * @silent: Don't complain if it's not a GFS2 filesystem + * + * Returns: errno + */ + +static int fill_super(struct super_block *sb, void *data, int silent) +{ + struct gfs2_sbd *sdp; + struct gfs2_holder mount_gh; + int error; + + sdp = init_sbd(sb); + if (!sdp) { + printk("GFS2: can't alloc struct gfs2_sbd\n"); + return -ENOMEM; + } + + error = gfs2_mount_args(sdp, (char *)data, 0); + if (error) { + printk("GFS2: can't parse mount arguments\n"); + goto fail; + } + + init_vfs(sdp); + + error = init_names(sdp, silent); + if (error) + goto fail; + + error = gfs2_sys_fs_add(sdp); + if (error) + goto fail; + + error = gfs2_lm_mount(sdp, silent); + if (error) + goto fail_sys; + + error = init_locking(sdp, &mount_gh, DO); + if (error) + goto fail_lm; + + error = init_sb(sdp, silent, DO); + if (error) + goto fail_locking; + + error = init_journal(sdp, DO); + if (error) + goto fail_sb; + + error = init_inodes(sdp, DO); + if (error) + goto fail_journals; + + error = init_per_node(sdp, DO); + if (error) + goto fail_inodes; + + error = gfs2_statfs_init(sdp); + if (error) { + fs_err(sdp, "can't initialize statfs subsystem: %d\n", error); + goto fail_per_node; + } + + error = init_threads(sdp, DO); + if (error) + goto fail_per_node; + + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_rw(sdp); + if (error) { + fs_err(sdp, "can't make FS RW: %d\n", error); + goto fail_threads; + } + } + + gfs2_glock_dq_uninit(&mount_gh); + + return 0; + + fail_threads: + init_threads(sdp, UNDO); + + fail_per_node: + init_per_node(sdp, UNDO); + + fail_inodes: + init_inodes(sdp, UNDO); + + fail_journals: + init_journal(sdp, UNDO); + + fail_sb: + init_sb(sdp, 0, UNDO); + + fail_locking: + init_locking(sdp, &mount_gh, UNDO); + + fail_lm: + gfs2_gl_hash_clear(sdp, WAIT); + gfs2_lm_unmount(sdp); + while (invalidate_inodes(sb)) + yield(); + + fail_sys: + gfs2_sys_fs_del(sdp); + + fail: + vfree(sdp); + set_v2sdp(sb, NULL); + + return error; +} + +static struct super_block *gfs2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, fill_super); +} + +struct file_system_type gfs2_fs_type = { + .name = "gfs2", + .fs_flags = FS_REQUIRES_DEV, + .get_sb = gfs2_get_sb, + .kill_sb = kill_block_super, + .owner = THIS_MODULE, +}; + --- a/fs/gfs2/ops_fstype.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_fstype.h 2005-10-10 11:28:49.305785544 -0500 @@ -0,0 +1,15 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_FSTYPE_DOT_H__ +#define __OPS_FSTYPE_DOT_H__ + +extern struct file_system_type gfs2_fs_type; + +#endif /* __OPS_FSTYPE_DOT_H__ */ --- a/fs/gfs2/ops_inode.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_inode.c 2005-10-10 11:28:49.307785232 -0500 @@ -0,0 +1,1265 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "bmap.h" +#include "dir.h" +#include "eaops.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "ops_dentry.h" +#include "ops_inode.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs2_create - Create a file + * @dir: The directory in which to create the file + * @dentry: The dentry of the new file + * @mode: The mode of the new file + * + * Returns: errno + */ + +static int gfs2_create(struct inode *dir, struct dentry *dentry, + int mode, struct nameidata *nd) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + int new = 1; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + for (;;) { + error = gfs2_createi(ghs, &dentry->d_name, S_IFREG | mode); + if (!error) { + ip = get_gl2ip(ghs[1].gh_gl); + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + gfs2_glock_dq_uninit_m(2, ghs); + break; + } else if (error != -EEXIST || + (nd->intent.open.flags & O_EXCL)) { + gfs2_holder_uninit(ghs); + return error; + } + + error = gfs2_lookupi(dip, &dentry->d_name, 0, &ip); + if (!error) { + new = 0; + gfs2_holder_uninit(ghs); + break; + } else if (error != -ENOENT) { + gfs2_holder_uninit(ghs); + return error; + } + } + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + if (new) + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_lookup - Look up a filename in a directory and return its inode + * @dir: The directory inode + * @dentry: The dentry of the new inode + * @nd: passed from Linux VFS, ignored by us + * + * Called by the VFS layer. Lock dir and call gfs2_lookupi() + * + * Returns: errno + */ + +static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode = NULL; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (!sdp->sd_args.ar_localcaching) + dentry->d_op = &gfs2_dops; + + error = gfs2_lookupi(dip, &dentry->d_name, 0, &ip); + if (!error) { + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + if (!inode) + return ERR_PTR(-ENOMEM); + + } else if (error != -ENOENT) + return ERR_PTR(error); + + if (inode) + return d_splice_alias(inode, dentry); + d_add(dentry, inode); + + return NULL; +} + +/** + * gfs2_link - Link to a file + * @old_dentry: The inode to link + * @dir: Add link to this directory + * @dentry: The name of the link + * + * Link the inode in "old_dentry" into the directory "dir" with the + * name in "dentry". + * + * Returns: errno + */ + +static int gfs2_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct inode *inode = old_dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder ghs[2]; + int alloc_required; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (S_ISDIR(ip->i_di.di_mode)) + return -EPERM; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_repermission(dir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto out_gunlock; + + error = gfs2_dir_search(dip, &dentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + } + + error = -EINVAL; + if (!dip->i_di.di_nlink) + goto out_gunlock; + error = -EFBIG; + if (dip->i_di.di_entries == (uint32_t)-1) + goto out_gunlock; + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out_gunlock; + error = -EINVAL; + if (!ip->i_di.di_nlink) + goto out_gunlock; + error = -EMLINK; + if (ip->i_di.di_nlink == (uint32_t)-1) + goto out_gunlock; + + error = gfs2_diradd_alloc_required(dip, &dentry->d_name, + &alloc_required); + if (error) + goto out_gunlock; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(dip); + + error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + error = gfs2_quota_check(dip, dip->i_di.di_uid, + dip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(dip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 2 * RES_DINODE + RES_STATFS + + RES_QUOTA, 0); + if (error) + goto out_ipres; + } else { + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF, 0); + if (error) + goto out_ipres; + } + + error = gfs2_dir_add(dip, &dentry->d_name, &ip->i_num, + IF2DT(ip->i_di.di_mode)); + if (error) + goto out_end_trans; + + error = gfs2_change_nlink(ip, +1); + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipres: + if (alloc_required) + gfs2_inplace_release(dip); + + out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(dip); + + out_alloc: + if (alloc_required) + gfs2_alloc_put(dip); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + if (!error) { + atomic_inc(&inode->i_count); + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + } + + return error; +} + +/** + * gfs2_unlink - Unlink a file + * @dir: The inode of the directory containing the file to unlink + * @dentry: The file itself + * + * Unlink a file. Call gfs2_unlinki() + * + * Returns: errno + */ + +static int gfs2_unlink(struct inode *dir, struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_unlink_ok(dip, &dentry->d_name, ip); + if (error) + goto out_gunlock; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + + error = gfs2_unlinki(dip, &dentry->d_name, ip,ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_symlink - Create a symlink + * @dir: The directory to create the symlink in + * @dentry: The dentry to put the symlink in + * @symname: The thing which the link points to + * + * Returns: errno + */ + +static int gfs2_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + int size; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + /* Must be stuffed with a null terminator for gfs2_follow_link() */ + size = strlen(symname); + if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) + return -ENAMETOOLONG; + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, S_IFLNK | S_IRWXUGO); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_size = size; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + gfs2_dinode_out(&ip->i_di, dibh->b_data); + memcpy(dibh->b_data + sizeof(struct gfs2_dinode), symname, + size); + brelse(dibh); + } + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_mkdir - Make a directory + * @dir: The parent directory of the new one + * @dentry: The dentry of the new directory + * @mode: The mode of the new directory + * + * Returns: errno + */ + +static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, S_IFDIR | mode); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_nlink = 2; + ip->i_di.di_size = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); + ip->i_di.di_flags |= GFS2_DIF_JDATA; + ip->i_di.di_payload_format = GFS2_FORMAT_DE; + ip->i_di.di_entries = 2; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; + struct gfs2_dirent *dent; + + gfs2_dirent_alloc(ip, dibh, 1, &dent); + + dent->de_inum = di->di_num; /* already GFS2 endian */ + dent->de_hash = gfs2_disk_hash(".", 1); + dent->de_hash = cpu_to_le32(dent->de_hash); + dent->de_type = DT_DIR; + memcpy((char *) (dent + 1), ".", 1); + di->di_entries = cpu_to_le32(1); + + gfs2_dirent_alloc(ip, dibh, 2, &dent); + + gfs2_inum_out(&dip->i_num, (char *) &dent->de_inum); + dent->de_hash = gfs2_disk_hash("..", 2); + dent->de_hash = cpu_to_le32(dent->de_hash); + dent->de_type = DT_DIR; + memcpy((char *) (dent + 1), "..", 2); + + gfs2_dinode_out(&ip->i_di, (char *)di); + + brelse(dibh); + } + + error = gfs2_change_nlink(dip, +1); + gfs2_assert_withdraw(sdp, !error); /* dip already pinned */ + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_rmdir - Remove a directory + * @dir: The parent directory of the directory to be removed + * @dentry: The dentry of the directory to remove + * + * Remove a directory. Call gfs2_rmdiri() + * + * Returns: errno + */ + +static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct gfs2_inode *dip = get_v2ip(dir); + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[2]; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + + error = gfs2_glock_nq_m(2, ghs); + if (error) + goto out; + + error = gfs2_unlink_ok(dip, &dentry->d_name, ip); + if (error) + goto out_gunlock; + + if (ip->i_di.di_entries < 2) { + if (gfs2_consist_inode(ip)) + gfs2_dinode_print(&ip->i_di); + error = -EIO; + goto out_gunlock; + } + if (ip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto out_gunlock; + } + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE + 3 * RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + + error = gfs2_rmdiri(dip, &dentry->d_name, ip, ul); + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_m(2, ghs); + + out: + gfs2_holder_uninit(ghs); + gfs2_holder_uninit(ghs + 1); + + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_mknod - Make a special file + * @dir: The directory in which the special file will reside + * @dentry: The dentry of the special file + * @mode: The mode of the special file + * @rdev: The device specification of the special file + * + */ + +static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, + dev_t dev) +{ + struct gfs2_inode *dip = get_v2ip(dir), *ip; + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_holder ghs[2]; + struct inode *inode; + struct buffer_head *dibh; + uint32_t major = 0, minor = 0; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + switch (mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + major = MAJOR(dev); + minor = MINOR(dev); + break; + case S_IFIFO: + case S_IFSOCK: + break; + default: + return -EOPNOTSUPP; + }; + + gfs2_holder_init(dip->i_gl, 0, 0, ghs); + + error = gfs2_createi(ghs, &dentry->d_name, mode); + if (error) { + gfs2_holder_uninit(ghs); + return error; + } + + ip = get_gl2ip(ghs[1].gh_gl); + + ip->i_di.di_major = major; + ip->i_di.di_minor = minor; + + error = gfs2_meta_inode_buffer(ip, &dibh); + + if (!gfs2_assert_withdraw(sdp, !error)) { + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + if (dip->i_alloc->al_rgd) + gfs2_inplace_release(dip); + gfs2_quota_unlock(dip); + gfs2_alloc_put(dip); + + gfs2_glock_dq_uninit_m(2, ghs); + + inode = gfs2_ip2v(ip); + gfs2_inode_put(ip); + + if (!inode) + return -ENOMEM; + + d_instantiate(dentry, inode); + mark_inode_dirty(inode); + + return 0; +} + +/** + * gfs2_rename - Rename a file + * @odir: Parent directory of old file name + * @odentry: The old dentry of the file + * @ndir: Parent directory of new file name + * @ndentry: The new dentry of the file + * + * Returns: errno + */ + +static int gfs2_rename(struct inode *odir, struct dentry *odentry, + struct inode *ndir, struct dentry *ndentry) +{ + struct gfs2_inode *odip = get_v2ip(odir); + struct gfs2_inode *ndip = get_v2ip(ndir); + struct gfs2_inode *ip = get_v2ip(odentry->d_inode); + struct gfs2_inode *nip = NULL; + struct gfs2_sbd *sdp = odip->i_sbd; + struct gfs2_unlinked *ul; + struct gfs2_holder ghs[4], r_gh; + unsigned int num_gh; + int dir_rename = 0; + int alloc_required; + unsigned int x; + int error; + + atomic_inc(&sdp->sd_ops_inode); + + if (ndentry->d_inode) { + nip = get_v2ip(ndentry->d_inode); + if (ip == nip) + return 0; + } + + error = gfs2_unlinked_get(sdp, &ul); + if (error) + return error; + + /* Make sure we aren't trying to move a dirctory into it's subdir */ + + if (S_ISDIR(ip->i_di.di_mode) && odip != ndip) { + dir_rename = 1; + + error = gfs2_glock_nq_init(sdp->sd_rename_gl, + LM_ST_EXCLUSIVE, 0, + &r_gh); + if (error) + goto out; + + error = gfs2_ok_to_move(ip, ndip); + if (error) + goto out_gunlock_r; + } + + gfs2_holder_init(odip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); + gfs2_holder_init(ndip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); + num_gh = 3; + + if (nip) + gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); + + error = gfs2_glock_nq_m(num_gh, ghs); + if (error) + goto out_uninit; + + /* Check out the old directory */ + + error = gfs2_unlink_ok(odip, &odentry->d_name, ip); + if (error) + goto out_gunlock; + + /* Check out the new directory */ + + if (nip) { + error = gfs2_unlink_ok(ndip, &ndentry->d_name, nip); + if (error) + goto out_gunlock; + + if (S_ISDIR(nip->i_di.di_mode)) { + if (nip->i_di.di_entries < 2) { + if (gfs2_consist_inode(nip)) + gfs2_dinode_print(&nip->i_di); + error = -EIO; + goto out_gunlock; + } + if (nip->i_di.di_entries > 2) { + error = -ENOTEMPTY; + goto out_gunlock; + } + } + } else { + error = gfs2_repermission(ndir, MAY_WRITE | MAY_EXEC, NULL); + if (error) + goto out_gunlock; + + error = gfs2_dir_search(ndip, &ndentry->d_name, NULL, NULL); + switch (error) { + case -ENOENT: + error = 0; + break; + case 0: + error = -EEXIST; + default: + goto out_gunlock; + }; + + if (odip != ndip) { + if (!ndip->i_di.di_nlink) { + error = -EINVAL; + goto out_gunlock; + } + if (ndip->i_di.di_entries == (uint32_t)-1) { + error = -EFBIG; + goto out_gunlock; + } + if (S_ISDIR(ip->i_di.di_mode) && + ndip->i_di.di_nlink == (uint32_t)-1) { + error = -EMLINK; + goto out_gunlock; + } + } + } + + /* Check out the dir to be renamed */ + + if (dir_rename) { + error = gfs2_repermission(odentry->d_inode, MAY_WRITE, NULL); + if (error) + goto out_gunlock; + } + + error = gfs2_diradd_alloc_required(ndip, &ndentry->d_name, + &alloc_required); + if (error) + goto out_gunlock; + + if (alloc_required) { + struct gfs2_alloc *al = gfs2_alloc_get(ndip); + + error = gfs2_quota_lock(ndip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + error = gfs2_quota_check(ndip, ndip->i_di.di_uid, + ndip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = sdp->sd_max_dirres; + + error = gfs2_inplace_reserve(ndip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + sdp->sd_max_dirres + + al->al_rgd->rd_ri.ri_length + + 4 * RES_DINODE + 4 * RES_LEAF + + RES_UNLINKED + RES_STATFS + + RES_QUOTA, 0); + if (error) + goto out_ipreserv; + } else { + error = gfs2_trans_begin(sdp, 4 * RES_DINODE + + 5 * RES_LEAF + + RES_UNLINKED, 0); + if (error) + goto out_gunlock; + } + + /* Remove the target file, if it exists */ + + if (nip) { + if (S_ISDIR(nip->i_di.di_mode)) + error = gfs2_rmdiri(ndip, &ndentry->d_name, nip, ul); + else + error = gfs2_unlinki(ndip, &ndentry->d_name, nip, ul); + if (error) + goto out_end_trans; + } + + if (dir_rename) { + struct qstr name; + name.len = 2; + name.name = ".."; + + error = gfs2_change_nlink(ndip, +1); + if (error) + goto out_end_trans; + error = gfs2_change_nlink(odip, -1); + if (error) + goto out_end_trans; + + error = gfs2_dir_mvino(ip, &name, &ndip->i_num, DT_DIR); + if (error) + goto out_end_trans; + } else { + struct buffer_head *dibh; + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + error = gfs2_dir_del(odip, &odentry->d_name); + if (error) + goto out_end_trans; + + error = gfs2_dir_add(ndip, &ndentry->d_name, &ip->i_num, + IF2DT(ip->i_di.di_mode)); + if (error) + goto out_end_trans; + + out_end_trans: + gfs2_trans_end(sdp); + + out_ipreserv: + if (alloc_required) + gfs2_inplace_release(ndip); + + out_gunlock_q: + if (alloc_required) + gfs2_quota_unlock(ndip); + + out_alloc: + if (alloc_required) + gfs2_alloc_put(ndip); + + out_gunlock: + gfs2_glock_dq_m(num_gh, ghs); + + out_uninit: + for (x = 0; x < num_gh; x++) + gfs2_holder_uninit(ghs + x); + + out_gunlock_r: + if (dir_rename) + gfs2_glock_dq_uninit(&r_gh); + + out: + gfs2_unlinked_put(sdp, ul); + + return error; +} + +/** + * gfs2_readlink - Read the value of a symlink + * @dentry: the symlink + * @buf: the buffer to read the symlink data into + * @size: the size of the buffer + * + * Returns: errno + */ + +static int gfs2_readlink(struct dentry *dentry, char __user *user_buf, + int user_size) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + char array[GFS2_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS2_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_readlinki(ip, &buf, &len); + if (error) + return error; + + if (user_size > len - 1) + user_size = len - 1; + + if (copy_to_user(user_buf, buf, user_size)) + error = -EFAULT; + else + error = user_size; + + if (buf != array) + kfree(buf); + + return error; +} + +/** + * gfs2_follow_link - Follow a symbolic link + * @dentry: The dentry of the link + * @nd: Data that we pass to vfs_follow_link() + * + * This can handle symlinks of any size. It is optimised for symlinks + * under GFS2_FAST_NAME_SIZE. + * + * Returns: 0 on success or error code + */ + +static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + char array[GFS2_FAST_NAME_SIZE], *buf = array; + unsigned int len = GFS2_FAST_NAME_SIZE; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_readlinki(ip, &buf, &len); + if (!error) { + error = vfs_follow_link(nd, buf); + if (buf != array) + kfree(buf); + } + + return ERR_PTR(error); +} + +/** + * gfs2_permission - + * @inode: + * @mask: + * @nd: passed from Linux VFS, ignored by us + * + * Returns: errno + */ + +static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + if (ip->i_vn == ip->i_gl->gl_vn) + return generic_permission(inode, mask, gfs2_check_acl); + + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = generic_permission(inode, mask, gfs2_check_acl_locked); + gfs2_glock_dq_uninit(&i_gh); + } + + return error; +} + +static int setattr_size(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = get_v2ip(inode); + int error; + + if (attr->ia_size != ip->i_di.di_size) { + error = vmtruncate(inode, attr->ia_size); + if (error) + return error; + } + + error = gfs2_truncatei(ip, attr->ia_size, gfs2_truncator_page); + if (error) + return error; + + return error; +} + +static int setattr_chown(struct inode *inode, struct iattr *attr) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + uint32_t ouid, ogid, nuid, ngid; + int error; + + ouid = ip->i_di.di_uid; + ogid = ip->i_di.di_gid; + nuid = attr->ia_uid; + ngid = attr->ia_gid; + + if (!(attr->ia_valid & ATTR_UID) || ouid == nuid) + ouid = nuid = NO_QUOTA_CHANGE; + if (!(attr->ia_valid & ATTR_GID) || ogid == ngid) + ogid = ngid = NO_QUOTA_CHANGE; + + gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out_alloc; + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + error = gfs2_quota_check(ip, nuid, ngid); + if (error) + goto out_gunlock_q; + } + + error = gfs2_trans_begin(sdp, RES_DINODE + 2 * RES_QUOTA, 0); + if (error) + goto out_gunlock_q; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out_end_trans; + + error = inode_setattr(inode, attr); + gfs2_assert_warn(sdp, !error); + gfs2_inode_attr_out(ip); + + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + + if (ouid != NO_QUOTA_CHANGE || ogid != NO_QUOTA_CHANGE) { + gfs2_quota_change(ip, -ip->i_di.di_blocks, + ouid, ogid); + gfs2_quota_change(ip, ip->i_di.di_blocks, + nuid, ngid); + } + + out_end_trans: + gfs2_trans_end(sdp); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out_alloc: + gfs2_alloc_put(ip); + + return error; +} + +/** + * gfs2_setattr - Change attributes on an inode + * @dentry: The dentry which is changing + * @attr: The structure describing the change + * + * The VFS layer wants to change one or more of an inodes attributes. Write + * that change out to disk. + * + * Returns: errno + */ + +static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + + error = inode_change_ok(inode, attr); + if (error) + goto out; + + if (attr->ia_valid & ATTR_SIZE) + error = setattr_size(inode, attr); + else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) + error = setattr_chown(inode, attr); + else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) + error = gfs2_acl_chmod(ip, attr); + else + error = gfs2_setattr_simple(ip, attr); + + out: + gfs2_glock_dq_uninit(&i_gh); + + if (!error) + mark_inode_dirty(inode); + + return error; +} + +/** + * gfs2_getattr - Read out an inode's attributes + * @mnt: ? + * @dentry: The dentry to stat + * @stat: The inode's stats + * + * Returns: errno + */ + +static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder gh; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); + if (!error) { + generic_fillattr(inode, stat); + gfs2_glock_dq_uninit(&gh); + } + + return error; +} + +static int gfs2_setxattr(struct dentry *dentry, const char *name, + const void *data, size_t size, int flags) +{ + struct gfs2_inode *ip = get_v2ip(dentry->d_inode); + struct gfs2_ea_request er; + + atomic_inc(&ip->i_sbd->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_data = (char *)data; + er.er_name_len = strlen(er.er_name); + er.er_data_len = size; + er.er_flags = flags; + + gfs2_assert_warn(ip->i_sbd, !(er.er_flags & GFS2_ERF_MODE)); + + return gfs2_ea_set(ip, &er); +} + +static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, + void *data, size_t size) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_data = data; + er.er_name_len = strlen(er.er_name); + er.er_data_len = size; + + return gfs2_ea_get(get_v2ip(dentry->d_inode), &er); +} + +static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_data = (size) ? buffer : NULL; + er.er_data_len = size; + + return gfs2_ea_list(get_v2ip(dentry->d_inode), &er); +} + +static int gfs2_removexattr(struct dentry *dentry, const char *name) +{ + struct gfs2_ea_request er; + + atomic_inc(&get_v2sdp(dentry->d_inode->i_sb)->sd_ops_inode); + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = gfs2_ea_name2type(name, &er.er_name); + if (er.er_type == GFS2_EATYPE_UNUSED) + return -EOPNOTSUPP; + er.er_name_len = strlen(er.er_name); + + return gfs2_ea_remove(get_v2ip(dentry->d_inode), &er); +} + +struct inode_operations gfs2_file_iops = { + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_dev_iops = { + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_dir_iops = { + .create = gfs2_create, + .lookup = gfs2_lookup, + .link = gfs2_link, + .unlink = gfs2_unlink, + .symlink = gfs2_symlink, + .mkdir = gfs2_mkdir, + .rmdir = gfs2_rmdir, + .mknod = gfs2_mknod, + .rename = gfs2_rename, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + +struct inode_operations gfs2_symlink_iops = { + .readlink = gfs2_readlink, + .follow_link = gfs2_follow_link, + .permission = gfs2_permission, + .setattr = gfs2_setattr, + .getattr = gfs2_getattr, + .setxattr = gfs2_setxattr, + .getxattr = gfs2_getxattr, + .listxattr = gfs2_listxattr, + .removexattr = gfs2_removexattr, +}; + --- a/fs/gfs2/ops_inode.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_inode.h 2005-10-10 11:28:49.307785232 -0500 @@ -0,0 +1,18 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_INODE_DOT_H__ +#define __OPS_INODE_DOT_H__ + +extern struct inode_operations gfs2_file_iops; +extern struct inode_operations gfs2_dir_iops; +extern struct inode_operations gfs2_symlink_iops; +extern struct inode_operations gfs2_dev_iops; + +#endif /* __OPS_INODE_DOT_H__ */ [PATCH 05/16] GFS: core fs Core file system functions. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/ops_super.c | 401 ++++++++++++++++++++++ fs/gfs2/ops_super.h | 15 fs/gfs2/ops_vm.c | 199 ++++++++++ fs/gfs2/ops_vm.h | 16 fs/gfs2/page.c | 273 +++++++++++++++ fs/gfs2/page.h | 23 + fs/gfs2/super.c | 944 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/super.h | 55 +++ fs/gfs2/trans.c | 214 +++++++++++ fs/gfs2/trans.h | 40 ++ fs/gfs2/unlinked.c | 453 ++++++++++++++++++++++++ fs/gfs2/unlinked.h | 25 + fs/gfs2/util.c | 302 ++++++++++++++++ fs/gfs2/util.h | 199 ++++++++++ 14 files changed, 3159 insertions(+) --- a/fs/gfs2/ops_super.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_super.c 2005-10-10 11:28:49.317783673 -0500 @@ -0,0 +1,401 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "inode.h" +#include "lm.h" +#include "log.h" +#include "mount.h" +#include "ops_super.h" +#include "page.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "sys.h" + +/** + * gfs2_write_inode - Make sure the inode is stable on the disk + * @inode: The inode + * @sync: synchronous write flag + * + * Returns: errno + */ + +static int gfs2_write_inode(struct inode *inode, int sync) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&ip->i_sbd->sd_ops_super); + + if (current->flags & PF_MEMALLOC) + return 0; + if (ip && sync) + gfs2_log_flush_glock(ip->i_gl); + + return 0; +} + +/** + * gfs2_put_super - Unmount the filesystem + * @sb: The VFS superblock + * + */ + +static void gfs2_put_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + if (!sdp) + return; + + atomic_inc(&sdp->sd_ops_super); + + /* Unfreeze the filesystem, if we need to */ + + down(&sdp->sd_freeze_lock); + if (sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + up(&sdp->sd_freeze_lock); + + kthread_stop(sdp->sd_inoded_process); + kthread_stop(sdp->sd_quotad_process); + kthread_stop(sdp->sd_logd_process); + kthread_stop(sdp->sd_recoverd_process); + while (sdp->sd_glockd_num--) + kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); + kthread_stop(sdp->sd_scand_process); + + if (!(sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_ro(sdp); + if (error) + gfs2_io_error(sdp); + } + + /* At this point, we're through modifying the disk */ + + /* Release stuff */ + + gfs2_inode_put(sdp->sd_master_dir); + gfs2_inode_put(sdp->sd_jindex); + gfs2_inode_put(sdp->sd_inum_inode); + gfs2_inode_put(sdp->sd_statfs_inode); + gfs2_inode_put(sdp->sd_rindex); + gfs2_inode_put(sdp->sd_quota_inode); + gfs2_inode_put(sdp->sd_root_dir); + + gfs2_glock_put(sdp->sd_rename_gl); + gfs2_glock_put(sdp->sd_trans_gl); + + if (!sdp->sd_args.ar_spectator) { + gfs2_glock_dq_uninit(&sdp->sd_journal_gh); + gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); + gfs2_glock_dq_uninit(&sdp->sd_ir_gh); + gfs2_glock_dq_uninit(&sdp->sd_sc_gh); + gfs2_glock_dq_uninit(&sdp->sd_ut_gh); + gfs2_glock_dq_uninit(&sdp->sd_qc_gh); + gfs2_inode_put(sdp->sd_ir_inode); + gfs2_inode_put(sdp->sd_sc_inode); + gfs2_inode_put(sdp->sd_ut_inode); + gfs2_inode_put(sdp->sd_qc_inode); + } + + gfs2_glock_dq_uninit(&sdp->sd_live_gh); + + gfs2_clear_rgrpd(sdp); + gfs2_jindex_free(sdp); + + /* Take apart glock structures and buffer lists */ + gfs2_gl_hash_clear(sdp, WAIT); + + /* Unmount the locking protocol */ + gfs2_lm_unmount(sdp); + + /* At this point, we're through participating in the lockspace */ + + gfs2_sys_fs_del(sdp); + + /* Get rid of any extra inodes */ + while (invalidate_inodes(sb)) + yield(); + + vfree(sdp); + + set_v2sdp(sb, NULL); +} + +/** + * gfs2_write_super - disk commit all incore transactions + * @sb: the filesystem + * + * This function is called every time sync(2) is called. + * After this exits, all dirty buffers and synced. + */ + +static void gfs2_write_super(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + atomic_inc(&sdp->sd_ops_super); + gfs2_log_flush(sdp); +} + +/** + * gfs2_write_super_lockfs - prevent further writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void gfs2_write_super_lockfs(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + for (;;) { + error = gfs2_freeze_fs(sdp); + if (!error) + break; + + switch (error) { + case -EBUSY: + fs_err(sdp, "waiting for recovery before freeze\n"); + break; + + default: + fs_err(sdp, "error freezing FS: %d\n", error); + break; + } + + fs_err(sdp, "retrying...\n"); + msleep(1000); + } +} + +/** + * gfs2_unlockfs - reallow writes to the filesystem + * @sb: the VFS structure for the filesystem + * + */ + +static void gfs2_unlockfs(struct super_block *sb) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + + atomic_inc(&sdp->sd_ops_super); + gfs2_unfreeze_fs(sdp); +} + +/** + * gfs2_statfs - Gather and return stats about the filesystem + * @sb: The superblock + * @statfsbuf: The buffer + * + * Returns: 0 on success or error code + */ + +static int gfs2_statfs(struct super_block *sb, struct kstatfs *buf) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + struct gfs2_statfs_change sc; + int error; + + atomic_inc(&sdp->sd_ops_super); + + if (gfs2_tune_get(sdp, gt_statfs_slow)) + error = gfs2_statfs_slow(sdp, &sc); + else + error = gfs2_statfs_i(sdp, &sc); + + if (error) + return error; + + memset(buf, 0, sizeof(struct kstatfs)); + + buf->f_type = GFS2_MAGIC; + buf->f_bsize = sdp->sd_sb.sb_bsize; + buf->f_blocks = sc.sc_total; + buf->f_bfree = sc.sc_free; + buf->f_bavail = sc.sc_free; + buf->f_files = sc.sc_dinodes + sc.sc_free; + buf->f_ffree = sc.sc_free; + buf->f_namelen = GFS2_FNAMESIZE; + + return 0; +} + +/** + * gfs2_remount_fs - called when the FS is remounted + * @sb: the filesystem + * @flags: the remount flags + * @data: extra data passed in (not used right now) + * + * Returns: errno + */ + +static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct gfs2_sbd *sdp = get_v2sdp(sb); + int error; + + atomic_inc(&sdp->sd_ops_super); + + error = gfs2_mount_args(sdp, data, 1); + if (error) + return error; + + if (sdp->sd_args.ar_spectator) + *flags |= MS_RDONLY; + else { + if (*flags & MS_RDONLY) { + if (!(sb->s_flags & MS_RDONLY)) + error = gfs2_make_fs_ro(sdp); + } else if (!(*flags & MS_RDONLY) && + (sb->s_flags & MS_RDONLY)) { + error = gfs2_make_fs_rw(sdp); + } + } + + if (*flags & (MS_NOATIME | MS_NODIRATIME)) + set_bit(SDF_NOATIME, &sdp->sd_flags); + else + clear_bit(SDF_NOATIME, &sdp->sd_flags); + + /* Don't let the VFS update atimes. GFS2 handles this itself. */ + *flags |= MS_NOATIME | MS_NODIRATIME; + + return error; +} + +/** + * gfs2_clear_inode - Deallocate an inode when VFS is done with it + * @inode: The VFS inode + * + */ + +static void gfs2_clear_inode(struct inode *inode) +{ + struct gfs2_inode *ip = get_v2ip(inode); + + atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_super); + + if (ip) { + spin_lock(&ip->i_spin); + ip->i_vnode = NULL; + set_v2ip(inode, NULL); + spin_unlock(&ip->i_spin); + + gfs2_glock_schedule_for_reclaim(ip->i_gl); + gfs2_inode_put(ip); + } +} + +/** + * gfs2_show_options - Show mount options for /proc/mounts + * @s: seq_file structure + * @mnt: vfsmount + * + * Returns: 0 on success or error code + */ + +static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) +{ + struct gfs2_sbd *sdp = get_v2sdp(mnt->mnt_sb); + struct gfs2_args *args = &sdp->sd_args; + + atomic_inc(&sdp->sd_ops_super); + + if (args->ar_lockproto[0]) + seq_printf(s, ",lockproto=%s", args->ar_lockproto); + if (args->ar_locktable[0]) + seq_printf(s, ",locktable=%s", args->ar_locktable); + if (args->ar_hostdata[0]) + seq_printf(s, ",hostdata=%s", args->ar_hostdata); + if (args->ar_spectator) + seq_printf(s, ",spectator"); + if (args->ar_ignore_local_fs) + seq_printf(s, ",ignore_local_fs"); + if (args->ar_localflocks) + seq_printf(s, ",localflocks"); + if (args->ar_localcaching) + seq_printf(s, ",localcaching"); + if (args->ar_debug) + seq_printf(s, ",debug"); + if (args->ar_upgrade) + seq_printf(s, ",upgrade"); + if (args->ar_num_glockd != GFS2_GLOCKD_DEFAULT) + seq_printf(s, ",num_glockd=%u", args->ar_num_glockd); + if (args->ar_posix_acl) + seq_printf(s, ",acl"); + if (args->ar_quota != GFS2_QUOTA_DEFAULT) { + char *state; + switch (args->ar_quota) { + case GFS2_QUOTA_OFF: + state = "off"; + break; + case GFS2_QUOTA_ACCOUNT: + state = "account"; + break; + case GFS2_QUOTA_ON: + state = "on"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",quota=%s", state); + } + if (args->ar_suiddir) + seq_printf(s, ",suiddir"); + if (args->ar_data != GFS2_DATA_DEFAULT) { + char *state; + switch (args->ar_data) { + case GFS2_DATA_WRITEBACK: + state = "writeback"; + break; + case GFS2_DATA_ORDERED: + state = "ordered"; + break; + default: + state = "unknown"; + break; + } + seq_printf(s, ",data=%s", state); + } + + return 0; +} + +struct super_operations gfs2_super_ops = { + .write_inode = gfs2_write_inode, + .put_super = gfs2_put_super, + .write_super = gfs2_write_super, + .write_super_lockfs = gfs2_write_super_lockfs, + .unlockfs = gfs2_unlockfs, + .statfs = gfs2_statfs, + .remount_fs = gfs2_remount_fs, + .clear_inode = gfs2_clear_inode, + .show_options = gfs2_show_options, +}; + --- a/fs/gfs2/ops_super.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_super.h 2005-10-10 11:28:49.318783517 -0500 @@ -0,0 +1,15 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_SUPER_DOT_H__ +#define __OPS_SUPER_DOT_H__ + +extern struct super_operations gfs2_super_ops; + +#endif /* __OPS_SUPER_DOT_H__ */ --- a/fs/gfs2/ops_vm.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_vm.c 2005-10-10 11:28:49.320783205 -0500 @@ -0,0 +1,199 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "glock.h" +#include "inode.h" +#include "ops_vm.h" +#include "page.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +static void pfault_be_greedy(struct gfs2_inode *ip) +{ + unsigned int time; + + spin_lock(&ip->i_spin); + time = ip->i_greedy; + ip->i_last_pfault = jiffies; + spin_unlock(&ip->i_spin); + + gfs2_inode_hold(ip); + if (gfs2_glock_be_greedy(ip->i_gl, time)) + gfs2_inode_put(ip); +} + +static struct page *gfs2_private_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host); + struct gfs2_holder i_gh; + struct page *result; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); + if (error) + return NULL; + + set_bit(GIF_PAGED, &ip->i_flags); + + result = filemap_nopage(area, address, type); + + if (result && result != NOPAGE_OOM) + pfault_be_greedy(ip); + + gfs2_glock_dq_uninit(&i_gh); + + return result; +} + +static int alloc_page_backing(struct gfs2_inode *ip, struct page *page) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + unsigned long index = page->index; + uint64_t lblock = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift); + unsigned int blocks = PAGE_CACHE_SIZE >> sdp->sd_sb.sb_bsize_shift; + struct gfs2_alloc *al; + unsigned int data_blocks, ind_blocks; + unsigned int x; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, + &data_blocks, &ind_blocks); + + al->al_requested = data_blocks + ind_blocks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(sdp, + al->al_rgd->rd_ri.ri_length + + ind_blocks + RES_DINODE + + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + if (gfs2_is_stuffed(ip)) { + error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, NULL); + if (error) + goto out_trans; + } + + for (x = 0; x < blocks; ) { + uint64_t dblock; + unsigned int extlen; + int new = 1; + + error = gfs2_block_map(ip, lblock, &new, &dblock, &extlen); + if (error) + goto out_trans; + + lblock += extlen; + x += extlen; + } + + gfs2_assert_warn(sdp, al->al_alloced); + + out_trans: + gfs2_trans_end(sdp); + + out_ipres: + gfs2_inplace_release(ip); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area, + unsigned long address, int *type) +{ + struct gfs2_inode *ip = get_v2ip(area->vm_file->f_mapping->host); + struct gfs2_holder i_gh; + struct page *result = NULL; + unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff; + int alloc_required; + int error; + + atomic_inc(&ip->i_sbd->sd_ops_vm); + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return NULL; + + if (gfs2_is_jdata(ip)) + goto out; + + set_bit(GIF_PAGED, &ip->i_flags); + set_bit(GIF_SW_PAGED, &ip->i_flags); + + error = gfs2_write_alloc_required(ip, + (uint64_t)index << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &alloc_required); + if (error) + goto out; + + result = filemap_nopage(area, address, type); + if (!result || result == NOPAGE_OOM) + goto out; + + if (alloc_required) { + error = alloc_page_backing(ip, result); + if (error) { + page_cache_release(result); + result = NULL; + goto out; + } + set_page_dirty(result); + } + + pfault_be_greedy(ip); + + out: + gfs2_glock_dq_uninit(&i_gh); + + return result; +} + +struct vm_operations_struct gfs2_vm_ops_private = { + .nopage = gfs2_private_nopage, +}; + +struct vm_operations_struct gfs2_vm_ops_sharewrite = { + .nopage = gfs2_sharewrite_nopage, +}; + --- a/fs/gfs2/ops_vm.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/ops_vm.h 2005-10-10 11:28:49.329781802 -0500 @@ -0,0 +1,16 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __OPS_VM_DOT_H__ +#define __OPS_VM_DOT_H__ + +extern struct vm_operations_struct gfs2_vm_ops_private; +extern struct vm_operations_struct gfs2_vm_ops_sharewrite; + +#endif /* __OPS_VM_DOT_H__ */ --- a/fs/gfs2/page.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/page.c 2005-10-10 11:28:49.334781022 -0500 @@ -0,0 +1,273 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "page.h" +#include "trans.h" + +/** + * gfs2_pte_inval - Sync and invalidate all PTEs associated with a glock + * @gl: the glock + * + */ + +void gfs2_pte_inval(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + if (!test_bit(GIF_PAGED, &ip->i_flags)) + return; + + inode = gfs2_ip2v_lookup(ip); + if (inode) { + unmap_shared_mapping_range(inode->i_mapping, 0, 0); + iput(inode); + + if (test_bit(GIF_SW_PAGED, &ip->i_flags)) + set_bit(GLF_DIRTY, &gl->gl_flags); + } + + clear_bit(GIF_SW_PAGED, &ip->i_flags); +} + +/** + * gfs2_page_inval - Invalidate all pages associated with a glock + * @gl: the glock + * + */ + +void gfs2_page_inval(struct gfs2_glock *gl) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + inode = gfs2_ip2v_lookup(ip); + if (inode) { + struct address_space *mapping = inode->i_mapping; + + truncate_inode_pages(mapping, 0); + gfs2_assert_withdraw(ip->i_sbd, !mapping->nrpages); + + iput(inode); + } + + clear_bit(GIF_PAGED, &ip->i_flags); +} + +/** + * gfs2_page_sync - Sync the data pages (not metadata) associated with a glock + * @gl: the glock + * @flags: DIO_START | DIO_WAIT + * + * Syncs data (not metadata) for a regular file. + * No-op for all other types. + */ + +void gfs2_page_sync(struct gfs2_glock *gl, int flags) +{ + struct gfs2_inode *ip; + struct inode *inode; + + ip = get_gl2ip(gl); + if (!ip || !S_ISREG(ip->i_di.di_mode)) + return; + + inode = gfs2_ip2v_lookup(ip); + if (inode) { + struct address_space *mapping = inode->i_mapping; + int error = 0; + + if (flags & DIO_START) + filemap_fdatawrite(mapping); + if (!error && (flags & DIO_WAIT)) + error = filemap_fdatawait(mapping); + + /* Put back any errors cleared by filemap_fdatawait() + so they can be caught by someone who can pass them + up to user space. */ + + if (error == -ENOSPC) + set_bit(AS_ENOSPC, &mapping->flags); + else if (error) + set_bit(AS_EIO, &mapping->flags); + + iput(inode); + } +} + +/** + * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page + * @ip: the inode + * @dibh: the dinode buffer + * @block: the block number that was allocated + * @private: any locked page held by the caller process + * + * Returns: errno + */ + +int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *inode = ip->i_vnode; + struct page *page = (struct page *)private; + struct buffer_head *bh; + int release = 0; + + if (!page || page->index) { + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + release = 1; + } + + if (!PageUptodate(page)) { + void *kaddr = kmap(page); + + memcpy(kaddr, + dibh->b_data + sizeof(struct gfs2_dinode), + ip->i_di.di_size); + memset(kaddr + ip->i_di.di_size, + 0, + PAGE_CACHE_SIZE - ip->i_di.di_size); + kunmap(page); + + SetPageUptodate(page); + } + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + bh = page_buffers(page); + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, block); + + set_buffer_uptodate(bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_trans_add_databuf(sdp, bh); + mark_buffer_dirty(bh); + + if (release) { + unlock_page(page); + page_cache_release(page); + } + + return 0; +} + +/** + * gfs2_truncator_page - truncate a partial data block in the page cache + * @ip: the inode + * @size: the size the file should be + * + * Returns: errno + */ + +int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct inode *inode = ip->i_vnode; + struct page *page; + struct buffer_head *bh; + void *kaddr; + uint64_t lbn, dbn; + unsigned long index; + unsigned int offset; + unsigned int bufnum; + int new = 0; + int error; + + lbn = size >> inode->i_blkbits; + error = gfs2_block_map(ip, lbn, &new, &dbn, NULL); + if (error || !dbn) + return error; + + index = size >> PAGE_CACHE_SHIFT; + offset = size & (PAGE_CACHE_SIZE - 1); + bufnum = lbn - (index << (PAGE_CACHE_SHIFT - inode->i_blkbits)); + + page = read_cache_page(inode->i_mapping, index, + (filler_t *)inode->i_mapping->a_ops->readpage, + NULL); + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + + if (!PageUptodate(page) || PageError(page)) { + error = -EIO; + goto out; + } + + kaddr = kmap(page); + memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); + kunmap(page); + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << inode->i_blkbits, + (1 << BH_Uptodate)); + + for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) + /* Do nothing */; + + if (!buffer_mapped(bh)) + map_bh(bh, inode->i_sb, dbn); + + set_buffer_uptodate(bh); + if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED) + gfs2_trans_add_databuf(sdp, bh); + mark_buffer_dirty(bh); + + out: + unlock_page(page); + page_cache_release(page); + + return error; +} + +void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page, + unsigned int from, unsigned int to) +{ + struct buffer_head *head = page_buffers(page); + unsigned int bsize = head->b_size; + struct buffer_head *bh; + unsigned int start, end; + + for (bh = head, start = 0; + bh != head || !start; + bh = bh->b_this_page, start = end) { + end = start + bsize; + if (end <= from || start >= to) + continue; + gfs2_trans_add_databuf(sdp, bh); + } +} + --- a/fs/gfs2/page.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/page.h 2005-10-10 11:28:49.338780399 -0500 @@ -0,0 +1,23 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __PAGE_DOT_H__ +#define __PAGE_DOT_H__ + +void gfs2_pte_inval(struct gfs2_glock *gl); +void gfs2_page_inval(struct gfs2_glock *gl); +void gfs2_page_sync(struct gfs2_glock *gl, int flags); + +int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, + uint64_t block, void *private); +int gfs2_truncator_page(struct gfs2_inode *ip, uint64_t size); +void gfs2_page_add_databufs(struct gfs2_sbd *sdp, struct page *page, + unsigned int from, unsigned int to); + +#endif /* __PAGE_DOT_H__ */ --- a/fs/gfs2/super.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/super.c 2005-10-10 11:28:49.362776657 -0500 @@ -0,0 +1,944 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "dir.h" +#include "format.h" +#include "glock.h" +#include "glops.h" +#include "inode.h" +#include "log.h" +#include "meta_io.h" +#include "quota.h" +#include "recovery.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" +#include "unlinked.h" + +/** + * gfs2_tune_init - Fill a gfs2_tune structure with default values + * @gt: tune + * + */ + +void gfs2_tune_init(struct gfs2_tune *gt) +{ + spin_lock_init(>->gt_spin); + + gt->gt_ilimit = 100; + gt->gt_ilimit_tries = 3; + gt->gt_ilimit_min = 1; + gt->gt_demote_secs = 300; + gt->gt_incore_log_blocks = 1024; + gt->gt_log_flush_secs = 60; + gt->gt_jindex_refresh_secs = 60; + gt->gt_scand_secs = 15; + gt->gt_recoverd_secs = 60; + gt->gt_logd_secs = 1; + gt->gt_quotad_secs = 5; + gt->gt_inoded_secs = 15; + gt->gt_quota_simul_sync = 64; + gt->gt_quota_warn_period = 10; + gt->gt_quota_scale_num = 1; + gt->gt_quota_scale_den = 1; + gt->gt_quota_cache_secs = 300; + gt->gt_quota_quantum = 60; + gt->gt_atime_quantum = 3600; + gt->gt_new_files_jdata = 0; + gt->gt_new_files_directio = 0; + gt->gt_max_atomic_write = 4 << 20; + gt->gt_max_readahead = 1 << 18; + gt->gt_lockdump_size = 131072; + gt->gt_stall_secs = 600; + gt->gt_complain_secs = 10; + gt->gt_reclaim_limit = 5000; + gt->gt_entries_per_readdir = 32; + gt->gt_prefetch_secs = 10; + gt->gt_greedy_default = HZ / 10; + gt->gt_greedy_quantum = HZ / 40; + gt->gt_greedy_max = HZ / 4; + gt->gt_statfs_quantum = 30; + gt->gt_statfs_slow = 0; +} + +/** + * gfs2_check_sb - Check superblock + * @sdp: the filesystem + * @sb: The superblock + * @silent: Don't print a message if the check fails + * + * Checks the version code of the FS is one that we understand how to + * read and that the sizes of the various on-disk structures have not + * changed. + */ + +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent) +{ + unsigned int x; + + if (sb->sb_header.mh_magic != GFS2_MAGIC || + sb->sb_header.mh_type != GFS2_METATYPE_SB) { + if (!silent) + printk("GFS2: not a GFS2 filesystem\n"); + return -EINVAL; + } + + /* If format numbers match exactly, we're done. */ + + if (sb->sb_fs_format == GFS2_FORMAT_FS && + sb->sb_multihost_format == GFS2_FORMAT_MULTI) + return 0; + + if (sb->sb_fs_format != GFS2_FORMAT_FS) { + for (x = 0; gfs2_old_fs_formats[x]; x++) + if (gfs2_old_fs_formats[x] == sb->sb_fs_format) + break; + + if (!gfs2_old_fs_formats[x]) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { + for (x = 0; gfs2_old_multihost_formats[x]; x++) + if (gfs2_old_multihost_formats[x] == sb->sb_multihost_format) + break; + + if (!gfs2_old_multihost_formats[x]) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: I don't know how to upgrade this FS\n"); + return -EINVAL; + } + } + + if (!sdp->sd_args.ar_upgrade) { + printk("GFS2: code version (%u, %u) is incompatible " + "with ondisk format (%u, %u)\n", + GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, + sb->sb_fs_format, sb->sb_multihost_format); + printk("GFS2: Use the \"upgrade\" mount option to upgrade " + "the FS\n"); + printk("GFS2: See the manual for more details\n"); + return -EINVAL; + } + + return 0; +} + +/** + * gfs2_read_sb - Read super block + * @sdp: The GFS2 superblock + * @gl: the glock for the superblock (assumed to be held) + * @silent: Don't print message if mount fails + * + */ + +int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) +{ + struct buffer_head *bh; + uint32_t hash_blocks, ind_blocks, leaf_blocks; + uint32_t tmp_blocks; + unsigned int x; + int error; + + error = gfs2_meta_read(gl, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift, + DIO_FORCE | DIO_START | DIO_WAIT, &bh); + if (error) { + if (!silent) + fs_err(sdp, "can't read superblock\n"); + return error; + } + + gfs2_assert(sdp, sizeof(struct gfs2_sb) <= bh->b_size); + gfs2_sb_in(&sdp->sd_sb, bh->b_data); + brelse(bh); + + error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); + if (error) + return error; + + sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - + GFS2_BASIC_BLOCK_SHIFT; + sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode)) / sizeof(uint64_t); + sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / sizeof(uint64_t); + sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); + sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; + sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; + sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(uint64_t); + sdp->sd_ut_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_unlinked_tag); + sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_meta_header)) / + sizeof(struct gfs2_quota_change); + + /* Compute maximum reservation required to add a entry to a directory */ + + hash_blocks = DIV_RU(sizeof(uint64_t) * (1 << GFS2_DIR_MAX_DEPTH), + sdp->sd_jbsize); + + ind_blocks = 0; + for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { + tmp_blocks = DIV_RU(tmp_blocks, sdp->sd_inptrs); + ind_blocks += tmp_blocks; + } + + leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; + + sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; + + sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t space, d; + uint32_t m; + + space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_heightsize[x - 1] || m) + break; + sdp->sd_heightsize[x] = space; + } + sdp->sd_max_height = x; + gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); + + sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - + sizeof(struct gfs2_dinode); + sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; + for (x = 2;; x++) { + uint64_t space, d; + uint32_t m; + + space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; + d = space; + m = do_div(d, sdp->sd_inptrs); + + if (d != sdp->sd_jheightsize[x - 1] || m) + break; + sdp->sd_jheightsize[x] = space; + } + sdp->sd_max_jheight = x; + gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); + + return 0; +} + +int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *sb_gl) +{ + return 0; +} + +/** + * gfs2_jindex_hold - Grab a lock on the jindex + * @sdp: The GFS2 superblock + * @ji_gh: the holder for the jindex glock + * + * This is very similar to the gfs2_rindex_hold() function, except that + * in general we hold the jindex lock for longer periods of time and + * we grab it far less frequently (in general) then the rgrp lock. + * + * Returns: errno + */ + +int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) +{ + struct gfs2_inode *dip = sdp->sd_jindex; + struct qstr name; + char buf[20]; + struct gfs2_jdesc *jd; + int error; + + name.name = buf; + + down(&sdp->sd_jindex_mutex); + + for (;;) { + error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, + GL_LOCAL_EXCL, ji_gh); + if (error) + break; + + name.len = sprintf(buf, "journal%u", sdp->sd_journals); + + error = gfs2_dir_search(sdp->sd_jindex, &name, NULL, NULL); + if (error == -ENOENT) { + error = 0; + break; + } + + gfs2_glock_dq_uninit(ji_gh); + + if (error) + break; + + error = -ENOMEM; + jd = kzalloc(sizeof(struct gfs2_jdesc), GFP_KERNEL); + if (!jd) + break; + + error = gfs2_lookupi(dip, &name, 1, &jd->jd_inode); + if (error) { + kfree(jd); + break; + } + + spin_lock(&sdp->sd_jindex_spin); + jd->jd_jid = sdp->sd_journals++; + list_add_tail(&jd->jd_list, &sdp->sd_jindex_list); + spin_unlock(&sdp->sd_jindex_spin); + } + + up(&sdp->sd_jindex_mutex); + + return error; +} + +/** + * gfs2_jindex_free - Clear all the journal index information + * @sdp: The GFS2 superblock + * + */ + +void gfs2_jindex_free(struct gfs2_sbd *sdp) +{ + struct list_head list; + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + list_add(&list, &sdp->sd_jindex_list); + list_del_init(&sdp->sd_jindex_list); + sdp->sd_journals = 0; + spin_unlock(&sdp->sd_jindex_spin); + + while (!list_empty(&list)) { + jd = list_entry(list.next, struct gfs2_jdesc, jd_list); + list_del(&jd->jd_list); + gfs2_inode_put(jd->jd_inode); + kfree(jd); + } +} + +static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid) +{ + struct gfs2_jdesc *jd; + int found = 0; + + list_for_each_entry(jd, head, jd_list) { + if (jd->jd_jid == jid) { + found = 1; + break; + } + } + + if (!found) + jd = NULL; + + return jd; +} + +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid) +{ + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + jd = jdesc_find_i(&sdp->sd_jindex_list, jid); + spin_unlock(&sdp->sd_jindex_spin); + + return jd; +} + +void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) +{ + struct gfs2_jdesc *jd; + + spin_lock(&sdp->sd_jindex_spin); + jd = jdesc_find_i(&sdp->sd_jindex_list, jid); + if (jd) + jd->jd_dirty = 1; + spin_unlock(&sdp->sd_jindex_spin); +} + +struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) +{ + struct gfs2_jdesc *jd; + int found = 0; + + spin_lock(&sdp->sd_jindex_spin); + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (jd->jd_dirty) { + jd->jd_dirty = 0; + found = 1; + break; + } + } + spin_unlock(&sdp->sd_jindex_spin); + + if (!found) + jd = NULL; + + return jd; +} + +int gfs2_jdesc_check(struct gfs2_jdesc *jd) +{ + struct gfs2_inode *ip = jd->jd_inode; + struct gfs2_sbd *sdp = ip->i_sbd; + int ar; + int error; + + if (ip->i_di.di_size < (8 << 20) || + ip->i_di.di_size > (1 << 30) || + (ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1))) { + gfs2_consist_inode(ip); + return -EIO; + } + jd->jd_blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; + + error = gfs2_write_alloc_required(ip, + 0, ip->i_di.di_size, + &ar); + if (!error && ar) { + gfs2_consist_inode(ip); + error = -EIO; + } + + return error; +} + +int gfs2_lookup_master_dir(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *gl; + int error; + + error = gfs2_glock_get(sdp, + sdp->sd_sb.sb_master_dir.no_addr, + &gfs2_inode_glops, CREATE, &gl); + if (!error) { + error = gfs2_inode_get(gl, &sdp->sd_sb.sb_master_dir, CREATE, + &sdp->sd_master_dir); + gfs2_glock_put(gl); + } + + return error; +} + +/** + * gfs2_make_fs_rw - Turn a Read-Only FS into a Read-Write one + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_make_fs_rw(struct gfs2_sbd *sdp) +{ + struct gfs2_glock *j_gl = sdp->sd_jdesc->jd_inode->i_gl; + struct gfs2_holder t_gh; + struct gfs2_log_header head; + int error; + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_LOCAL_EXCL | GL_NEVER_RECURSE, &t_gh); + if (error) + return error; + + gfs2_meta_cache_flush(sdp->sd_jdesc->jd_inode); + j_gl->gl_ops->go_inval(j_gl, DIO_METADATA | DIO_DATA); + + error = gfs2_find_jhead(sdp->sd_jdesc, &head); + if (error) + goto fail; + + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + gfs2_consist(sdp); + error = -EIO; + goto fail; + } + + /* Initialize some head of the log stuff */ + sdp->sd_log_sequence = head.lh_sequence + 1; + gfs2_log_pointers_init(sdp, head.lh_blkno); + + error = gfs2_unlinked_init(sdp); + if (error) + goto fail; + error = gfs2_quota_init(sdp); + if (error) + goto fail_unlinked; + + set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + gfs2_glock_dq_uninit(&t_gh); + + return 0; + + fail_unlinked: + gfs2_unlinked_cleanup(sdp); + + fail: + t_gh.gh_flags |= GL_NOCACHE; + gfs2_glock_dq_uninit(&t_gh); + + return error; +} + +/** + * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_make_fs_ro(struct gfs2_sbd *sdp) +{ + struct gfs2_holder t_gh; + int error; + + gfs2_unlinked_dealloc(sdp); + gfs2_quota_sync(sdp); + gfs2_statfs_sync(sdp); + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, + GL_LOCAL_EXCL | GL_NEVER_RECURSE | GL_NOCACHE, + &t_gh); + if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) + return error; + + gfs2_meta_syncfs(sdp); + gfs2_log_shutdown(sdp); + + clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); + + if (t_gh.gh_gl) + gfs2_glock_dq_uninit(&t_gh); + + gfs2_unlinked_cleanup(sdp); + gfs2_quota_cleanup(sdp); + + return error; +} + +int gfs2_statfs_init(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *m_ip = sdp->sd_statfs_inode; + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct buffer_head *m_bh, *l_bh; + struct gfs2_holder gh; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + return error; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out; + + if (sdp->sd_args.ar_spectator) { + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + } else { + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + goto out_m_bh; + + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + gfs2_statfs_change_in(l_sc, l_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + + brelse(l_bh); + } + + out_m_bh: + brelse(m_bh); + + out: + gfs2_glock_dq_uninit(&gh); + + return 0; +} + +void gfs2_statfs_change(struct gfs2_sbd *sdp, int64_t total, int64_t free, + int64_t dinodes) +{ + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct buffer_head *l_bh; + int error; + + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + return; + + down(&sdp->sd_statfs_mutex); + gfs2_trans_add_bh(l_ip->i_gl, l_bh); + up(&sdp->sd_statfs_mutex); + + spin_lock(&sdp->sd_statfs_spin); + l_sc->sc_total += total; + l_sc->sc_free += free; + l_sc->sc_dinodes += dinodes; + gfs2_statfs_change_out(l_sc, l_bh->b_data + + sizeof(struct gfs2_dinode)); + spin_unlock(&sdp->sd_statfs_spin); + + brelse(l_bh); +} + +int gfs2_statfs_sync(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *m_ip = sdp->sd_statfs_inode; + struct gfs2_inode *l_ip = sdp->sd_sc_inode; + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + struct gfs2_holder gh; + struct buffer_head *m_bh, *l_bh; + int error; + + error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, + &gh); + if (error) + return error; + + error = gfs2_meta_inode_buffer(m_ip, &m_bh); + if (error) + goto out; + + spin_lock(&sdp->sd_statfs_spin); + gfs2_statfs_change_in(m_sc, m_bh->b_data + + sizeof(struct gfs2_dinode)); + if (!l_sc->sc_total && !l_sc->sc_free && !l_sc->sc_dinodes) { + spin_unlock(&sdp->sd_statfs_spin); + goto out_bh; + } + spin_unlock(&sdp->sd_statfs_spin); + + error = gfs2_meta_inode_buffer(l_ip, &l_bh); + if (error) + goto out_bh; + + error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0); + if (error) + goto out_bh2; + + down(&sdp->sd_statfs_mutex); + gfs2_trans_add_bh(l_ip->i_gl, l_bh); + up(&sdp->sd_statfs_mutex); + + spin_lock(&sdp->sd_statfs_spin); + m_sc->sc_total += l_sc->sc_total; + m_sc->sc_free += l_sc->sc_free; + m_sc->sc_dinodes += l_sc->sc_dinodes; + memset(l_sc, 0, sizeof(struct gfs2_statfs_change)); + memset(l_bh->b_data + sizeof(struct gfs2_dinode), + 0, sizeof(struct gfs2_statfs_change)); + spin_unlock(&sdp->sd_statfs_spin); + + gfs2_trans_add_bh(m_ip->i_gl, m_bh); + gfs2_statfs_change_out(m_sc, m_bh->b_data + sizeof(struct gfs2_dinode)); + + gfs2_trans_end(sdp); + + out_bh2: + brelse(l_bh); + + out_bh: + brelse(m_bh); + + out: + gfs2_glock_dq_uninit(&gh); + + return error; +} + +/** + * gfs2_statfs_i - Do a statfs + * @sdp: the filesystem + * @sg: the sg structure + * + * Returns: errno + */ + +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +{ + struct gfs2_statfs_change *m_sc = &sdp->sd_statfs_master; + struct gfs2_statfs_change *l_sc = &sdp->sd_statfs_local; + + spin_lock(&sdp->sd_statfs_spin); + + *sc = *m_sc; + sc->sc_total += l_sc->sc_total; + sc->sc_free += l_sc->sc_free; + sc->sc_dinodes += l_sc->sc_dinodes; + + spin_unlock(&sdp->sd_statfs_spin); + + if (sc->sc_free < 0) + sc->sc_free = 0; + if (sc->sc_free > sc->sc_total) + sc->sc_free = sc->sc_total; + if (sc->sc_dinodes < 0) + sc->sc_dinodes = 0; + + return 0; +} + +/** + * statfs_fill - fill in the sg for a given RG + * @rgd: the RG + * @sc: the sc structure + * + * Returns: 0 on success, -ESTALE if the LVB is invalid + */ + +static int statfs_slow_fill(struct gfs2_rgrpd *rgd, + struct gfs2_statfs_change *sc) +{ + gfs2_rgrp_verify(rgd); + sc->sc_total += rgd->rd_ri.ri_data; + sc->sc_free += rgd->rd_rg.rg_free; + sc->sc_dinodes += rgd->rd_rg.rg_dinodes; + return 0; +} + +/** + * gfs2_statfs_slow - Stat a filesystem using asynchronous locking + * @sdp: the filesystem + * @sc: the sc info that will be returned + * + * Any error (other than a signal) will cause this routine to fall back + * to the synchronous version. + * + * FIXME: This really shouldn't busy wait like this. + * + * Returns: errno + */ + +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc) +{ + struct gfs2_holder ri_gh; + struct gfs2_rgrpd *rgd_next; + struct gfs2_holder *gha, *gh; + unsigned int slots = 64; + unsigned int x; + int done; + int error = 0, err; + + memset(sc, 0, sizeof(struct gfs2_statfs_change)); + gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL); + if (!gha) + return -ENOMEM; + + error = gfs2_rindex_hold(sdp, &ri_gh); + if (error) + goto out; + + rgd_next = gfs2_rgrpd_get_first(sdp); + + for (;;) { + done = 1; + + for (x = 0; x < slots; x++) { + gh = gha + x; + + if (gh->gh_gl && gfs2_glock_poll(gh)) { + err = gfs2_glock_wait(gh); + if (err) { + gfs2_holder_uninit(gh); + error = err; + } else { + if (!error) + error = statfs_slow_fill(get_gl2rgd(gh->gh_gl), sc); + gfs2_glock_dq_uninit(gh); + } + } + + if (gh->gh_gl) + done = 0; + else if (rgd_next && !error) { + error = gfs2_glock_nq_init(rgd_next->rd_gl, + LM_ST_SHARED, + GL_ASYNC, + gh); + rgd_next = gfs2_rgrpd_get_next(rgd_next); + done = 0; + } + + if (signal_pending(current)) + error = -ERESTARTSYS; + } + + if (done) + break; + + yield(); + } + + gfs2_glock_dq_uninit(&ri_gh); + + out: + kfree(gha); + + return error; +} + +struct lfcc { + struct list_head list; + struct gfs2_holder gh; +}; + +/** + * gfs2_lock_fs_check_clean - Stop all writes to the FS and check that all + * journals are clean + * @sdp: the file system + * @state: the state to put the transaction lock into + * @t_gh: the hold on the transaction lock + * + * Returns: errno + */ + +int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh) +{ + struct gfs2_holder ji_gh; + struct gfs2_jdesc *jd; + struct lfcc *lfcc; + LIST_HEAD(list); + struct gfs2_log_header lh; + int error; + + error = gfs2_jindex_hold(sdp, &ji_gh); + if (error) + return error; + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + lfcc = kmalloc(sizeof(struct lfcc), GFP_KERNEL); + if (!lfcc) { + error = -ENOMEM; + goto out; + } + error = gfs2_glock_nq_init(jd->jd_inode->i_gl, LM_ST_SHARED, 0, + &lfcc->gh); + if (error) { + kfree(lfcc); + goto out; + } + list_add(&lfcc->list, &list); + } + + error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_DEFERRED, + LM_FLAG_PRIORITY | GL_NEVER_RECURSE | GL_NOCACHE, + t_gh); + + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + error = gfs2_jdesc_check(jd); + if (error) + break; + error = gfs2_find_jhead(jd, &lh); + if (error) + break; + if (!(lh.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EBUSY; + break; + } + } + + if (error) + gfs2_glock_dq_uninit(t_gh); + + out: + while (!list_empty(&list)) { + lfcc = list_entry(list.next, struct lfcc, list); + list_del(&lfcc->list); + gfs2_glock_dq_uninit(&lfcc->gh); + kfree(lfcc); + } + gfs2_glock_dq_uninit(&ji_gh); + + return error; +} + +/** + * gfs2_freeze_fs - freezes the file system + * @sdp: the file system + * + * This function flushes data and meta data for all machines by + * aquiring the transaction log exclusively. All journals are + * ensured to be in a clean state as well. + * + * Returns: errno + */ + +int gfs2_freeze_fs(struct gfs2_sbd *sdp) +{ + int error = 0; + + down(&sdp->sd_freeze_lock); + + if (!sdp->sd_freeze_count++) { + error = gfs2_lock_fs_check_clean(sdp, &sdp->sd_freeze_gh); + if (error) + sdp->sd_freeze_count--; + } + + up(&sdp->sd_freeze_lock); + + return error; +} + +/** + * gfs2_unfreeze_fs - unfreezes the file system + * @sdp: the file system + * + * This function allows the file system to proceed by unlocking + * the exclusively held transaction lock. Other GFS2 nodes are + * now free to acquire the lock shared and go on with their lives. + * + */ + +void gfs2_unfreeze_fs(struct gfs2_sbd *sdp) +{ + down(&sdp->sd_freeze_lock); + + if (sdp->sd_freeze_count && !--sdp->sd_freeze_count) + gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + + up(&sdp->sd_freeze_lock); +} + --- a/fs/gfs2/super.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/super.h 2005-10-10 11:28:49.362776657 -0500 @@ -0,0 +1,55 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __SUPER_DOT_H__ +#define __SUPER_DOT_H__ + +void gfs2_tune_init(struct gfs2_tune *gt); + +int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb *sb, int silent); +int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); +int gfs2_do_upgrade(struct gfs2_sbd *sdp, struct gfs2_glock *gl_sb); + +static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) +{ + unsigned int x; + spin_lock(&sdp->sd_jindex_spin); + x = sdp->sd_journals; + spin_unlock(&sdp->sd_jindex_spin); + return x; +} + +int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh); +void gfs2_jindex_free(struct gfs2_sbd *sdp); + +struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid); +void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid); +struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp); +int gfs2_jdesc_check(struct gfs2_jdesc *jd); + +int gfs2_lookup_master_dir(struct gfs2_sbd *sdp); +int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, + struct gfs2_inode **ipp); + +int gfs2_make_fs_rw(struct gfs2_sbd *sdp); +int gfs2_make_fs_ro(struct gfs2_sbd *sdp); + +int gfs2_statfs_init(struct gfs2_sbd *sdp); +void gfs2_statfs_change(struct gfs2_sbd *sdp, + int64_t total, int64_t free, int64_t dinodes); +int gfs2_statfs_sync(struct gfs2_sbd *sdp); +int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); +int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change *sc); + +int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp, struct gfs2_holder *t_gh); +int gfs2_freeze_fs(struct gfs2_sbd *sdp); +void gfs2_unfreeze_fs(struct gfs2_sbd *sdp); + +#endif /* __SUPER_DOT_H__ */ + --- a/fs/gfs2/trans.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/trans.c 2005-10-10 11:28:49.365776189 -0500 @@ -0,0 +1,214 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "log.h" +#include "lops.h" +#include "meta_io.h" +#include "trans.h" + +int gfs2_trans_begin_i(struct gfs2_sbd *sdp, unsigned int blocks, + unsigned int revokes, char *file, unsigned int line) +{ + struct gfs2_trans *tr; + int error; + + if (gfs2_assert_warn(sdp, !get_transaction) || + gfs2_assert_warn(sdp, blocks || revokes)) { + fs_warn(sdp, "(%s, %u)\n", file, line); + return -EINVAL; + } + + tr = kzalloc(sizeof(struct gfs2_trans), GFP_KERNEL); + if (!tr) + return -ENOMEM; + + tr->tr_file = file; + tr->tr_line = line; + tr->tr_blocks = blocks; + tr->tr_revokes = revokes; + tr->tr_reserved = 1; + if (blocks) + tr->tr_reserved += 1 + blocks; + if (revokes) + tr->tr_reserved += gfs2_struct2blk(sdp, revokes, + sizeof(uint64_t)); + INIT_LIST_HEAD(&tr->tr_list_buf); + + error = -ENOMEM; + tr->tr_t_gh = gfs2_holder_get(sdp->sd_trans_gl, LM_ST_SHARED, + GL_NEVER_RECURSE, GFP_KERNEL); + if (!tr->tr_t_gh) + goto fail; + + error = gfs2_glock_nq(tr->tr_t_gh); + if (error) + goto fail_holder_put; + + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { + tr->tr_t_gh->gh_flags |= GL_NOCACHE; + error = -EROFS; + goto fail_gunlock; + } + + error = gfs2_log_reserve(sdp, tr->tr_reserved); + if (error) + goto fail_gunlock; + + set_transaction(tr); + + return 0; + + fail_gunlock: + gfs2_glock_dq(tr->tr_t_gh); + + fail_holder_put: + gfs2_holder_put(tr->tr_t_gh); + + fail: + kfree(tr); + + return error; +} + +void gfs2_trans_end(struct gfs2_sbd *sdp) +{ + struct gfs2_trans *tr; + struct gfs2_holder *t_gh; + + tr = get_transaction; + set_transaction(NULL); + + if (gfs2_assert_warn(sdp, tr)) + return; + + t_gh = tr->tr_t_gh; + tr->tr_t_gh = NULL; + + if (!tr->tr_touched) { + gfs2_log_release(sdp, tr->tr_reserved); + kfree(tr); + + gfs2_glock_dq(t_gh); + gfs2_holder_put(t_gh); + + return; + } + + if (gfs2_assert_withdraw(sdp, tr->tr_num_buf <= tr->tr_blocks)) + fs_err(sdp, "tr_num_buf = %u, tr_blocks = %u " + "tr_file = %s, tr_line = %u\n", + tr->tr_num_buf, tr->tr_blocks, + tr->tr_file, tr->tr_line); + if (gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes)) + fs_err(sdp, "tr_num_revoke = %u, tr_revokes = %u " + "tr_file = %s, tr_line = %u\n", + tr->tr_num_revoke, tr->tr_revokes, + tr->tr_file, tr->tr_line); + + gfs2_log_commit(sdp, tr); + + gfs2_glock_dq(t_gh); + gfs2_holder_put(t_gh); + + if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS) + gfs2_log_flush(sdp); +} + +void gfs2_trans_add_gl(struct gfs2_glock *gl) +{ + lops_add(gl->gl_sbd, &gl->gl_le); +} + +/** + * gfs2_trans_add_bh - Add a to-be-modified buffer to the current transaction + * @gl: the glock the buffer belongs to + * @bh: The buffer to add + * + */ + +void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh) +{ + struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_bufdata *bd; + + bd = get_v2bd(bh); + if (bd) + gfs2_assert(sdp, bd->bd_gl == gl); + else { + gfs2_meta_attach_bufdata(gl, bh); + bd = get_v2bd(bh); + } + + lops_add(sdp, &bd->bd_le); +} + +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno) +{ + struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), + GFP_KERNEL | __GFP_NOFAIL); + lops_init_le(&rv->rv_le, &gfs2_revoke_lops); + rv->rv_blkno = blkno; + lops_add(sdp, &rv->rv_le); +} + +void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno) +{ + struct gfs2_revoke *rv; + int found = 0; + + gfs2_log_lock(sdp); + + list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { + if (rv->rv_blkno == blkno) { + list_del(&rv->rv_le.le_list); + gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); + sdp->sd_log_num_revoke--; + found = 1; + break; + } + } + + gfs2_log_unlock(sdp); + + if (found) { + kfree(rv); + get_transaction->tr_num_revoke_rm++; + } +} + +void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd) +{ + lops_add(rgd->rd_sbd, &rgd->rd_le); +} + +void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_databuf *db; + + db = get_v2db(bh); + if (!db) { + db = kmalloc(sizeof(struct gfs2_databuf), + GFP_KERNEL | __GFP_NOFAIL); + lops_init_le(&db->db_le, &gfs2_databuf_lops); + get_bh(bh); + db->db_bh = bh; + set_v2db(bh, db); + lops_add(sdp, &db->db_le); + } +} + --- a/fs/gfs2/trans.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/trans.h 2005-10-10 11:28:49.371775254 -0500 @@ -0,0 +1,40 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __TRANS_DOT_H__ +#define __TRANS_DOT_H__ + +#define RES_DINODE 1 +#define RES_INDIRECT 1 +#define RES_JDATA 1 +#define RES_DATA 1 +#define RES_LEAF 1 +#define RES_RG_BIT 2 +#define RES_EATTR 1 +#define RES_UNLINKED 1 +#define RES_STATFS 1 +#define RES_QUOTA 2 + +#define gfs2_trans_begin(sdp, blocks, revokes) \ +gfs2_trans_begin_i((sdp), (blocks), (revokes), __FILE__, __LINE__) + +int gfs2_trans_begin_i(struct gfs2_sbd *sdp, + unsigned int blocks, unsigned int revokes, + char *file, unsigned int line); + +void gfs2_trans_end(struct gfs2_sbd *sdp); + +void gfs2_trans_add_gl(struct gfs2_glock *gl); +void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh); +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno); +void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno); +void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); +void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh); + +#endif /* __TRANS_DOT_H__ */ --- a/fs/gfs2/unlinked.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/unlinked.c 2005-10-10 11:28:49.378774162 -0500 @@ -0,0 +1,453 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bmap.h" +#include "inode.h" +#include "meta_io.h" +#include "trans.h" +#include "unlinked.h" + +static int munge_ondisk(struct gfs2_sbd *sdp, unsigned int slot, + struct gfs2_unlinked_tag *ut) +{ + struct gfs2_inode *ip = sdp->sd_ut_inode; + unsigned int block, offset; + uint64_t dblock; + int new = 0; + struct buffer_head *bh; + int error; + + block = slot / sdp->sd_ut_per_block; + offset = slot % sdp->sd_ut_per_block; + + error = gfs2_block_map(ip, block, &new, &dblock, NULL); + if (error) + return error; + error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, &bh); + if (error) + return error; + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) { + error = -EIO; + goto out; + } + + down(&sdp->sd_unlinked_mutex); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_unlinked_tag_out(ut, bh->b_data + + sizeof(struct gfs2_meta_header) + + offset * sizeof(struct gfs2_unlinked_tag)); + up(&sdp->sd_unlinked_mutex); + + out: + brelse(bh); + + return error; +} + +static void ul_hash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_spin); + list_add(&ul->ul_list, &sdp->sd_unlinked_list); + gfs2_assert(sdp, ul->ul_count); + ul->ul_count++; + atomic_inc(&sdp->sd_unlinked_count); + spin_unlock(&sdp->sd_unlinked_spin); +} + +static void ul_unhash(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + spin_lock(&sdp->sd_unlinked_spin); + list_del_init(&ul->ul_list); + gfs2_assert(sdp, ul->ul_count > 1); + ul->ul_count--; + gfs2_assert_warn(sdp, atomic_read(&sdp->sd_unlinked_count) > 0); + atomic_dec(&sdp->sd_unlinked_count); + spin_unlock(&sdp->sd_unlinked_spin); +} + +static struct gfs2_unlinked *ul_fish(struct gfs2_sbd *sdp) +{ + struct list_head *head; + struct gfs2_unlinked *ul; + int found = 0; + + if (sdp->sd_vfs->s_flags & MS_RDONLY) + return NULL; + + spin_lock(&sdp->sd_unlinked_spin); + + head = &sdp->sd_unlinked_list; + + list_for_each_entry(ul, head, ul_list) { + if (test_bit(ULF_LOCKED, &ul->ul_flags)) + continue; + + list_move_tail(&ul->ul_list, head); + ul->ul_count++; + set_bit(ULF_LOCKED, &ul->ul_flags); + found = 1; + + break; + } + + if (!found) + ul = NULL; + + spin_unlock(&sdp->sd_unlinked_spin); + + return ul; +} + +/** + * enforce_limit - limit the number of inodes waiting to be deallocated + * @sdp: the filesystem + * + * Returns: errno + */ + +static void enforce_limit(struct gfs2_sbd *sdp) +{ + unsigned int tries = 0, min = 0; + int error; + + if (atomic_read(&sdp->sd_unlinked_count) >= + gfs2_tune_get(sdp, gt_ilimit)) { + tries = gfs2_tune_get(sdp, gt_ilimit_tries); + min = gfs2_tune_get(sdp, gt_ilimit_min); + } + + while (tries--) { + struct gfs2_unlinked *ul = ul_fish(sdp); + if (!ul) + break; + error = gfs2_inode_dealloc(sdp, ul); + gfs2_unlinked_put(sdp, ul); + + if (!error) { + if (!--min) + break; + } else if (error != 1) + break; + } +} + +static struct gfs2_unlinked *ul_alloc(struct gfs2_sbd *sdp) +{ + struct gfs2_unlinked *ul; + + ul = kzalloc(sizeof(struct gfs2_unlinked), GFP_KERNEL); + if (ul) { + INIT_LIST_HEAD(&ul->ul_list); + ul->ul_count = 1; + set_bit(ULF_LOCKED, &ul->ul_flags); + } + + return ul; +} + +int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul) +{ + unsigned int c, o = 0, b; + unsigned char byte = 0; + + enforce_limit(sdp); + + *ul = ul_alloc(sdp); + if (!*ul) + return -ENOMEM; + + spin_lock(&sdp->sd_unlinked_spin); + + for (c = 0; c < sdp->sd_unlinked_chunks; c++) + for (o = 0; o < PAGE_SIZE; o++) { + byte = sdp->sd_unlinked_bitmap[c][o]; + if (byte != 0xFF) + goto found; + } + + goto fail; + + found: + for (b = 0; b < 8; b++) + if (!(byte & (1 << b))) + break; + (*ul)->ul_slot = c * (8 * PAGE_SIZE) + o * 8 + b; + + if ((*ul)->ul_slot >= sdp->sd_unlinked_slots) + goto fail; + + sdp->sd_unlinked_bitmap[c][o] |= 1 << b; + + spin_unlock(&sdp->sd_unlinked_spin); + + return 0; + + fail: + spin_unlock(&sdp->sd_unlinked_spin); + kfree(*ul); + return -ENOSPC; +} + +void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + gfs2_assert_warn(sdp, test_and_clear_bit(ULF_LOCKED, &ul->ul_flags)); + + spin_lock(&sdp->sd_unlinked_spin); + gfs2_assert(sdp, ul->ul_count); + ul->ul_count--; + if (!ul->ul_count) { + gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, ul->ul_slot, 0); + spin_unlock(&sdp->sd_unlinked_spin); + kfree(ul); + } else + spin_unlock(&sdp->sd_unlinked_spin); +} + +int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, list_empty(&ul->ul_list)); + + error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut); + if (!error) + ul_hash(sdp, ul); + + return error; +} + +int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, !list_empty(&ul->ul_list)); + + error = munge_ondisk(sdp, ul->ul_slot, &ul->ul_ut); + + return error; +} + +int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul) +{ + struct gfs2_unlinked_tag ut; + int error; + + gfs2_assert_warn(sdp, test_bit(ULF_LOCKED, &ul->ul_flags)); + gfs2_assert_warn(sdp, !list_empty(&ul->ul_list)); + + memset(&ut, 0, sizeof(struct gfs2_unlinked_tag)); + + error = munge_ondisk(sdp, ul->ul_slot, &ut); + if (error) + return error; + + ul_unhash(sdp, ul); + + return 0; +} + +/** + * gfs2_unlinked_dealloc - Go through the list of inodes to be deallocated + * @sdp: the filesystem + * + * Returns: errno + */ + +int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp) +{ + unsigned int hits, strikes; + int error; + + for (;;) { + hits = 0; + strikes = 0; + + for (;;) { + struct gfs2_unlinked *ul = ul_fish(sdp); + if (!ul) + return 0; + error = gfs2_inode_dealloc(sdp, ul); + gfs2_unlinked_put(sdp, ul); + + if (!error) { + hits++; + if (strikes) + strikes--; + } else if (error == 1) { + strikes++; + if (strikes >= + atomic_read(&sdp->sd_unlinked_count)) { + error = 0; + break; + } + } else + return error; + } + + if (!hits || kthread_should_stop()) + break; + + cond_resched(); + } + + return 0; +} + +int gfs2_unlinked_init(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *ip = sdp->sd_ut_inode; + unsigned int blocks = ip->i_di.di_size >> sdp->sd_sb.sb_bsize_shift; + unsigned int x, slot = 0; + unsigned int found = 0; + uint64_t dblock; + uint32_t extlen = 0; + int error; + + if (!ip->i_di.di_size || + ip->i_di.di_size > (64 << 20) || + ip->i_di.di_size & (sdp->sd_sb.sb_bsize - 1)) { + gfs2_consist_inode(ip); + return -EIO; + } + sdp->sd_unlinked_slots = blocks * sdp->sd_ut_per_block; + sdp->sd_unlinked_chunks = DIV_RU(sdp->sd_unlinked_slots, 8 * PAGE_SIZE); + + error = -ENOMEM; + + sdp->sd_unlinked_bitmap = kcalloc(sdp->sd_unlinked_chunks, + sizeof(unsigned char *), + GFP_KERNEL); + if (!sdp->sd_unlinked_bitmap) + return error; + + for (x = 0; x < sdp->sd_unlinked_chunks; x++) { + sdp->sd_unlinked_bitmap[x] = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sdp->sd_unlinked_bitmap[x]) + goto fail; + } + + for (x = 0; x < blocks; x++) { + struct buffer_head *bh; + unsigned int y; + + if (!extlen) { + int new = 0; + error = gfs2_block_map(ip, x, &new, &dblock, &extlen); + if (error) + goto fail; + } + gfs2_meta_ra(ip->i_gl, dblock, extlen); + error = gfs2_meta_read(ip->i_gl, dblock, DIO_START | DIO_WAIT, + &bh); + if (error) + goto fail; + error = -EIO; + if (gfs2_metatype_check(sdp, bh, GFS2_METATYPE_UT)) { + brelse(bh); + goto fail; + } + + for (y = 0; + y < sdp->sd_ut_per_block && slot < sdp->sd_unlinked_slots; + y++, slot++) { + struct gfs2_unlinked_tag ut; + struct gfs2_unlinked *ul; + + gfs2_unlinked_tag_in(&ut, bh->b_data + + sizeof(struct gfs2_meta_header) + + y * sizeof(struct gfs2_unlinked_tag)); + if (!ut.ut_inum.no_addr) + continue; + + error = -ENOMEM; + ul = ul_alloc(sdp); + if (!ul) { + brelse(bh); + goto fail; + } + ul->ul_ut = ut; + ul->ul_slot = slot; + + spin_lock(&sdp->sd_unlinked_spin); + gfs2_icbit_munge(sdp, sdp->sd_unlinked_bitmap, slot, 1); + spin_unlock(&sdp->sd_unlinked_spin); + ul_hash(sdp, ul); + + gfs2_unlinked_put(sdp, ul); + found++; + } + + brelse(bh); + dblock++; + extlen--; + } + + if (found) + fs_info(sdp, "found %u unlinked inodes\n", found); + + return 0; + + fail: + gfs2_unlinked_cleanup(sdp); + return error; +} + +/** + * gfs2_unlinked_cleanup - get rid of any extra struct gfs2_unlinked structures + * @sdp: the filesystem + * + */ + +void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp) +{ + struct list_head *head = &sdp->sd_unlinked_list; + struct gfs2_unlinked *ul; + unsigned int x; + + spin_lock(&sdp->sd_unlinked_spin); + while (!list_empty(head)) { + ul = list_entry(head->next, struct gfs2_unlinked, ul_list); + + if (ul->ul_count > 1) { + list_move_tail(&ul->ul_list, head); + spin_unlock(&sdp->sd_unlinked_spin); + schedule(); + spin_lock(&sdp->sd_unlinked_spin); + continue; + } + + list_del_init(&ul->ul_list); + atomic_dec(&sdp->sd_unlinked_count); + + gfs2_assert_warn(sdp, ul->ul_count == 1); + gfs2_assert_warn(sdp, !test_bit(ULF_LOCKED, &ul->ul_flags)); + kfree(ul); + } + spin_unlock(&sdp->sd_unlinked_spin); + + gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_unlinked_count)); + + if (sdp->sd_unlinked_bitmap) { + for (x = 0; x < sdp->sd_unlinked_chunks; x++) + kfree(sdp->sd_unlinked_bitmap[x]); + kfree(sdp->sd_unlinked_bitmap); + } +} + --- a/fs/gfs2/unlinked.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/unlinked.h 2005-10-10 11:28:49.379774006 -0500 @@ -0,0 +1,25 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __UNLINKED_DOT_H__ +#define __UNLINKED_DOT_H__ + +int gfs2_unlinked_get(struct gfs2_sbd *sdp, struct gfs2_unlinked **ul); +void gfs2_unlinked_put(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_unlinked_ondisk_add(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); +int gfs2_unlinked_ondisk_munge(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); +int gfs2_unlinked_ondisk_rm(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul); + +int gfs2_unlinked_dealloc(struct gfs2_sbd *sdp); + +int gfs2_unlinked_init(struct gfs2_sbd *sdp); +void gfs2_unlinked_cleanup(struct gfs2_sbd *sdp); + +#endif /* __UNLINKED_DOT_H__ */ --- a/fs/gfs2/util.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/util.c 2005-10-10 11:28:49.385773071 -0500 @@ -0,0 +1,302 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "glock.h" +#include "lm.h" + +kmem_cache_t *gfs2_glock_cachep; +kmem_cache_t *gfs2_inode_cachep; +kmem_cache_t *gfs2_bufdata_cachep; + +uint32_t gfs2_disk_hash(const char *data, int len) +{ + return crc32_le(0xFFFFFFFF, data, len) ^ 0xFFFFFFFF; +} + +void gfs2_assert_i(struct gfs2_sbd *sdp) +{ + printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", + sdp->sd_fsname); +} + +/** + * gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, file, line); + return (me) ? -1 : -2; +} + +/** + * gfs2_assert_warn_i - Print a message to the console if @assertion is false + * Returns: -1 if we printed something + * -2 if we didn't + */ + +int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line) +{ + if (time_before(jiffies, + sdp->sd_last_warning + + gfs2_tune_get(sdp, gt_complain_secs) * HZ)) + return -2; + + printk(KERN_WARNING + "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, assertion, + sdp->sd_fsname, function, file, line); + + if (sdp->sd_args.ar_debug) + BUG(); + + sdp->sd_last_warning = jiffies; + + return -1; +} + +/** + * gfs2_consist_i - Flag a filesystem consistency error and withdraw + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, + char *file, unsigned int line) +{ + int rv; + rv = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, function, file, line); + return rv; +} + +/** + * gfs2_consist_inode_i - Flag an inode consistency error and withdraw + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, + const char *function, char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + int rv; + rv = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: inode = %llu %llu\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, ip->i_num.no_formal_ino, ip->i_num.no_addr, + sdp->sd_fsname, function, file, line); + return rv; +} + +/** + * gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, + const char *function, char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + int rv; + rv = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: filesystem consistency error\n" + "GFS2: fsid=%s: RG = %llu\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, rgd->rd_ri.ri_addr, + sdp->sd_fsname, function, file, line); + return rv; +} + +/** + * gfs2_meta_check_ii - Flag a magic number consistency error and withdraw + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *type, const char *function, char *file, + unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: invalid metadata block\n" + "GFS2: fsid=%s: bh = %llu (%s)\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, + sdp->sd_fsname, function, file, line); + return (me) ? -1 : -2; +} + +/** + * gfs2_metatype_check_ii - Flag a metadata type consistency error and withdraw + * Returns: -1 if this call withdrew the machine, + * -2 if it was already withdrawn + */ + +int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + uint16_t type, uint16_t t, const char *function, + char *file, unsigned int line) +{ + int me; + me = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: invalid metadata block\n" + "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, type, t, + sdp->sd_fsname, function, file, line); + return (me) ? -1 : -2; +} + +/** + * gfs2_io_error_i - Flag an I/O error and withdraw + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, + unsigned int line) +{ + int rv; + rv = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: I/O error\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, function, file, line); + return rv; +} + +/** + * gfs2_io_error_bh_i - Flag a buffer I/O error and withdraw + * Returns: -1 if this call withdrew the machine, + * 0 if it was already withdrawn + */ + +int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line) +{ + int rv; + rv = gfs2_lm_withdraw(sdp, + "GFS2: fsid=%s: fatal: I/O error\n" + "GFS2: fsid=%s: block = %llu\n" + "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", + sdp->sd_fsname, + sdp->sd_fsname, (uint64_t)bh->b_blocknr, + sdp->sd_fsname, function, file, line); + return rv; +} + +/** + * gfs2_add_bh_to_ub - copy a buffer up to user space + * @ub: the structure representing where to copy + * @bh: the buffer + * + * Returns: errno + */ + +int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh) +{ + uint64_t blkno = bh->b_blocknr; + + if (ub->ub_count + sizeof(uint64_t) + bh->b_size > ub->ub_size) + return -ENOMEM; + + if (copy_to_user(ub->ub_data + ub->ub_count, + &blkno, + sizeof(uint64_t))) + return -EFAULT; + ub->ub_count += sizeof(uint64_t); + + if (copy_to_user(ub->ub_data + ub->ub_count, + bh->b_data, + bh->b_size)) + return -EFAULT; + ub->ub_count += bh->b_size; + + return 0; +} + +int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count, + char *fmt, ...) +{ + va_list args; + int left, out; + + if (!buf) { + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + return 0; + } + + left = size - *count; + if (left <= 0) + return 1; + + va_start(args, fmt); + out = vsnprintf(buf + *count, left, fmt, args); + va_end(args); + + if (out < left) + *count += out; + else + return 1; + + return 0; +} + +void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value) +{ + unsigned int c, o, b = bit; + int old_value; + + c = b / (8 * PAGE_SIZE); + b %= 8 * PAGE_SIZE; + o = b / 8; + b %= 8; + + old_value = (bitmap[c][o] & (1 << b)); + gfs2_assert_withdraw(sdp, !old_value != !new_value); + + if (new_value) + bitmap[c][o] |= 1 << b; + else + bitmap[c][o] &= ~(1 << b); +} + --- a/fs/gfs2/util.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/util.h 2005-10-10 11:28:49.389772447 -0500 @@ -0,0 +1,199 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __UTIL_DOT_H__ +#define __UTIL_DOT_H__ + +uint32_t gfs2_disk_hash(const char *data, int len); + + +#define fs_printk(level, fs, fmt, arg...) \ + printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) + +#define fs_info(fs, fmt, arg...) \ + fs_printk(KERN_INFO , fs , fmt , ## arg) + +#define fs_warn(fs, fmt, arg...) \ + fs_printk(KERN_WARNING , fs , fmt , ## arg) + +#define fs_err(fs, fmt, arg...) \ + fs_printk(KERN_ERR, fs , fmt , ## arg) + + +void gfs2_assert_i(struct gfs2_sbd *sdp); + +#define gfs2_assert(sdp, assertion) \ +do { \ + if (unlikely(!(assertion))) { \ + gfs2_assert_i(sdp); \ + BUG(); \ + } \ +} while (0) + + +int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line); + +#define gfs2_assert_withdraw(sdp, assertion) \ +((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \ + __FUNCTION__, __FILE__, __LINE__)) + + +int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, + const char *function, char *file, unsigned int line); + +#define gfs2_assert_warn(sdp, assertion) \ +((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \ + __FUNCTION__, __FILE__, __LINE__)) + + +int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, + const char *function, char *file, unsigned int line); + +#define gfs2_consist(sdp) \ +gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__) + + +int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, + const char *function, char *file, unsigned int line); + +#define gfs2_consist_inode(ip) \ +gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__) + + +int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, + const char *function, char *file, unsigned int line); + +#define gfs2_consist_rgrpd(rgd) \ +gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__) + + +int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *type, const char *function, + char *file, unsigned int line); + +static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp, + struct buffer_head *bh, + const char *function, + char *file, unsigned int line) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + uint32_t magic = mh->mh_magic; + uint64_t blkno = mh->mh_blkno; + magic = le32_to_cpu(magic); + blkno = le64_to_cpu(blkno); + if (unlikely(magic != GFS2_MAGIC)) + return gfs2_meta_check_ii(sdp, bh, "magic number", function, + file, line); + if (unlikely(blkno != bh->b_blocknr)) + return gfs2_meta_check_ii(sdp, bh, "block number", function, + file, line); + return 0; +} + +#define gfs2_meta_check(sdp, bh) \ +gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__) + + +int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, + uint16_t type, uint16_t t, + const char *function, + char *file, unsigned int line); + +static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp, + struct buffer_head *bh, + uint16_t type, + const char *function, + char *file, unsigned int line) +{ + struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; + uint32_t magic = mh->mh_magic; + uint16_t t = mh->mh_type; + uint64_t blkno = mh->mh_blkno; + magic = le32_to_cpu(magic); + blkno = le64_to_cpu(blkno); + if (unlikely(magic != GFS2_MAGIC)) + return gfs2_meta_check_ii(sdp, bh, "magic number", function, + file, line); + if (unlikely(blkno != bh->b_blocknr)) + return gfs2_meta_check_ii(sdp, bh, "block number", function, + file, line); + t = le16_to_cpu(t); + if (unlikely(t != type)) + return gfs2_metatype_check_ii(sdp, bh, type, t, function, + file, line); + return 0; +} + +#define gfs2_metatype_check(sdp, bh, type) \ +gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__) + +static inline void gfs2_metatype_set(struct buffer_head *bh, uint16_t type, + uint16_t format) +{ + struct gfs2_meta_header *mh; + mh = (struct gfs2_meta_header *)bh->b_data; + mh->mh_type = cpu_to_le16(type); + mh->mh_format = cpu_to_le16(format); +} + + +int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, + char *file, unsigned int line); + +#define gfs2_io_error(sdp) \ +gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__); + + +int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, + const char *function, char *file, unsigned int line); + +#define gfs2_io_error_bh(sdp, bh) \ +gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__); + + +extern kmem_cache_t *gfs2_glock_cachep; +extern kmem_cache_t *gfs2_inode_cachep; +extern kmem_cache_t *gfs2_bufdata_cachep; + +struct gfs2_user_buffer { + char __user *ub_data; + unsigned int ub_size; + unsigned int ub_count; +}; + +int gfs2_add_bh_to_ub(struct gfs2_user_buffer *ub, struct buffer_head *bh); + +static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, + unsigned int *p) +{ + unsigned int x; + spin_lock(>->gt_spin); + x = *p; + spin_unlock(>->gt_spin); + return x; +} + +#define gfs2_tune_get(sdp, field) \ +gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) + +int gfs2_printf_i(char *buf, unsigned int size, unsigned int *count, + char *fmt, ...) __attribute__ ((format(printf, 4, 5))); + +#define gfs2_printf(fmt, args...) \ +do { \ + if (gfs2_printf_i(buf, size, count, fmt, ##args)) \ + goto out; \ +} while(0) + +void gfs2_icbit_munge(struct gfs2_sbd *sdp, unsigned char **bitmap, + unsigned int bit, int new_value); + +#endif /* __UTIL_DOT_H__ */ + [PATCH 06/16] GFS: directories Code that handles directory operations. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/dir.c | 2157 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/dir.h | 51 + 2 files changed, 2208 insertions(+) --- a/fs/gfs2/dir.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/dir.c 2005-10-10 11:28:49.154809087 -0500 @@ -0,0 +1,2157 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +/* +* Implements Extendible Hashing as described in: +* "Extendible Hashing" by Fagin, et al in +* __ACM Trans. on Database Systems__, Sept 1979. +* +* +* Here's the layout of dirents which is essentially the same as that of ext2 +* within a single block. The field de_name_len is the number of bytes +* actually required for the name (no null terminator). The field de_rec_len +* is the number of bytes allocated to the dirent. The offset of the next +* dirent in the block is (dirent + dirent->de_rec_len). When a dirent is +* deleted, the preceding dirent inherits its allocated space, ie +* prev->de_rec_len += deleted->de_rec_len. Since the next dirent is obtained +* by adding de_rec_len to the current dirent, this essentially causes the +* deleted dirent to get jumped over when iterating through all the dirents. +* +* When deleting the first dirent in a block, there is no previous dirent so +* the field de_ino is set to zero to designate it as deleted. When allocating +* a dirent, gfs2_dirent_alloc iterates through the dirents in a block. If the +* first dirent has (de_ino == 0) and de_rec_len is large enough, this first +* dirent is allocated. Otherwise it must go through all the 'used' dirents +* searching for one in which the amount of total space minus the amount of +* used space will provide enough space for the new dirent. +* +* There are two types of blocks in which dirents reside. In a stuffed dinode, +* the dirents begin at offset sizeof(struct gfs2_dinode) from the beginning of +* the block. In leaves, they begin at offset sizeof(struct gfs2_leaf) from the +* beginning of the leaf block. The dirents reside in leaves when +* +* dip->i_di.di_flags & GFS2_DIF_EXHASH is true +* +* Otherwise, the dirents are "linear", within a single stuffed dinode block. +* +* When the dirents are in leaves, the actual contents of the directory file are +* used as an array of 64-bit block pointers pointing to the leaf blocks. The +* dirents are NOT in the directory file itself. There can be more than one block +* pointer in the array that points to the same leaf. In fact, when a directory +* is first converted from linear to exhash, all of the pointers point to the +* same leaf. +* +* When a leaf is completely full, the size of the hash table can be +* doubled unless it is already at the maximum size which is hard coded into +* GFS2_DIR_MAX_DEPTH. After that, leaves are chained together in a linked list, +* but never before the maximum hash table size has been reached. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "dir.h" +#include "glock.h" +#include "inode.h" +#include "jdata.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +#define IS_LEAF 1 /* Hashed (leaf) directory */ +#define IS_DINODE 2 /* Linear (stuffed dinode block) directory */ + +#if 1 +#define gfs2_disk_hash2offset(h) (((uint64_t)(h)) >> 1) +#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)) << 1)) +#else +#define gfs2_disk_hash2offset(h) (((uint64_t)(h))) +#define gfs2_dir_offset2hash(p) ((uint32_t)(((uint64_t)(p)))) +#endif + +typedef int (*leaf_call_t) (struct gfs2_inode *dip, + uint32_t index, uint32_t len, uint64_t leaf_no, + void *data); + +/** + * int gfs2_filecmp - Compare two filenames + * @file1: The first filename + * @file2: The second filename + * @len_of_file2: The length of the second file + * + * This routine compares two filenames and returns 1 if they are equal. + * + * Returns: 1 if the files are the same, otherwise 0. + */ + +int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2) +{ + if (file1->len != len_of_file2) + return 0; + if (memcmp(file1->name, file2, file1->len)) + return 0; + return 1; +} + +/** + * dirent_first - Return the first dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * return first dirent whether bh points to leaf or stuffed dinode + * + * Returns: IS_LEAF, IS_DINODE, or -errno + */ + +static int dirent_first(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent **dent) +{ + struct gfs2_meta_header *h = (struct gfs2_meta_header *)bh->b_data; + + if (le16_to_cpu(h->mh_type) == GFS2_METATYPE_LF) { + if (gfs2_meta_check(dip->i_sbd, bh)) + return -EIO; + *dent = (struct gfs2_dirent *)(bh->b_data + + sizeof(struct gfs2_leaf)); + return IS_LEAF; + } else { + if (gfs2_metatype_check(dip->i_sbd, bh, GFS2_METATYPE_DI)) + return -EIO; + *dent = (struct gfs2_dirent *)(bh->b_data + + sizeof(struct gfs2_dinode)); + return IS_DINODE; + } +} + +/** + * dirent_next - Next dirent + * @dip: the directory + * @bh: The buffer + * @dent: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +static int dirent_next(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent **dent) +{ + struct gfs2_dirent *tmp, *cur; + char *bh_end; + uint32_t cur_rec_len; + + cur = *dent; + bh_end = bh->b_data + bh->b_size; + cur_rec_len = le32_to_cpu(cur->de_rec_len); + + if ((char *)cur + cur_rec_len >= bh_end) { + if ((char *)cur + cur_rec_len > bh_end) { + gfs2_consist_inode(dip); + return -EIO; + } + return -ENOENT; + } + + tmp = (struct gfs2_dirent *)((char *)cur + cur_rec_len); + + if ((char *)tmp + le32_to_cpu(tmp->de_rec_len) > bh_end) { + gfs2_consist_inode(dip); + return -EIO; + } + /* Only the first dent could ever have de_inum.no_addr == 0 */ + if (!tmp->de_inum.no_addr) { + gfs2_consist_inode(dip); + return -EIO; + } + + *dent = tmp; + + return 0; +} + +/** + * dirent_del - Delete a dirent + * @dip: The GFS2 inode + * @bh: The buffer + * @prev: The previous dirent + * @cur: The current dirent + * + */ + +static void dirent_del(struct gfs2_inode *dip, struct buffer_head *bh, + struct gfs2_dirent *prev, struct gfs2_dirent *cur) +{ + uint32_t cur_rec_len, prev_rec_len; + + if (!cur->de_inum.no_addr) { + gfs2_consist_inode(dip); + return; + } + + gfs2_trans_add_bh(dip->i_gl, bh); + + /* If there is no prev entry, this is the first entry in the block. + The de_rec_len is already as big as it needs to be. Just zero + out the inode number and return. */ + + if (!prev) { + cur->de_inum.no_addr = 0; /* No endianess worries */ + return; + } + + /* Combine this dentry with the previous one. */ + + prev_rec_len = le32_to_cpu(prev->de_rec_len); + cur_rec_len = le32_to_cpu(cur->de_rec_len); + + if ((char *)prev + prev_rec_len != (char *)cur) + gfs2_consist_inode(dip); + if ((char *)cur + cur_rec_len > bh->b_data + bh->b_size) + gfs2_consist_inode(dip); + + prev_rec_len += cur_rec_len; + prev->de_rec_len = cpu_to_le32(prev_rec_len); +} + +/** + * gfs2_dirent_alloc - Allocate a directory entry + * @dip: The GFS2 inode + * @bh: The buffer + * @name_len: The length of the name + * @dent_out: Pointer to list of dirents + * + * Returns: 0 on success, error code otherwise + */ + +int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh, + int name_len, struct gfs2_dirent **dent_out) +{ + struct gfs2_dirent *dent, *new; + unsigned int rec_len = GFS2_DIRENT_SIZE(name_len); + unsigned int entries = 0, offset = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = le16_to_cpu(leaf->lf_entries); + offset = sizeof(struct gfs2_leaf); + } else { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = le32_to_cpu(dinode->di_entries); + offset = sizeof(struct gfs2_dinode); + } + + if (!entries) { + if (dent->de_inum.no_addr) { + gfs2_consist_inode(dip); + return -EIO; + } + + gfs2_trans_add_bh(dip->i_gl, bh); + + dent->de_rec_len = bh->b_size - offset; + dent->de_rec_len = cpu_to_le32(dent->de_rec_len); + dent->de_name_len = name_len; + + *dent_out = dent; + return 0; + } + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = le32_to_cpu(dent->de_rec_len); + cur_name_len = dent->de_name_len; + + if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) { + gfs2_trans_add_bh(dip->i_gl, bh); + + if (dent->de_inum.no_addr) { + new = (struct gfs2_dirent *)((char *)dent + + GFS2_DIRENT_SIZE(cur_name_len)); + memset(new, 0, sizeof(struct gfs2_dirent)); + + new->de_rec_len = cur_rec_len - GFS2_DIRENT_SIZE(cur_name_len); + new->de_rec_len = cpu_to_le32(new->de_rec_len); + new->de_name_len = name_len; + + dent->de_rec_len = cur_rec_len - le32_to_cpu(new->de_rec_len); + dent->de_rec_len = cpu_to_le32(dent->de_rec_len); + + *dent_out = new; + return 0; + } + + dent->de_name_len = name_len; + + *dent_out = dent; + return 0; + } + } while (dirent_next(dip, bh, &dent) == 0); + + return -ENOSPC; +} + +/** + * dirent_fits - See if we can fit a entry in this buffer + * @dip: The GFS2 inode + * @bh: The buffer + * @name_len: The length of the name + * + * Returns: 1 if it can fit, 0 otherwise + */ + +static int dirent_fits(struct gfs2_inode *dip, struct buffer_head *bh, + int name_len) +{ + struct gfs2_dirent *dent; + unsigned int rec_len = GFS2_DIRENT_SIZE(name_len); + unsigned int entries = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = le16_to_cpu(leaf->lf_entries); + } else { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = le32_to_cpu(dinode->di_entries); + } + + if (!entries) + return 1; + + do { + uint32_t cur_rec_len, cur_name_len; + + cur_rec_len = le32_to_cpu(dent->de_rec_len); + cur_name_len = dent->de_name_len; + + if ((!dent->de_inum.no_addr && cur_rec_len >= rec_len) || + (cur_rec_len >= GFS2_DIRENT_SIZE(cur_name_len) + rec_len)) + return 1; + } while (dirent_next(dip, bh, &dent) == 0); + + return 0; +} + +static int leaf_search(struct gfs2_inode *dip, struct buffer_head *bh, + struct qstr *filename, struct gfs2_dirent **dent_out, + struct gfs2_dirent **dent_prev) +{ + uint32_t hash; + struct gfs2_dirent *dent, *prev = NULL; + unsigned int entries = 0; + int type; + + type = dirent_first(dip, bh, &dent); + if (type < 0) + return type; + + if (type == IS_LEAF) { + struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data; + entries = le16_to_cpu(leaf->lf_entries); + } else if (type == IS_DINODE) { + struct gfs2_dinode *dinode = (struct gfs2_dinode *)bh->b_data; + entries = le32_to_cpu(dinode->di_entries); + } + + hash = gfs2_disk_hash(filename->name, filename->len); + + do { + if (!dent->de_inum.no_addr) { + prev = dent; + continue; + } + + if (le32_to_cpu(dent->de_hash) == hash && + gfs2_filecmp(filename, (char *)(dent + 1), + dent->de_name_len)) { + *dent_out = dent; + if (dent_prev) + *dent_prev = prev; + + return 0; + } + + prev = dent; + } while (dirent_next(dip, bh, &dent) == 0); + + return -ENOENT; +} + +static int get_leaf(struct gfs2_inode *dip, uint64_t leaf_no, + struct buffer_head **bhp) +{ + int error; + + error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_START | DIO_WAIT, bhp); + if (!error && gfs2_metatype_check(dip->i_sbd, *bhp, GFS2_METATYPE_LF)) + error = -EIO; + + return error; +} + +/** + * get_leaf_nr - Get a leaf number associated with the index + * @dip: The GFS2 inode + * @index: + * @leaf_out: + * + * Returns: 0 on success, error code otherwise + */ + +static int get_leaf_nr(struct gfs2_inode *dip, uint32_t index, + uint64_t *leaf_out) +{ + uint64_t leaf_no; + int error; + + error = gfs2_jdata_read_mem(dip, (char *)&leaf_no, + index * sizeof(uint64_t), + sizeof(uint64_t)); + if (error != sizeof(uint64_t)) + return (error < 0) ? error : -EIO; + + *leaf_out = le64_to_cpu(leaf_no); + + return 0; +} + +static int get_first_leaf(struct gfs2_inode *dip, uint32_t index, + struct buffer_head **bh_out) +{ + uint64_t leaf_no; + int error; + + error = get_leaf_nr(dip, index, &leaf_no); + if (!error) + error = get_leaf(dip, leaf_no, bh_out); + + return error; +} + +static int get_next_leaf(struct gfs2_inode *dip, struct buffer_head *bh_in, + struct buffer_head **bh_out) +{ + struct gfs2_leaf *leaf; + int error; + + leaf = (struct gfs2_leaf *)bh_in->b_data; + + if (!leaf->lf_next) + error = -ENOENT; + else + error = get_leaf(dip, le64_to_cpu(leaf->lf_next), bh_out); + + return error; +} + +static int linked_leaf_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_dirent **dent_out, + struct gfs2_dirent **dent_prev, + struct buffer_head **bh_out) +{ + struct buffer_head *bh = NULL, *bh_next; + uint32_t hsize, index; + uint32_t hash; + int error; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* Figure out the address of the leaf node. */ + + hash = gfs2_disk_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_first_leaf(dip, index, &bh_next); + if (error) + return error; + + /* Find the entry */ + + do { + brelse(bh); + + bh = bh_next; + + error = leaf_search(dip, bh, filename, dent_out, dent_prev); + switch (error) { + case 0: + *bh_out = bh; + return 0; + + case -ENOENT: + break; + + default: + brelse(bh); + return error; + } + + error = get_next_leaf(dip, bh, &bh_next); + } + while (!error); + + brelse(bh); + + return error; +} + +/** + * dir_make_exhash - Convert a stuffed directory into an ExHash directory + * @dip: The GFS2 inode + * + * Returns: 0 on success, error code otherwise + */ + +static int dir_make_exhash(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_dirent *dent; + struct buffer_head *bh, *dibh; + struct gfs2_leaf *leaf; + int y; + uint32_t x; + uint64_t *lp, bn; + int error; + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + /* Allocate a new block for the first leaf node */ + + bn = gfs2_alloc_meta(dip); + + /* Turn over a new leaf */ + + bh = gfs2_meta_new(dip->i_gl, bn); + gfs2_trans_add_bh(dip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); + gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); + + /* Fill in the leaf structure */ + + leaf = (struct gfs2_leaf *)bh->b_data; + + gfs2_assert(sdp, dip->i_di.di_entries < (1 << 16)); + + leaf->lf_dirent_format = cpu_to_le32(GFS2_FORMAT_DE); + leaf->lf_entries = cpu_to_le16(dip->i_di.di_entries); + + /* Copy dirents */ + + gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_leaf), dibh, + sizeof(struct gfs2_dinode)); + + /* Find last entry */ + + x = 0; + dirent_first(dip, bh, &dent); + + do { + if (!dent->de_inum.no_addr) + continue; + if (++x == dip->i_di.di_entries) + break; + } + while (dirent_next(dip, bh, &dent) == 0); + + /* Adjust the last dirent's record length + (Remember that dent still points to the last entry.) */ + + dent->de_rec_len = le32_to_cpu(dent->de_rec_len) + + sizeof(struct gfs2_dinode) - + sizeof(struct gfs2_leaf); + dent->de_rec_len = cpu_to_le32(dent->de_rec_len); + + brelse(bh); + + /* We're done with the new leaf block, now setup the new + hash table. */ + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); + + lp = (uint64_t *)(dibh->b_data + sizeof(struct gfs2_dinode)); + + for (x = sdp->sd_hash_ptrs; x--; lp++) + *lp = cpu_to_le64(bn); + + dip->i_di.di_size = sdp->sd_sb.sb_bsize / 2; + dip->i_di.di_blocks++; + dip->i_di.di_flags |= GFS2_DIF_EXHASH; + dip->i_di.di_payload_format = 0; + + for (x = sdp->sd_hash_ptrs, y = -1; x; x >>= 1, y++) ; + dip->i_di.di_depth = y; + + gfs2_dinode_out(&dip->i_di, dibh->b_data); + + brelse(dibh); + + return 0; +} + +/** + * dir_split_leaf - Split a leaf block into two + * @dip: The GFS2 inode + * @index: + * @leaf_no: + * + * Returns: 0 on success, error code on failure + */ + +static int dir_split_leaf(struct gfs2_inode *dip, uint32_t index, + uint64_t leaf_no) +{ + struct buffer_head *nbh, *obh, *dibh; + struct gfs2_leaf *nleaf, *oleaf; + struct gfs2_dirent *dent, *prev = NULL, *next = NULL, *new; + uint32_t start, len, half_len, divider; + uint64_t bn, *lp; + uint32_t name_len; + int x, moved = 0; + int error; + + /* Allocate the new leaf block */ + + bn = gfs2_alloc_meta(dip); + + /* Get the new leaf block */ + + nbh = gfs2_meta_new(dip->i_gl, bn); + gfs2_trans_add_bh(dip->i_gl, nbh); + gfs2_metatype_set(nbh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); + gfs2_buffer_clear_tail(nbh, sizeof(struct gfs2_meta_header)); + + nleaf = (struct gfs2_leaf *)nbh->b_data; + + nleaf->lf_dirent_format = cpu_to_le32(GFS2_FORMAT_DE); + + /* Get the old leaf block */ + + error = get_leaf(dip, leaf_no, &obh); + if (error) + goto fail; + + gfs2_trans_add_bh(dip->i_gl, obh); + + oleaf = (struct gfs2_leaf *)obh->b_data; + + /* Compute the start and len of leaf pointers in the hash table. */ + + len = 1 << (dip->i_di.di_depth - le16_to_cpu(oleaf->lf_depth)); + half_len = len >> 1; + if (!half_len) { + gfs2_consist_inode(dip); + error = -EIO; + goto fail_brelse; + } + + start = (index & ~(len - 1)); + + /* Change the pointers. + Don't bother distinguishing stuffed from non-stuffed. + This code is complicated enough already. */ + + lp = kcalloc(half_len, sizeof(uint64_t), GFP_KERNEL | __GFP_NOFAIL); + + error = gfs2_jdata_read_mem(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + /* Change the pointers */ + + for (x = 0; x < half_len; x++) + lp[x] = cpu_to_le64(bn); + + error = gfs2_jdata_write_mem(dip, (char *)lp, start * sizeof(uint64_t), + half_len * sizeof(uint64_t)); + if (error != half_len * sizeof(uint64_t)) { + if (error >= 0) + error = -EIO; + goto fail_lpfree; + } + + kfree(lp); + + /* Compute the divider */ + + divider = (start + half_len) << (32 - dip->i_di.di_depth); + + /* Copy the entries */ + + dirent_first(dip, obh, &dent); + + do { + next = dent; + if (dirent_next(dip, obh, &next)) + next = NULL; + + if (dent->de_inum.no_addr && + le32_to_cpu(dent->de_hash) < divider) { + name_len = dent->de_name_len; + + gfs2_dirent_alloc(dip, nbh, name_len, &new); + + new->de_inum = dent->de_inum; /* No endian worries */ + new->de_hash = dent->de_hash; /* No endian worries */ + new->de_type = dent->de_type; /* No endian worries */ + memcpy((char *)(new + 1), (char *)(dent + 1), + name_len); + + nleaf->lf_entries = le16_to_cpu(nleaf->lf_entries)+1; + nleaf->lf_entries = cpu_to_le16(nleaf->lf_entries); + + dirent_del(dip, obh, prev, dent); + + if (!oleaf->lf_entries) + gfs2_consist_inode(dip); + oleaf->lf_entries = le16_to_cpu(oleaf->lf_entries)-1; + oleaf->lf_entries = cpu_to_le16(oleaf->lf_entries); + + if (!prev) + prev = dent; + + moved = 1; + } else + prev = dent; + + dent = next; + } + while (dent); + + /* If none of the entries got moved into the new leaf, + artificially fill in the first entry. */ + + if (!moved) { + gfs2_dirent_alloc(dip, nbh, 0, &new); + new->de_inum.no_addr = 0; + } + + oleaf->lf_depth = le16_to_cpu(oleaf->lf_depth) + 1; + oleaf->lf_depth = cpu_to_le16(oleaf->lf_depth); + nleaf->lf_depth = oleaf->lf_depth; + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(dip->i_sbd, !error)) { + dip->i_di.di_blocks++; + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + } + + brelse(obh); + brelse(nbh); + + return error; + + fail_lpfree: + kfree(lp); + + fail_brelse: + brelse(obh); + + fail: + brelse(nbh); + return error; +} + +/** + * dir_double_exhash - Double size of ExHash table + * @dip: The GFS2 dinode + * + * Returns: 0 on success, error code on failure + */ + +static int dir_double_exhash(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct buffer_head *dibh; + uint32_t hsize; + uint64_t *buf; + uint64_t *from, *to; + uint64_t block; + int x; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* Allocate both the "from" and "to" buffers in one big chunk */ + + buf = kcalloc(3, sdp->sd_hash_bsize, GFP_KERNEL | __GFP_NOFAIL); + + for (block = dip->i_di.di_size >> sdp->sd_hash_bsize_shift; block--;) { + error = gfs2_jdata_read_mem(dip, (char *)buf, + block * sdp->sd_hash_bsize, + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + + from = buf; + to = (uint64_t *)((char *)buf + sdp->sd_hash_bsize); + + for (x = sdp->sd_hash_ptrs; x--; from++) { + *to++ = *from; /* No endianess worries */ + *to++ = *from; + } + + error = gfs2_jdata_write_mem(dip, + (char *)buf + sdp->sd_hash_bsize, + block * sdp->sd_sb.sb_bsize, + sdp->sd_sb.sb_bsize); + if (error != sdp->sd_sb.sb_bsize) { + if (error >= 0) + error = -EIO; + goto fail; + } + } + + kfree(buf); + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (!gfs2_assert_withdraw(sdp, !error)) { + dip->i_di.di_depth++; + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + } + + return error; + + fail: + kfree(buf); + + return error; +} + +/** + * compare_dents - compare directory entries by hash value + * @a: first dent + * @b: second dent + * + * When comparing the hash entries of @a to @b: + * gt: returns 1 + * lt: returns -1 + * eq: returns 0 + */ + +static int compare_dents(const void *a, const void *b) +{ + struct gfs2_dirent *dent_a, *dent_b; + uint32_t hash_a, hash_b; + int ret = 0; + + dent_a = *(struct gfs2_dirent **)a; + hash_a = dent_a->de_hash; + hash_a = le32_to_cpu(hash_a); + + dent_b = *(struct gfs2_dirent **)b; + hash_b = dent_b->de_hash; + hash_b = le32_to_cpu(hash_b); + + if (hash_a > hash_b) + ret = 1; + else if (hash_a < hash_b) + ret = -1; + else { + unsigned int len_a = dent_a->de_name_len; + unsigned int len_b = dent_b->de_name_len; + + if (len_a > len_b) + ret = 1; + else if (len_a < len_b) + ret = -1; + else + ret = memcmp((char *)(dent_a + 1), + (char *)(dent_b + 1), + len_a); + } + + return ret; +} + +/** + * do_filldir_main - read out directory entries + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @darr: an array of struct gfs2_dirent pointers to read + * @entries: the number of entries in darr + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Jump through some hoops to make sure that if there are hash collsions, + * they are read out at the beginning of a buffer. We want to minimize + * the possibility that they will fall into different readdir buffers or + * that someone will want to seek to that location. + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_main(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct gfs2_dirent **darr, uint32_t entries, + int *copied) +{ + struct gfs2_dirent *dent, *dent_next; + struct gfs2_inum inum; + uint64_t off, off_next; + unsigned int x, y; + int run = 0; + int error = 0; + + sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); + + dent_next = darr[0]; + off_next = le32_to_cpu(dent_next->de_hash); + off_next = gfs2_disk_hash2offset(off_next); + + for (x = 0, y = 1; x < entries; x++, y++) { + dent = dent_next; + off = off_next; + + if (y < entries) { + dent_next = darr[y]; + off_next = le32_to_cpu(dent_next->de_hash); + off_next = gfs2_disk_hash2offset(off_next); + + if (off < *offset) + continue; + *offset = off; + + if (off_next == off) { + if (*copied && !run) + return 1; + run = 1; + } else + run = 0; + } else { + if (off < *offset) + continue; + *offset = off; + } + + gfs2_inum_in(&inum, (char *)&dent->de_inum); + + error = filldir(opaque, (char *)(dent + 1), + dent->de_name_len, + off, &inum, + dent->de_type); + if (error) + return 1; + + *copied = 1; + } + + /* Increment the *offset by one, so the next time we come into the + do_filldir fxn, we get the next entry instead of the last one in the + current leaf */ + + (*offset)++; + + return 0; +} + +/** + * do_filldir_single - Read directory entries out of a single block + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the block + * @entries: the number of entries in the block + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_single(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct buffer_head *bh, uint32_t entries, + int *copied) +{ + struct gfs2_dirent **darr; + struct gfs2_dirent *de; + unsigned int e = 0; + int error; + + if (!entries) + return 0; + + darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL); + if (!darr) + return -ENOMEM; + + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + + if (e != entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + out: + kfree(darr); + + return error; +} + +/** + * do_filldir_multi - Read directory entries out of a linked leaf list + * @dip: The GFS2 inode + * @offset: The offset in the file to read from + * @opaque: opaque data to pass to filldir + * @filldir: The function to pass entries to + * @bh: the first leaf in the list + * @copied: pointer to int that's non-zero if a entry has been copied out + * + * Returns: errno, >0 on exception from filldir + */ + +static int do_filldir_multi(struct gfs2_inode *dip, uint64_t *offset, + void *opaque, gfs2_filldir_t filldir, + struct buffer_head *bh, int *copied) +{ + struct buffer_head **larr = NULL; + struct gfs2_dirent **darr; + struct gfs2_leaf *leaf; + struct buffer_head *tmp_bh; + struct gfs2_dirent *de; + unsigned int entries, e = 0; + unsigned int leaves = 0, l = 0; + unsigned int x; + uint64_t ln; + int error = 0; + + /* Count leaves and entries */ + + leaf = (struct gfs2_leaf *)bh->b_data; + entries = le16_to_cpu(leaf->lf_entries); + ln = leaf->lf_next; + + while (ln) { + ln = le64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + return error; + + leaf = (struct gfs2_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + entries += le16_to_cpu(leaf->lf_entries); + leaves++; + } + ln = leaf->lf_next; + + brelse(tmp_bh); + } + + if (!entries) + return 0; + + if (leaves) { + larr = kcalloc(leaves, sizeof(struct buffer_head *),GFP_KERNEL); + if (!larr) + return -ENOMEM; + } + + darr = kcalloc(entries, sizeof(struct gfs2_dirent *), GFP_KERNEL); + if (!darr) { + kfree(larr); + return -ENOMEM; + } + + leaf = (struct gfs2_leaf *)bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, bh, &de) == 0); + } + ln = leaf->lf_next; + + while (ln) { + ln = le64_to_cpu(ln); + + error = get_leaf(dip, ln, &tmp_bh); + if (error) + goto out; + + leaf = (struct gfs2_leaf *)tmp_bh->b_data; + if (leaf->lf_entries) { + dirent_first(dip, tmp_bh, &de); + do { + if (!de->de_inum.no_addr) + continue; + if (e >= entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + darr[e++] = de; + } + while (dirent_next(dip, tmp_bh, &de) == 0); + + larr[l++] = tmp_bh; + + ln = leaf->lf_next; + } else { + ln = leaf->lf_next; + brelse(tmp_bh); + } + } + + if (gfs2_assert_withdraw(dip->i_sbd, l == leaves)) { + error = -EIO; + goto out; + } + if (e != entries) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + + error = do_filldir_main(dip, offset, opaque, filldir, darr, + entries, copied); + + out: + kfree(darr); + for (x = 0; x < l; x++) + brelse(larr[x]); + kfree(larr); + + return error; +} + +/** + * dir_e_search - Search exhash (leaf) dir for inode matching name + * @dip: The GFS2 inode + * @filename: Filename string + * @inode: If non-NULL, function fills with formal inode # and block address + * @type: If non-NULL, function fills with DT_... dinode type + * + * Returns: + */ + +static int dir_e_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int *type) +{ + struct buffer_head *bh; + struct gfs2_dirent *dent; + int error; + + error = linked_leaf_search(dip, filename, &dent, NULL, &bh); + if (error) + return error; + + if (inum) + gfs2_inum_in(inum, (char *)&dent->de_inum); + if (type) + *type = dent->de_type; + + brelse(bh); + + return 0; +} + +static int dir_e_add(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int type) +{ + struct buffer_head *bh, *nbh, *dibh; + struct gfs2_leaf *leaf, *nleaf; + struct gfs2_dirent *dent; + uint32_t hsize, index; + uint32_t hash; + uint64_t leaf_no, bn; + int error; + + restart: + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + /* Figure out the address of the leaf node. */ + + hash = gfs2_disk_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_leaf_nr(dip, index, &leaf_no); + if (error) + return error; + + /* Add entry to the leaf */ + + for (;;) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + return error; + + leaf = (struct gfs2_leaf *)bh->b_data; + + if (gfs2_dirent_alloc(dip, bh, filename->len, &dent)) { + + if (le16_to_cpu(leaf->lf_depth) < dip->i_di.di_depth) { + /* Can we split the leaf? */ + + brelse(bh); + + error = dir_split_leaf(dip, index, leaf_no); + if (error) + return error; + + goto restart; + + } else if (dip->i_di.di_depth < GFS2_DIR_MAX_DEPTH) { + /* Can we double the hash table? */ + + brelse(bh); + + error = dir_double_exhash(dip); + if (error) + return error; + + goto restart; + + } else if (leaf->lf_next) { + /* Can we try the next leaf in the list? */ + leaf_no = le64_to_cpu(leaf->lf_next); + brelse(bh); + continue; + + } else { + /* Create a new leaf and add it to the list. */ + + bn = gfs2_alloc_meta(dip); + + nbh = gfs2_meta_new(dip->i_gl, bn); + gfs2_trans_add_bh(dip->i_gl, nbh); + gfs2_metatype_set(nbh, + GFS2_METATYPE_LF, + GFS2_FORMAT_LF); + gfs2_buffer_clear_tail(nbh, + sizeof(struct gfs2_meta_header)); + + gfs2_trans_add_bh(dip->i_gl, bh); + leaf->lf_next = cpu_to_le64(bn); + + nleaf = (struct gfs2_leaf *)nbh->b_data; + nleaf->lf_depth = leaf->lf_depth; + nleaf->lf_dirent_format = cpu_to_le32(GFS2_FORMAT_DE); + + gfs2_dirent_alloc(dip, nbh, filename->len, + &dent); + + dip->i_di.di_blocks++; + + brelse(bh); + + bh = nbh; + leaf = nleaf; + } + } + + /* If the gfs2_dirent_alloc() succeeded, it pinned the "bh" */ + + gfs2_inum_out(inum, (char *)&dent->de_inum); + dent->de_hash = cpu_to_le32(hash); + dent->de_type = type; + memcpy((char *)(dent + 1), filename->name, filename->len); + + leaf->lf_entries = le16_to_cpu(leaf->lf_entries) + 1; + leaf->lf_entries = cpu_to_le16(leaf->lf_entries); + + brelse(bh); + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + dip->i_di.di_entries++; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; + } + + return -ENOENT; +} + +static int dir_e_del(struct gfs2_inode *dip, struct qstr *filename) +{ + struct buffer_head *bh, *dibh; + struct gfs2_dirent *dent, *prev; + struct gfs2_leaf *leaf; + unsigned int entries; + int error; + + error = linked_leaf_search(dip, filename, &dent, &prev, &bh); + if (error == -ENOENT) { + gfs2_consist_inode(dip); + return -EIO; + } + if (error) + return error; + + dirent_del(dip, bh, prev, dent); /* Pins bh */ + + leaf = (struct gfs2_leaf *)bh->b_data; + entries = le16_to_cpu(leaf->lf_entries); + if (!entries) + gfs2_consist_inode(dip); + entries--; + leaf->lf_entries = cpu_to_le16(entries); + + brelse(bh); + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + if (!dip->i_di.di_entries) + gfs2_consist_inode(dip); + dip->i_di.di_entries--; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * dir_e_read - Reads the entries from a directory into a filldir buffer + * @dip: dinode pointer + * @offset: the hash of the last entry read shifted to the right once + * @opaque: buffer for the filldir function to fill + * @filldir: points to the filldir function to use + * + * Returns: errno + */ + +static int dir_e_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque, + gfs2_filldir_t filldir) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + struct gfs2_leaf leaf; + uint32_t hsize, len; + uint32_t ht_offset, lp_offset, ht_offset_cur = -1; + uint32_t hash, index; + uint64_t *lp; + int copied = 0; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + hash = gfs2_dir_offset2hash(*offset); + index = hash >> (32 - dip->i_di.di_depth); + + lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); + if (!lp) + return -ENOMEM; + + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_jdata_read_mem(dip, (char *)lp, + ht_offset * sizeof(uint64_t), + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + error = get_leaf(dip, le64_to_cpu(lp[lp_offset]), &bh); + if (error) + goto out; + + gfs2_leaf_in(&leaf, bh->b_data); + + if (leaf.lf_next) + error = do_filldir_multi(dip, offset, opaque, filldir, + bh, &copied); + else + error = do_filldir_single(dip, offset, opaque, filldir, + bh, leaf.lf_entries, &copied); + + brelse(bh); + + if (error) { + if (error > 0) + error = 0; + goto out; + } + + len = 1 << (dip->i_di.di_depth - leaf.lf_depth); + index = (index & ~(len - 1)) + len; + } + + out: + kfree(lp); + + return error; +} + +static int dir_e_mvino(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int new_type) +{ + struct buffer_head *bh, *dibh; + struct gfs2_dirent *dent; + int error; + + error = linked_leaf_search(dip, filename, &dent, NULL, &bh); + if (error == -ENOENT) { + gfs2_consist_inode(dip); + return -EIO; + } + if (error) + return error; + + gfs2_trans_add_bh(dip->i_gl, bh); + + gfs2_inum_out(inum, (char *)&dent->de_inum); + dent->de_type = new_type; + + brelse(bh); + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +/** + * dir_l_search - Search linear (stuffed dinode) dir for inode matching name + * @dip: The GFS2 inode + * @filename: Filename string + * @inode: If non-NULL, function fills with formal inode # and block address + * @type: If non-NULL, function fills with DT_... dinode type + * + * Returns: + */ + +static int dir_l_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int *type) +{ + struct buffer_head *dibh; + struct gfs2_dirent *dent; + int error; + + if (!gfs2_is_stuffed(dip)) { + gfs2_consist_inode(dip); + return -EIO; + } + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, NULL); + if (!error) { + if (inum) + gfs2_inum_in(inum, (char *)&dent->de_inum); + if (type) + *type = dent->de_type; + } + + brelse(dibh); + + return error; +} + +static int dir_l_add(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int type) +{ + struct buffer_head *dibh; + struct gfs2_dirent *dent; + int error; + + if (!gfs2_is_stuffed(dip)) { + gfs2_consist_inode(dip); + return -EIO; + } + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + if (gfs2_dirent_alloc(dip, dibh, filename->len, &dent)) { + brelse(dibh); + + error = dir_make_exhash(dip); + if (!error) + error = dir_e_add(dip, filename, inum, type); + + return error; + } + + /* gfs2_dirent_alloc() pins */ + + gfs2_inum_out(inum, (char *)&dent->de_inum); + dent->de_hash = gfs2_disk_hash(filename->name, filename->len); + dent->de_hash = cpu_to_le32(dent->de_hash); + dent->de_type = type; + memcpy((char *)(dent + 1), filename->name, filename->len); + + dip->i_di.di_entries++; + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + return 0; +} + +static int dir_l_del(struct gfs2_inode *dip, struct qstr *filename) +{ + struct buffer_head *dibh; + struct gfs2_dirent *dent, *prev; + int error; + + if (!gfs2_is_stuffed(dip)) { + gfs2_consist_inode(dip); + return -EIO; + } + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, &prev); + if (error == -ENOENT) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + if (error) + goto out; + + dirent_del(dip, dibh, prev, dent); + + /* dirent_del() pins */ + + if (!dip->i_di.di_entries) + gfs2_consist_inode(dip); + dip->i_di.di_entries--; + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_dinode_out(&dip->i_di, dibh->b_data); + + out: + brelse(dibh); + + return error; +} + +static int dir_l_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque, + gfs2_filldir_t filldir) +{ + struct buffer_head *dibh; + int copied = 0; + int error; + + if (!gfs2_is_stuffed(dip)) { + gfs2_consist_inode(dip); + return -EIO; + } + + if (!dip->i_di.di_entries) + return 0; + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + error = do_filldir_single(dip, offset, + opaque, filldir, + dibh, dip->i_di.di_entries, + &copied); + if (error > 0) + error = 0; + + brelse(dibh); + + return error; +} + +static int dir_l_mvino(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int new_type) +{ + struct buffer_head *dibh; + struct gfs2_dirent *dent; + int error; + + if (!gfs2_is_stuffed(dip)) { + gfs2_consist_inode(dip); + return -EIO; + } + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + return error; + + error = leaf_search(dip, dibh, filename, &dent, NULL); + if (error == -ENOENT) { + gfs2_consist_inode(dip); + error = -EIO; + goto out; + } + if (error) + goto out; + + gfs2_trans_add_bh(dip->i_gl, dibh); + + gfs2_inum_out(inum, (char *)&dent->de_inum); + dent->de_type = new_type; + + dip->i_di.di_mtime = dip->i_di.di_ctime = get_seconds(); + + gfs2_dinode_out(&dip->i_di, dibh->b_data); + + out: + brelse(dibh); + + return error; +} + +/** + * gfs2_dir_search - Search a directory + * @dip: The GFS2 inode + * @filename: + * @inode: + * + * This routine searches a directory for a file or another directory. + * Assumes a glock is held on dip. + * + * Returns: errno + */ + +int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int *type) +{ + int error; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) + error = dir_e_search(dip, filename, inum, type); + else + error = dir_l_search(dip, filename, inum, type); + + return error; +} + +/** + * gfs2_dir_add - Add new filename into directory + * @dip: The GFS2 inode + * @filename: The new name + * @inode: The inode number of the entry + * @type: The type of the entry + * + * Returns: 0 on success, error code on failure + */ + +int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int type) +{ + int error; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) + error = dir_e_add(dip, filename, inum, type); + else + error = dir_l_add(dip, filename, inum, type); + + return error; +} + +/** + * gfs2_dir_del - Delete a directory entry + * @dip: The GFS2 inode + * @filename: The filename + * + * Returns: 0 on success, error code on failure + */ + +int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename) +{ + int error; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) + error = dir_e_del(dip, filename); + else + error = dir_l_del(dip, filename); + + return error; +} + +int gfs2_dir_read(struct gfs2_inode *dip, uint64_t *offset, void *opaque, + gfs2_filldir_t filldir) +{ + int error; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) + error = dir_e_read(dip, offset, opaque, filldir); + else + error = dir_l_read(dip, offset, opaque, filldir); + + return error; +} + +/** + * gfs2_dir_mvino - Change inode number of directory entry + * @dip: The GFS2 inode + * @filename: + * @new_inode: + * + * This routine changes the inode number of a directory entry. It's used + * by rename to change ".." when a directory is moved. + * Assumes a glock is held on dvp. + * + * Returns: errno + */ + +int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int new_type) +{ + int error; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) + error = dir_e_mvino(dip, filename, inum, new_type); + else + error = dir_l_mvino(dip, filename, inum, new_type); + + return error; +} + +/** + * foreach_leaf - call a function for each leaf in a directory + * @dip: the directory + * @lc: the function to call for each each + * @data: private data to pass to it + * + * Returns: errno + */ + +static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + struct gfs2_leaf leaf; + uint32_t hsize, len; + uint32_t ht_offset, lp_offset, ht_offset_cur = -1; + uint32_t index = 0; + uint64_t *lp; + uint64_t leaf_no; + int error = 0; + + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + lp = kmalloc(sdp->sd_hash_bsize, GFP_KERNEL); + if (!lp) + return -ENOMEM; + + while (index < hsize) { + lp_offset = index & (sdp->sd_hash_ptrs - 1); + ht_offset = index - lp_offset; + + if (ht_offset_cur != ht_offset) { + error = gfs2_jdata_read_mem(dip, (char *)lp, + ht_offset * sizeof(uint64_t), + sdp->sd_hash_bsize); + if (error != sdp->sd_hash_bsize) { + if (error >= 0) + error = -EIO; + goto out; + } + ht_offset_cur = ht_offset; + } + + leaf_no = le64_to_cpu(lp[lp_offset]); + if (leaf_no) { + error = get_leaf(dip, leaf_no, &bh); + if (error) + goto out; + gfs2_leaf_in(&leaf, bh->b_data); + brelse(bh); + + len = 1 << (dip->i_di.di_depth - leaf.lf_depth); + + error = lc(dip, index, len, leaf_no, data); + if (error) + goto out; + + index = (index & ~(len - 1)) + len; + } else + index++; + } + + if (index != hsize) { + gfs2_consist_inode(dip); + error = -EIO; + } + + out: + kfree(lp); + + return error; +} + +/** + * leaf_dealloc - Deallocate a directory leaf + * @dip: the directory + * @index: the hash table offset in the directory + * @len: the number of pointers to this leaf + * @leaf_no: the leaf number + * @data: not used + * + * Returns: errno + */ + +static int leaf_dealloc(struct gfs2_inode *dip, uint32_t index, uint32_t len, + uint64_t leaf_no, void *data) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_leaf tmp_leaf; + struct gfs2_rgrp_list rlist; + struct buffer_head *bh, *dibh; + uint64_t blk; + unsigned int rg_blocks = 0, l_blocks = 0; + char *ht; + unsigned int x, size = len * sizeof(uint64_t); + int error; + + memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); + + ht = kzalloc(size, GFP_KERNEL); + if (!ht) + return -ENOMEM; + + gfs2_alloc_get(dip); + + error = gfs2_quota_hold(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh); + if (error) + goto out_qs; + + /* Count the number of leaves */ + + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_rlist; + gfs2_leaf_in(&tmp_leaf, (bh)->b_data); + brelse(bh); + + gfs2_rlist_add(sdp, &rlist, blk); + l_blocks++; + } + + gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0); + + for (x = 0; x < rlist.rl_rgrps; x++) { + struct gfs2_rgrpd *rgd; + rgd = get_gl2rgd(rlist.rl_ghs[x].gh_gl); + rg_blocks += rgd->rd_ri.ri_length; + } + + error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs); + if (error) + goto out_rlist; + + error = gfs2_trans_begin(sdp, + rg_blocks + (DIV_RU(size, sdp->sd_jbsize) + 1) + + RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks); + if (error) + goto out_rg_gunlock; + + for (blk = leaf_no; blk; blk = tmp_leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + goto out_end_trans; + gfs2_leaf_in(&tmp_leaf, bh->b_data); + brelse(bh); + + gfs2_free_meta(dip, blk, 1); + + if (!dip->i_di.di_blocks) + gfs2_consist_inode(dip); + dip->i_di.di_blocks--; + } + + error = gfs2_jdata_write_mem(dip, ht, index * sizeof(uint64_t), size); + if (error != size) { + if (error >= 0) + error = -EIO; + goto out_end_trans; + } + + error = gfs2_meta_inode_buffer(dip, &dibh); + if (error) + goto out_end_trans; + + gfs2_trans_add_bh(dip->i_gl, dibh); + gfs2_dinode_out(&dip->i_di, dibh->b_data); + brelse(dibh); + + out_end_trans: + gfs2_trans_end(sdp); + + out_rg_gunlock: + gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs); + + out_rlist: + gfs2_rlist_free(&rlist); + gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh); + + out_qs: + gfs2_quota_unhold(dip); + + out: + gfs2_alloc_put(dip); + kfree(ht); + + return error; +} + +/** + * gfs2_dir_exhash_dealloc - free all the leaf blocks in a directory + * @dip: the directory + * + * Dealloc all on-disk directory leaves to FREEMETA state + * Change on-disk inode type to "regular file" + * + * Returns: errno + */ + +int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct buffer_head *bh; + int error; + + /* Dealloc on-disk leaves to FREEMETA state */ + error = foreach_leaf(dip, leaf_dealloc, NULL); + if (error) + return error; + + /* Make this a regular file in case we crash. + (We don't want to free these blocks a second time.) */ + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(dip, &bh); + if (!error) { + gfs2_trans_add_bh(dip->i_gl, bh); + ((struct gfs2_dinode *)bh->b_data)->di_mode = cpu_to_le32(S_IFREG); + brelse(bh); + } + + gfs2_trans_end(sdp); + + return error; +} + +/** + * gfs2_diradd_alloc_required - find if adding entry will require an allocation + * @ip: the file being written to + * @filname: the filename that's going to be added + * @alloc_required: set to 1 if an alloc is required, 0 otherwise + * + * Returns: errno + */ + +int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, + int *alloc_required) +{ + struct buffer_head *bh = NULL, *bh_next; + uint32_t hsize, hash, index; + int error = 0; + + *alloc_required = 0; + + if (dip->i_di.di_flags & GFS2_DIF_EXHASH) { + hsize = 1 << dip->i_di.di_depth; + if (hsize * sizeof(uint64_t) != dip->i_di.di_size) { + gfs2_consist_inode(dip); + return -EIO; + } + + hash = gfs2_disk_hash(filename->name, filename->len); + index = hash >> (32 - dip->i_di.di_depth); + + error = get_first_leaf(dip, index, &bh_next); + if (error) + return error; + + do { + brelse(bh); + + bh = bh_next; + + if (dirent_fits(dip, bh, filename->len)) + break; + + error = get_next_leaf(dip, bh, &bh_next); + if (error == -ENOENT) { + *alloc_required = 1; + error = 0; + break; + } + } + while (!error); + + brelse(bh); + } else { + error = gfs2_meta_inode_buffer(dip, &bh); + if (error) + return error; + + if (!dirent_fits(dip, bh, filename->len)) + *alloc_required = 1; + + brelse(bh); + } + + return error; +} + +/** + * do_gdm - copy out one leaf (or list of leaves) + * @dip: the directory + * @index: the hash table offset in the directory + * @len: the number of pointers to this leaf + * @leaf_no: the leaf number + * @data: a pointer to a struct gfs2_user_buffer structure + * + * Returns: errno + */ + +static int do_gdm(struct gfs2_inode *dip, uint32_t index, uint32_t len, + uint64_t leaf_no, void *data) +{ + struct gfs2_user_buffer *ub = (struct gfs2_user_buffer *)data; + struct gfs2_leaf leaf; + struct buffer_head *bh; + uint64_t blk; + int error = 0; + + for (blk = leaf_no; blk; blk = leaf.lf_next) { + error = get_leaf(dip, blk, &bh); + if (error) + break; + + gfs2_leaf_in(&leaf, bh->b_data); + + error = gfs2_add_bh_to_ub(ub, bh); + + brelse(bh); + + if (error) + break; + } + + return error; +} + +/** + * gfs2_get_dir_meta - return all the leaf blocks of a directory + * @dip: the directory + * @ub: the structure representing the meta + * + * Returns: errno + */ + +int gfs2_get_dir_meta(struct gfs2_inode *dip, struct gfs2_user_buffer *ub) +{ + return foreach_leaf(dip, do_gdm, ub); +} + --- a/fs/gfs2/dir.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/dir.h 2005-10-10 11:28:49.154809087 -0500 @@ -0,0 +1,51 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __DIR_DOT_H__ +#define __DIR_DOT_H__ + +/** + * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read() + * @opaque: opaque data used by the function + * @name: the name of the directory entry + * @length: the length of the name + * @offset: the entry's offset in the directory + * @inum: the inode number the entry points to + * @type: the type of inode the entry points to + * + * Returns: 0 on success, 1 if buffer full + */ + +typedef int (*gfs2_filldir_t) (void *opaque, + const char *name, unsigned int length, + uint64_t offset, + struct gfs2_inum *inum, unsigned int type); + +int gfs2_filecmp(struct qstr *file1, char *file2, int len_of_file2); +int gfs2_dirent_alloc(struct gfs2_inode *dip, struct buffer_head *bh, + int name_len, struct gfs2_dirent **dent_out); + +int gfs2_dir_search(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int *type); +int gfs2_dir_add(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *inum, unsigned int type); +int gfs2_dir_del(struct gfs2_inode *dip, struct qstr *filename); +int gfs2_dir_read(struct gfs2_inode *dip, uint64_t * offset, void *opaque, + gfs2_filldir_t filldir); +int gfs2_dir_mvino(struct gfs2_inode *dip, struct qstr *filename, + struct gfs2_inum *new_inum, unsigned int new_type); + +int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); + +int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename, + int *alloc_required); + +int gfs2_get_dir_meta(struct gfs2_inode *ip, struct gfs2_user_buffer *ub); + +#endif /* __DIR_DOT_H__ */ [PATCH 07/16] GFS: allocation Code that manages block allocation. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/bits.c | 178 +++++++ fs/gfs2/bits.h | 28 + fs/gfs2/rgrp.c | 1373 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/rgrp.h | 62 ++ 4 files changed, 1641 insertions(+) --- a/fs/gfs2/rgrp.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/rgrp.c 2005-10-10 11:28:49.352778216 -0500 @@ -0,0 +1,1373 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bits.h" +#include "glock.h" +#include "glops.h" +#include "jdata.h" +#include "lops.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "super.h" +#include "trans.h" + +/** + * gfs2_rgrp_verify - Verify that a resource group is consistent + * @sdp: the filesystem + * @rgd: the rgrp + * + */ + +void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + struct gfs2_bitmap *bi = NULL; + uint32_t length = rgd->rd_ri.ri_length; + uint32_t count[4], tmp; + int buf, x; + + memset(count, 0, 4 * sizeof(uint32_t)); + + /* Count # blocks in each of 4 possible allocation states */ + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + for (x = 0; x < 4; x++) + count[x] += gfs2_bitcount(rgd, + bi->bi_bh->b_data + + bi->bi_offset, + bi->bi_len, x); + } + + if (count[0] != rgd->rd_rg.rg_free) { + if (gfs2_consist_rgrpd(rgd)) + fs_err(sdp, "free data mismatch: %u != %u\n", + count[0], rgd->rd_rg.rg_free); + return; + } + + tmp = rgd->rd_ri.ri_data - + rgd->rd_rg.rg_free - + rgd->rd_rg.rg_dinodes; + if (count[1] != tmp) { + if (gfs2_consist_rgrpd(rgd)) + fs_err(sdp, "used data mismatch: %u != %u\n", + count[1], tmp); + return; + } + + if (count[2]) { + if (gfs2_consist_rgrpd(rgd)) + fs_err(sdp, "free metadata mismatch: %u != 0\n", + count[2]); + return; + } + + if (count[3] != rgd->rd_rg.rg_dinodes) { + if (gfs2_consist_rgrpd(rgd)) + fs_err(sdp, "used metadata mismatch: %u != %u\n", + count[3], rgd->rd_rg.rg_dinodes); + return; + } +} + +static inline int rgrp_contains_block(struct gfs2_rindex *ri, uint64_t block) +{ + uint64_t first = ri->ri_data0; + uint64_t last = first + ri->ri_data; + return !!(first <= block && block < last); +} + +/** + * gfs2_blk2rgrpd - Find resource group for a given data/meta block number + * @sdp: The GFS2 superblock + * @n: The data block number + * + * Returns: The resource group, or NULL if not found + */ + +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk) +{ + struct gfs2_rgrpd *rgd; + + spin_lock(&sdp->sd_rindex_spin); + + list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { + if (rgrp_contains_block(&rgd->rd_ri, blk)) { + list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); + spin_unlock(&sdp->sd_rindex_spin); + return rgd; + } + } + + spin_unlock(&sdp->sd_rindex_spin); + + return NULL; +} + +/** + * gfs2_rgrpd_get_first - get the first Resource Group in the filesystem + * @sdp: The GFS2 superblock + * + * Returns: The first rgrp in the filesystem + */ + +struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp) +{ + gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list)); + return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list); +} + +/** + * gfs2_rgrpd_get_next - get the next RG + * @rgd: A RG + * + * Returns: The next rgrp + */ + +struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd) +{ + if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list) + return NULL; + return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list); +} + +static void clear_rgrpdi(struct gfs2_sbd *sdp) +{ + struct list_head *head; + struct gfs2_rgrpd *rgd; + struct gfs2_glock *gl; + + spin_lock(&sdp->sd_rindex_spin); + sdp->sd_rindex_forward = NULL; + head = &sdp->sd_rindex_recent_list; + while (!list_empty(head)) { + rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); + list_del(&rgd->rd_recent); + } + spin_unlock(&sdp->sd_rindex_spin); + + head = &sdp->sd_rindex_list; + while (!list_empty(head)) { + rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list); + gl = rgd->rd_gl; + + list_del(&rgd->rd_list); + list_del(&rgd->rd_list_mru); + + if (gl) { + set_gl2rgd(gl, NULL); + gfs2_glock_put(gl); + } + + kfree(rgd->rd_bits); + kfree(rgd); + } +} + +void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) +{ + down(&sdp->sd_rindex_mutex); + clear_rgrpdi(sdp); + up(&sdp->sd_rindex_mutex); +} + +/** + * gfs2_compute_bitstructs - Compute the bitmap sizes + * @rgd: The resource group descriptor + * + * Calculates bitmap descriptors, one for each block that contains bitmap data + * + * Returns: errno + */ + +static int compute_bitstructs(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + struct gfs2_bitmap *bi; + uint32_t length = rgd->rd_ri.ri_length; /* # blocks in hdr & bitmap */ + uint32_t bytes_left, bytes; + int x; + + rgd->rd_bits = kcalloc(length, sizeof(struct gfs2_bitmap), GFP_KERNEL); + if (!rgd->rd_bits) + return -ENOMEM; + + bytes_left = rgd->rd_ri.ri_bitbytes; + + for (x = 0; x < length; x++) { + bi = rgd->rd_bits + x; + + /* small rgrp; bitmap stored completely in header block */ + if (length == 1) { + bytes = bytes_left; + bi->bi_offset = sizeof(struct gfs2_rgrp); + bi->bi_start = 0; + bi->bi_len = bytes; + /* header block */ + } else if (x == 0) { + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp); + bi->bi_offset = sizeof(struct gfs2_rgrp); + bi->bi_start = 0; + bi->bi_len = bytes; + /* last block */ + } else if (x + 1 == length) { + bytes = bytes_left; + bi->bi_offset = sizeof(struct gfs2_meta_header); + bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bi->bi_len = bytes; + /* other blocks */ + } else { + bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); + bi->bi_offset = sizeof(struct gfs2_meta_header); + bi->bi_start = rgd->rd_ri.ri_bitbytes - bytes_left; + bi->bi_len = bytes; + } + + bytes_left -= bytes; + } + + if (bytes_left) { + gfs2_consist_rgrpd(rgd); + return -EIO; + } + bi = rgd->rd_bits + (length - 1); + if ((bi->bi_start + bi->bi_len) * GFS2_NBBY != rgd->rd_ri.ri_data) { + if (gfs2_consist_rgrpd(rgd)) { + gfs2_rindex_print(&rgd->rd_ri); + fs_err(sdp, "start=%u len=%u offset=%u\n", + bi->bi_start, bi->bi_len, bi->bi_offset); + } + return -EIO; + } + + return 0; +} + +/** + * gfs2_ri_update - Pull in a new resource index from the disk + * @gl: The glock covering the rindex inode + * + * Returns: 0 on successful update, error code otherwise + */ + +static int gfs2_ri_update(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrpd *rgd; + char buf[sizeof(struct gfs2_rindex)]; + uint64_t junk = ip->i_di.di_size; + int error; + + if (do_div(junk, sizeof(struct gfs2_rindex))) { + gfs2_consist_inode(ip); + return -EIO; + } + + clear_rgrpdi(sdp); + + for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { + error = gfs2_jdata_read_mem(ip, buf, + sdp->sd_rgrps * + sizeof(struct gfs2_rindex), + sizeof(struct gfs2_rindex)); + if (!error) + break; + if (error != sizeof(struct gfs2_rindex)) { + if (error > 0) + error = -EIO; + goto fail; + } + + rgd = kzalloc(sizeof(struct gfs2_rgrpd), GFP_KERNEL); + error = -ENOMEM; + if (!rgd) + goto fail; + + init_MUTEX(&rgd->rd_mutex); + lops_init_le(&rgd->rd_le, &gfs2_rg_lops); + rgd->rd_sbd = sdp; + + list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); + list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); + + gfs2_rindex_in(&rgd->rd_ri, buf); + + error = compute_bitstructs(rgd); + if (error) + goto fail; + + error = gfs2_glock_get(sdp, rgd->rd_ri.ri_addr, + &gfs2_rgrp_glops, CREATE, &rgd->rd_gl); + if (error) + goto fail; + + set_gl2rgd(rgd->rd_gl, rgd); + rgd->rd_rg_vn = rgd->rd_gl->gl_vn - 1; + } + + sdp->sd_rindex_vn = ip->i_gl->gl_vn; + + return 0; + + fail: + clear_rgrpdi(sdp); + + return error; +} + +/** + * gfs2_rindex_hold - Grab a lock on the rindex + * @sdp: The GFS2 superblock + * @ri_gh: the glock holder + * + * We grab a lock on the rindex inode to make sure that it doesn't + * change whilst we are performing an operation. We keep this lock + * for quite long periods of time compared to other locks. This + * doesn't matter, since it is shared and it is very, very rarely + * accessed in the exclusive mode (i.e. only when expanding the filesystem). + * + * This makes sure that we're using the latest copy of the resource index + * special file, which might have been updated if someone expanded the + * filesystem (via gfs2_grow utility), which adds new resource groups. + * + * Returns: 0 on success, error code otherwise + */ + +int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) +{ + struct gfs2_inode *ip = sdp->sd_rindex; + struct gfs2_glock *gl = ip->i_gl; + int error; + + error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); + if (error) + return error; + + /* Read new copy from disk if we don't have the latest */ + if (sdp->sd_rindex_vn != gl->gl_vn) { + down(&sdp->sd_rindex_mutex); + if (sdp->sd_rindex_vn != gl->gl_vn) { + error = gfs2_ri_update(ip); + if (error) + gfs2_glock_dq_uninit(ri_gh); + } + up(&sdp->sd_rindex_mutex); + } + + return error; +} + +/** + * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps + * @rgd: the struct gfs2_rgrpd describing the RG to read in + * + * Read in all of a Resource Group's header and bitmap blocks. + * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps. + * + * Returns: errno + */ + +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + struct gfs2_glock *gl = rgd->rd_gl; + unsigned int length = rgd->rd_ri.ri_length; + struct gfs2_bitmap *bi; + unsigned int x, y; + int error; + + down(&rgd->rd_mutex); + + spin_lock(&sdp->sd_rindex_spin); + if (rgd->rd_bh_count) { + rgd->rd_bh_count++; + spin_unlock(&sdp->sd_rindex_spin); + up(&rgd->rd_mutex); + return 0; + } + spin_unlock(&sdp->sd_rindex_spin); + + for (x = 0; x < length; x++) { + bi = rgd->rd_bits + x; + error = gfs2_meta_read(gl, rgd->rd_ri.ri_addr + x, DIO_START, + &bi->bi_bh); + if (error) + goto fail; + } + + for (y = length; y--;) { + bi = rgd->rd_bits + y; + error = gfs2_meta_reread(sdp, bi->bi_bh, DIO_WAIT); + if (error) + goto fail; + if (gfs2_metatype_check(sdp, bi->bi_bh, + (y) ? GFS2_METATYPE_RB : + GFS2_METATYPE_RG)) { + error = -EIO; + goto fail; + } + } + + if (rgd->rd_rg_vn != gl->gl_vn) { + gfs2_rgrp_in(&rgd->rd_rg, (rgd->rd_bits[0].bi_bh)->b_data); + rgd->rd_rg_vn = gl->gl_vn; + } + + spin_lock(&sdp->sd_rindex_spin); + rgd->rd_free_clone = rgd->rd_rg.rg_free; + rgd->rd_bh_count++; + spin_unlock(&sdp->sd_rindex_spin); + + up(&rgd->rd_mutex); + + return 0; + + fail: + while (x--) { + bi = rgd->rd_bits + x; + brelse(bi->bi_bh); + bi->bi_bh = NULL; + gfs2_assert_warn(sdp, !bi->bi_clone); + } + up(&rgd->rd_mutex); + + return error; +} + +void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + + spin_lock(&sdp->sd_rindex_spin); + gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); + rgd->rd_bh_count++; + spin_unlock(&sdp->sd_rindex_spin); +} + +/** + * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get() + * @rgd: the struct gfs2_rgrpd describing the RG to read in + * + */ + +void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + int x, length = rgd->rd_ri.ri_length; + + spin_lock(&sdp->sd_rindex_spin); + gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); + if (--rgd->rd_bh_count) { + spin_unlock(&sdp->sd_rindex_spin); + return; + } + + for (x = 0; x < length; x++) { + struct gfs2_bitmap *bi = rgd->rd_bits + x; + kfree(bi->bi_clone); + bi->bi_clone = NULL; + brelse(bi->bi_bh); + bi->bi_bh = NULL; + } + + spin_unlock(&sdp->sd_rindex_spin); +} + +void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + unsigned int length = rgd->rd_ri.ri_length; + unsigned int x; + + for (x = 0; x < length; x++) { + struct gfs2_bitmap *bi = rgd->rd_bits + x; + if (!bi->bi_clone) + continue; + memcpy(bi->bi_clone + bi->bi_offset, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len); + } + + spin_lock(&sdp->sd_rindex_spin); + rgd->rd_free_clone = rgd->rd_rg.rg_free; + spin_unlock(&sdp->sd_rindex_spin); +} + +/** + * gfs2_alloc_get - allocate a struct gfs2_alloc structure for an inode + * @ip: the incore GFS2 inode structure + * + * FIXME: Don't use NOFAIL + * + * Returns: the struct gfs2_alloc + */ + +struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) +{ + struct gfs2_alloc *al = ip->i_alloc; + + gfs2_assert_warn(ip->i_sbd, !al); + + al = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL | __GFP_NOFAIL); + + ip->i_alloc = al; + + return al; +} + +/** + * gfs2_alloc_put - throw away the struct gfs2_alloc for an inode + * @ip: the inode + * + */ + +void gfs2_alloc_put(struct gfs2_inode *ip) +{ + struct gfs2_alloc *al = ip->i_alloc; + + if (gfs2_assert_warn(ip->i_sbd, al)) + return; + + ip->i_alloc = NULL; + kfree(al); +} + +/** + * try_rgrp_fit - See if a given reservation will fit in a given RG + * @rgd: the RG data + * @al: the struct gfs2_alloc structure describing the reservation + * + * If there's room for the requested blocks to be allocated from the RG: + * Sets the $al_reserved_data field in @al. + * Sets the $al_reserved_meta field in @al. + * Sets the $al_rgd field in @al. + * + * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) + */ + +static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + int ret = 0; + + spin_lock(&sdp->sd_rindex_spin); + if (rgd->rd_free_clone >= al->al_requested) { + al->al_rgd = rgd; + ret = 1; + } + spin_unlock(&sdp->sd_rindex_spin); + + return ret; +} + +/** + * recent_rgrp_first - get first RG from "recent" list + * @sdp: The GFS2 superblock + * @rglast: address of the rgrp used last + * + * Returns: The first rgrp in the recent list + */ + +static struct gfs2_rgrpd *recent_rgrp_first(struct gfs2_sbd *sdp, + uint64_t rglast) +{ + struct gfs2_rgrpd *rgd = NULL; + + spin_lock(&sdp->sd_rindex_spin); + + if (list_empty(&sdp->sd_rindex_recent_list)) + goto out; + + if (!rglast) + goto first; + + list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { + if (rgd->rd_ri.ri_addr == rglast) + goto out; + } + + first: + rgd = list_entry(sdp->sd_rindex_recent_list.next, struct gfs2_rgrpd, + rd_recent); + + out: + spin_unlock(&sdp->sd_rindex_spin); + + return rgd; +} + +/** + * recent_rgrp_next - get next RG from "recent" list + * @cur_rgd: current rgrp + * @remove: + * + * Returns: The next rgrp in the recent list + */ + +static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd, + int remove) +{ + struct gfs2_sbd *sdp = cur_rgd->rd_sbd; + struct list_head *head; + struct gfs2_rgrpd *rgd; + + spin_lock(&sdp->sd_rindex_spin); + + head = &sdp->sd_rindex_recent_list; + + list_for_each_entry(rgd, head, rd_recent) { + if (rgd == cur_rgd) { + if (cur_rgd->rd_recent.next != head) + rgd = list_entry(cur_rgd->rd_recent.next, + struct gfs2_rgrpd, rd_recent); + else + rgd = NULL; + + if (remove) + list_del(&cur_rgd->rd_recent); + + goto out; + } + } + + rgd = NULL; + if (!list_empty(head)) + rgd = list_entry(head->next, struct gfs2_rgrpd, rd_recent); + + out: + spin_unlock(&sdp->sd_rindex_spin); + + return rgd; +} + +/** + * recent_rgrp_add - add an RG to tail of "recent" list + * @new_rgd: The rgrp to add + * + */ + +static void recent_rgrp_add(struct gfs2_rgrpd *new_rgd) +{ + struct gfs2_sbd *sdp = new_rgd->rd_sbd; + struct gfs2_rgrpd *rgd; + unsigned int count = 0; + unsigned int max = sdp->sd_rgrps / gfs2_jindex_size(sdp); + + spin_lock(&sdp->sd_rindex_spin); + + list_for_each_entry(rgd, &sdp->sd_rindex_recent_list, rd_recent) { + if (rgd == new_rgd) + goto out; + + if (++count >= max) + goto out; + } + list_add_tail(&new_rgd->rd_recent, &sdp->sd_rindex_recent_list); + + out: + spin_unlock(&sdp->sd_rindex_spin); +} + +/** + * forward_rgrp_get - get an rgrp to try next from full list + * @sdp: The GFS2 superblock + * + * Returns: The rgrp to try next + */ + +static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp) +{ + struct gfs2_rgrpd *rgd; + unsigned int journals = gfs2_jindex_size(sdp); + unsigned int rg = 0, x; + + spin_lock(&sdp->sd_rindex_spin); + + rgd = sdp->sd_rindex_forward; + if (!rgd) { + if (sdp->sd_rgrps >= journals) + rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals; + + for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); + x < rg; + x++, rgd = gfs2_rgrpd_get_next(rgd)) + /* Do Nothing */; + + sdp->sd_rindex_forward = rgd; + } + + spin_unlock(&sdp->sd_rindex_spin); + + return rgd; +} + +/** + * forward_rgrp_set - set the forward rgrp pointer + * @sdp: the filesystem + * @rgd: The new forward rgrp + * + */ + +static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) +{ + spin_lock(&sdp->sd_rindex_spin); + sdp->sd_rindex_forward = rgd; + spin_unlock(&sdp->sd_rindex_spin); +} + +/** + * get_local_rgrp - Choose and lock a rgrp for allocation + * @ip: the inode to reserve space for + * @rgp: the chosen and locked rgrp + * + * Try to acquire rgrp in way which avoids contending with others. + * + * Returns: errno + */ + +static int get_local_rgrp(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrpd *rgd, *begin = NULL; + struct gfs2_alloc *al = ip->i_alloc; + int flags = LM_FLAG_TRY; + int skipped = 0; + int loops = 0; + int error; + + /* Try recently successful rgrps */ + + rgd = recent_rgrp_first(sdp, ip->i_last_rg_alloc); + + while (rgd) { + error = gfs2_glock_nq_init(rgd->rd_gl, + LM_ST_EXCLUSIVE, LM_FLAG_TRY, + &al->al_rgd_gh); + switch (error) { + case 0: + if (try_rgrp_fit(rgd, al)) + goto out; + gfs2_glock_dq_uninit(&al->al_rgd_gh); + rgd = recent_rgrp_next(rgd, 1); + break; + + case GLR_TRYFAILED: + rgd = recent_rgrp_next(rgd, 0); + break; + + default: + return error; + } + } + + /* Go through full list of rgrps */ + + begin = rgd = forward_rgrp_get(sdp); + + for (;;) { + error = gfs2_glock_nq_init(rgd->rd_gl, + LM_ST_EXCLUSIVE, flags, + &al->al_rgd_gh); + switch (error) { + case 0: + if (try_rgrp_fit(rgd, al)) + goto out; + gfs2_glock_dq_uninit(&al->al_rgd_gh); + break; + + case GLR_TRYFAILED: + skipped++; + break; + + default: + return error; + } + + rgd = gfs2_rgrpd_get_next(rgd); + if (!rgd) + rgd = gfs2_rgrpd_get_first(sdp); + + if (rgd == begin) { + if (++loops >= 2 || !skipped) + return -ENOSPC; + flags = 0; + } + } + + out: + ip->i_last_rg_alloc = rgd->rd_ri.ri_addr; + + if (begin) { + recent_rgrp_add(rgd); + rgd = gfs2_rgrpd_get_next(rgd); + if (!rgd) + rgd = gfs2_rgrpd_get_first(sdp); + forward_rgrp_set(sdp, rgd); + } + + return 0; +} + +/** + * gfs2_inplace_reserve_i - Reserve space in the filesystem + * @ip: the inode to reserve space for + * + * Returns: errno + */ + +int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = ip->i_alloc; + int error; + + if (gfs2_assert_warn(sdp, al->al_requested)) + return -EINVAL; + + error = gfs2_rindex_hold(sdp, &al->al_ri_gh); + if (error) + return error; + + error = get_local_rgrp(ip); + if (error) { + gfs2_glock_dq_uninit(&al->al_ri_gh); + return error; + } + + al->al_file = file; + al->al_line = line; + + return 0; +} + +/** + * gfs2_inplace_release - release an inplace reservation + * @ip: the inode the reservation was taken out on + * + * Release a reservation made by gfs2_inplace_reserve(). + */ + +void gfs2_inplace_release(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = ip->i_alloc; + + if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) + fs_warn(sdp, "al_alloced = %u, al_requested = %u " + "al_file = %s, al_line = %u\n", + al->al_alloced, al->al_requested, al->al_file, + al->al_line); + + al->al_rgd = NULL; + gfs2_glock_dq_uninit(&al->al_rgd_gh); + gfs2_glock_dq_uninit(&al->al_ri_gh); +} + +/** + * gfs2_get_block_type - Check a block in a RG is of given type + * @rgd: the resource group holding the block + * @block: the block number + * + * Returns: The block type (GFS2_BLKST_*) + */ + +unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block) +{ + struct gfs2_bitmap *bi = NULL; + uint32_t length, rgrp_block, buf_block; + unsigned int buf; + unsigned char type; + + length = rgd->rd_ri.ri_length; + rgrp_block = block - rgd->rd_ri.ri_data0; + + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) + break; + } + + gfs2_assert(rgd->rd_sbd, buf < length); + buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; + + type = gfs2_testbit(rgd, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, buf_block); + + return type; +} + +/** + * rgblk_search - find a block in @old_state, change allocation + * state to @new_state + * @rgd: the resource group descriptor + * @goal: the goal block within the RG (start here to search for avail block) + * @old_state: GFS2_BLKST_XXX the before-allocation state to find + * @new_state: GFS2_BLKST_XXX the after-allocation block state + * + * Walk rgrp's bitmap to find bits that represent a block in @old_state. + * Add the found bitmap buffer to the transaction. + * Set the found bits to @new_state to change block's allocation state. + * + * This function never fails, because we wouldn't call it unless we + * know (from reservation results, etc.) that a block is available. + * + * Scope of @goal and returned block is just within rgrp, not the whole + * filesystem. + * + * Returns: the block number allocated + */ + +static uint32_t rgblk_search(struct gfs2_rgrpd *rgd, uint32_t goal, + unsigned char old_state, unsigned char new_state) +{ + struct gfs2_bitmap *bi = NULL; + uint32_t length = rgd->rd_ri.ri_length; + uint32_t blk = 0; + unsigned int buf, x; + + /* Find bitmap block that contains bits for goal block */ + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) + break; + } + + gfs2_assert(rgd->rd_sbd, buf < length); + + /* Convert scope of "goal" from rgrp-wide to within found bit block */ + goal -= bi->bi_start * GFS2_NBBY; + + /* Search (up to entire) bitmap in this rgrp for allocatable block. + "x <= length", instead of "x < length", because we typically start + the search in the middle of a bit block, but if we can't find an + allocatable block anywhere else, we want to be able wrap around and + search in the first part of our first-searched bit block. */ + for (x = 0; x <= length; x++) { + if (bi->bi_clone) + blk = gfs2_bitfit(rgd, + bi->bi_clone + bi->bi_offset, + bi->bi_len, goal, old_state); + else + blk = gfs2_bitfit(rgd, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, goal, old_state); + if (blk != BFITNOENT) + break; + + /* Try next bitmap block (wrap back to rgrp header if at end) */ + buf = (buf + 1) % length; + bi = rgd->rd_bits + buf; + goal = 0; + } + + if (gfs2_assert_withdraw(rgd->rd_sbd, x <= length)) + blk = 0; + + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh); + gfs2_setbit(rgd, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, blk, new_state); + if (bi->bi_clone) + gfs2_setbit(rgd, + bi->bi_clone + bi->bi_offset, + bi->bi_len, blk, new_state); + + return bi->bi_start * GFS2_NBBY + blk; +} + +/** + * rgblk_free - Change alloc state of given block(s) + * @sdp: the filesystem + * @bstart: the start of a run of blocks to free + * @blen: the length of the block run (all must lie within ONE RG!) + * @new_state: GFS2_BLKST_XXX the after-allocation block state + * + * Returns: Resource group containing the block(s) + */ + +static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, uint64_t bstart, + uint32_t blen, unsigned char new_state) +{ + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi = NULL; + uint32_t length, rgrp_blk, buf_blk; + unsigned int buf; + + rgd = gfs2_blk2rgrpd(sdp, bstart); + if (!rgd) { + if (gfs2_consist(sdp)) + fs_err(sdp, "block = %llu\n", bstart); + return NULL; + } + + length = rgd->rd_ri.ri_length; + + rgrp_blk = bstart - rgd->rd_ri.ri_data0; + + while (blen--) { + for (buf = 0; buf < length; buf++) { + bi = rgd->rd_bits + buf; + if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) + break; + } + + gfs2_assert(rgd->rd_sbd, buf < length); + + buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; + rgrp_blk++; + + if (!bi->bi_clone) { + bi->bi_clone = kmalloc(bi->bi_bh->b_size, + GFP_KERNEL | __GFP_NOFAIL); + memcpy(bi->bi_clone + bi->bi_offset, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len); + } + gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh); + gfs2_setbit(rgd, + bi->bi_bh->b_data + bi->bi_offset, + bi->bi_len, buf_blk, new_state); + } + + return rgd; +} + +/** + * gfs2_alloc_data - Allocate a data block + * @ip: the inode to allocate the data block for + * + * Returns: the allocated block + */ + +uint64_t gfs2_alloc_data(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_rgrpd *rgd = al->al_rgd; + uint32_t goal, blk; + uint64_t block; + + if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_data)) + goal = ip->i_di.di_goal_data - rgd->rd_ri.ri_data0; + else + goal = rgd->rd_last_alloc_data; + + blk = rgblk_search(rgd, goal, + GFS2_BLKST_FREE, GFS2_BLKST_USED); + rgd->rd_last_alloc_data = blk; + + block = rgd->rd_ri.ri_data0 + blk; + ip->i_di.di_goal_data = block; + + gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); + rgd->rd_rg.rg_free--; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + al->al_alloced++; + + gfs2_statfs_change(sdp, 0, -1, 0); + gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + + spin_lock(&sdp->sd_rindex_spin); + rgd->rd_free_clone--; + spin_unlock(&sdp->sd_rindex_spin); + + return block; +} + +/** + * gfs2_alloc_meta - Allocate a metadata block + * @ip: the inode to allocate the metadata block for + * + * Returns: the allocated block + */ + +uint64_t gfs2_alloc_meta(struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_alloc *al = ip->i_alloc; + struct gfs2_rgrpd *rgd = al->al_rgd; + uint32_t goal, blk; + uint64_t block; + + if (rgrp_contains_block(&rgd->rd_ri, ip->i_di.di_goal_meta)) + goal = ip->i_di.di_goal_meta - rgd->rd_ri.ri_data0; + else + goal = rgd->rd_last_alloc_meta; + + blk = rgblk_search(rgd, goal, + GFS2_BLKST_FREE, GFS2_BLKST_USED); + rgd->rd_last_alloc_meta = blk; + + block = rgd->rd_ri.ri_data0 + blk; + ip->i_di.di_goal_meta = block; + + gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); + rgd->rd_rg.rg_free--; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + al->al_alloced++; + + gfs2_statfs_change(sdp, 0, -1, 0); + gfs2_quota_change(ip, +1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_trans_add_unrevoke(sdp, block); + + spin_lock(&sdp->sd_rindex_spin); + rgd->rd_free_clone--; + spin_unlock(&sdp->sd_rindex_spin); + + return block; +} + +/** + * gfs2_alloc_di - Allocate a dinode + * @dip: the directory that the inode is going in + * + * Returns: the block allocated + */ + +uint64_t gfs2_alloc_di(struct gfs2_inode *dip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct gfs2_alloc *al = dip->i_alloc; + struct gfs2_rgrpd *rgd = al->al_rgd; + uint32_t blk; + uint64_t block; + + blk = rgblk_search(rgd, rgd->rd_last_alloc_meta, + GFS2_BLKST_FREE, GFS2_BLKST_DINODE); + + rgd->rd_last_alloc_meta = blk; + + block = rgd->rd_ri.ri_data0 + blk; + + gfs2_assert_withdraw(sdp, rgd->rd_rg.rg_free); + rgd->rd_rg.rg_free--; + rgd->rd_rg.rg_dinodes++; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + al->al_alloced++; + + gfs2_statfs_change(sdp, 0, -1, +1); + gfs2_trans_add_unrevoke(sdp, block); + + spin_lock(&sdp->sd_rindex_spin); + rgd->rd_free_clone--; + spin_unlock(&sdp->sd_rindex_spin); + + return block; +} + +/** + * gfs2_free_data - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrpd *rgd; + + rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); + if (!rgd) + return; + + rgd->rd_rg.rg_free += blen; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + gfs2_trans_add_rg(rgd); + + gfs2_statfs_change(sdp, 0, +blen, 0); + gfs2_quota_change(ip, -(int64_t)blen, + ip->i_di.di_uid, ip->i_di.di_gid); +} + +/** + * gfs2_free_meta - free a contiguous run of data block(s) + * @ip: the inode these blocks are being freed from + * @bstart: first block of a run of contiguous blocks + * @blen: the length of the block run + * + */ + +void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrpd *rgd; + + rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); + if (!rgd) + return; + + rgd->rd_rg.rg_free += blen; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + gfs2_trans_add_rg(rgd); + + gfs2_statfs_change(sdp, 0, +blen, 0); + gfs2_quota_change(ip, -(int64_t)blen, + ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_meta_wipe(ip, bstart, blen); +} + +void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno) +{ + struct gfs2_sbd *sdp = rgd->rd_sbd; + struct gfs2_rgrpd *tmp_rgd; + + tmp_rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_FREE); + if (!tmp_rgd) + return; + gfs2_assert_withdraw(sdp, rgd == tmp_rgd); + + if (!rgd->rd_rg.rg_dinodes) + gfs2_consist_rgrpd(rgd); + rgd->rd_rg.rg_dinodes--; + rgd->rd_rg.rg_free++; + + gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh); + gfs2_rgrp_out(&rgd->rd_rg, rgd->rd_bits[0].bi_bh->b_data); + + gfs2_statfs_change(sdp, 0, +1, -1); + gfs2_trans_add_rg(rgd); +} + +/** + * gfs2_free_uninit_di - free a dinode block + * @rgd: the resource group that contains the dinode + * @ip: the inode + * + */ + +void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip) +{ + gfs2_free_uninit_di(rgd, ip->i_num.no_addr); + gfs2_quota_change(ip, -1, ip->i_di.di_uid, ip->i_di.di_gid); + gfs2_meta_wipe(ip, ip->i_num.no_addr, 1); +} + +/** + * gfs2_rlist_add - add a RG to a list of RGs + * @sdp: the filesystem + * @rlist: the list of resource groups + * @block: the block + * + * Figure out what RG a block belongs to and add that RG to the list + * + * FIXME: Don't use NOFAIL + * + */ + +void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, + uint64_t block) +{ + struct gfs2_rgrpd *rgd; + struct gfs2_rgrpd **tmp; + unsigned int new_space; + unsigned int x; + + if (gfs2_assert_warn(sdp, !rlist->rl_ghs)) + return; + + rgd = gfs2_blk2rgrpd(sdp, block); + if (!rgd) { + if (gfs2_consist(sdp)) + fs_err(sdp, "block = %llu\n", block); + return; + } + + for (x = 0; x < rlist->rl_rgrps; x++) + if (rlist->rl_rgd[x] == rgd) + return; + + if (rlist->rl_rgrps == rlist->rl_space) { + new_space = rlist->rl_space + 10; + + tmp = kcalloc(new_space, sizeof(struct gfs2_rgrpd *), + GFP_KERNEL | __GFP_NOFAIL); + + if (rlist->rl_rgd) { + memcpy(tmp, rlist->rl_rgd, + rlist->rl_space * sizeof(struct gfs2_rgrpd *)); + kfree(rlist->rl_rgd); + } + + rlist->rl_space = new_space; + rlist->rl_rgd = tmp; + } + + rlist->rl_rgd[rlist->rl_rgrps++] = rgd; +} + +/** + * gfs2_rlist_alloc - all RGs have been added to the rlist, now allocate + * and initialize an array of glock holders for them + * @rlist: the list of resource groups + * @state: the lock state to acquire the RG lock in + * @flags: the modifier flags for the holder structures + * + * FIXME: Don't use NOFAIL + * + */ + +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, + int flags) +{ + unsigned int x; + + rlist->rl_ghs = kcalloc(rlist->rl_rgrps, sizeof(struct gfs2_holder), + GFP_KERNEL | __GFP_NOFAIL); + for (x = 0; x < rlist->rl_rgrps; x++) + gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, + state, flags, + &rlist->rl_ghs[x]); +} + +/** + * gfs2_rlist_free - free a resource group list + * @list: the list of resource groups + * + */ + +void gfs2_rlist_free(struct gfs2_rgrp_list *rlist) +{ + unsigned int x; + + kfree(rlist->rl_rgd); + + if (rlist->rl_ghs) { + for (x = 0; x < rlist->rl_rgrps; x++) + gfs2_holder_uninit(&rlist->rl_ghs[x]); + kfree(rlist->rl_ghs); + } +} + --- a/fs/gfs2/rgrp.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/rgrp.h 2005-10-10 11:28:49.353778060 -0500 @@ -0,0 +1,62 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __RGRP_DOT_H__ +#define __RGRP_DOT_H__ + +void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); + +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, uint64_t blk); +struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); +struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); + +void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); + +int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); +void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); +void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); + +void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); + +struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +void gfs2_alloc_put(struct gfs2_inode *ip); + +int gfs2_inplace_reserve_i(struct gfs2_inode *ip, + char *file, unsigned int line); +#define gfs2_inplace_reserve(ip) \ +gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) + +void gfs2_inplace_release(struct gfs2_inode *ip); + +unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, uint64_t block); + +uint64_t gfs2_alloc_data(struct gfs2_inode *ip); +uint64_t gfs2_alloc_meta(struct gfs2_inode *ip); +uint64_t gfs2_alloc_di(struct gfs2_inode *ip); + +void gfs2_free_data(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen); +void gfs2_free_meta(struct gfs2_inode *ip, uint64_t bstart, uint32_t blen); +void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, uint64_t blkno); +void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); + +struct gfs2_rgrp_list { + unsigned int rl_rgrps; + unsigned int rl_space; + struct gfs2_rgrpd **rl_rgd; + struct gfs2_holder *rl_ghs; +}; + +void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, + uint64_t block); +void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state, + int flags); +void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); + +#endif /* __RGRP_DOT_H__ */ --- a/fs/gfs2/bits.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/bits.c 2005-10-10 11:28:49.097817974 -0500 @@ -0,0 +1,178 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +/* + * These routines are used by the resource group routines (rgrp.c) + * to keep track of block allocation. Each block is represented by two + * bits. One bit indicates whether or not the block is used. (1=used, + * 0=free) The other bit indicates whether or not the block contains a + * dinode or not. (1=dinode, 0=not-dinode) So, each byte represents + * GFS2_NBBY (i.e. 4) blocks. + */ + +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "bits.h" + +static const char valid_change[16] = { + /* current */ + /* n */ 0, 1, 0, 1, + /* e */ 1, 0, 0, 0, + /* w */ 0, 0, 0, 0, + 1, 0, 0, 0 +}; + +/** + * gfs2_setbit - Set a bit in the bitmaps + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @block: the block to set + * @new_state: the new state of the block + * + */ + +void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, + unsigned int buflen, uint32_t block, unsigned char new_state) +{ + unsigned char *byte, *end, cur_state; + unsigned int bit; + + byte = buffer + (block / GFS2_NBBY); + bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + end = buffer + buflen; + + gfs2_assert(rgd->rd_sbd, byte < end); + + cur_state = (*byte >> bit) & GFS2_BIT_MASK; + + if (valid_change[new_state * 4 + cur_state]) { + *byte ^= cur_state << bit; + *byte |= new_state << bit; + } else + gfs2_consist_rgrpd(rgd); +} + +/** + * gfs2_testbit - test a bit in the bitmaps + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @block: the block to read + * + */ + +unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, + unsigned int buflen, uint32_t block) +{ + unsigned char *byte, *end, cur_state; + unsigned int bit; + + byte = buffer + (block / GFS2_NBBY); + bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + end = buffer + buflen; + + gfs2_assert(rgd->rd_sbd, byte < end); + + cur_state = (*byte >> bit) & GFS2_BIT_MASK; + + return cur_state; +} + +/** + * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing + * a block in a given allocation state. + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @goal: start search at this block's bit-pair (within @buffer) + * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; + * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0) + * + * Scope of @goal and returned block number is only within this bitmap buffer, + * not entire rgrp or filesystem. @buffer will be offset from the actual + * beginning of a bitmap block buffer, skipping any header structures. + * + * Return: the block number (bitmap buffer scope) that was found + */ + +uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, + unsigned int buflen, uint32_t goal, + unsigned char old_state) +{ + unsigned char *byte, *end, alloc; + uint32_t blk = goal; + unsigned int bit; + + byte = buffer + (goal / GFS2_NBBY); + bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; + end = buffer + buflen; + alloc = (old_state & 1) ? 0 : 0x55; + + while (byte < end) { + if ((*byte & 0x55) == alloc) { + blk += (8 - bit) >> 1; + + bit = 0; + byte++; + + continue; + } + + if (((*byte >> bit) & GFS2_BIT_MASK) == old_state) + return blk; + + bit += GFS2_BIT_SIZE; + if (bit >= 8) { + bit = 0; + byte++; + } + + blk++; + } + + return BFITNOENT; +} + +/** + * gfs2_bitcount - count the number of bits in a certain state + * @buffer: the buffer that holds the bitmaps + * @buflen: the length (in bytes) of the buffer + * @state: the state of the block we're looking for + * + * Returns: The number of bits + */ + +uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, unsigned char *buffer, + unsigned int buflen, unsigned char state) +{ + unsigned char *byte = buffer; + unsigned char *end = buffer + buflen; + unsigned char state1 = state << 2; + unsigned char state2 = state << 4; + unsigned char state3 = state << 6; + uint32_t count = 0; + + for (; byte < end; byte++) { + if (((*byte) & 0x03) == state) + count++; + if (((*byte) & 0x0C) == state1) + count++; + if (((*byte) & 0x30) == state2) + count++; + if (((*byte) & 0xC0) == state3) + count++; + } + + return count; +} + --- a/fs/gfs2/bits.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/bits.h 2005-10-10 11:28:49.104816882 -0500 @@ -0,0 +1,28 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __BITS_DOT_H__ +#define __BITS_DOT_H__ + +#define BFITNOENT 0xFFFFFFFF + +void gfs2_setbit(struct gfs2_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t block, unsigned char new_state); +unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t block); +uint32_t gfs2_bitfit(struct gfs2_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + uint32_t goal, unsigned char old_state); +uint32_t gfs2_bitcount(struct gfs2_rgrpd *rgd, + unsigned char *buffer, unsigned int buflen, + unsigned char state); + +#endif /* __BITS_DOT_H__ */ [PATCH 08/16] GFS: ea and acl Code that handles extended attributes and ACL's. Signed-off-by: Ken Preslan Signed-off-by: David Teigland --- fs/gfs2/acl.c | 312 ++++++++++ fs/gfs2/acl.h | 37 + fs/gfs2/eaops.c | 185 ++++++ fs/gfs2/eaops.h | 30 + fs/gfs2/eattr.c | 1620 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/eattr.h | 91 +++ 6 files changed, 2275 insertions(+) --- a/fs/gfs2/acl.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/acl.c 2005-10-10 11:28:49.095818285 -0500 @@ -0,0 +1,312 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "eaops.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "trans.h" + +#define ACL_ACCESS 1 +#define ACL_DEFAULT 0 + +int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, + struct gfs2_ea_request *er, + int *remove, mode_t *mode) +{ + struct posix_acl *acl; + int error; + + error = gfs2_acl_validate_remove(ip, access); + if (error) + return error; + + if (!er->er_data) + return -EINVAL; + + acl = posix_acl_from_xattr(er->er_data, er->er_data_len); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (!acl) { + *remove = 1; + return 0; + } + + error = posix_acl_valid(acl); + if (error) + goto out; + + if (access) { + error = posix_acl_equiv_mode(acl, mode); + if (!error) + *remove = 1; + else if (error > 0) + error = 0; + } + + out: + posix_acl_release(acl); + + return error; +} + +int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access) +{ + if (!ip->i_sbd->sd_args.ar_posix_acl) + return -EOPNOTSUPP; + if (current->fsuid != ip->i_di.di_uid && !capable(CAP_FOWNER)) + return -EPERM; + if (S_ISLNK(ip->i_di.di_mode)) + return -EOPNOTSUPP; + if (!access && !S_ISDIR(ip->i_di.di_mode)) + return -EACCES; + + return 0; +} + +static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, + struct gfs2_ea_location *el, char **data, unsigned int *len) +{ + struct gfs2_ea_request er; + struct gfs2_ea_location el_this; + int error; + + if (!ip->i_di.di_eattr) + return 0; + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + if (access) { + er.er_name = GFS2_POSIX_ACL_ACCESS; + er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; + } else { + er.er_name = GFS2_POSIX_ACL_DEFAULT; + er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; + } + er.er_type = GFS2_EATYPE_SYS; + + if (!el) + el = &el_this; + + error = gfs2_ea_find(ip, &er, el); + if (error) + return error; + if (!el->el_ea) + return 0; + if (!GFS2_EA_DATA_LEN(el->el_ea)) + goto out; + + er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); + er.er_data = kmalloc(er.er_data_len, GFP_KERNEL); + error = -ENOMEM; + if (!er.er_data) + goto out; + + error = gfs2_ea_get_copy(ip, el, er.er_data); + if (error) + goto out_kfree; + + if (acl) { + *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); + if (IS_ERR(*acl)) + error = PTR_ERR(*acl); + } + + out_kfree: + if (error || !data) + kfree(er.er_data); + else { + *data = er.er_data; + *len = er.er_data_len; + } + + out: + if (error || el == &el_this) + brelse(el->el_bh); + + return error; +} + +/** + * gfs2_check_acl_locked - Check an ACL to see if we're allowed to do something + * @inode: the file we want to do something to + * @mask: what we want to do + * + * Returns: errno + */ + +int gfs2_check_acl_locked(struct inode *inode, int mask) +{ + struct posix_acl *acl = NULL; + int error; + + error = acl_get(get_v2ip(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); + if (error) + return error; + + if (acl) { + error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } + + return -EAGAIN; +} + +int gfs2_check_acl(struct inode *inode, int mask) +{ + struct gfs2_inode *ip = get_v2ip(inode); + struct gfs2_holder i_gh; + int error; + + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (!error) { + error = gfs2_check_acl_locked(inode, mask); + gfs2_glock_dq_uninit(&i_gh); + } + + return error; +} + +static int munge_mode(struct gfs2_inode *ip, mode_t mode) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + gfs2_assert_withdraw(sdp, + (ip->i_di.di_mode & S_IFMT) == (mode & S_IFMT)); + ip->i_di.di_mode = mode; + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + + return 0; +} + +int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) +{ + struct gfs2_sbd *sdp = dip->i_sbd; + struct posix_acl *acl = NULL, *clone; + struct gfs2_ea_request er; + mode_t mode = ip->i_di.di_mode; + int error; + + if (!sdp->sd_args.ar_posix_acl) + return 0; + if (S_ISLNK(ip->i_di.di_mode)) + return 0; + + memset(&er, 0, sizeof(struct gfs2_ea_request)); + er.er_type = GFS2_EATYPE_SYS; + + error = acl_get(dip, ACL_DEFAULT, &acl, NULL, + &er.er_data, &er.er_data_len); + if (error) + return error; + if (!acl) { + mode &= ~current->fs->umask; + if (mode != ip->i_di.di_mode) + error = munge_mode(ip, mode); + return error; + } + + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto out; + posix_acl_release(acl); + acl = clone; + + if (S_ISDIR(ip->i_di.di_mode)) { + er.er_name = GFS2_POSIX_ACL_DEFAULT; + er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; + error = gfs2_system_eaops.eo_set(ip, &er); + if (error) + goto out; + } + + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out; + if (error > 0) { + er.er_name = GFS2_POSIX_ACL_ACCESS; + er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN; + posix_acl_to_xattr(acl, er.er_data, er.er_data_len); + er.er_mode = mode; + er.er_flags = GFS2_ERF_MODE; + error = gfs2_system_eaops.eo_set(ip, &er); + if (error) + goto out; + } else + munge_mode(ip, mode); + + out: + posix_acl_release(acl); + kfree(er.er_data); + return error; +} + +int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) +{ + struct posix_acl *acl = NULL, *clone; + struct gfs2_ea_location el; + char *data; + unsigned int len; + int error; + + error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); + if (error) + return error; + if (!acl) + return gfs2_setattr_simple(ip, attr); + + clone = posix_acl_clone(acl, GFP_KERNEL); + error = -ENOMEM; + if (!clone) + goto out; + posix_acl_release(acl); + acl = clone; + + error = posix_acl_chmod_masq(acl, attr->ia_mode); + if (!error) { + posix_acl_to_xattr(acl, data, len); + error = gfs2_ea_acl_chmod(ip, &el, attr, data); + } + + out: + posix_acl_release(acl); + brelse(el.el_bh); + kfree(data); + + return error; +} + --- a/fs/gfs2/acl.h 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/acl.h 2005-10-10 11:28:49.095818285 -0500 @@ -0,0 +1,37 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#ifndef __ACL_DOT_H__ +#define __ACL_DOT_H__ + +#define GFS2_POSIX_ACL_ACCESS "posix_acl_access" +#define GFS2_POSIX_ACL_ACCESS_LEN 16 +#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" +#define GFS2_POSIX_ACL_DEFAULT_LEN 17 + +#define GFS2_ACL_IS_ACCESS(name, len) \ + ((len) == GFS2_POSIX_ACL_ACCESS_LEN && \ + !memcmp(GFS2_POSIX_ACL_ACCESS, (name), (len))) + +#define GFS2_ACL_IS_DEFAULT(name, len) \ + ((len) == GFS2_POSIX_ACL_DEFAULT_LEN && \ + !memcmp(GFS2_POSIX_ACL_DEFAULT, (name), (len))) + +struct gfs2_ea_request; + +int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, + struct gfs2_ea_request *er, + int *remove, mode_t *mode); +int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access); +int gfs2_check_acl_locked(struct inode *inode, int mask); +int gfs2_check_acl(struct inode *inode, int mask); +int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip); +int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); + +#endif /* __ACL_DOT_H__ */ --- a/fs/gfs2/eattr.c 1969-12-31 17:00:00.000000000 -0700 +++ b/fs/gfs2/eattr.c 2005-10-10 11:28:49.160808151 -0500 @@ -0,0 +1,1620 @@ +/* + * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gfs2.h" +#include "acl.h" +#include "eaops.h" +#include "eattr.h" +#include "glock.h" +#include "inode.h" +#include "meta_io.h" +#include "quota.h" +#include "rgrp.h" +#include "trans.h" + +/** + * ea_calc_size - returns the acutal number of bytes the request will take up + * (not counting any unstuffed data blocks) + * @sdp: + * @er: + * @size: + * + * Returns: 1 if the EA should be stuffed + */ + +static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er, + unsigned int *size) +{ + *size = GFS2_EAREQ_SIZE_STUFFED(er); + if (*size <= sdp->sd_jbsize) + return 1; + + *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er); + + return 0; +} + +static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er) +{ + unsigned int size; + + if (er->er_data_len > GFS2_EA_MAX_DATA_LEN) + return -ERANGE; + + ea_calc_size(sdp, er, &size); + + /* This can only happen with 512 byte blocks */ + if (size > sdp->sd_jbsize) + return -ERANGE; + + return 0; +} + +typedef int (*ea_call_t) (struct gfs2_inode *ip, + struct buffer_head *bh, + struct gfs2_ea_header *ea, + struct gfs2_ea_header *prev, + void *private); + +static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh, + ea_call_t ea_call, void *data) +{ + struct gfs2_ea_header *ea, *prev = NULL; + int error = 0; + + if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_EA)) + return -EIO; + + for (ea = GFS2_EA_BH2FIRST(bh);; prev = ea, ea = GFS2_EA2NEXT(ea)) { + if (!GFS2_EA_REC_LEN(ea)) + goto fail; + if (!(bh->b_data <= (char *)ea && + (char *)GFS2_EA2NEXT(ea) <= + bh->b_data + bh->b_size)) + goto fail; + if (!GFS2_EATYPE_VALID(ea->ea_type)) + goto fail; + + error = ea_call(ip, bh, ea, prev, data); + if (error) + return error; + + if (GFS2_EA_IS_LAST(ea)) { + if ((char *)GFS2_EA2NEXT(ea) != + bh->b_data + bh->b_size) + goto fail; + break; + } + } + + return error; + + fail: + gfs2_consist_inode(ip); + return -EIO; +} + +static int ea_foreach(struct gfs2_inode *ip, ea_call_t ea_call, void *data) +{ + struct buffer_head *bh, *eabh; + uint64_t *eablk, *end; + int error; + + error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, + DIO_START | DIO_WAIT, &bh); + if (error) + return error; + + if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) { + error = ea_foreach_i(ip, bh, ea_call, data); + goto out; + } + + if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_IN)) { + error = -EIO; + goto out; + } + + eablk = (uint64_t *)(bh->b_data + sizeof(struct gfs2_meta_header)); + end = eablk + ip->i_sbd->sd_inptrs; + + for (; eablk < end; eablk++) { + uint64_t bn; + + if (!*eablk) + break; + bn = le64_to_cpu(*eablk); + + error = gfs2_meta_read(ip->i_gl, bn, DIO_START | DIO_WAIT, + &eabh); + if (error) + break; + error = ea_foreach_i(ip, eabh, ea_call, data); + brelse(eabh); + if (error) + break; + } + out: + brelse(bh); + + return error; +} + +struct ea_find { + struct gfs2_ea_request *ef_er; + struct gfs2_ea_location *ef_el; +}; + +static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, + void *private) +{ + struct ea_find *ef = private; + struct gfs2_ea_request *er = ef->ef_er; + + if (ea->ea_type == GFS2_EATYPE_UNUSED) + return 0; + + if (ea->ea_type == er->er_type) { + if (ea->ea_name_len == er->er_name_len && + !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) { + struct gfs2_ea_location *el = ef->ef_el; + get_bh(bh); + el->el_bh = bh; + el->el_ea = ea; + el->el_prev = prev; + return 1; + } + } + +#if 0 + else if ((ip->i_di.di_flags & GFS2_DIF_EA_PACKED) && + er->er_type == GFS2_EATYPE_SYS) + return 1; +#endif + + return 0; +} + +int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er, + struct gfs2_ea_location *el) +{ + struct ea_find ef; + int error; + + ef.ef_er = er; + ef.ef_el = el; + + memset(el, 0, sizeof(struct gfs2_ea_location)); + + error = ea_foreach(ip, ea_find_i, &ef); + if (error > 0) + return 0; + + return error; +} + +/** + * ea_dealloc_unstuffed - + * @ip: + * @bh: + * @ea: + * @prev: + * @private: + * + * Take advantage of the fact that all unstuffed blocks are + * allocated from the same RG. But watch, this may not always + * be true. + * + * Returns: errno + */ + +static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, + struct gfs2_ea_header *prev, void *private) +{ + int *leave = private; + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_rgrpd *rgd; + struct gfs2_holder rg_gh; + struct buffer_head *dibh; + uint64_t *dataptrs, bn = 0; + uint64_t bstart = 0; + unsigned int blen = 0; + unsigned int blks = 0; + unsigned int x; + int error; + + if (GFS2_EA_IS_STUFFED(ea)) + return 0; + + dataptrs = GFS2_EA2DATAPTRS(ea); + for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) + if (*dataptrs) { + blks++; + bn = le64_to_cpu(*dataptrs); + } + if (!blks) + return 0; + + rgd = gfs2_blk2rgrpd(sdp, bn); + if (!rgd) { + gfs2_consist_inode(ip); + return -EIO; + } + + error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh); + if (error) + return error; + + error = gfs2_trans_begin(sdp, rgd->rd_ri.ri_length + + RES_DINODE + RES_EATTR + RES_STATFS + + RES_QUOTA, blks); + if (error) + goto out_gunlock; + + gfs2_trans_add_bh(ip->i_gl, bh); + + dataptrs = GFS2_EA2DATAPTRS(ea); + for (x = 0; x < ea->ea_num_ptrs; x++, dataptrs++) { + if (!*dataptrs) + break; + bn = le64_to_cpu(*dataptrs); + + if (bstart + blen == bn) + blen++; + else { + if (bstart) + gfs2_free_meta(ip, bstart, blen); + bstart = bn; + blen = 1; + } + + *dataptrs = 0; + if (!ip->i_di.di_blocks) + gfs2_consist_inode(ip); + ip->i_di.di_blocks--; + } + if (bstart) + gfs2_free_meta(ip, bstart, blen); + + if (prev && !leave) { + uint32_t len; + + len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); + prev->ea_rec_len = cpu_to_le32(len); + + if (GFS2_EA_IS_LAST(ea)) + prev->ea_flags |= GFS2_EAFLAG_LAST; + } else { + ea->ea_type = GFS2_EATYPE_UNUSED; + ea->ea_num_ptrs = 0; + } + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(sdp); + + out_gunlock: + gfs2_glock_dq_uninit(&rg_gh); + + return error; +} + +static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, + struct gfs2_ea_header *prev, int leave) +{ + struct gfs2_alloc *al; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out_alloc; + + error = gfs2_rindex_hold(ip->i_sbd, &al->al_ri_gh); + if (error) + goto out_quota; + + error = ea_dealloc_unstuffed(ip, + bh, ea, prev, + (leave) ? &error : NULL); + + gfs2_glock_dq_uninit(&al->al_ri_gh); + + out_quota: + gfs2_quota_unhold(ip); + + out_alloc: + gfs2_alloc_put(ip); + + return error; +} + +/******************************************************************************/ + +static int gfs2_ea_repack_i(struct gfs2_inode *ip) +{ + return -EOPNOTSUPP; +} + +int gfs2_ea_repack(struct gfs2_inode *ip) +{ + struct gfs2_holder gh; + int error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (error) + return error; + + /* Some sort of permissions checking would be nice */ + + error = gfs2_ea_repack_i(ip); + + gfs2_glock_dq_uninit(&gh); + + return error; +} + +struct ea_list { + struct gfs2_ea_request *ei_er; + unsigned int ei_size; +}; + +static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, + void *private) +{ + struct ea_list *ei = private; + struct gfs2_ea_request *er = ei->ei_er; + unsigned int ea_size = GFS2_EA_STRLEN(ea); + + if (ea->ea_type == GFS2_EATYPE_UNUSED) + return 0; + + if (er->er_data_len) { + char *prefix; + unsigned int l; + char c = 0; + + if (ei->ei_size + ea_size > er->er_data_len) + return -ERANGE; + + if (ea->ea_type == GFS2_EATYPE_USR) { + prefix = "user."; + l = 5; + } else { + prefix = "system."; + l = 7; + } + + memcpy(er->er_data + ei->ei_size, + prefix, l); + memcpy(er->er_data + ei->ei_size + l, + GFS2_EA2NAME(ea), + ea->ea_name_len); + memcpy(er->er_data + ei->ei_size + + ea_size - 1, + &c, 1); + } + + ei->ei_size += ea_size; + + return 0; +} + +/** + * gfs2_ea_list - + * @ip: + * @er: + * + * Returns: actual size of data on success, -errno on error + */ + +int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_holder i_gh; + int error; + + if (!er->er_data || !er->er_data_len) { + er->er_data = NULL; + er->er_data_len = 0; + } + + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + return error; + + if (ip->i_di.di_eattr) { + struct ea_list ei = { .ei_er = er, .ei_size = 0 }; + + error = ea_foreach(ip, ea_list_i, &ei); + if (!error) + error = ei.ei_size; + } + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * ea_get_unstuffed - actually copies the unstuffed data into the + * request buffer + * @ip: + * @ea: + * @data: + * + * Returns: errno + */ + +static int ea_get_unstuffed(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + char *data) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head **bh; + unsigned int amount = GFS2_EA_DATA_LEN(ea); + unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize); + uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea); + unsigned int x; + int error = 0; + + bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); + if (!bh) + return -ENOMEM; + + for (x = 0; x < nptrs; x++) { + error = gfs2_meta_read(ip->i_gl, le64_to_cpu(*dataptrs), + DIO_START, bh + x); + if (error) { + while (x--) + brelse(bh[x]); + goto out; + } + dataptrs++; + } + + for (x = 0; x < nptrs; x++) { + error = gfs2_meta_reread(sdp, bh[x], DIO_WAIT); + if (error) { + for (; x < nptrs; x++) + brelse(bh[x]); + goto out; + } + if (gfs2_metatype_check(sdp, bh[x], GFS2_METATYPE_ED)) { + for (; x < nptrs; x++) + brelse(bh[x]); + error = -EIO; + goto out; + } + + memcpy(data, + bh[x]->b_data + sizeof(struct gfs2_meta_header), + (sdp->sd_jbsize > amount) ? amount : sdp->sd_jbsize); + + amount -= sdp->sd_jbsize; + data += sdp->sd_jbsize; + + brelse(bh[x]); + } + + out: + kfree(bh); + + return error; +} + +int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, + char *data) +{ + if (GFS2_EA_IS_STUFFED(el->el_ea)) { + memcpy(data, + GFS2_EA2DATA(el->el_ea), + GFS2_EA_DATA_LEN(el->el_ea)); + return 0; + } else + return ea_get_unstuffed(ip, el->el_ea, data); +} + +/** + * gfs2_ea_get_i - + * @ip: + * @er: + * + * Returns: actual size of data on success, -errno on error + */ + +int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_ea_location el; + int error; + + if (!ip->i_di.di_eattr) + return -ENODATA; + + error = gfs2_ea_find(ip, er, &el); + if (error) + return error; + if (!el.el_ea) + return -ENODATA; + + if (er->er_data_len) { + if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len) + error = -ERANGE; + else + error = gfs2_ea_get_copy(ip, &el, er->er_data); + } + if (!error) + error = GFS2_EA_DATA_LEN(el.el_ea); + + brelse(el.el_bh); + + return error; +} + +/** + * gfs2_ea_get - + * @ip: + * @er: + * + * Returns: actual size of data on success, -errno on error + */ + +int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_holder i_gh; + int error; + + if (!er->er_name_len || + er->er_name_len > GFS2_EA_MAX_NAME_LEN) + return -EINVAL; + if (!er->er_data || !er->er_data_len) { + er->er_data = NULL; + er->er_data_len = 0; + } + + error = gfs2_glock_nq_init(ip->i_gl, + LM_ST_SHARED, LM_FLAG_ANY, + &i_gh); + if (error) + return error; + + error = gfs2_ea_ops[er->er_type]->eo_get(ip, er); + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +/** + * ea_alloc_blk - allocates a new block for extended attributes. + * @ip: A pointer to the inode that's getting extended attributes + * @bhp: + * + * Returns: errno + */ + +static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct gfs2_ea_header *ea; + uint64_t block; + + block = gfs2_alloc_meta(ip); + + *bhp = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, *bhp); + gfs2_metatype_set(*bhp, GFS2_METATYPE_EA, GFS2_FORMAT_EA); + gfs2_buffer_clear_tail(*bhp, sizeof(struct gfs2_meta_header)); + + ea = GFS2_EA_BH2FIRST(*bhp); + ea->ea_rec_len = cpu_to_le32(sdp->sd_jbsize); + ea->ea_type = GFS2_EATYPE_UNUSED; + ea->ea_flags = GFS2_EAFLAG_LAST; + ea->ea_num_ptrs = 0; + + ip->i_di.di_blocks++; + + return 0; +} + +/** + * ea_write - writes the request info to an ea, creating new blocks if + * necessary + * @ip: inode that is being modified + * @ea: the location of the new ea in a block + * @er: the write request + * + * Note: does not update ea_rec_len or the GFS2_EAFLAG_LAST bin of ea_flags + * + * returns : errno + */ + +static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, + struct gfs2_ea_request *er) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + + ea->ea_data_len = cpu_to_le32(er->er_data_len); + ea->ea_name_len = er->er_name_len; + ea->ea_type = er->er_type; + ea->ea_pad = 0; + + memcpy(GFS2_EA2NAME(ea), er->er_name, er->er_name_len); + + if (GFS2_EAREQ_SIZE_STUFFED(er) <= sdp->sd_jbsize) { + ea->ea_num_ptrs = 0; + memcpy(GFS2_EA2DATA(ea), er->er_data, er->er_data_len); + } else { + uint64_t *dataptr = GFS2_EA2DATAPTRS(ea); + const char *data = er->er_data; + unsigned int data_len = er->er_data_len; + unsigned int copy; + unsigned int x; + + ea->ea_num_ptrs = DIV_RU(er->er_data_len, sdp->sd_jbsize); + for (x = 0; x < ea->ea_num_ptrs; x++) { + struct buffer_head *bh; + uint64_t block; + int mh_size = sizeof(struct gfs2_meta_header); + + block = gfs2_alloc_meta(ip); + + bh = gfs2_meta_new(ip->i_gl, block); + gfs2_trans_add_bh(ip->i_gl, bh); + gfs2_metatype_set(bh, GFS2_METATYPE_ED, GFS2_FORMAT_ED); + + ip->i_di.di_blocks++; + + copy = (data_len > sdp->sd_jbsize) ? sdp->sd_jbsize : + data_len; + memcpy(bh->b_data + mh_size, data, copy); + if (copy < sdp->sd_jbsize) + memset(bh->b_data + mh_size + copy, 0, + sdp->sd_jbsize - copy); + + *dataptr++ = cpu_to_le64((uint64_t)bh->b_blocknr); + data += copy; + data_len -= copy; + + brelse(bh); + } + + gfs2_assert_withdraw(sdp, !data_len); + } + + return 0; +} + +typedef int (*ea_skeleton_call_t) (struct gfs2_inode *ip, + struct gfs2_ea_request *er, + void *private); + +static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, + unsigned int blks, + ea_skeleton_call_t skeleton_call, + void *private) +{ + struct gfs2_alloc *al; + struct buffer_head *dibh; + int error; + + al = gfs2_alloc_get(ip); + + error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE); + if (error) + goto out; + + error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid); + if (error) + goto out_gunlock_q; + + al->al_requested = blks; + + error = gfs2_inplace_reserve(ip); + if (error) + goto out_gunlock_q; + + error = gfs2_trans_begin(ip->i_sbd, + blks + al->al_rgd->rd_ri.ri_length + + RES_DINODE + RES_STATFS + RES_QUOTA, 0); + if (error) + goto out_ipres; + + error = skeleton_call(ip, er, private); + if (error) + goto out_end_trans; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + if (er->er_flags & GFS2_ERF_MODE) { + gfs2_assert_withdraw(ip->i_sbd, + (ip->i_di.di_mode & S_IFMT) == + (er->er_mode & S_IFMT)); + ip->i_di.di_mode = er->er_mode; + } + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + out_end_trans: + gfs2_trans_end(ip->i_sbd); + + out_ipres: + gfs2_inplace_release(ip); + + out_gunlock_q: + gfs2_quota_unlock(ip); + + out: + gfs2_alloc_put(ip); + + return error; +} + +static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, + void *private) +{ + struct buffer_head *bh; + int error; + + error = ea_alloc_blk(ip, &bh); + if (error) + return error; + + ip->i_di.di_eattr = bh->b_blocknr; + error = ea_write(ip, GFS2_EA_BH2FIRST(bh), er); + + brelse(bh); + + return error; +} + +/** + * ea_init - initializes a new eattr block + * @ip: + * @er: + * + * Returns: errno + */ + +static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + unsigned int jbsize = ip->i_sbd->sd_jbsize; + unsigned int blks = 1; + + if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize) + blks += DIV_RU(er->er_data_len, jbsize); + + return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL); +} + +static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) +{ + uint32_t ea_size = GFS2_EA_SIZE(ea); + struct gfs2_ea_header *new = (struct gfs2_ea_header *)((char *)ea + ea_size); + uint32_t new_size = GFS2_EA_REC_LEN(ea) - ea_size; + int last = ea->ea_flags & GFS2_EAFLAG_LAST; + + ea->ea_rec_len = cpu_to_le32(ea_size); + ea->ea_flags ^= last; + + new->ea_rec_len = cpu_to_le32(new_size); + new->ea_flags = last; + + return new; +} + +static void ea_set_remove_stuffed(struct gfs2_inode *ip, + struct gfs2_ea_location *el) +{ + struct gfs2_ea_header *ea = el->el_ea; + struct gfs2_ea_header *prev = el->el_prev; + uint32_t len; + + gfs2_trans_add_bh(ip->i_gl, el->el_bh); + + if (!prev || !GFS2_EA_IS_STUFFED(ea)) { + ea->ea_type = GFS2_EATYPE_UNUSED; + return; + } else if (GFS2_EA2NEXT(prev) != ea) { + prev = GFS2_EA2NEXT(prev); + gfs2_assert_withdraw(ip->i_sbd, GFS2_EA2NEXT(prev) == ea); + } + + len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); + prev->ea_rec_len = cpu_to_le32(len); + + if (GFS2_EA_IS_LAST(ea)) + prev->ea_flags |= GFS2_EAFLAG_LAST; +} + +struct ea_set { + int ea_split; + + struct gfs2_ea_request *es_er; + struct gfs2_ea_location *es_el; + + struct buffer_head *es_bh; + struct gfs2_ea_header *es_ea; +}; + +static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, struct ea_set *es) +{ + struct gfs2_ea_request *er = es->es_er; + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + 2 * RES_EATTR, 0); + if (error) + return error; + + gfs2_trans_add_bh(ip->i_gl, bh); + + if (es->ea_split) + ea = ea_split_ea(ea); + + ea_write(ip, ea, er); + + if (es->es_el) + ea_set_remove_stuffed(ip, es->es_el); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto out; + + if (er->er_flags & GFS2_ERF_MODE) { + gfs2_assert_withdraw(ip->i_sbd, + (ip->i_di.di_mode & S_IFMT) == (er->er_mode & S_IFMT)); + ip->i_di.di_mode = er->er_mode; + } + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + out: + gfs2_trans_end(ip->i_sbd); + + return error; +} + +static int ea_set_simple_alloc(struct gfs2_inode *ip, + struct gfs2_ea_request *er, void *private) +{ + struct ea_set *es = private; + struct gfs2_ea_header *ea = es->es_ea; + int error; + + gfs2_trans_add_bh(ip->i_gl, es->es_bh); + + if (es->ea_split) + ea = ea_split_ea(ea); + + error = ea_write(ip, ea, er); + if (error) + return error; + + if (es->es_el) + ea_set_remove_stuffed(ip, es->es_el); + + return 0; +} + +static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh, + struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, + void *private) +{ + struct ea_set *es = private; + unsigned int size; + int stuffed; + int error; + + stuffed = ea_calc_size(ip->i_sbd, es->es_er, &size); + + if (ea->ea_type == GFS2_EATYPE_UNUSED) { + if (GFS2_EA_REC_LEN(ea) < size) + return 0; + if (!GFS2_EA_IS_STUFFED(ea)) { + error = ea_remove_unstuffed(ip, bh, ea, prev, 1); + if (error) + return error; + } + es->ea_split = 0; + } else if (GFS2_EA_REC_LEN(ea) - GFS2_EA_SIZE(ea) >= size) + es->ea_split = 1; + else + return 0; + + if (stuffed) { + error = ea_set_simple_noalloc(ip, bh, ea, es); + if (error) + return error; + } else { + unsigned int blks; + + es->es_bh = bh; + es->es_ea = ea; + blks = 2 + DIV_RU(es->es_er->er_data_len, ip->i_sbd->sd_jbsize); + + error = ea_alloc_skeleton(ip, es->es_er, blks, + ea_set_simple_alloc, es); + if (error) + return error; + } + + return 1; +} + +static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, + void *private) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head *indbh, *newbh; + uint64_t *eablk; + int error; + int mh_size = sizeof(struct gfs2_meta_header); + + if (ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT) { + uint64_t *end; + + error = gfs2_meta_read(ip->i_gl, ip->i_di.di_eattr, + DIO_START | DIO_WAIT, &indbh); + if (error) + return error; + + if (gfs2_metatype_check(sdp, indbh, GFS2_METATYPE_IN)) { + error = -EIO; + goto out; + } + + eablk = (uint64_t *)(indbh->b_data + mh_size); + end = eablk + sdp->sd_inptrs; + + for (; eablk < end; eablk++) + if (!*eablk) + break; + + if (eablk == end) { + error = -ENOSPC; + goto out; + } + + gfs2_trans_add_bh(ip->i_gl, indbh); + } else { + uint64_t blk; + + blk = gfs2_alloc_meta(ip); + + indbh = gfs2_meta_new(ip->i_gl, blk); + gfs2_trans_add_bh(ip->i_gl, indbh); + gfs2_metatype_set(indbh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); + gfs2_buffer_clear_tail(indbh, mh_size); + + eablk = (uint64_t *)(indbh->b_data + mh_size); + *eablk = cpu_to_le64(ip->i_di.di_eattr); + ip->i_di.di_eattr = blk; + ip->i_di.di_flags |= GFS2_DIF_EA_INDIRECT; + ip->i_di.di_blocks++; + + eablk++; + } + + error = ea_alloc_blk(ip, &newbh); + if (error) + goto out; + + *eablk = cpu_to_le64((uint64_t)newbh->b_blocknr); + error = ea_write(ip, GFS2_EA_BH2FIRST(newbh), er); + brelse(newbh); + if (error) + goto out; + + if (private) + ea_set_remove_stuffed(ip, (struct gfs2_ea_location *)private); + + out: + brelse(indbh); + + return error; +} + +static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, + struct gfs2_ea_location *el) +{ + struct ea_set es; + unsigned int blks = 2; + int error; + + memset(&es, 0, sizeof(struct ea_set)); + es.es_er = er; + es.es_el = el; + + error = ea_foreach(ip, ea_set_simple, &es); + if (error > 0) + return 0; + if (error) + return error; + + if (!(ip->i_di.di_flags & GFS2_DIF_EA_INDIRECT)) + blks++; + if (GFS2_EAREQ_SIZE_STUFFED(er) > ip->i_sbd->sd_jbsize) + blks += DIV_RU(er->er_data_len, ip->i_sbd->sd_jbsize); + + return ea_alloc_skeleton(ip, er, blks, ea_set_block, el); +} + +static int ea_set_remove_unstuffed(struct gfs2_inode *ip, + struct gfs2_ea_location *el) +{ + if (el->el_prev && GFS2_EA2NEXT(el->el_prev) != el->el_ea) { + el->el_prev = GFS2_EA2NEXT(el->el_prev); + gfs2_assert_withdraw(ip->i_sbd, + GFS2_EA2NEXT(el->el_prev) == el->el_ea); + } + + return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0); +} + +int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_ea_location el; + int error; + + if (!ip->i_di.di_eattr) { + if (er->er_flags & XATTR_REPLACE) + return -ENODATA; + return ea_init(ip, er); + } + + error = gfs2_ea_find(ip, er, &el); + if (error) + return error; + + if (el.el_ea) { + if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY) { + brelse(el.el_bh); + return -EPERM; + } + + error = -EEXIST; + if (!(er->er_flags & XATTR_CREATE)) { + int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea); + error = ea_set_i(ip, er, &el); + if (!error && unstuffed) + ea_set_remove_unstuffed(ip, &el); + } + + brelse(el.el_bh); + } else { + error = -ENODATA; + if (!(er->er_flags & XATTR_REPLACE)) + error = ea_set_i(ip, er, NULL); + } + + return error; +} + +int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_holder i_gh; + int error; + + if (!er->er_name_len || + er->er_name_len > GFS2_EA_MAX_NAME_LEN) + return -EINVAL; + if (!er->er_data || !er->er_data_len) { + er->er_data = NULL; + er->er_data_len = 0; + } + error = ea_check_size(ip->i_sbd, er); + if (error) + return error; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + if (IS_IMMUTABLE(ip->i_vnode)) + error = -EPERM; + else + error = gfs2_ea_ops[er->er_type]->eo_set(ip, er); + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) +{ + struct gfs2_ea_header *ea = el->el_ea; + struct gfs2_ea_header *prev = el->el_prev; + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(ip->i_sbd, RES_DINODE + RES_EATTR, 0); + if (error) + return error; + + gfs2_trans_add_bh(ip->i_gl, el->el_bh); + + if (prev) { + uint32_t len; + + len = GFS2_EA_REC_LEN(prev) + GFS2_EA_REC_LEN(ea); + prev->ea_rec_len = cpu_to_le32(len); + + if (GFS2_EA_IS_LAST(ea)) + prev->ea_flags |= GFS2_EAFLAG_LAST; + } else + ea->ea_type = GFS2_EATYPE_UNUSED; + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (!error) { + ip->i_di.di_ctime = get_seconds(); + gfs2_trans_add_bh(ip->i_gl, dibh); + gfs2_dinode_out(&ip->i_di, dibh->b_data); + brelse(dibh); + } + + gfs2_trans_end(ip->i_sbd); + + return error; +} + +int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_ea_location el; + int error; + + if (!ip->i_di.di_eattr) + return -ENODATA; + + error = gfs2_ea_find(ip, er, &el); + if (error) + return error; + if (!el.el_ea) + return -ENODATA; + + if (GFS2_EA_IS_STUFFED(el.el_ea)) + error = ea_remove_stuffed(ip, &el); + else + error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, + 0); + + brelse(el.el_bh); + + return error; +} + +/** + * gfs2_ea_remove - sets (or creates or replaces) an extended attribute + * @ip: pointer to the inode of the target file + * @er: request information + * + * Returns: errno + */ + +int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) +{ + struct gfs2_holder i_gh; + int error; + + if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) + return -EINVAL; + + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + if (error) + return error; + + if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode)) + error = -EPERM; + else + error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er); + + gfs2_glock_dq_uninit(&i_gh); + + return error; +} + +static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip, + struct gfs2_ea_header *ea, char *data) +{ + struct gfs2_sbd *sdp = ip->i_sbd; + struct buffer_head **bh; + unsigned int amount = GFS2_EA_DATA_LEN(ea); + unsigned int nptrs = DIV_RU(amount, sdp->sd_jbsize); + uint64_t *dataptrs = GFS2_EA2DATAPTRS(ea); + unsigned int x; + int error; + + bh = kcalloc(nptrs, sizeof(struct buffer_head *), GFP_KERNEL); + if (!bh) + return -ENOMEM; + + error = gfs2_trans_begin(sdp, nptrs + RES_DINODE, 0); + if (error) + goto out; + + for (x = 0; x < nptrs; x++) { + error = gfs2_meta_read(ip->i_gl, le64_to_cpu(*dataptrs), + DIO_START, bh + x); + if (error) { + while (x--) + brelse(bh[x]); + goto fail; + } + dataptrs++; + } + + for (x = 0; x < nptrs; x++) { + error = gfs2_meta_reread(sd