To: ext3-users, ext2-devel Subject: Updated 2.4 htree patches available for 2.4.21rc5 From: "Theodore Ts'o" Date: Sat, 08 Mar 2003 08:13:42 -0500 I've backported all of the bugfixes to the 2.5 dxdir/htree patches to 2.4, and have created a new set of patches for Linux 2.4.21rc5. At this point it *looks* like we've fixed all of the htree bugs that people have reported, including the brelse bug, the memory leak bugs, and the NFS compatibility problems. I've done *very* light testing, and things seem to work, but I'm now looking for some brave souls/guinea pigs to give this some more strenous testing. Please don't use this on production systems just yet, until some people report success. In particular, I'm looking for people who had trouble with NFS to confirm whether or not this patch fixes their problems or not. If you do try out the patch, please let me know how well (or how poorly) it works. --- linux-2.4-ext3merge/fs/ext3/dir.c.=K0013=.orig 2003-03-13 16:22:45.000000000 +0000 +++ linux-2.4-ext3merge/fs/ext3/dir.c 2003-03-13 16:23:17.000000000 +0000 @@ -31,12 +31,17 @@ static unsigned char ext3_filetype_table static int ext3_readdir(struct file *, void *, filldir_t); static int ext3_dx_readdir(struct file * filp, void * dirent, filldir_t filldir); +static int ext3_release_dir (struct inode * inode, + struct file * filp); struct file_operations ext3_dir_operations = { read: generic_read_dir, readdir: ext3_readdir, /* BKL held */ ioctl: ext3_ioctl, /* BKL held */ fsync: ext3_sync_file, /* BKL held */ +#ifdef CONFIG_EXT3_INDEX + release: ext3_release_dir +#endif }; @@ -94,7 +99,11 @@ static int ext3_readdir(struct file * fi sb = inode->i_sb; - if (is_dx(inode)) { +#ifdef CONFIG_EXT3_INDEX + if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb, + EXT3_FEATURE_COMPAT_DIR_INDEX) && + ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) || + ((inode->i_size >> sb->s_blocksize_bits) == 1))) { err = ext3_dx_readdir(filp, dirent, filldir); if (err != ERR_BAD_DX_DIR) return err; @@ -104,6 +113,7 @@ static int ext3_readdir(struct file * fi */ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; } +#endif stored = 0; bh = NULL; offset = filp->f_pos & (sb->s_blocksize - 1); @@ -269,7 +279,11 @@ static void free_rb_tree_fname(rb_root_t */ parent = n->rb_parent; fname = rb_entry(n, struct fname, rb_hash); - kfree(fname); + while (fname) { + struct fname * old = fname; + fname = fname->next; + kfree (old); + } if (!parent) root->rb_node = 0; else if (parent->rb_left == n) @@ -417,6 +431,9 @@ static int ext3_dx_readdir(struct file * filp->private_data = info; } + if (filp->f_pos == -1) + return 0; /* EOF */ + /* Some one has messed with f_pos; reset the world */ if (info->last_pos != filp->f_pos) { free_rb_tree_fname(&info->root); @@ -453,8 +470,10 @@ static int ext3_dx_readdir(struct file * &info->next_hash); if (ret < 0) return ret; - if (ret == 0) + if (ret == 0) { + filp->f_pos = -1; break; + } info->curr_node = rb_get_first(&info->root); } @@ -466,6 +485,10 @@ static int ext3_dx_readdir(struct file * info->curr_node = rb_get_next(info->curr_node); if (!info->curr_node) { + if (info->next_hash == ~0) { + filp->f_pos = -1; + break; + } info->curr_hash = info->next_hash; info->curr_minor_hash = 0; } @@ -475,4 +498,13 @@ finished: UPDATE_ATIME(inode); return 0; } + +static int ext3_release_dir (struct inode * inode, struct file * filp) +{ + if (is_dx(inode) && filp->private_data) + ext3_htree_free_dir_info(filp->private_data); + + return 0; +} + #endif --- linux-2.4-ext3merge/fs/ext3/namei.c.=K0013=.orig 2003-03-13 16:22:45.000000000 +0000 +++ linux-2.4-ext3merge/fs/ext3/namei.c 2003-03-14 18:19:13.000000000 +0000 @@ -167,7 +167,7 @@ static struct ext3_dir_entry_2* dx_pack_ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, int *err, + struct dx_frame *frames, __u32 *start_hash); static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct ext3_dir_entry_2 **res_dir, int *err); @@ -236,6 +236,17 @@ static inline unsigned dx_node_limit (st * Debug */ #ifdef DX_DEBUG +static void dx_show_index (char * label, struct dx_entry *entries) +{ + int i, n = dx_get_count (entries); + printk("%s index ", label); + for (i = 0; i < n; i++) + { + printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); + } + printk("\n"); +} + struct stats { unsigned names; @@ -444,22 +455,21 @@ static void dx_release (struct dx_frame * * This function returns 1 if the caller should continue to search, * or 0 if it should not. If there is an error reading one of the - * index blocks, it will return -1. + * index blocks, it will a negative error code. * * If start_hash is non-null, it will be filled in with the starting * hash of the next page. */ static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, - struct dx_frame *frames, int *err, + struct dx_frame *frames, __u32 *start_hash) { struct dx_frame *p; struct buffer_head *bh; - int num_frames = 0; + int err, num_frames = 0; __u32 bhash; - *err = ENOENT; p = frame; /* * Find the next leaf page by incrementing the frame pointer. @@ -497,8 +507,8 @@ static int ext3_htree_next_block(struct */ while (num_frames--) { if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), - 0, err))) - return -1; /* Failure */ + 0, &err))) + return err; /* Failure */ p++; brelse (p->bh); p->bh = bh; @@ -518,6 +528,46 @@ static inline struct ext3_dir_entry_2 *e /* * This function fills a red-black tree with information from a + * directory block. It returns the number directory entries loaded + * into the tree. If there is an error it is returned in err. + */ +static int htree_dirblock_to_tree(struct file *dir_file, + struct inode *dir, int block, + struct dx_hash_info *hinfo, + __u32 start_hash, __u32 start_minor_hash) +{ + struct buffer_head *bh; + struct ext3_dir_entry_2 *de, *top; + int err, count = 0; + + dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); + if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) + return err; + + de = (struct ext3_dir_entry_2 *) bh->b_data; + top = (struct ext3_dir_entry_2 *) ((char *) de + + dir->i_sb->s_blocksize - + EXT3_DIR_REC_LEN(0)); + for (; de < top; de = ext3_next_entry(de)) { + ext3fs_dirhash(de->name, de->name_len, hinfo); + if ((hinfo->hash < start_hash) || + ((hinfo->hash == start_hash) && + (hinfo->minor_hash < start_minor_hash))) + continue; + if ((err = ext3_htree_store_dirent(dir_file, + hinfo->hash, hinfo->minor_hash, de)) != 0) { + brelse(bh); + return err; + } + count++; + } + brelse(bh); + return count; +} + + +/* + * This function fills a red-black tree with information from a * directory. We start scanning the directory in hash order, starting * at start_hash and start_minor_hash. * @@ -528,9 +578,8 @@ int ext3_htree_fill_tree(struct file *di __u32 start_minor_hash, __u32 *next_hash) { struct dx_hash_info hinfo; - struct buffer_head *bh; - struct ext3_dir_entry_2 *de, *top; - static struct dx_frame frames[2], *frame; + struct ext3_dir_entry_2 *de; + struct dx_frame frames[2], *frame; struct inode *dir; int block, err; int count = 0; @@ -540,6 +589,14 @@ int ext3_htree_fill_tree(struct file *di dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); dir = dir_file->f_dentry->d_inode; + if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) { + hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version; + hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed; + count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, + start_hash, start_minor_hash); + *next_hash = ~0; + return count; + } hinfo.hash = start_hash; hinfo.minor_hash = 0; frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); @@ -559,32 +616,21 @@ int ext3_htree_fill_tree(struct file *di while (1) { block = dx_get_block(frame->at); - dxtrace(printk("Reading block %d\n", block)); - if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) + ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo, + start_hash, start_minor_hash); + if (ret < 0) { + err = ret; goto errout; - - de = (struct ext3_dir_entry_2 *) bh->b_data; - top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - - EXT3_DIR_REC_LEN(0)); - for (; de < top; de = ext3_next_entry(de)) { - ext3fs_dirhash(de->name, de->name_len, &hinfo); - if ((hinfo.hash < start_hash) || - ((hinfo.hash == start_hash) && - (hinfo.minor_hash < start_minor_hash))) - continue; - if ((err = ext3_htree_store_dirent(dir_file, - hinfo.hash, hinfo.minor_hash, de)) != 0) - goto errout; - count++; } - brelse (bh); - hashval = ~1; + count += ret; + hashval = ~0; ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, - frame, frames, &err, &hashval); - if (next_hash) - *next_hash = hashval; - if (ret == -1) + frame, frames, &hashval); + *next_hash = hashval; + if (ret < 0) { + err = ret; goto errout; + } /* * Stop if: (a) there are no more entries, or * (b) we have inserted at least one entry and the @@ -595,7 +641,8 @@ int ext3_htree_fill_tree(struct file *di break; } dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries\n", count)); + dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", + count, *next_hash)); return count; errout: dx_release(frames); @@ -904,11 +951,12 @@ static struct buffer_head * ext3_dx_find brelse (bh); /* Check to see if we should continue to search */ retval = ext3_htree_next_block(dir, hash, frame, - frames, err, 0); - if (retval == -1) { + frames, 0); + if (retval < 0) { ext3_warning(sb, __FUNCTION__, "error reading index page in directory #%lu", dir->i_ino); + *err = retval; goto errout; } } while (retval == 1); @@ -973,7 +1021,8 @@ dx_move_dirents(char *from, char *to, st struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); rec_len = EXT3_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); - ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len; + ((struct ext3_dir_entry_2 *) to)->rec_len = + cpu_to_le16(rec_len); de->inode = 0; map++; to += rec_len; @@ -994,7 +1043,7 @@ static struct ext3_dir_entry_2* dx_pack_ rec_len = EXT3_DIR_REC_LEN(de->name_len); if (de > to) memmove(to, de, rec_len); - to->rec_len = rec_len; + to->rec_len = cpu_to_le16(rec_len); prev = to; to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); } @@ -2042,12 +2091,6 @@ static int ext3_symlink (struct inode * out_stop: ext3_journal_stop(handle, dir); return err; - -out_no_entry: - ext3_dec_count(handle, inode); - ext3_mark_inode_dirty(handle, inode); - iput (inode); - goto out_stop; } static int ext3_link (struct dentry * old_dentry,