diff --git a/0001-dx_dirs-Add-library-support-for-directory-indexing.patch b/0001-dx_dirs-Add-library-support-for-directory-indexing.patch new file mode 100644 index 0000000..76d7010 --- /dev/null +++ b/0001-dx_dirs-Add-library-support-for-directory-indexing.patch @@ -0,0 +1,843 @@ +From 1f8bab1217e89bd3f2e0bbd9934dd07fc24dff5d Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Sun, 11 Apr 2010 16:09:58 +0800 +Subject: [PATCH 01/30] dx_dirs: Add library support for directory indexing + +This patch updates ocs2_fs.h with the relevant directory indexing +structures. Additionally, libocfs2/ is given swap +functions for the new disk structures. The library also gets three new +iteration functions: + +ocfs2_dx_entries_iterate() - to iterate all index entries in an inline, +or external index. + +ocfs2_dx_frees_iterate() - an iterator for the dirblock free list. + +ocfs2_extent_iterate_dx_root() - iterate the extent blocks of an index +tree. + +Caveats: + +Right now, this is all read-only. A major 'TODO' item is adding the +appropriate API's to enable creation, deletion and +various manipulation of the dx tree, as well as individual items. + +None of the other library code really knows about the directory index. +This means that things like ocfs2_looup() is +still using the old-style lookup via the unindexed dirent tree. + +We need to add support for the newly increased links_count maximum. This +should probably be a seperate patch though. + +[modified the patch for code rebase and cleanup -- Coly Li] + +Signed-off-by: Mark Fasheh +Signed-off-by: Coly Li +--- + include/ocfs2-kernel/ocfs2_fs.h | 129 +++++++++++++++++++++++++++++++- + include/ocfs2/ocfs2.h | 46 ++++++++++++ + libocfs2/dir_iterate.c | 155 +++++++++++++++++++++++++++++++++++++++ + libocfs2/dirblock.c | 140 +++++++++++++++++++++++++++++++++++- + libocfs2/extents.c | 89 ++++++++++++++++++++++ + libocfs2/feature_string.c | 12 +++- + libocfs2/inode.c | 2 + + sizetest/sizes.txt | 2 +- + sizetest/sizetest.c | 3 +- + 9 files changed, 570 insertions(+), 8 deletions(-) + +diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h +index 3fb0d6c..d4de181 100644 +--- a/include/ocfs2-kernel/ocfs2_fs.h ++++ b/include/ocfs2-kernel/ocfs2_fs.h +@@ -67,6 +67,8 @@ + #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" + #define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" + #define OCFS2_REFCOUNT_BLOCK_SIGNATURE "REFCNT1" ++#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" ++#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" + + /* Compatibility flags */ + #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ +@@ -97,7 +99,8 @@ + | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ + | OCFS2_FEATURE_INCOMPAT_META_ECC \ + | OCFS2_FEATURE_INCOMPAT_XATTR \ +- | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) ++ | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ ++ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) + #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ + | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ + | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) +@@ -153,6 +156,9 @@ + /* Support for extended attributes */ + #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 + ++/* Support for indexed directores */ ++#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400 ++ + /* Metadata checksum and error correction */ + #define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 + +@@ -375,8 +381,11 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { + #define OCFS2_DIR_REC_LEN(name_len) (((name_len) + OCFS2_DIR_MEMBER_LEN + \ + OCFS2_DIR_ROUND) & \ + ~OCFS2_DIR_ROUND) ++#define OCFS2_DIR_MIN_REC_LEN OCFS2_DIR_REC_LEN(1) + + #define OCFS2_LINK_MAX 32000 ++#define OCFS2_DX_LINK_MAX ((1U << 31) - 1U) ++#define OCFS2_LINKS_HI_SHIFT 16 + + #define S_SHIFT 12 + static unsigned char ocfs2_type_by_mode[S_IFMT >> S_SHIFT] = { +@@ -592,8 +601,9 @@ struct ocfs2_super_block { + /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size + for this fs*/ + __le16 s_reserved0; +- __le32 s_reserved1; +-/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ ++ __le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash. ++ * s_uuid_hash serves as seed[3]. */ ++/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */ + /*140*/ + + /* +@@ -643,7 +653,7 @@ struct ocfs2_dinode { + belongs to */ + __le16 i_suballoc_bit; /* Bit offset in suballocator + block group */ +-/*10*/ __le16 i_reserved0; ++/*10*/ __le16 i_links_count_hi; /* High 16 bits of links count */ + __le16 i_xattr_inline_size; + __le32 i_clusters; /* Cluster count */ + __le32 i_uid; /* Owner UID */ +@@ -747,6 +757,87 @@ struct ocfs2_dir_block_trailer { + /*40*/ + }; + ++ /* ++ * A directory entry in the indexed tree. We don't store the full name here, ++ * but instead provide a pointer to the full dirent in the unindexed tree. ++ * ++ * We also store name_len here so as to reduce the number of leaf blocks we ++ * need to search in case of collisions. ++ */ ++struct ocfs2_dx_entry { ++ __le32 dx_major_hash; /* Used to find logical ++ * cluster in index */ ++ __le32 dx_minor_hash; /* Lower bits used to find ++ * block in cluster */ ++ __le64 dx_dirent_blk; /* Physical block in unindexed ++ * tree holding this dirent. */ ++}; ++ ++struct ocfs2_dx_entry_list { ++ __le32 de_reserved; ++ __le16 de_count; /* Maximum number of entries ++ * possible in de_entries */ ++ __le16 de_num_used; /* Current number of ++ * de_entries entries */ ++ struct ocfs2_dx_entry de_entries[0]; /* Indexed dir entries ++ * in a packed array of ++ * length de_num_used */ ++}; ++ ++#define OCFS2_DX_FLAG_INLINE 0x01 ++ ++/* ++ * A directory indexing block. Each indexed directory has one of these, ++ * pointed to by ocfs2_dinode. ++ * ++ * This block stores an indexed btree root, and a set of free space ++ * start-of-list pointers. ++ */ ++struct ocfs2_dx_root_block { ++ __u8 dr_signature[8]; /* Signature for verification */ ++ struct ocfs2_block_check dr_check; /* Error checking */ ++ __le16 dr_suballoc_slot; /* Slot suballocator this ++ * block belongs to. */ ++ __le16 dr_suballoc_bit; /* Bit offset in suballocator ++ * block group */ ++ __le32 dr_fs_generation; /* Must match super block */ ++ __le64 dr_blkno; /* Offset on disk, in blocks */ ++ __le64 dr_last_eb_blk; /* Pointer to last ++ * extent block */ ++ __le32 dr_clusters; /* Clusters allocated ++ * to the indexed tree. */ ++ __u8 dr_flags; /* OCFS2_DX_FLAG_* flags */ ++ __u8 dr_reserved0; ++ __le16 dr_reserved1; ++ __le64 dr_dir_blkno; /* Pointer to parent inode */ ++ __le64 dr_reserved2; ++ __le64 dr_free_blk; /* Pointer to head of free ++ * unindexed block list. */ ++ __le64 dr_reserved3[15]; ++ union { ++ struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 ++ * bits for maximum space ++ * efficiency. */ ++ struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of ++ * entries. We grow out ++ * to extents if this ++ * gets too big. */ ++ }; ++}; ++ ++/* ++ * The header of a leaf block in the indexed tree. ++ */ ++struct ocfs2_dx_leaf { ++ __u8 dl_signature[8];/* Signature for verification */ ++ struct ocfs2_block_check dl_check; /* Error checking */ ++ __le64 dl_blkno; /* Offset on disk, in blocks */ ++ __le32 dl_fs_generation;/* Must match super block */ ++ __le32 dl_reserved0; ++ __le64 dl_reserved1; ++ struct ocfs2_dx_entry_list dl_list; ++}; ++ + /* + * On disk allocator group structure for OCFS2 + */ +@@ -1133,6 +1224,16 @@ static inline int ocfs2_extent_recs_per_inode_with_xattr( + return size / sizeof(struct ocfs2_extent_rec); + } + ++static inline int ocfs2_extent_recs_per_dx_root(struct super_block *sb) ++{ ++ int size; ++ ++ size = sb->s_blocksize - ++ offsetof(struct ocfs2_dx_root_block, dr_list.l_recs); ++ ++ return size / sizeof(struct ocfs2_extent_rec); ++} ++ + static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) + { + int size; +@@ -1153,6 +1254,26 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb) + return size / sizeof(struct ocfs2_extent_rec); + } + ++static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) ++{ ++ int size; ++ ++ size = sb->s_blocksize - ++ offsetof(struct ocfs2_dx_leaf, dl_list.de_entries); ++ ++ return size / sizeof(struct ocfs2_dx_entry); ++} ++ ++static inline int ocfs2_dx_entries_per_root(struct super_block *sb) ++{ ++ int size; ++ ++ size = sb->s_blocksize - ++ offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries); ++ ++ return size / sizeof(struct ocfs2_dx_entry); ++} ++ + static inline u16 ocfs2_local_alloc_size(struct super_block *sb) + { + u16 size; +diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h +index d91f734..332d4bd 100644 +--- a/include/ocfs2/ocfs2.h ++++ b/include/ocfs2/ocfs2.h +@@ -473,6 +473,11 @@ int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, + struct ocfs2_dir_entry *de, unsigned long offset); + void ocfs2_init_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, + uint64_t blkno, void *buf); ++errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, ++ void *buf); ++errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, ++ void *buf); ++int ocfs2_dir_indexed(struct ocfs2_dinode *di); + + errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, + uint64_t dir, +@@ -497,6 +502,27 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, + void *priv_data), + void *priv_data); + ++extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs, ++ struct ocfs2_dinode *dir, ++ int flags, ++ int (*func)(ocfs2_filesys *fs, ++ struct ocfs2_dx_entry_list *entry_list, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dx_leaf *dx_leaf, ++ void *priv_data), ++ void *priv_data); ++ ++extern errcode_t ocfs2_dx_frees_iterate(ocfs2_filesys *fs, ++ struct ocfs2_dinode *dir, ++ struct ocfs2_dx_root_block *dx_root, ++ int flags, ++ int (*func)(ocfs2_filesys *fs, ++ uint64_t blkno, ++ struct ocfs2_dir_block_trailer *trailer, ++ char *dirblock, ++ void *priv_data), ++ void *priv_data); ++ + errcode_t ocfs2_lookup(ocfs2_filesys *fs, uint64_t dir, + const char *name, int namelen, char *buf, + uint64_t *inode); +@@ -1224,6 +1250,13 @@ static inline int ocfs2_support_xattr(struct ocfs2_super_block *osb) + return 0; + } + ++static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super_block *osb) ++{ ++ if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) ++ return 1; ++ return 0; ++} ++ + /* + * When we're swapping some of our disk structures, a garbage count + * can send us past the edge of a block buffer. This function guards +@@ -1352,6 +1385,19 @@ errcode_t ocfs2_extent_iterate_inode(ocfs2_filesys *fs, + int ref_recno, + void *priv_data), + void *priv_data); ++errcode_t ocfs2_extent_iterate_dx_root(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ int flags, ++ char *block_buf, ++ int (*func)(ocfs2_filesys *fs, ++ struct ocfs2_extent_rec *rec, ++ int tree_depth, ++ uint32_t ccount, ++ uint64_t ref_blkno, ++ int ref_recno, ++ void *priv_data), ++ void *priv_data); ++ + errcode_t ocfs2_block_iterate(ocfs2_filesys *fs, + uint64_t blkno, + int flags, +diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c +index 00c8d16..1064d9f 100644 +--- a/libocfs2/dir_iterate.c ++++ b/libocfs2/dir_iterate.c +@@ -307,6 +307,161 @@ int ocfs2_process_dir_block(ocfs2_filesys *fs, + return 0; + } + ++struct dx_iterator_data { ++ int (*dx_func)(ocfs2_filesys *fs, ++ struct ocfs2_dx_entry_list *entry_list, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dx_leaf *dx_leaf, ++ void *priv_data); ++ void *dx_priv_data; ++ char *leaf_buf; ++ struct ocfs2_dx_root_block *dx_root; ++}; ++ ++static int dx_iterator(ocfs2_filesys *fs, ++ struct ocfs2_extent_rec *rec, ++ int tree_depth, ++ uint32_t ccount, ++ uint64_t ref_blkno, ++ int ref_recno, ++ void *priv_data) ++{ ++ int ret, i; ++ struct ocfs2_dx_leaf *dx_leaf; ++ struct dx_iterator_data *iter = priv_data; ++ uint64_t blkno, count; ++ ++ count = ocfs2_clusters_to_blocks(fs, rec->e_leaf_clusters); ++ ++ blkno = rec->e_blkno; ++ for (i = 0; i < count; i++) { ++ ret = ocfs2_read_dx_leaf(fs, blkno, iter->leaf_buf); ++ if (ret) ++ return ret; ++ ++ dx_leaf = (struct ocfs2_dx_leaf *)iter->leaf_buf; ++ iter->dx_func(fs, &dx_leaf->dl_list, iter->dx_root, dx_leaf, ++ iter->dx_priv_data); ++ ++ blkno++; ++ } ++ ++ return 0; ++} ++ ++extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs, ++ struct ocfs2_dinode *dir, ++ int flags, ++ int (*func)(ocfs2_filesys *fs, ++ struct ocfs2_dx_entry_list *entry_list, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dx_leaf *dx_leaf, ++ void *priv_data), ++ void *priv_data) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_dx_root_block *dx_root; ++ uint64_t dx_blkno; ++ char *buf = NULL, *eb_buf = NULL, *leaf_buf = NULL; ++ struct dx_iterator_data data; ++ ++ if (!S_ISDIR(dir->i_mode) && !ocfs2_dir_indexed(dir)) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &buf); ++ if (ret) ++ goto out; ++ ++ dx_blkno = (uint64_t) dir->i_dx_root; ++ ++ ret = ocfs2_read_dx_root(fs, dx_blkno, buf); ++ if (ret) ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { ++ func(fs, &dx_root->dr_entries, dx_root, NULL, priv_data); ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &eb_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &leaf_buf); ++ if (ret) ++ goto out; ++ ++ data.dx_func = func; ++ data.dx_priv_data = priv_data; ++ data.leaf_buf = leaf_buf; ++ data.dx_root = dx_root; ++ ret = ocfs2_extent_iterate_dx_root(fs, dx_root, ++ OCFS2_EXTENT_FLAG_DATA_ONLY, eb_buf, ++ dx_iterator, &data); ++ ++out: ++ if (buf) ++ ocfs2_free(&buf); ++ if (eb_buf) ++ ocfs2_free(&eb_buf); ++ if (leaf_buf) ++ ocfs2_free(&leaf_buf); ++ return ret; ++} ++ ++extern errcode_t ocfs2_dx_frees_iterate(ocfs2_filesys *fs, ++ struct ocfs2_dinode *dir, ++ struct ocfs2_dx_root_block *dx_root, ++ int flags, ++ int (*func)(ocfs2_filesys *fs, ++ uint64_t blkno, ++ struct ocfs2_dir_block_trailer *trailer, ++ char *dirblock, ++ void *priv_data), ++ void *priv_data) ++{ ++ errcode_t ret = 0; ++ uint64_t blkno; ++ char *buf = NULL; ++ struct ocfs2_dir_block_trailer *trailer; ++ ++ if (!S_ISDIR(dir->i_mode) || !(ocfs2_dir_indexed(dir))) { ++ ret = 0; ++ goto out; ++ } ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { ++ ret = 0; ++ goto out; ++ } ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &buf); ++ if (ret) ++ goto out; ++ ++ blkno = dx_root->dr_free_blk; ++ while (blkno) { ++ ret = ocfs2_read_dir_block(fs, dir, blkno, buf); ++ if (ret) ++ goto out; ++ ++ trailer = ocfs2_dir_trailer_from_block(fs, buf); ++ ++ func(fs, blkno, trailer, buf, priv_data); ++ ++ blkno = trailer->db_free_next; ++ } ++ ++out: ++ if (buf) ++ ocfs2_free(&buf); ++ return ret; ++} + + #ifdef DEBUG_EXE + #include +diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c +index fec8ffc..d68e5c0 100644 +--- a/libocfs2/dirblock.c ++++ b/libocfs2/dirblock.c +@@ -54,12 +54,17 @@ int ocfs2_dir_has_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di) + (di->i_dyn_features & OCFS2_INLINE_DATA_FL)) + return 0; + ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && ++ di->i_dyn_features & OCFS2_INDEXED_DIR_FL) ++ return 1; ++ + return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super)); + } + + int ocfs2_supports_dir_trailer(ocfs2_filesys *fs) + { +- return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super)); ++ return ocfs2_meta_ecc(OCFS2_RAW_SB(fs->fs_super)) || ++ ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)); + } + + int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, +@@ -148,6 +153,8 @@ void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer) + bswap_64(trailer->db_compat_rec_len); + bswap_64(trailer->db_blkno); + bswap_64(trailer->db_parent_dinode); ++ bswap_16(trailer->db_free_rec_len); ++ bswap_64(trailer->db_free_next); + } + + errcode_t ocfs2_read_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di, +@@ -222,3 +229,134 @@ out: + ocfs2_free(&buf); + return retval; + } ++ ++static void ocfs2_swap_dx_entry_to_cpu(struct ocfs2_dx_entry *dx_entry) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ++ dx_entry->dx_major_hash = bswap_32(dx_entry->dx_major_hash); ++ dx_entry->dx_minor_hash = bswap_32(dx_entry->dx_minor_hash); ++ dx_entry->dx_dirent_blk = bswap_64(dx_entry->dx_dirent_blk); ++} ++ ++static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) ++{ ++ int i; ++ ++ if (cpu_is_little_endian) ++ return; ++ ++ dl_list->de_count = bswap_16(dl_list->de_count); ++ dl_list->de_num_used = bswap_16(dl_list->de_num_used); ++ ++ for (i = 0; i < dl_list->de_count; i++) ++ ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]); ++} ++ ++static void ocfs2_swap_dx_root_to_cpu(struct ocfs2_dx_root_block *dx_root) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ++ dx_root->dr_suballoc_slot = bswap_16(dx_root->dr_suballoc_slot); ++ dx_root->dr_suballoc_bit = bswap_16(dx_root->dr_suballoc_bit); ++ dx_root->dr_fs_generation = bswap_32(dx_root->dr_fs_generation); ++ dx_root->dr_blkno = bswap_64(dx_root->dr_blkno); ++ dx_root->dr_last_eb_blk = bswap_64(dx_root->dr_last_eb_blk); ++ dx_root->dr_clusters = bswap_32(dx_root->dr_clusters); ++ dx_root->dr_dir_blkno = bswap_64(dx_root->dr_dir_blkno); ++ dx_root->dr_free_blk = bswap_64(dx_root->dr_free_blk); ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ ocfs2_swap_dx_entry_list_to_cpu(&dx_root->dr_entries); ++ else ++ ocfs2_swap_extent_list_to_cpu(&dx_root->dr_list); ++} ++ ++errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, ++ void *buf) ++{ ++ errcode_t ret; ++ struct ocfs2_dx_root_block *dx_root; ++ ++ ret = ocfs2_read_blocks(fs, block, 1, buf); ++ if (ret) ++ return ret; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ ret = ocfs2_validate_meta_ecc(fs, buf, &dx_root->dr_check); ++ if (ret) ++ return ret; ++ ++ if (memcmp(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE, ++ strlen(OCFS2_DX_ROOT_SIGNATURE))) ++ return OCFS2_ET_DIR_CORRUPTED; ++ ++ ocfs2_swap_dx_root_to_cpu(dx_root); ++ ++ return 0; ++} ++ ++static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ++ dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); ++ dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); ++ ++ ocfs2_swap_dx_entry_list_to_cpu(&dx_leaf->dl_list); ++} ++ ++errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, ++ void *buf) ++{ ++ errcode_t ret; ++ struct ocfs2_dx_leaf *dx_leaf; ++ ++ ret = ocfs2_read_blocks(fs, block, 1, buf); ++ if (ret) ++ return ret; ++ ++ dx_leaf = (struct ocfs2_dx_leaf *)buf; ++ ret = ocfs2_validate_meta_ecc(fs, buf, &dx_leaf->dl_check); ++ if (ret) ++ return ret; ++ ++ if (memcmp(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE, ++ strlen(OCFS2_DX_LEAF_SIGNATURE))) ++ return OCFS2_ET_DIR_CORRUPTED; ++ ++ ocfs2_swap_dx_leaf_to_cpu(dx_leaf); ++ ++ return 0; ++} ++ ++int ocfs2_dir_indexed(struct ocfs2_dinode *di) ++{ ++ if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) ++ return 1; ++ return 0; ++} ++ ++/* ++ * Only use this when we already know the directory is indexed. ++ */ ++int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off) ++{ ++ if (de_off == ocfs2_dir_trailer_blk_off(fs)) ++ return 1; ++ ++ return 0; ++} ++ ++int ocfs2_is_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, ++ unsigned long de_off) ++{ ++ if (ocfs2_dir_has_trailer(fs, di)) { ++ return __ocfs2_is_dir_trailer(fs, de_off); ++ } ++ ++ return 0; ++} +diff --git a/libocfs2/extents.c b/libocfs2/extents.c +index ee7ef93..8c322b1 100644 +--- a/libocfs2/extents.c ++++ b/libocfs2/extents.c +@@ -550,6 +550,95 @@ out: + return ret; + } + ++errcode_t ocfs2_extent_iterate_dx_root(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ int flags, ++ char *block_buf, ++ int (*func)(ocfs2_filesys *fs, ++ struct ocfs2_extent_rec *rec, ++ int tree_depth, ++ uint32_t ccount, ++ uint64_t ref_blkno, ++ int ref_recno, ++ void *priv_data), ++ void *priv_data) ++{ ++ int i; ++ int iret = 0; ++ struct ocfs2_extent_list *el; ++ errcode_t ret; ++ struct extent_context ctxt; ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ return OCFS2_ET_INODE_CANNOT_BE_ITERATED; ++ ++ el = &dx_root->dr_list; ++ if (el->l_tree_depth) { ++ ret = ocfs2_malloc0(sizeof(char *) * el->l_tree_depth, ++ &ctxt.eb_bufs); ++ if (ret) ++ goto out; ++ ++ if (block_buf) { ++ ctxt.eb_bufs[0] = block_buf; ++ } else { ++ ret = ocfs2_malloc0(fs->fs_blocksize * ++ el->l_tree_depth, ++ &ctxt.eb_bufs[0]); ++ if (ret) ++ goto out_eb_bufs; ++ } ++ ++ for (i = 1; i < el->l_tree_depth; i++) { ++ ctxt.eb_bufs[i] = ctxt.eb_bufs[0] + ++ i * fs->fs_blocksize; ++ } ++ } ++ else ++ ctxt.eb_bufs = NULL; ++ ++ ctxt.fs = fs; ++ ctxt.func = func; ++ ctxt.priv_data = priv_data; ++ ctxt.flags = flags; ++ ctxt.ccount = 0; ++ ctxt.last_eb_blkno = 0; ++ ctxt.last_eb_cpos = 0; ++ ++ ret = 0; ++ iret |= extent_iterate_el(el, 0, &ctxt); ++ if (iret & OCFS2_EXTENT_ERROR) ++ ret = ctxt.errcode; ++ ++ if (iret & OCFS2_EXTENT_ABORT) ++ goto out_abort; ++ ++ /* we can only trust ctxt.last_eb_blkno if we walked the whole tree */ ++ if (dx_root->dr_last_eb_blk != ctxt.last_eb_blkno) { ++ dx_root->dr_last_eb_blk = ctxt.last_eb_blkno; ++ iret |= OCFS2_EXTENT_CHANGED; ++ } ++ ++out_abort: ++#if 0 ++ /* ++ * This block needs to be fixed up for write support. ++ */ ++ if (!ret && (iret & OCFS2_EXTENT_CHANGED)) ++ ret = ocfs2_write_inode(fs, inode->i_blkno, (char *)inode); ++#endif ++ ++out_eb_bufs: ++ if (ctxt.eb_bufs) { ++ if (!block_buf && ctxt.eb_bufs[0]) ++ ocfs2_free(&ctxt.eb_bufs[0]); ++ ocfs2_free(&ctxt.eb_bufs); ++ } ++ ++out: ++ return ret; ++} ++ + errcode_t ocfs2_extent_iterate(ocfs2_filesys *fs, + uint64_t blkno, + int flags, +diff --git a/libocfs2/feature_string.c b/libocfs2/feature_string.c +index 7021dba..9f395c6 100644 +--- a/libocfs2/feature_string.c ++++ b/libocfs2/feature_string.c +@@ -89,7 +89,8 @@ static ocfs2_fs_options feature_level_defaults[] = { + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | + OCFS2_FEATURE_INCOMPAT_META_ECC | + OCFS2_FEATURE_INCOMPAT_XATTR | +- OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE, ++ OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | + OCFS2_FEATURE_RO_COMPAT_USRQUOTA | + OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }, /* OCFS2_FEATURE_LEVEL_MAX_FEATURES */ +@@ -166,6 +167,11 @@ static struct fs_feature_flags ocfs2_supported_features[] = { + {0, OCFS2_FEATURE_INCOMPAT_XATTR, 0}, + }, + { ++ "indexed-dirs", ++ {0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0}, ++ {0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0}, ++ }, ++ { + "usrquota", + {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, + {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, +@@ -248,6 +254,10 @@ static struct feature_name ocfs2_feature_names[] = { + .fn_flag = {0, OCFS2_FEATURE_INCOMPAT_XATTR, 0}, + }, + { ++ .fn_name = "IndexedDirs", ++ .fn_flag = {0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0}, ++ }, ++ { + .fn_name = "usrquota", + .fn_flag = {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, + }, +diff --git a/libocfs2/inode.c b/libocfs2/inode.c +index e9ce579..95419f4 100644 +--- a/libocfs2/inode.c ++++ b/libocfs2/inode.c +@@ -163,6 +163,8 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di) + struct ocfs2_inline_data *id = &di->id2.i_data; + + id->id_count = bswap_16(id->id_count); ++ } else if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) { ++ di->i_dx_root = bswap_64(di->i_dx_root); + } + } + +diff --git a/sizetest/sizes.txt b/sizetest/sizes.txt +index d03b67c..1fb27a9 100644 +--- a/sizetest/sizes.txt ++++ b/sizetest/sizes.txt +@@ -110,7 +110,7 @@ + 0x0C0 id2.i_chain +0x10 + 0x0C0 id2.i_list +0x10 + 0x0C0 id2.i_symlink +0x00 +- Total 0x200 ++ Total 0x208 + + [off] struct ocfs2_dir_entry [size] + 0x000 inode +0x08 +diff --git a/sizetest/sizetest.c b/sizetest/sizetest.c +index bdb06a2..e01e800 100644 +--- a/sizetest/sizetest.c ++++ b/sizetest/sizetest.c +@@ -173,7 +173,7 @@ static void print_ocfs2_dinode(void) + SHOW_OFFSET(struct ocfs2_dinode, i_generation); + SHOW_OFFSET(struct ocfs2_dinode, i_suballoc_slot); + SHOW_OFFSET(struct ocfs2_dinode, i_suballoc_bit); +- SHOW_OFFSET(struct ocfs2_dinode, i_reserved0); ++ SHOW_OFFSET(struct ocfs2_dinode, i_links_count_hi); + SHOW_OFFSET(struct ocfs2_dinode, i_xattr_inline_size); + SHOW_OFFSET(struct ocfs2_dinode, i_clusters); + SHOW_OFFSET(struct ocfs2_dinode, i_uid); +@@ -197,6 +197,7 @@ static void print_ocfs2_dinode(void) + SHOW_OFFSET(struct ocfs2_dinode, i_dyn_features); + SHOW_OFFSET(struct ocfs2_dinode, i_xattr_loc); + SHOW_OFFSET(struct ocfs2_dinode, i_check); ++ SHOW_OFFSET(struct ocfs2_dinode, i_dx_root); + SHOW_OFFSET(struct ocfs2_dinode, i_reserved2); + + SHOW_OFFSET(struct ocfs2_dinode, id1.i_pad1); +-- +1.7.0.2 + diff --git a/0002-dx_dirs-debugfs.ocfs2-support.patch b/0002-dx_dirs-debugfs.ocfs2-support.patch new file mode 100644 index 0000000..f72211b --- /dev/null +++ b/0002-dx_dirs-debugfs.ocfs2-support.patch @@ -0,0 +1,454 @@ +From b891260ad1500f3f2c0562d4376307b18bc4a9f4 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Sun, 11 Apr 2010 16:09:59 +0800 +Subject: [PATCH 02/30] dx_dirs: debugfs.ocfs2 support + +This adds a full set of functionality to debugfs.ocfs2 so that we can +visualize and debug indexed directories. Aside +from updates to other commands to dump newly added/used fields in old +structures, we get the following debugfs.ocfs2 commands: + +dx_space - Show all entries in the free list +dx_dump - Show the directory index (including root block) +dx_leaf - Show a single directory index leaf block +dx_root - Show directory index root block, as well as any extent blocks + for non-inline dx_roots. + +[modified the patch for code rebase and cleanup -- Coly Li] + +Signed-off-by: Mark Fasheh +Signed-off-by: Coly Li +--- + debugfs.ocfs2/commands.c | 173 ++++++++++++++++++++++++++++++++++++++++++ + debugfs.ocfs2/dump.c | 164 ++++++++++++++++++++++++++++++++++++++-- + debugfs.ocfs2/include/dump.h | 5 + + 3 files changed, 335 insertions(+), 7 deletions(-) + +diff --git a/debugfs.ocfs2/commands.c b/debugfs.ocfs2/commands.c +index 1c19ab4..04e56b4 100644 +--- a/debugfs.ocfs2/commands.c ++++ b/debugfs.ocfs2/commands.c +@@ -77,6 +77,10 @@ static void do_dirblocks(char **args); + static void do_xattr(char **args); + static void do_frag(char **args); + static void do_refcount(char **args); ++static void do_dx_root(char **args); ++static void do_dx_leaf(char **args); ++static void do_dx_dump(char **args); ++static void do_dx_space(char **args); + + dbgfs_gbls gbls; + +@@ -116,6 +120,10 @@ static Command commands[] = { + { "dirblocks", do_dirblocks }, + { "frag", do_frag }, + { "refcount", do_refcount }, ++ { "dx_root", do_dx_root }, ++ { "dx_leaf", do_dx_leaf }, ++ { "dx_dump", do_dx_dump }, ++ { "dx_space", do_dx_space }, + }; + + /* +@@ -842,6 +850,10 @@ static void do_help (char **args) + printf ("dlm_locks [-f ] [-l] lockname\t\t\tShow live dlm locking state\n"); + printf ("dump [-p] \t\tDumps file to outfile on a mounted fs\n"); + printf ("dirblocks \t\t\tDump directory blocks\n"); ++ printf ("dx_space \t\t\tDump directory free space list\n"); ++ printf ("dx_dump \t\t\tShow directory index information\n"); ++ printf ("dx_leaf \t\t\tShow directory index leaf block only\n"); ++ printf ("dx_root \t\t\tShow directory index root block only\n"); + printf ("encode \t\t\tShow lock name\n"); + printf ("extent \t\t\t\tShow extent block\n"); + printf ("findpath \t\t\tList one pathname of the inode/lockname\n"); +@@ -1316,6 +1328,167 @@ static void do_dirblocks (char **args) + } + + /* ++ * do_dx_root() ++ * ++ */ ++static void do_dx_root (char **args) ++{ ++ struct ocfs2_dx_root_block *dx_root; ++ uint64_t blkno; ++ char *buf = NULL; ++ FILE *out; ++ errcode_t ret = 0; ++ ++ if (process_inodestr_args(args, 1, &blkno) != 1) ++ return; ++ ++ buf = gbls.blockbuf; ++ out = open_pager(gbls.interactive); ++ ++ ret = ocfs2_read_dx_root(gbls.fs, blkno, buf); ++ if (ret) { ++ com_err(args[0], ret, "while reading dx dir root " ++ "block %"PRIu64"", blkno); ++ close_pager (out); ++ return; ++ } ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ dump_dx_root(out, dx_root); ++ if (!(dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)) ++ traverse_extents(gbls.fs, &dx_root->dr_list, out); ++ close_pager(out); ++ ++ return; ++} ++ ++/* ++ * do_dx_leaf() ++ * ++ */ ++static void do_dx_leaf (char **args) ++{ ++ struct ocfs2_dx_leaf *dx_leaf; ++ uint64_t blkno; ++ char *buf = NULL; ++ FILE *out; ++ errcode_t ret = 0; ++ ++ if (process_inodestr_args(args, 1, &blkno) != 1) ++ return; ++ ++ buf = gbls.blockbuf; ++ out = open_pager(gbls.interactive); ++ ++ ret = ocfs2_read_dx_leaf(gbls.fs, blkno, buf); ++ if (ret) { ++ com_err(args[0], ret, "while reading dx dir leaf " ++ "block %"PRIu64"", blkno); ++ close_pager (out); ++ return; ++ } ++ ++ dx_leaf = (struct ocfs2_dx_leaf *)buf; ++ dump_dx_leaf(out, dx_leaf); ++ ++ close_pager(out); ++ ++ return; ++} ++ ++/* ++ * do_dx_dump() ++ * ++ */ ++static void do_dx_dump (char **args) ++{ ++ struct ocfs2_dinode *inode; ++ uint64_t ino_blkno; ++ char *buf = NULL; ++ FILE *out; ++ errcode_t ret = 0; ++ ++ if (process_inode_args(args, &ino_blkno)) ++ return; ++ ++ out = open_pager(gbls.interactive); ++ ++ buf = gbls.blockbuf; ++ ret = ocfs2_read_inode(gbls.fs, ino_blkno, buf); ++ if (ret) { ++ com_err(args[0], ret, "while reading inode %"PRIu64"", ++ ino_blkno); ++ close_pager (out); ++ return ; ++ } ++ ++ inode = (struct ocfs2_dinode *)buf; ++ ++ dump_dx_entries(out, inode); ++ ++ close_pager(out); ++ ++ return; ++} ++ ++/* ++ * do_dx_space() ++ * ++ */ ++static void do_dx_space (char **args) ++{ ++ struct ocfs2_dinode *inode; ++ struct ocfs2_dx_root_block *dx_root; ++ uint64_t ino_blkno, dx_blkno; ++ char *buf = NULL, *dx_root_buf = NULL; ++ FILE *out; ++ errcode_t ret = 0; ++ ++ if (process_inode_args(args, &ino_blkno)) ++ return; ++ ++ out = open_pager(gbls.interactive); ++ ++ buf = gbls.blockbuf; ++ ret = ocfs2_read_inode(gbls.fs, ino_blkno, buf); ++ if (ret) { ++ com_err(args[0], ret, "while reading inode %"PRIu64"", ++ ino_blkno); ++ goto out; ++ } ++ ++ inode = (struct ocfs2_dinode *)buf; ++ if (!(ocfs2_dir_indexed(inode))) { ++ fprintf(out, "Inode %"PRIu64" is not indexed\n", ino_blkno); ++ goto out; ++ } ++ ++ ret = ocfs2_malloc_block(gbls.fs->fs_io, &dx_root_buf); ++ if (ret) { ++ goto out; ++ } ++ ++ dx_blkno = (uint64_t) inode->i_dx_root; ++ ++ ret = ocfs2_read_dx_root(gbls.fs, dx_blkno, dx_root_buf); ++ if (ret) { ++ com_err(args[0], ret, "while reading dx dir root " ++ "block %"PRIu64"", dx_blkno); ++ goto out; ++ } ++ ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ++ dump_dx_space(out, inode, dx_root); ++out: ++ close_pager(out); ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ ++ return; ++} ++ ++/* + * do_extent() + * + */ +diff --git a/debugfs.ocfs2/dump.c b/debugfs.ocfs2/dump.c +index 7880991..2e887ce 100644 +--- a/debugfs.ocfs2/dump.c ++++ b/debugfs.ocfs2/dump.c +@@ -99,6 +99,9 @@ void dump_super_block(FILE *out, struct ocfs2_super_block *sb) + fprintf(out, "%02X", sb->s_uuid[i]); + fprintf(out, "\n"); + fprintf(out, "\tHash: %u (0x%x)\n", sb->s_uuid_hash, sb->s_uuid_hash); ++ for (i = 0; i < 3; i++) ++ fprintf(out, "\tDX Seed[%d]: 0x%08x\n", i, sb->s_dx_seed[i]); ++ + if (ocfs2_userspace_stack(sb)) + fprintf(out, + "\tCluster stack: %s\n" +@@ -315,6 +318,9 @@ void dump_inode(FILE *out, struct ocfs2_dinode *in) + if (in->i_dyn_features & OCFS2_INLINE_DATA_FL) { + fprintf(out, "\tInline Data Max: %u\n", + in->id2.i_data.id_count); ++ } else if (in->i_dyn_features & OCFS2_INDEXED_DIR_FL) { ++ fprintf(out, "\tIndexed Tree Root: %"PRIu64"\n", ++ (uint64_t)in->i_dx_root); + } + + if (flags) +@@ -490,6 +496,21 @@ int dump_dir_entry (struct ocfs2_dir_entry *rec, int offset, int blocksize, + } + + /* ++ * dump_dir_trailer() ++ */ ++static void dump_dir_trailer(FILE *out, struct ocfs2_dir_block_trailer *trailer) ++{ ++ fprintf(out, ++ "\tTrailer Block: %-15"PRIu64" Inode: %-15"PRIu64" rec_len: %-4u\n", ++ trailer->db_blkno, trailer->db_parent_dinode, ++ trailer->db_compat_rec_len); ++ fprintf(out, ++ "\tLargest hole: %u Next in list: %-15"PRIu64"\n", ++ trailer->db_free_rec_len, trailer->db_free_next); ++ dump_block_check(out, &trailer->db_check); ++} ++ ++/* + * dump_dir_block() + * + */ +@@ -507,13 +528,9 @@ void dump_dir_block(FILE *out, char *buf) + }; + + if (!strncmp((char *)trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE, +- sizeof(trailer->db_signature))) { +- fprintf(out, +- "\tTrailer Block: %-15"PRIu64" Inode: %-15"PRIu64" rec_len: %-4u\n", +- trailer->db_blkno, trailer->db_parent_dinode, +- trailer->db_compat_rec_len); +- dump_block_check(out, &trailer->db_check); +- } else ++ sizeof(trailer->db_signature))) ++ dump_dir_trailer(out, trailer); ++ else + end = gbls.fs->fs_blocksize; + + fprintf(out, "\tEntries:\n"); +@@ -533,6 +550,139 @@ void dump_dir_block(FILE *out, char *buf) + } + } + ++static void dump_dx_entry(FILE *out, int i, struct ocfs2_dx_entry *dx_entry) ++{ ++ fprintf(out, "\t %-2d (0x%08x 0x%08x) %-13"PRIu64"\n", ++ i, dx_entry->dx_major_hash, dx_entry->dx_minor_hash, ++ (uint64_t)dx_entry->dx_dirent_blk); ++} ++ ++static void dump_dx_entry_list(FILE *out, struct ocfs2_dx_entry_list *dl_list, ++ int traverse) ++{ ++ int i; ++ ++ fprintf(out, "\tCount: %u Num Used: %u\n", ++ dl_list->de_count, dl_list->de_num_used); ++ ++ if (traverse) { ++ fprintf(out, "\t## %-11s %-13s\n", "Hash (Major Minor)", ++ "Dir Block#"); ++ ++ for (i = 0; i < dl_list->de_num_used; i++) ++ dump_dx_entry(out, i, &dl_list->de_entries[i]); ++ } ++} ++ ++void dump_dx_root(FILE *out, struct ocfs2_dx_root_block *dr) ++{ ++ char tmp_str[30]; ++ GString *flags = NULL; ++ ++ flags = g_string_new(NULL); ++ if (dr->dr_flags & OCFS2_DX_FLAG_INLINE) ++ g_string_append(flags, "Inline "); ++ ++ fprintf(out, "\tDir Index Root: %"PRIu64" FS Generation: %u (0x%x)\n", ++ (uint64_t)dr->dr_blkno, dr->dr_fs_generation, ++ dr->dr_fs_generation); ++ ++ fprintf(out, "\tClusters: %u Last Extblk: %"PRIu64" " ++ "Dir Inode: %"PRIu64"\n", ++ dr->dr_clusters, (uint64_t)dr->dr_last_eb_blk, ++ (uint64_t)dr->dr_dir_blkno); ++ ++ if (dr->dr_suballoc_slot == (uint16_t)OCFS2_INVALID_SLOT) ++ strcpy(tmp_str, "Global"); ++ else ++ sprintf(tmp_str, "%d", dr->dr_suballoc_slot); ++ fprintf(out, "\tSub Alloc Slot: %s Sub Alloc Bit: %u " ++ "Flags: (0x%x) %s\n", ++ tmp_str, dr->dr_suballoc_bit, dr->dr_flags, flags->str); ++ ++ dump_block_check(out, &dr->dr_check); ++ ++ if (dr->dr_flags & OCFS2_DX_FLAG_INLINE) ++ dump_dx_entry_list(out, &dr->dr_entries, 0); ++ ++ if (flags) ++ g_string_free(flags, 1); ++} ++ ++void dump_dx_leaf (FILE *out, struct ocfs2_dx_leaf *dx_leaf) ++{ ++ fprintf(out, "\tDir Index Leaf: %"PRIu64" FS Generation: %u (0x%x)\n", ++ (uint64_t)dx_leaf->dl_blkno, dx_leaf->dl_fs_generation, ++ dx_leaf->dl_fs_generation); ++ dump_block_check(out, &dx_leaf->dl_check); ++ ++ dump_dx_entry_list(out, &dx_leaf->dl_list, 1); ++} ++ ++static int entries_iter(ocfs2_filesys *fs, ++ struct ocfs2_dx_entry_list *entry_list, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dx_leaf *dx_leaf, ++ void *priv_data) ++{ ++ FILE *out = priv_data; ++ ++ if (dx_leaf) { ++ dump_dx_leaf(out, dx_leaf); ++ return 0; ++ } ++ ++ /* Inline entries. Dump the list directly. */ ++ dump_dx_entry_list(out, entry_list, 1); ++ ++ return 0; ++} ++ ++void dump_dx_entries(FILE *out, struct ocfs2_dinode *inode) ++{ ++ struct ocfs2_dx_root_block *dx_root; ++ uint64_t dx_blkno; ++ char *buf = NULL; ++ errcode_t ret = 0; ++ ++ if (ocfs2_malloc_block(gbls.fs->fs_io, &buf)) ++ return; ++ ++ if (!(ocfs2_dir_indexed(inode))) ++ return; ++ ++ dx_blkno = (uint64_t) inode->i_dx_root; ++ ++ ret = ocfs2_read_dx_root(gbls.fs, dx_blkno, buf); ++ if (ret) ++ return; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ dump_dx_root(out, dx_root); ++ ++ ocfs2_dx_entries_iterate(gbls.fs, inode, 0, entries_iter, out); ++ return; ++} ++ ++static int dx_space_iter(ocfs2_filesys *fs, ++ uint64_t blkno, ++ struct ocfs2_dir_block_trailer *trailer, ++ char *dirblock, ++ void *priv_data) ++{ ++ FILE *out = priv_data; ++ ++ dump_dir_trailer(out, trailer); ++ ++ return 0; ++} ++ ++void dump_dx_space(FILE *out, struct ocfs2_dinode *inode, ++ struct ocfs2_dx_root_block *dx_root) ++{ ++ ocfs2_dx_frees_iterate(gbls.fs, inode, dx_root, 0, dx_space_iter, out); ++} ++ + /* + * dump_jbd_header() + * +diff --git a/debugfs.ocfs2/include/dump.h b/debugfs.ocfs2/include/dump.h +index cb677c9..79b10b3 100644 +--- a/debugfs.ocfs2/include/dump.h ++++ b/debugfs.ocfs2/include/dump.h +@@ -52,7 +52,12 @@ void dump_extent_block (FILE *out, struct ocfs2_extent_block *blk); + void dump_group_descriptor (FILE *out, struct ocfs2_group_desc *grp, int index); + int dump_dir_entry (struct ocfs2_dir_entry *rec, int offset, int blocksize, + char *buf, void *priv_data); ++void dump_dx_root (FILE *out, struct ocfs2_dx_root_block *dx_root); ++void dump_dx_leaf (FILE *out, struct ocfs2_dx_leaf *dx_leaf); + void dump_dir_block(FILE *out, char *buf); ++void dump_dx_entries(FILE *out, struct ocfs2_dinode *inode); ++void dump_dx_space(FILE *out, struct ocfs2_dinode *inode, ++ struct ocfs2_dx_root_block *dx_root); + void dump_jbd_header (FILE *out, journal_header_t *header); + void dump_jbd_superblock (FILE *out, journal_superblock_t *jsb); + void dump_jbd_block (FILE *out, journal_superblock_t *jsb, +-- +1.7.0.2 + diff --git a/0003-dx_dirs-mkfs.ocfs2-support.patch b/0003-dx_dirs-mkfs.ocfs2-support.patch new file mode 100644 index 0000000..1676692 --- /dev/null +++ b/0003-dx_dirs-mkfs.ocfs2-support.patch @@ -0,0 +1,73 @@ +From 585f52a5dfd8cb4301d29804e7e05ddff1f3fca1 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Sun, 11 Apr 2010 16:10:00 +0800 +Subject: [PATCH 03/30] dx_dirs: mkfs.ocfs2 support + +This adds only basic support to mkfs.ocfs2 to write the seed fields used +in directory indexing. The feature string +'indexed-dirs' is already there from a previous patch, so it is possible +to create a new file system with the directory +indexing feature turned on. + +Future improvements that are needed: + +- mkfs.ocfs2 should create the root directory and orphan dirs as + indexed. +- possibly also we want to do this for the system dir, but that might + require some discussion. + +[modified the patch for code rebase and cleanup -- Coly Li] + +Signed-off-by: Mark Fasheh +Signed-off-by: Coly Li +--- + mkfs.ocfs2/mkfs.c | 17 ++++++++++++++--- + 1 files changed, 14 insertions(+), 3 deletions(-) + +diff --git a/mkfs.ocfs2/mkfs.c b/mkfs.ocfs2/mkfs.c +index 5d01e3e..b43a9ba 100644 +--- a/mkfs.ocfs2/mkfs.c ++++ b/mkfs.ocfs2/mkfs.c +@@ -1004,6 +1004,8 @@ get_state(int argc, char **argv) + if ((optind == argc) && !show_version) + usage(progname); + ++ srand48(time(NULL)); ++ + device_name = argv[optind]; + optind++; + +@@ -2240,11 +2242,9 @@ format_superblock(State *s, SystemFileDiskRecord *rec, + */ + s->feature_flags.opt_compat &= ~OCFS2_FEATURE_COMPAT_BACKUP_SB; + +- if (s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) { ++ if (s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) + di->id2.i_super.s_xattr_inline_size = + OCFS2_MIN_XATTR_INLINE_SIZE; +- di->id2.i_super.s_uuid_hash = ocfs2_xattr_uuid_hash(s->uuid); +- } + + di->id2.i_super.s_feature_incompat = s->feature_flags.opt_incompat; + di->id2.i_super.s_feature_compat = s->feature_flags.opt_compat; +@@ -2253,6 +2253,17 @@ format_superblock(State *s, SystemFileDiskRecord *rec, + strcpy((char *)di->id2.i_super.s_label, s->vol_label); + memcpy(di->id2.i_super.s_uuid, s->uuid, 16); + ++ /* s_uuid_hash is also used by Indexed Dirs */ ++ if (s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_XATTR || ++ s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) ++ di->id2.i_super.s_uuid_hash = ocfs2_xattr_uuid_hash(s->uuid); ++ ++ if (s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) { ++ di->id2.i_super.s_dx_seed[0] = mrand48(); ++ di->id2.i_super.s_dx_seed[1] = mrand48(); ++ di->id2.i_super.s_dx_seed[2] = mrand48(); ++ } ++ + mkfs_swap_inode_from_cpu(s, di); + mkfs_compute_meta_ecc(s, di, &di->i_check); + do_pwrite(s, di, s->blocksize, super_off); +-- +1.7.0.2 + diff --git a/0004-dx_dirs-Add-tunefs.ocfs2-feature-for-indexed-directo.patch b/0004-dx_dirs-Add-tunefs.ocfs2-feature-for-indexed-directo.patch new file mode 100644 index 0000000..68c6207 --- /dev/null +++ b/0004-dx_dirs-Add-tunefs.ocfs2-feature-for-indexed-directo.patch @@ -0,0 +1,149 @@ +From 0afd7bae3557bd443eac4e388c0c3cbf47690e5d Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Sun, 11 Apr 2010 16:10:01 +0800 +Subject: [PATCH 04/30] dx_dirs: Add tunefs.ocfs2 feature for indexed directories + +This only enables them for now. Disabling is a bit more involved, and +will come later. + +[modified the patch for code rebase and cleanup -- Coly Li] + +Signed-off-by: Mark Fasheh +Signed-off-by: Coly Li +--- + tunefs.ocfs2/Makefile | 1 + + tunefs.ocfs2/feature_indexed_dirs.c | 89 +++++++++++++++++++++++++++++++++++ + tunefs.ocfs2/op_features.c | 2 + + 3 files changed, 92 insertions(+), 0 deletions(-) + create mode 100644 tunefs.ocfs2/feature_indexed_dirs.c + +diff --git a/tunefs.ocfs2/Makefile b/tunefs.ocfs2/Makefile +index dad7034..6219af6 100644 +--- a/tunefs.ocfs2/Makefile ++++ b/tunefs.ocfs2/Makefile +@@ -26,6 +26,7 @@ OCFS2NE_FEATURES = \ + feature_sparse_files \ + feature_unwritten_extents \ + feature_xattr \ ++ feature_indexed_dirs \ + feature_quota + + OCFS2NE_OPERATIONS = \ +diff --git a/tunefs.ocfs2/feature_indexed_dirs.c b/tunefs.ocfs2/feature_indexed_dirs.c +new file mode 100644 +index 0000000..368eb87 +--- /dev/null ++++ b/tunefs.ocfs2/feature_indexed_dirs.c +@@ -0,0 +1,89 @@ ++/* -*- mode: c; c-basic-offset: 8; -*- ++ * vim: noexpandtab sw=8 ts=8 sts=0: ++ * ++ * feature_indexed_dirs.c ++ * ++ * ocfs2 tune utility for enabling and disabling the directory indexing ++ * feature. ++ * ++ * Copyright (C) 2009 Novell. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License version 2 as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "ocfs2/ocfs2.h" ++ ++#include "libocfs2ne.h" ++ ++ ++static int enable_indexed_dirs(ocfs2_filesys *fs, int flags) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_super_block *super = OCFS2_RAW_SB(fs->fs_super); ++ struct tools_progress *prog; ++ ++ if (ocfs2_supports_indexed_dirs(super)) { ++ verbosef(VL_APP, ++ "Directory indexing feature is already enabled; " ++ "nothing to enable\n"); ++ goto out; ++ } ++ ++ ++ if (!tools_interact("Enable the directory indexing feature on " ++ "device \"%s\"? ", ++ fs->fs_devname)) ++ goto out; ++ ++ prog = tools_progress_start("Enable directory indexing", "dir idx", 1); ++ if (!prog) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ tcom_err(ret, "while initializing the progress display"); ++ goto out; ++ } ++ ++ OCFS2_SET_INCOMPAT_FEATURE(super, ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS); ++ tunefs_block_signals(); ++ ret = ocfs2_write_super(fs); ++ tunefs_unblock_signals(); ++ if (ret) ++ tcom_err(ret, "while writing out the superblock"); ++ ++ tools_progress_step(prog, 1); ++ tools_progress_stop(prog); ++out: ++ return ret; ++} ++ ++/* ++ * TUNEFS_FLAG_ALLOCATION because disabling will want to dealloc ++ * blocks. ++ */ ++DEFINE_TUNEFS_FEATURE_INCOMPAT(indexed_dirs, ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, ++ TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION, ++ enable_indexed_dirs, ++ NULL); ++ ++#ifdef DEBUG_EXE ++int main(int argc, char *argv[]) ++{ ++ return tunefs_feature_main(argc, argv, &indexed_dirs_feature); ++} ++#endif +diff --git a/tunefs.ocfs2/op_features.c b/tunefs.ocfs2/op_features.c +index 91abca1..613ea7e 100644 +--- a/tunefs.ocfs2/op_features.c ++++ b/tunefs.ocfs2/op_features.c +@@ -44,6 +44,7 @@ extern struct tunefs_feature xattr_feature; + extern struct tunefs_feature usrquota_feature; + extern struct tunefs_feature grpquota_feature; + extern struct tunefs_feature refcount_feature; ++extern struct tunefs_feature indexed_dirs_feature; + + /* List of features supported by ocfs2ne */ + static struct tunefs_feature *features[] = { +@@ -58,6 +59,7 @@ static struct tunefs_feature *features[] = { + &usrquota_feature, + &grpquota_feature, + &refcount_feature, ++ &indexed_dirs_feature, + NULL, + }; + +-- +1.7.0.2 + diff --git a/0005-dx_dirs-Update-for-dr_num_entries.patch b/0005-dx_dirs-Update-for-dr_num_entries.patch new file mode 100644 index 0000000..7da5580 --- /dev/null +++ b/0005-dx_dirs-Update-for-dr_num_entries.patch @@ -0,0 +1,62 @@ +From 56f5ac553c2686271b657ed8be4f8ca001aa7ed6 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Sun, 11 Apr 2010 16:10:02 +0800 +Subject: [PATCH 05/30] dx_dirs: Update for dr_num_entries + +This just adds the new field, swaps it appropriately, and prints it in +debuggs. + +[Modified the patch for code rebase and cleanup -- Coly Li] + +Signed-off-by: Mark Fasheh +Signed-off-by: Coly Li +--- + debugfs.ocfs2/dump.c | 2 ++ + include/ocfs2-kernel/ocfs2_fs.h | 5 ++++- + libocfs2/dirblock.c | 1 + + 3 files changed, 7 insertions(+), 1 deletions(-) + +diff --git a/debugfs.ocfs2/dump.c b/debugfs.ocfs2/dump.c +index 2e887ce..88ec430 100644 +--- a/debugfs.ocfs2/dump.c ++++ b/debugfs.ocfs2/dump.c +@@ -600,6 +600,8 @@ void dump_dx_root(FILE *out, struct ocfs2_dx_root_block *dr) + "Flags: (0x%x) %s\n", + tmp_str, dr->dr_suballoc_bit, dr->dr_flags, flags->str); + ++ fprintf(out, "\tTotal Entry Count: %d\n", dr->dr_num_entries); ++ + dump_block_check(out, &dr->dr_check); + + if (dr->dr_flags & OCFS2_DX_FLAG_INLINE) +diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h +index d4de181..f3c2450 100644 +--- a/include/ocfs2-kernel/ocfs2_fs.h ++++ b/include/ocfs2-kernel/ocfs2_fs.h +@@ -810,7 +810,10 @@ struct ocfs2_dx_root_block { + __u8 dr_reserved0; + __le16 dr_reserved1; + __le64 dr_dir_blkno; /* Pointer to parent inode */ +- __le64 dr_reserved2; ++ __le32 dr_num_entries; /* Total number of ++ * names stored in ++ * this directory.*/ ++ __le32 dr_reserved2; + __le64 dr_free_blk; /* Pointer to head of free + * unindexed block list. */ + __le64 dr_reserved3[15]; +diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c +index d68e5c0..4bf48f3 100644 +--- a/libocfs2/dirblock.c ++++ b/libocfs2/dirblock.c +@@ -266,6 +266,7 @@ static void ocfs2_swap_dx_root_to_cpu(struct ocfs2_dx_root_block *dx_root) + dx_root->dr_last_eb_blk = bswap_64(dx_root->dr_last_eb_blk); + dx_root->dr_clusters = bswap_32(dx_root->dr_clusters); + dx_root->dr_dir_blkno = bswap_64(dx_root->dr_dir_blkno); ++ dx_root->dr_num_entries = bswap_32(dx_root->dr_num_entries); + dx_root->dr_free_blk = bswap_64(dx_root->dr_free_blk); + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) +-- +1.7.0.2 + diff --git a/0006-dx_dirs-add-missing-ocfs2_filesys-fs-parameter.patch b/0006-dx_dirs-add-missing-ocfs2_filesys-fs-parameter.patch new file mode 100644 index 0000000..16c98bb --- /dev/null +++ b/0006-dx_dirs-add-missing-ocfs2_filesys-fs-parameter.patch @@ -0,0 +1,50 @@ +From 5110de09d64c84037e7e2ecdf76ab2d0d0f77ff4 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:03 +0800 +Subject: [PATCH 06/30] dx_dirs: add missing 'ocfs2_filesys *fs' parameter + +This patch add 'ocfs2_filesys *fs' parameter to +ocfs2_swap_dx_root_to_cpu(), and fix the mismatched arguments when +alling ocfs2_swap_extent_list_to_cpu(). + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + libocfs2/dirblock.c | 7 ++++--- + 1 files changed, 4 insertions(+), 3 deletions(-) + +diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c +index 4bf48f3..06a1b64 100644 +--- a/libocfs2/dirblock.c ++++ b/libocfs2/dirblock.c +@@ -254,7 +254,8 @@ static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) + ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]); + } + +-static void ocfs2_swap_dx_root_to_cpu(struct ocfs2_dx_root_block *dx_root) ++static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root) + { + if (cpu_is_little_endian) + return; +@@ -272,7 +273,7 @@ static void ocfs2_swap_dx_root_to_cpu(struct ocfs2_dx_root_block *dx_root) + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) + ocfs2_swap_dx_entry_list_to_cpu(&dx_root->dr_entries); + else +- ocfs2_swap_extent_list_to_cpu(&dx_root->dr_list); ++ ocfs2_swap_extent_list_to_cpu(fs, dx_root, &dx_root->dr_list); + } + + errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, +@@ -294,7 +295,7 @@ errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, + strlen(OCFS2_DX_ROOT_SIGNATURE))) + return OCFS2_ET_DIR_CORRUPTED; + +- ocfs2_swap_dx_root_to_cpu(dx_root); ++ ocfs2_swap_dx_root_to_cpu(fs, dx_root); + + return 0; + } +-- +1.7.0.2 + diff --git a/0007-dx_dirs-more-library-support-for-directory-indexing.patch b/0007-dx_dirs-more-library-support-for-directory-indexing.patch new file mode 100644 index 0000000..8bdc8d8 --- /dev/null +++ b/0007-dx_dirs-more-library-support-for-directory-indexing.patch @@ -0,0 +1,2767 @@ +From 583be25c5f5ea6ec34b4ec499df12c450d457778 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:04 +0800 +Subject: [PATCH 07/30] dx_dirs: more library support for directory indexing + +This patch adds more library support for indexed dirs, +- dx_root alloc/delete +- dx_leaf alloc/delete +- dx_root read/write +- dx_leaf read/write +- indexed tree insert/truncate +- dx_root extent tree operations +- indexed dir support in ocfs2_lookup_system_inode(), + ocfs2_init_dir(), ocfs2_link(), ocfs2_unlink(), + ocfs2_lookup(). +With this patch, indexed dirs support in fsck.ocfs2 is possible. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + include/ocfs2-kernel/ocfs2_fs.h | 29 + + include/ocfs2/ocfs2.h | 55 ++- + libocfs2/Makefile | 3 +- + libocfs2/alloc.c | 96 +++ + libocfs2/dir_indexed.c | 1461 +++++++++++++++++++++++++++++++++++++++ + libocfs2/dir_iterate.c | 16 +- + libocfs2/dir_iterate.h | 1 + + libocfs2/dirblock.c | 195 +++++- + libocfs2/expanddir.c | 18 +- + libocfs2/extent_tree.c | 53 ++ + libocfs2/extent_tree.h | 5 + + libocfs2/inode.c | 3 + + libocfs2/link.c | 9 + + libocfs2/lookup.c | 71 ++- + libocfs2/ocfs2_err.et | 6 + + libocfs2/truncate.c | 16 + + libocfs2/unlink.c | 187 +++++- + sizetest/sizetest.c | 2 +- + 18 files changed, 2165 insertions(+), 61 deletions(-) + create mode 100644 libocfs2/dir_indexed.c + +diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h +index f3c2450..6fc63e8 100644 +--- a/include/ocfs2-kernel/ocfs2_fs.h ++++ b/include/ocfs2-kernel/ocfs2_fs.h +@@ -1406,6 +1406,35 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize) + return size / sizeof(struct ocfs2_extent_rec); + } + ++static inline int ocfs2_dx_entries_per_leaf(int blocksize) ++{ ++ int size; ++ size = blocksize - ++ offsetof(struct ocfs2_dx_leaf, dl_list.de_entries); ++ ++ return size / sizeof(struct ocfs2_dx_entry); ++} ++ ++static inline int ocfs2_dx_entries_per_root(int blocksize) ++{ ++ int size; ++ ++ size = blocksize - ++ offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries); ++ ++ return size / sizeof(struct ocfs2_dx_entry); ++} ++ ++static inline int ocfs2_extent_recs_per_dx_root(int blocksize) ++{ ++ int size; ++ ++ size = blocksize - ++ offsetof(struct ocfs2_dx_root_block, dr_list.l_recs); ++ ++ return size / sizeof(struct ocfs2_extent_rec); ++} ++ + static inline int ocfs2_local_alloc_size(int blocksize) + { + int size; +diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h +index 332d4bd..465dc58 100644 +--- a/include/ocfs2/ocfs2.h ++++ b/include/ocfs2/ocfs2.h +@@ -254,6 +254,22 @@ struct _ocfs2_quota_hash { + ocfs2_cached_dquot **hash; + }; + ++struct ocfs2_dx_hinfo { ++ uint32_t major_hash; ++ uint32_t minor_hash; ++}; ++ ++struct ocfs2_dir_lookup_result { ++ struct ocfs2_dx_hinfo dl_hinfo; /* name hash results */ ++ char * dl_leaf; /* unindexed block buffer */ ++ uint64_t dl_leaf_blkno; /* blk number of dl_leaf */ ++ struct ocfs2_dir_entry * dl_entry; /* dirent pointed into dl_leaf */ ++ struct ocfs2_dx_leaf * dl_dx_leaf; /* indexed block buffer */ ++ uint64_t dl_dx_leaf_blkno; /* blk number of dl_dx_leaf */ ++ struct ocfs2_dx_entry * dl_dx_entry; /* indexed entry pointed to dl_dx_leaf */ ++ int dl_dx_entry_idx; /* index of dl_dx_entry in entries list */ ++}; ++ + typedef struct _ocfs2_quota_hash ocfs2_quota_hash; + + errcode_t ocfs2_malloc(unsigned long size, void *ptr); +@@ -332,7 +348,8 @@ errcode_t ocfs2_read_inode(ocfs2_filesys *fs, uint64_t blkno, + errcode_t ocfs2_write_inode(ocfs2_filesys *fs, uint64_t blkno, + char *inode_buf); + errcode_t ocfs2_check_directory(ocfs2_filesys *fs, uint64_t dir); +- ++int ocfs2_check_dir_entry(ocfs2_filesys *fs, struct ocfs2_dir_entry *de, ++ char *dir_buf, unsigned int offset); + errcode_t ocfs2_read_cached_inode(ocfs2_filesys *fs, uint64_t blkno, + ocfs2_cached_inode **ret_ci); + errcode_t ocfs2_write_cached_inode(ocfs2_filesys *fs, +@@ -478,7 +495,7 @@ errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, + errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, + void *buf); + int ocfs2_dir_indexed(struct ocfs2_dinode *di); +- ++errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs, uint64_t dir); + errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, + uint64_t dir, + int flags, +@@ -486,6 +503,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, + int (*func)(uint64_t dir, + int entry, + struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -496,6 +514,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, + int flags, + char *block_buf, + int (*func)(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -675,7 +694,10 @@ errcode_t ocfs2_new_inode(ocfs2_filesys *fs, uint64_t *ino, int mode); + errcode_t ocfs2_new_system_inode(ocfs2_filesys *fs, uint64_t *ino, int mode, int flags); + errcode_t ocfs2_delete_inode(ocfs2_filesys *fs, uint64_t ino); + errcode_t ocfs2_new_extent_block(ocfs2_filesys *fs, uint64_t *blkno); ++errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t *dr_blkno); + errcode_t ocfs2_delete_extent_block(ocfs2_filesys *fs, uint64_t blkno); ++errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno); ++ + /* + * Allocate the blocks and insert them to the file. + * only i_clusters of dinode will be updated accordingly, i_size not changed. +@@ -1347,6 +1369,10 @@ static inline int ocfs2_refcount_tree(struct ocfs2_super_block *osb) + #define OCFS2_BLOCK_ABORT 0x02 + #define OCFS2_BLOCK_ERROR 0x04 + ++ ++#define OCFS2_IS_VALID_DX_ROOT(ptr) \ ++ (!strcmp((char *)(ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) ++ + /* + * Block iterate flags + * +@@ -1487,5 +1513,30 @@ errcode_t ocfs2_extent_iterate_xattr(ocfs2_filesys *fs, + void *priv_data, + int *changed); + errcode_t ocfs2_delete_xattr_block(ocfs2_filesys *fs, uint64_t blkno); ++errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root); ++errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block, char *buf); ++errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block, void *buf); ++errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, uint64_t dir); ++errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name, ++ uint64_t ino, uint64_t blkno); ++int ocfs2_search_dirblock(ocfs2_filesys *fs, char *dir_buf, ++ const char *name, int namelen, unsigned int bytes, ++ struct ocfs2_dir_entry **res_dir); ++void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs, const char *name, ++ int len, struct ocfs2_dx_hinfo *hinfo); ++errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs, struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_extent_list *el, struct ocfs2_dx_hinfo *hinfo, ++ uint32_t *ret_cpos, uint64_t *ret_phys_blkno); ++errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs, const char *name, ++ int namelen, struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dir_lookup_result *res); ++void release_lookup_res(struct ocfs2_dir_lookup_result *res); ++int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf); ++void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, int index); ++int ocfs2_is_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, unsigned long de_off); ++ ++ ++ + + #endif /* _FILESYS_H */ +diff --git a/libocfs2/Makefile b/libocfs2/Makefile +index 622b89e..cff5061 100644 +--- a/libocfs2/Makefile ++++ b/libocfs2/Makefile +@@ -77,7 +77,8 @@ CFILES = \ + image.c \ + xattr.c \ + extent_tree.c \ +- refcount.c ++ refcount.c \ ++ dir_indexed.c + + HFILES = \ + bitmap.h \ +diff --git a/libocfs2/alloc.c b/libocfs2/alloc.c +index 842ebdb..84f3b05 100644 +--- a/libocfs2/alloc.c ++++ b/libocfs2/alloc.c +@@ -622,6 +622,102 @@ out: + return ret; + } + ++/* only initiate part of dx_root: ++ * dr_subllaoc_slot ++ * dr_sbualloc_bit ++ * dr_fs_generation ++ * dr_blkno ++ * dr_flags ++ */ ++static void init_dx_root(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ int slot, uint64_t gd_blkno, uint64_t dr_blkno) ++{ ++ ++ memset(dx_root, 0, fs->fs_blocksize); ++ strcpy((char *)dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); ++ dx_root->dr_suballoc_slot = slot; ++ dx_root->dr_suballoc_bit = (uint16_t)(dr_blkno - gd_blkno); ++ dx_root->dr_fs_generation = fs->fs_super->i_fs_generation; ++ dx_root->dr_blkno = dr_blkno; ++ dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; ++} ++ ++errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ uint64_t *dr_blkno) ++{ ++ errcode_t ret; ++ char *buf = NULL; ++ uint64_t gd_blkno; ++ struct ocfs2_dx_root_block *dx_root; ++ int slot; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &buf); ++ if (ret) ++ goto out; ++ ++ slot = di->i_suballoc_slot; ++ ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, ++ slot, &fs->fs_eb_allocs[slot]); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot], ++ &gd_blkno, dr_blkno); ++ if (ret == OCFS2_ET_BIT_NOT_FOUND) { ++ ret = ocfs2_chain_add_group(fs, fs->fs_eb_allocs[slot]); ++ if (ret) ++ goto out; ++ ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot], ++ &gd_blkno, dr_blkno); ++ if (ret) ++ goto out; ++ } else if (ret) ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ init_dx_root(fs, dx_root, slot, gd_blkno, *dr_blkno); ++ ++ ret = ocfs2_write_dx_root(fs, *dr_blkno, (char *)dx_root); ++out: ++ if (buf) ++ ocfs2_free(&buf); ++ return ret; ++} ++ ++errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno) ++{ ++ errcode_t ret; ++ char *buf = NULL; ++ struct ocfs2_dx_root_block *dx_root; ++ int slot; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_dx_root(fs, dr_blkno, buf); ++ if (ret) ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ slot = dx_root->dr_suballoc_slot; ++ ++ ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, slot, ++ &fs->fs_eb_allocs[slot]); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_chain_free_with_io(fs, fs->fs_eb_allocs[slot], dr_blkno); ++ ++out: ++ if (buf) ++ ocfs2_free(&buf); ++ ++ return ret; ++} ++ + /* XXX what to do about local allocs? + * XXX Well, we shouldn't use local allocs to allocate, as we are + * userspace and we have the entire bitmap in memory. However, this +diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c +new file mode 100644 +index 0000000..9cae3d0 +--- /dev/null ++++ b/libocfs2/dir_indexed.c +@@ -0,0 +1,1461 @@ ++/* -*- mode: c; c-basic-offset: 8; -*- ++ * vim: noexpandtab sw=8 ts=8 sts=0: ++ * ++ * Copyright (C) 2009, 2010 Novell. All rights reserved. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public ++ * License, version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * General Public License for more details. ++ */ ++#include ++#include ++#include ++#include ++#include "ocfs2_err.h" ++#include "extent_tree.h" ++ ++ ++errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs, ++ uint64_t dir) ++{ ++ struct ocfs2_dx_root_block *dx_root; ++ char *dx_root_buf = NULL, *di_buf = NULL; ++ struct ocfs2_dinode *di; ++ uint64_t dx_root_blk; ++ errcode_t ret = 0; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ /* we have to trust i_dyn_features */ ++ if (!S_ISDIR(di->i_mode) || ++ !ocfs2_dir_indexed(di) || ++ di->i_dyn_features & OCFS2_INLINE_DATA_FL) ++ goto out; ++ ++ dx_root_blk = di->i_dx_root; ++ ++ di->i_dyn_features &= ~OCFS2_INDEXED_DIR_FL; ++ di->i_dx_root = 0; ++ ++ /* update inode firstly */ ++ ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di); ++ if (ret) ++ goto out; ++ ++ /* inode is updated, the rested errors are not fatal */ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_dx_root(fs, dx_root_blk, dx_root_buf); ++ if (ret) ++ goto out; ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ goto remove_index; ++ ++ ret = ocfs2_dir_indexed_tree_truncate(fs, dx_root); ++ ++ /* ++ * even ocfs2_dir_indexed_tree_truncate() failed, ++ * we still want to call ocfs2_delete_dx_root(). ++ */ ++ ++remove_index: ++ ret = ocfs2_delete_dx_root(fs, dx_root->dr_blkno); ++out: ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ return ret; ++} ++ ++static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de) ++{ ++ unsigned int hole; ++ ++ if (de->inode == 0) ++ hole = de->rec_len; ++ else ++ hole = de->rec_len - OCFS2_DIR_REC_LEN(de->name_len); ++ ++ return hole; ++} ++ ++int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf) ++{ ++ int size, this_hole, largest_hole = 0; ++ char *de_buf, *limit; ++ struct ocfs2_dir_entry *de; ++ ++ size = ocfs2_dir_trailer_blk_off(fs); ++ limit = buf + size; ++ de_buf = buf; ++ de = (struct ocfs2_dir_entry *)de_buf; ++ do { ++ this_hole = ocfs2_figure_dirent_hole(de); ++ if (this_hole > largest_hole) ++ largest_hole = this_hole; ++ ++ de_buf += de->rec_len; ++ de = (struct ocfs2_dir_entry *)de_buf; ++ } while (de_buf < limit); ++ ++ if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) ++ return largest_hole; ++ return 0; ++} ++ ++struct trailer_ctxt { ++ struct ocfs2_dx_root_block *dx_root; ++ struct ocfs2_dinode *di; ++}; ++ ++/* make sure the space for trailer is reserved */ ++static errcode_t ocfs2_check_dir_trailer_space(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ uint64_t blkno, ++ char *blk) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_dir_entry *dirent; ++ unsigned int offset = 0; ++ unsigned int toff = ocfs2_dir_trailer_blk_off(fs); ++ unsigned int real_rec_len = 0; ++ ++ while(offset < fs->fs_blocksize) { ++ dirent = (struct ocfs2_dir_entry *)(blk + offset); ++ if (!ocfs2_check_dir_entry(fs, dirent, blk, offset)) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ break; ++ } ++ ++ real_rec_len = dirent->inode ? ++ OCFS2_DIR_REC_LEN(dirent->name_len) : ++ OCFS2_DIR_REC_LEN(1); ++ if ((offset + real_rec_len) <= toff) ++ goto next; ++ ++ if (dirent->inode) { ++ ret = OCFS2_ET_DIR_NO_SPACE; ++ break; ++ } ++next: ++ offset += dirent->rec_len; ++ } ++ ++out: ++ return ret; ++} ++ ++static int dir_trailer_func(ocfs2_filesys *fs, ++ uint64_t blkno, ++ uint64_t bcount, ++ uint16_t ext_flags, ++ void *priv_data) ++{ ++ struct trailer_ctxt *ctxt = (struct trailer_ctxt *)priv_data; ++ struct ocfs2_dinode *di = ctxt->di; ++ struct ocfs2_dx_root_block *dx_root = ctxt->dx_root; ++ struct ocfs2_dir_block_trailer *trailer; ++ int max_rec_len = 0; ++ errcode_t ret = 0; ++ char *blk = NULL; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &blk); ++ if (ret) ++ goto out; ++ ++ /* here we don't trust trailer, cannot use ++ * ocfs2_read_dir_block() */ ++ ret = ocfs2_read_blocks(fs, blkno, 1, blk); ++ if (ret) ++ goto out; ++ ret = ocfs2_check_dir_trailer_space(fs, di, blkno, blk); ++ if (ret) ++ goto out; ++ ocfs2_init_dir_trailer(fs, di, blkno, blk); ++ max_rec_len = ocfs2_find_max_rec_len(fs, blk); ++ trailer = ocfs2_dir_trailer_from_block(fs, blk); ++ trailer->db_free_rec_len = max_rec_len; ++ ++ if (max_rec_len) { ++ trailer->db_free_next = dx_root->dr_free_blk; ++ dx_root->dr_free_blk = blkno; ++ } ++ ++ /* comput trailer->db_check here, after writes out, ++ * trailer is trustable */ ++ ret = ocfs2_write_dir_block(fs, di, blkno, blk); ++out: ++ if (blk) ++ ocfs2_free(&blk); ++ return ret; ++} ++ ++static errcode_t ocfs2_init_dir_trailers(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ struct ocfs2_dx_root_block *dx_root) ++{ ++ errcode_t ret = 0; ++ struct trailer_ctxt ctxt; ++ ++ if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) { ++ ret = OCFS2_ET_INODE_NOT_VALID; ++ goto out; ++ } ++ ++ ctxt.di = di; ++ ctxt.dx_root = dx_root; ++ ++ ret = ocfs2_block_iterate_inode(fs, di, ++ 0, dir_trailer_func, &ctxt); ++out: ++ return ret; ++} ++ ++static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list, ++ struct ocfs2_dx_hinfo *hinfo, ++ uint64_t dirent_blk) ++{ ++ int i; ++ struct ocfs2_dx_entry *dx_entry; ++ ++ i = entry_list->de_num_used; ++ dx_entry = &entry_list->de_entries[i]; ++ ++ memset(dx_entry, 0, sizeof(struct ocfs2_dx_entry)); ++ dx_entry->dx_major_hash = hinfo->major_hash; ++ dx_entry->dx_minor_hash = hinfo->minor_hash; ++ dx_entry->dx_dirent_blk = dirent_blk; ++ ++ entry_list->de_num_used += 1; ++} ++ ++struct dx_insert_ctxt { ++ uint64_t dir_blkno; ++ uint64_t dx_root_blkno; ++ ocfs2_filesys *fs; ++}; ++ ++ ++inline static int ocfs2_inline_dx_has_space(struct ocfs2_dx_root_block *dx_root) ++{ ++ struct ocfs2_dx_entry_list *entry_list; ++ ++ entry_list = &dx_root->dr_entries; ++ ++ if (entry_list->de_num_used >= entry_list->de_count) ++ return 0; ++ ++ return 1; ++} ++ ++static struct ocfs2_dx_leaf **ocfs2_dx_dir_alloc_leaves(ocfs2_filesys *fs, ++ int *ret_num_leaves) ++{ ++ errcode_t num_dx_leaves = ocfs2_clusters_to_blocks(fs, 1); ++ char **dx_leaves_buf = NULL; ++ ++ dx_leaves_buf = calloc(num_dx_leaves, sizeof (void *)); ++ if (dx_leaves_buf && ret_num_leaves) ++ *ret_num_leaves = num_dx_leaves; ++ ++ return (struct ocfs2_dx_leaf **)dx_leaves_buf; ++} ++ ++static errcode_t ocfs2_dx_dir_format_cluster(ocfs2_filesys *fs, ++ struct ocfs2_dx_leaf **dx_leaves, ++ int num_dx_leaves, ++ uint64_t start_blk) ++{ ++ errcode_t ret; ++ int i; ++ struct ocfs2_dx_leaf *dx_leaf; ++ char *blk; ++ ++ for (i = 0; i < num_dx_leaves; i++) { ++ ret = ocfs2_malloc_block(fs->fs_io, &blk); ++ if (ret) ++ goto out; ++ ++ dx_leaves[i] = (struct ocfs2_dx_leaf *)blk; ++ dx_leaf = (struct ocfs2_dx_leaf *)blk; ++ ++ memset(dx_leaf, 0, fs->fs_blocksize); ++ strcpy((char *)dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE); ++ dx_leaf->dl_fs_generation = fs->fs_super->i_fs_generation; ++ dx_leaf->dl_blkno = start_blk + i; ++ dx_leaf->dl_list.de_count = ocfs2_dx_entries_per_leaf(fs->fs_blocksize); ++ ++ ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); ++ if (ret) ++ goto out; ++ } ++ ret = 0; ++out: ++ return ret; ++} ++ ++static inline unsigned int __ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs, ++ uint32_t minor_hash) ++{ ++ unsigned int cbits, bbits, dx_mask; ++ ++ cbits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; ++ bbits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; ++ dx_mask = (1 << (cbits - bbits)) -1; ++ ++ return (minor_hash & dx_mask); ++} ++ ++static inline unsigned int ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs, ++ struct ocfs2_dx_hinfo *hinfo) ++{ ++ return __ocfs2_dx_dir_hash_idx(fs, hinfo->minor_hash); ++} ++ ++static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, ++ struct ocfs2_dx_entry *dx_new_entry) ++{ ++ int i; ++ ++ i = dx_leaf->dl_list.de_num_used; ++ dx_leaf->dl_list.de_entries[i] = *dx_new_entry; ++ ++ dx_leaf->dl_list.de_num_used += 1; ++} ++ ++static errcode_t ocfs2_expand_inline_dx_root(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root) ++{ ++ errcode_t ret; ++ int num_dx_leaves, i, j; ++ uint64_t start_blkno = 0; ++ uint32_t clusters_found = 0; ++ struct ocfs2_dx_leaf **dx_leaves = NULL; ++ struct ocfs2_dx_leaf *target_leaf; ++ struct ocfs2_dx_entry_list *entry_list; ++ struct ocfs2_extent_tree et; ++ struct ocfs2_dx_entry *dx_entry; ++ ++ dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves); ++ if (!dx_leaves) { ++ ret = OCFS2_ET_NO_MEMORY; ++ goto out; ++ } ++ ++ ret = ocfs2_new_clusters(fs, 1, 1, &start_blkno, &clusters_found); ++ if (ret) ++ goto out; ++ assert(clusters_found == 1); ++ ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves, ++ num_dx_leaves, start_blkno); ++ if (ret) ++ goto out; ++ ++ /* ++ * Transfer the entries from inline dx_root into the appropriate ++ * block ++ */ ++ entry_list = &dx_root->dr_entries; ++ ++ for (i = 0; i < entry_list->de_num_used; i++) { ++ dx_entry = &entry_list->de_entries[i]; ++ j = __ocfs2_dx_dir_hash_idx(fs, dx_entry->dx_minor_hash); ++ target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]; ++ ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry); ++ } ++ ++ /* ++ * Write out all leaves. ++ * If ocfs2_write_dx_leaf() failed, since dx_root is not cleared ++ * yet, and the leaves are not inserted into indexed tree yet, ++ * this cluster will be recoganized as orphan in blocks scan of ++ * fsck.ocfs2 ++ */ ++ for (i = 0; i < num_dx_leaves; i ++) { ++ target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[i]; ++ ret = ocfs2_write_dx_leaf(fs, target_leaf->dl_blkno, ++ target_leaf); ++ if (ret) ++ goto out; ++ } ++ ++ dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE; ++ memset(&dx_root->dr_list, 0, fs->fs_blocksize - ++ offsetof(struct ocfs2_dx_root_block, dr_list)); ++ dx_root->dr_list.l_count = ++ ocfs2_extent_recs_per_dx_root(fs->fs_blocksize); ++ ++ /* This should never fail considering we start with an empty ++ * dx_root */ ++ ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno); ++ ret = ocfs2_tree_insert_extent(fs, &et, 0, start_blkno, 1, 0); ++ if (ret) ++ goto out; ++ ++out: ++ return ret; ++} ++ ++static errcode_t ocfs2_dx_dir_lookup_rec(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_extent_list *el, ++ uint32_t major_hash, ++ uint32_t *ret_cpos, ++ uint64_t *ret_phys_blkno, ++ unsigned int *ret_clen) ++{ ++ errcode_t ret = 0; ++ int i, found; ++ struct ocfs2_extent_block *eb; ++ struct ocfs2_extent_rec *rec = NULL; ++ char *eb_buf = NULL; ++ ++ if (el->l_tree_depth) { ++ ret = ocfs2_tree_find_leaf(fs, ++ &dx_root->dr_list, ++ dx_root->dr_blkno, ++ (char *)dx_root, ++ major_hash, &eb_buf); ++ if (ret) ++ goto out; ++ ++ eb = (struct ocfs2_extent_block *)eb_buf; ++ el = &eb->h_list; ++ ++ if (el->l_tree_depth) { ++ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; ++ goto out; ++ } ++ } ++ ++ found = 0; ++ for (i = el->l_next_free_rec - 1; i >= 0; i--) { ++ rec = &el->l_recs[i]; ++ if (rec->e_cpos <= major_hash) { ++ found = 1; ++ break; ++ } ++ } ++ if (!found) { ++ ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; ++ goto out; ++ } ++ ++ if (ret_phys_blkno) ++ *ret_phys_blkno = rec->e_blkno; ++ if (ret_cpos) ++ *ret_cpos = rec->e_cpos; ++ if (ret_clen) ++ *ret_clen = rec->e_leaf_clusters; ++ ++out: ++ if (eb_buf) ++ ocfs2_free(&eb_buf); ++ return ret; ++} ++ ++errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_extent_list *el, ++ struct ocfs2_dx_hinfo *hinfo, ++ uint32_t *ret_cpos, ++ uint64_t *ret_phys_blkno) ++{ ++ errcode_t ret = 0; ++ unsigned int cend = 0, clen = 0; ++ uint32_t cpos = 0; ++ uint64_t blkno = 0; ++ uint32_t name_hash = hinfo->major_hash; ++ ++ ret = ocfs2_dx_dir_lookup_rec(fs, dx_root, el, ++ name_hash, &cpos, &blkno, &clen); ++ if (ret) ++ goto out; ++ cend = cpos + clen; ++ if (name_hash >= cend) { ++ blkno += ocfs2_clusters_to_blocks(fs, clen - 1); ++ cpos += clen - 1; ++ } else { ++ blkno += ocfs2_clusters_to_blocks(fs, name_hash - cpos); ++ cpos = name_hash; ++ } ++ ++ blkno += ocfs2_dx_dir_hash_idx(fs, hinfo); ++ ++ if (ret_phys_blkno) ++ *ret_phys_blkno = blkno; ++ if (ret_cpos) ++ *ret_cpos = cpos; ++ ++out: ++ return ret; ++} ++ ++static int dx_leaf_sort_cmp(const void *a, const void *b) ++{ ++ const struct ocfs2_dx_entry *e1 = a; ++ const struct ocfs2_dx_entry *e2 = b; ++ uint32_t major_hash1 = e1->dx_major_hash; ++ uint32_t major_hash2 = e2->dx_major_hash; ++ uint32_t minor_hash1 = e1->dx_minor_hash; ++ uint32_t minor_hash2 = e2->dx_minor_hash; ++ ++ if (major_hash1 > major_hash2) ++ return 1; ++ if (major_hash1 < major_hash2) ++ return -1; ++ ++ /* it is not strictly necessary to sort by minor */ ++ if (minor_hash1 > minor_hash2) ++ return 1; ++ if (minor_hash1 < minor_hash2) ++ return -1; ++ return 0; ++} ++ ++static void dx_leaf_sort_swap(void *a, void *b, int size) ++{ ++ struct ocfs2_dx_entry *e1 = a; ++ struct ocfs2_dx_entry *e2 = b; ++ struct ocfs2_dx_entry tmp; ++ ++ assert(size == sizeof (struct ocfs2_dx_entry)); ++ ++ tmp = *e1; ++ *e1 = *e2; ++ *e2 = tmp; ++} ++ ++static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) ++{ ++ struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; ++ int i, num = dl_list->de_num_used; ++ ++ for (i = 0; i < (num - 1); i++) { ++ if (dl_list->de_entries[i].dx_major_hash != ++ dl_list->de_entries[i + 1].dx_major_hash) ++ return 0; ++ } ++ return 1; ++} ++ ++/* ++ * Find the optimal value to split this leaf on. This expects the leaf ++ * entries to be in sorted order. ++ * ++ * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is ++ * the hash we want to insert. ++ * ++ * This function is only concerned with the major hash - that which ++ * determines which cluster an item belongs to. ++ */ ++static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf, ++ uint32_t leaf_cpos, ++ uint32_t insert_hash, ++ uint32_t *split_hash) ++{ ++ struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; ++ int i, num_used = dl_list->de_num_used; ++ int allsame; ++ ++ /* ++ * There's a couple rare, but nasty corner cases we have to ++ * check for here. All of them involve a leaf where all value ++ * have the same hash, which is what we look for first. ++ * ++ * Most of the time, all of the above is false, and we simply ++ * pick the median value for a split. ++ */ ++ allsame = ocfs2_dx_leaf_same_major(dx_leaf); ++ if (allsame) { ++ uint32_t val = dl_list->de_entries[0].dx_major_hash; ++ if (val == insert_hash) { ++ /* ++ * No matter where we would choose to split, ++ * the new entry would want to occupy the same ++ * block as these. Since there's no space left ++ * in their existing block, we know there ++ * won't be space after the split. ++ */ ++ return OCFS2_ET_DIR_NO_SPACE; ++ } ++ ++ if (val == leaf_cpos) { ++ /* ++ * Because val is the same as leaf_cpos (which ++ * is the smallest value this leaf can have), ++ * yet is not equal to insert_hash, then we ++ * know that insert_hash *must* be larger than ++ * val (and leaf_cpos). At least cpos+1 in value. ++ * ++ * We also know then, that there cannot be an ++ * adjacent extent (otherwise we'd be looking ++ * at it). Choosing this value gives us a ++ * chance to get some continguousness. ++ */ ++ *split_hash = leaf_cpos + 1; ++ return 0; ++ } ++ ++ if (val > insert_hash) { ++ /* ++ * val can not be the same as insert_hash, and ++ * also must be larger than leaf_cpos. Also, ++ * we know that there can't be a leaf between ++ * cpos and val, otherwise the entries with ++ * hash 'val' would be there. ++ */ ++ *split_hash = val; ++ return 0; ++ } ++ ++ *split_hash = insert_hash; ++ return 0; ++ } ++ ++ /* ++ * Since the records are sorted and the checks above ++ * guaranteed that not all records in this block are the same, ++ * we simple travel forward, from the median, and pick the 1st ++ * record whose value is larger than leaf_cpos. ++ */ ++ for (i = (num_used /2); i < num_used; i++) { ++ if (dl_list->de_entries[i].dx_major_hash > leaf_cpos) ++ break; ++ } ++ ++ assert(i < num_used); /* Should be impossible */ ++ *split_hash = dl_list->de_entries[i].dx_major_hash; ++ return 0; ++} ++ ++static errcode_t ocfs2_read_dx_leaves(ocfs2_filesys *fs, ++ uint64_t start, ++ int num, ++ struct ocfs2_dx_leaf **dx_leaves) ++{ ++ errcode_t ret; ++ int i; ++ struct ocfs2_dx_leaf *dx_leaf; ++ for (i = 0; i < num; i++) { ++ assert(!dx_leaves[i]); ++ ret = ocfs2_malloc_block(fs->fs_io, (char **)&dx_leaf); ++ if (ret) ++ goto bail; ++ ret = ocfs2_read_dx_leaf(fs, start + i, (char *)dx_leaf); ++ if (ret) ++ goto bail; ++ dx_leaves[i] = dx_leaf; ++ } ++ goto out; ++ ++bail: ++ for (; i >= 0; i--) { ++ if (dx_leaves[i]) ++ ocfs2_free(&dx_leaves[i]); ++ } ++out: ++ return ret; ++} ++ ++static errcode_t __ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs, ++ uint32_t cpos, ++ struct ocfs2_dx_leaf **dx_leaves, ++ int num_dx_leaves, ++ uint64_t *ret_phys_blkno) ++{ ++ errcode_t ret; ++ uint32_t num; ++ uint64_t phys; ++ ++ ret = ocfs2_new_clusters(fs, 1, 1, &phys, &num); ++ if (ret) ++ goto out; ++ assert(num == 1); ++ ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves, ++ num_dx_leaves, phys); ++ if (ret) ++ goto out; ++ ++ *ret_phys_blkno = phys; ++ ++out: ++ return ret; ++} ++ ++static errcode_t ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs, ++ struct ocfs2_extent_tree *et, ++ uint32_t cpos, ++ uint64_t *phys_blocknr, ++ struct ocfs2_dx_leaf **dx_leaves, ++ int num_dx_leaves) ++{ ++ errcode_t ret; ++ uint64_t blkno; ++ ret = __ocfs2_dx_dir_new_cluster(fs, cpos, dx_leaves, ++ num_dx_leaves, &blkno); ++ if (ret) ++ goto out; ++ ++ *phys_blocknr = blkno; ++ ret = ocfs2_tree_insert_extent(fs, et, cpos, blkno, 1, 0); ++ ++out: ++ return ret; ++} ++ ++ ++static errcode_t ocfs2_dx_dir_transfer_leaf(ocfs2_filesys *fs, ++ uint32_t split_hash, ++ struct ocfs2_dx_leaf *tmp_dx_leaf, ++ struct ocfs2_dx_leaf **orig_dx_leaves, ++ uint64_t orig_dx_leaves_blkno, ++ struct ocfs2_dx_leaf **new_dx_leaves, ++ uint64_t new_dx_leaves_blkno, ++ int num_dx_leaves) ++{ ++ errcode_t ret; ++ int i, j, num_used; ++ uint32_t major_hash; ++ struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; ++ struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; ++ struct ocfs2_dx_entry *dx_entry; ++ ++ tmp_list = &tmp_dx_leaf->dl_list; ++ ++ for (i = 0; i < num_dx_leaves; i++) { ++ orig_dx_leaf = orig_dx_leaves[i]; ++ orig_list = &orig_dx_leaf->dl_list; ++ new_dx_leaf = new_dx_leaves[i]; ++ new_list = &new_dx_leaf->dl_list; ++ ++ num_used = orig_list->de_num_used; ++ ++ memcpy(tmp_dx_leaf, orig_dx_leaf, fs->fs_blocksize); ++ tmp_list->de_num_used = 0; ++ memset(&tmp_list->de_entries, 0, ++ sizeof(struct ocfs2_dx_entry) * num_used); ++ ++ for (j = 0; j < num_used; j++) { ++ dx_entry = &orig_list->de_entries[j]; ++ major_hash = dx_entry->dx_major_hash; ++ if (major_hash >= split_hash) ++ ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf, ++ dx_entry); ++ else ++ ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf, ++ dx_entry); ++ } ++ memcpy(orig_dx_leaf, tmp_dx_leaf, fs->fs_blocksize); ++ ++ ret = ocfs2_write_dx_leaf(fs, orig_dx_leaves_blkno + i, ++ (char *)orig_dx_leaf); ++ if (ret) ++ goto out; ++ ret = ocfs2_write_dx_leaf(fs, new_dx_leaves_blkno + i, ++ (char *)new_dx_leaf); ++ if (ret) ++ goto out; ++ } ++ ++out: ++ return ret; ++} ++ ++static int ocfs2_dx_dir_free_leaves(ocfs2_filesys *fs, ++ struct ocfs2_dx_leaf **dx_leaves) ++{ ++ int i, num; ++ ++ num = ocfs2_clusters_to_blocks(fs, 1); ++ for (i = 0; i < num; i++) { ++ if (dx_leaves[i]) ++ ocfs2_free(&dx_leaves[i]); ++ } ++ free(dx_leaves); ++ return 0; ++} ++ ++/* from Linux kernel lib/sort.c */ ++static void ocfs2_sort(void *base, size_t num, size_t size, ++ int (*cmp_func)(const void *, const void *), ++ void (*swap_func)(void *, void *, int size)) ++{ ++ /* pre-scale counters for performance */ ++ int i = (num/2 - 1) * size, n = num * size, c, r; ++ ++ /* heapify */ ++ for (; i >= 0; i -= size) { ++ for (r = i; r * 2 + size < n; r = c) { ++ c = r * 2 + size; ++ if (c < n - size && ++ cmp_func(base + c, base + c + size) < 0) ++ c += size; ++ if (cmp_func(base + r, base + c) >= 0) ++ break; ++ swap_func(base + r, base + c, size); ++ } ++ } ++ ++ /* sort */ ++ for (i = n - size; i > 0; i -= size) { ++ swap_func(base, base + i, size); ++ for (r = 0; r * 2 + size < i; r = c) { ++ c = r * 2 + size; ++ if (c < i - size && ++ cmp_func(base + c, base + c + size) < 0) ++ c += size; ++ if (cmp_func(base + r, base + c) >= 0) ++ break; ++ swap_func(base + r, base + c, size); ++ } ++ } ++ ++} ++ ++static errcode_t ocfs2_dx_dir_rebalance(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dx_leaf *dx_leaf, ++ struct ocfs2_dx_hinfo *hinfo, ++ uint32_t leaf_cpos, ++ uint64_t leaf_blkno) ++{ ++ struct ocfs2_extent_tree et; ++ struct ocfs2_dx_leaf **orig_dx_leaves = NULL; ++ struct ocfs2_dx_leaf **new_dx_leaves = NULL; ++ struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; ++ uint32_t insert_hash = hinfo->major_hash; ++ uint32_t split_hash, cpos; ++ uint64_t orig_leaves_start, new_leaves_start; ++ errcode_t ret; ++ int num_used, num_dx_leaves; ++ ++ ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno); ++ ++ if (dx_root->dr_clusters == UINT_MAX) { ++ ret = OCFS2_ET_DIR_NO_SPACE; ++ goto out; ++ } ++ ++ num_used = dx_leaf->dl_list.de_num_used; ++ if (num_used < dx_leaf->dl_list.de_count) { ++ ret = OCFS2_ET_DX_BALANCE_EMPTY_LEAF; ++ goto out; ++ } ++ ++ orig_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves); ++ if (!orig_dx_leaves) { ++ ret = OCFS2_ET_NO_MEMORY; ++ goto out; ++ } ++ ++ new_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, NULL); ++ if (!new_dx_leaves) { ++ ret = OCFS2_ET_NO_MEMORY; ++ goto out; ++ } ++ ++ ocfs2_sort(dx_leaf->dl_list.de_entries, num_used, ++ sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, ++ dx_leaf_sort_swap); ++ ++ ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, ++ insert_hash, &split_hash); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, (char **)(&tmp_dx_leaf)); ++ if (ret) ++ goto out; ++ orig_leaves_start = ocfs2_blocks_to_clusters(fs, leaf_blkno); ++ ret = ocfs2_read_dx_leaves(fs, orig_leaves_start, num_dx_leaves, ++ orig_dx_leaves); ++ if (ret) ++ goto out; ++ ++ cpos = split_hash; ++ ret = ocfs2_dx_dir_new_cluster(fs, &et, cpos, &new_leaves_start, ++ new_dx_leaves, num_dx_leaves); ++ if (ret) ++ goto out; ++ ret = ocfs2_dx_dir_transfer_leaf(fs, split_hash, tmp_dx_leaf, ++ orig_dx_leaves, orig_leaves_start, ++ new_dx_leaves, new_leaves_start, ++ num_dx_leaves); ++ ++out: ++ if (tmp_dx_leaf) ++ ocfs2_free((char **)(&tmp_dx_leaf)); ++ ++ if (orig_dx_leaves) ++ ocfs2_dx_dir_free_leaves(fs, orig_dx_leaves); ++ if (new_dx_leaves) ++ ocfs2_dx_dir_free_leaves(fs, new_dx_leaves); ++ return ret; ++} ++ ++static errcode_t ocfs2_find_dir_space_dx(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root, ++ const char *name, int namelen, ++ struct ocfs2_dir_lookup_result *lookup) ++{ ++ errcode_t ret; ++ int rebalanced = 0; ++ struct ocfs2_dx_leaf *dx_leaf; ++ char *dx_leaf_buf = NULL; ++ uint64_t blkno; ++ uint32_t leaf_cpos; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); ++ if (ret) ++ goto out; ++ ++restart_search: ++ ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list, ++ &lookup->dl_hinfo, &leaf_cpos, &blkno); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf); ++ if (ret) ++ goto out; ++ dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; ++ if (dx_leaf->dl_list.de_num_used >= dx_leaf->dl_list.de_count) { ++ if (rebalanced) { ++ /* ++ * Rebalancing should have provided us with ++ * space in an appropriate leaf. ++ */ ++ ret = OCFS2_ET_DIR_NO_SPACE; ++ goto out; ++ } ++ ++ ret = ocfs2_dx_dir_rebalance(fs, dx_root, dx_leaf, ++ &lookup->dl_hinfo, leaf_cpos, blkno); ++ if (ret) ++ goto out; ++ rebalanced = 1; ++ goto restart_search; ++ } ++ lookup->dl_dx_leaf_blkno = blkno; ++ ++out: ++ if (dx_leaf_buf) ++ ocfs2_free(&dx_leaf_buf); ++ return ret; ++} ++ ++/* ++ * Hashing code adapted from ext3 ++ */ ++#define DELTA 0x9E3779B9 ++ ++static void TEA_transform(uint32_t buf[4], uint32_t const in[]) ++{ ++ uint32_t sum = 0; ++ uint32_t b0 = buf[0], b1 = buf[1]; ++ uint32_t a = in[0], b = in[1], c = in[2], d = in[3]; ++ int n = 16; ++ ++ do { ++ sum += DELTA; ++ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); ++ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); ++ } while (--n); ++ ++ buf[0] += b0; ++ buf[1] += b1; ++} ++ ++static void str2hashbuf(const char *msg, int len, uint32_t *buf, int num) ++{ ++ uint32_t pad, val; ++ int i; ++ ++ pad = (uint32_t)len | ((uint32_t)len << 8); ++ pad |= pad << 16; ++ ++ val = pad; ++ if (len > (num * 4)) ++ len = num * 4; ++ for (i = 0; i < len; i++) { ++ if ((i % 4) == 0) ++ val = pad; ++ val = msg[i] + (val << 8); ++ if ((i % 4) == 3) { ++ *buf++ = val; ++ val = pad; ++ num --; ++ } ++ } ++ if (--num >= 0) ++ *buf++ = val; ++ while(--num >= 0) ++ *buf++ = pad; ++} ++ ++void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs, ++ const char *name, ++ int len, ++ struct ocfs2_dx_hinfo *hinfo) ++{ ++ const char *p; ++ uint32_t in[8], buf[4]; ++ ++ /* ++ * XXX: Is this really necessary, if the index is never looked ++ * at by readdir? Is a hash value of '0' a bad idea ? ++ */ ++ if ((len == 1 && !strncmp(".", name, 1)) || ++ (len == 2 && !strncmp("..", name, 2))) { ++ buf[0] = buf[1] = 0; ++ goto out; ++ } ++ ++ memcpy(buf, OCFS2_RAW_SB(fs->fs_super)->s_dx_seed, sizeof(buf)); ++ ++ p = name; ++ while(len > 0) { ++ str2hashbuf(p, len, in, 4); ++ TEA_transform(buf, in); ++ len -= 16; ++ p += 16; ++ } ++ ++out: ++ hinfo->major_hash = buf[0]; ++ hinfo->minor_hash = buf[1]; ++} ++ ++static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, ++ uint64_t blocknr, ++ int offset, ++ int blocksize, ++ char *buf, ++ void *priv_data) ++{ ++ errcode_t ret = 0; ++ char *dx_buf = NULL; ++ char *dx_leaf_buf = NULL; ++ struct ocfs2_dx_root_block *dx_root = NULL; ++ struct ocfs2_dx_leaf *dx_leaf = NULL; ++ struct ocfs2_dir_lookup_result lookup; ++ struct ocfs2_dx_entry_list *entry_list; ++ struct dx_insert_ctxt *ctxt = (struct dx_insert_ctxt *)priv_data; ++ ocfs2_filesys *fs = ctxt->fs; ++ uint64_t dx_root_blkno = ctxt->dx_root_blkno; ++ int write_dx_leaf = 0; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_dx_root(fs, dx_root_blkno, dx_buf); ++ if (ret) ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)dx_buf; ++ memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); ++ ocfs2_dx_dir_name_hash(fs, dentry->name, ++ dentry->name_len, &lookup.dl_hinfo); ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { ++ if (ocfs2_inline_dx_has_space(dx_root)) { ++ entry_list = &dx_root->dr_entries; ++ goto insert_into_entries; ++ } else { ++ /* root block is full, expand it to an extent */ ++ ret = ocfs2_expand_inline_dx_root(fs, dx_root); ++ if (ret) ++ goto out; ++ } ++ } ++ ++ ret = ocfs2_find_dir_space_dx(fs, dx_root, ++ dentry->name, dentry->name_len, &lookup); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_dx_leaf(fs, lookup.dl_dx_leaf_blkno, dx_leaf_buf); ++ if (ret) ++ goto out; ++ dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; ++ entry_list = &dx_leaf->dl_list; ++ write_dx_leaf = 1; ++ ++insert_into_entries: ++ ocfs2_dx_entry_list_insert(entry_list, &lookup.dl_hinfo, blocknr); ++ if (write_dx_leaf) { ++ ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); ++ if (ret) ++ goto out; ++ } ++ dx_root->dr_num_entries += 1; ++ ret = ocfs2_write_dx_root(fs, dx_root_blkno, dx_buf); ++out: ++ if (dx_leaf_buf) ++ ocfs2_free(&dx_leaf_buf); ++ if (dx_buf) ++ ocfs2_free(&dx_buf); ++ return ret; ++} ++ ++errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name, ++ uint64_t ino, uint64_t blkno) ++{ ++ struct ocfs2_dir_entry dummy_de; ++ struct dx_insert_ctxt dummy_ctxt; ++ char *di_buf = NULL; ++ struct ocfs2_dinode *di; ++ errcode_t ret = 0; ++ ++ if (!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) ++ goto out; ++ ++ assert(name); ++ memset(&dummy_de, 0, sizeof(struct ocfs2_dir_entry)); ++ memcpy(dummy_de.name, name, strlen(name)); ++ dummy_de.name_len = strlen(name); ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ if (!(di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) ++ goto out; ++ ++ memset(&dummy_ctxt, 0, sizeof(struct dx_insert_ctxt)); ++ dummy_ctxt.dir_blkno = dir; ++ dummy_ctxt.fs = fs; ++ dummy_ctxt.dx_root_blkno = di->i_dx_root; ++ ++ ret = ocfs2_dx_dir_insert(&dummy_de, blkno, 0, ++ fs->fs_blocksize, NULL, &dummy_ctxt); ++out: ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ return ret; ++} ++ ++ ++/* ++ * This function overwite the indexed dir attribute of ++ * the given inode. The caller should make sure the dir's ++ * indexed tree is truncated. ++ * Currently tunefs.ocfs2 is the only user, before calling ++ * this function, tunefs.ocfs2 makes sure there is space ++ * for directory trailer. So directory entry moves here. ++ */ ++errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, ++ uint64_t dir) ++{ ++ errcode_t ret = 0, err; ++ uint64_t dr_blkno; ++ char *dx_buf = NULL, *di_buf = NULL; ++ struct ocfs2_dinode *di; ++ struct ocfs2_dx_root_block *dx_root; ++ struct dx_insert_ctxt ctxt; ++ ocfs2_quota_hash *usrhash = NULL, *grphash = NULL; ++ uint32_t uid, gid; ++ long long change; ++ ++ ret = ocfs2_load_fs_quota_info(fs); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_init_quota_change(fs, &usrhash, &grphash); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ if ((ocfs2_dir_indexed(di)) || ++ (di->i_dyn_features & OCFS2_INLINE_DATA_FL)) ++ goto out; ++ ++ ret = ocfs2_new_dx_root(fs, di, &dr_blkno); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); ++ if (ret) ++ goto out; ++ dx_root = (struct ocfs2_dx_root_block *)dx_buf; ++ ++ ret = ocfs2_init_dir_trailers(fs, di, dx_root); ++ if (ret) ++ goto out; ++ ++ dx_root->dr_dir_blkno = di->i_blkno; ++ dx_root->dr_num_entries = 0; ++ dx_root->dr_entries.de_count = ocfs2_dx_entries_per_root(fs->fs_blocksize); ++ ++ di->i_dx_root = dr_blkno; ++ di->i_dyn_features |= OCFS2_INDEXED_DIR_FL; ++ ++ ret = ocfs2_write_dx_root(fs, dr_blkno, dx_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_write_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ ++ ctxt.dir_blkno = dir; ++ ctxt.dx_root_blkno = dr_blkno; ++ ctxt.fs = fs; ++ ret = ocfs2_dir_iterate(fs, dir, 0, NULL, ++ ocfs2_dx_dir_insert, &ctxt); ++ ++ /* check quota for dx_leaf */ ++ ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ ++ change = ocfs2_clusters_to_bytes(fs, ++ dx_root->dr_clusters); ++ uid = di->i_uid; ++ gid = di->i_gid; ++ ++ ret = ocfs2_apply_quota_change(fs, usrhash, grphash, ++ uid, gid, change, 0); ++ if (ret) { ++ /* exceed quota, truncate the indexed tree */ ++ ret = ocfs2_dx_dir_truncate(fs, dir); ++ } ++ ++out: ++ err = ocfs2_finish_quota_change(fs, usrhash, grphash); ++ if (!ret) ++ ret = err; ++ ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ if (dx_buf) ++ ocfs2_free(&dx_buf); ++ ++ return ret; ++} ++ ++void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, ++ int index) ++{ ++ int num_used = entry_list->de_num_used; ++ if (num_used == 1 || index == (num_used - 1)) ++ goto clear; ++ ++ memmove(&entry_list->de_entries[index], ++ &entry_list->de_entries[index + 1], ++ (num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); ++clear: ++ num_used --; ++ memset(&entry_list->de_entries[num_used], 0, ++ sizeof(struct ocfs2_dx_entry)); ++ entry_list->de_num_used = num_used; ++} ++ ++static int ocfs2_match(int len, ++ const char *name, ++ struct ocfs2_dir_entry *de) ++{ ++ if (len != de->name_len) ++ return 0; ++ if (!de->inode) ++ return 0; ++ return !memcmp((char *)name, de->name, len); ++} ++ ++int ocfs2_check_dir_entry(ocfs2_filesys *fs, ++ struct ocfs2_dir_entry *de, ++ char *dir_buf, ++ unsigned int offset) ++{ ++ int rlen = de->rec_len; ++ int ret = 1; ++ ++ if ((rlen < OCFS2_DIR_REC_LEN(1)) || ++ (rlen % 4 != 0) || ++ (rlen < OCFS2_DIR_REC_LEN(de->name_len)) || ++ (((char *)de - dir_buf) > fs->fs_blocksize)) ++ ret = 0; ++ ++ return ret; ++} ++ ++int ocfs2_search_dirblock(ocfs2_filesys *fs, ++ char *dir_buf, ++ const char *name, ++ int namelen, ++ unsigned int bytes, ++ struct ocfs2_dir_entry **res_dir) ++{ ++ struct ocfs2_dir_entry *de; ++ char *dlimit, *de_buf; ++ int de_len, offset = 0; ++ int ret = 0; ++ ++ de_buf = (char *)dir_buf; ++ dlimit = de_buf + bytes; ++ ++ while(de_buf < dlimit) { ++ de = (struct ocfs2_dir_entry *)de_buf; ++ ++ if ((de_buf + namelen <= dlimit) && ++ ocfs2_match(namelen, name, de)) { ++ if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) { ++ ret = -1; ++ goto out; ++ } ++ if (res_dir) ++ *res_dir = de; ++ ret = 1; ++ goto out; ++ } ++ ++ de_len = de->rec_len; ++ if (de_len <= 0) { ++ ret = -1; ++ goto out; ++ } ++ de_buf += de_len; ++ offset += de_len; ++ } ++out: ++ return ret; ++} ++ ++errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs, ++ const char *name, ++ int namelen, ++ struct ocfs2_dx_root_block *dx_root, ++ struct ocfs2_dir_lookup_result *lookup) ++{ ++ errcode_t ret; ++ char *di_buf = NULL, *dir_buf = NULL, *dx_leaf_buf = NULL; ++ struct ocfs2_dx_entry_list *entry_list; ++ struct ocfs2_dx_leaf *dx_leaf; ++ struct ocfs2_dx_entry *dx_entry; ++ struct ocfs2_dir_entry *dir_ent; ++ uint32_t leaf_cpos; ++ uint64_t blkno; ++ int i, found; ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ entry_list = &dx_root->dr_entries; ++ else { ++ ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list, ++ &lookup->dl_hinfo, &leaf_cpos, &blkno); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf); ++ if (ret) ++ goto out; ++ dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; ++ entry_list = &dx_leaf->dl_list; ++ } ++ ++ assert(entry_list->de_count > 0); ++ assert(entry_list->de_num_used > 0); ++ assert(dx_root->dr_num_entries > 0); ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dir_buf); ++ if (ret) ++ goto out; ++ ++ found = 0; ++ for (i = 0; i < entry_list->de_num_used; i++) { ++ dx_entry = &entry_list->de_entries[i]; ++ if ((lookup->dl_hinfo.major_hash != dx_entry->dx_major_hash) || ++ (lookup->dl_hinfo.minor_hash != dx_entry->dx_minor_hash)) ++ continue; ++ ++ ret = ocfs2_read_blocks(fs, dx_entry->dx_dirent_blk, 1, dir_buf); ++ if (ret) ++ goto out; ++ ++ found = ocfs2_search_dirblock(fs, dir_buf, name, namelen, ++ fs->fs_blocksize, &dir_ent); ++ if (found == 1) ++ break; ++ ++ if (found == -1) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ goto out; ++ } ++ } ++ ++ if (found <= 0) { ++ ret = OCFS2_ET_DIRENT_NOT_FOUND; ++ goto out; ++ } ++ ++ lookup->dl_leaf = dir_buf; ++ lookup->dl_leaf_blkno = dx_entry->dx_dirent_blk; ++ lookup->dl_entry = dir_ent; ++ lookup->dl_dx_entry = dx_entry; ++ lookup->dl_dx_entry_idx = i; ++ if (!(dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)) { ++ lookup->dl_dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; ++ lookup->dl_dx_leaf_blkno = blkno; ++ } ++ ret = 0; ++out: ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ if (ret) { ++ if (dir_buf) ++ ocfs2_free(&dir_buf); ++ if (dx_leaf_buf) ++ ocfs2_free(&dx_leaf_buf); ++ } ++ return ret; ++} ++ ++void release_lookup_res(struct ocfs2_dir_lookup_result *res) ++{ ++ if (res->dl_leaf) ++ ocfs2_free(&res->dl_leaf); ++ if (res->dl_dx_leaf) ++ ocfs2_free(&res->dl_dx_leaf); ++} ++ ++ +diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c +index 1064d9f..8a3f5a9 100644 +--- a/libocfs2/dir_iterate.c ++++ b/libocfs2/dir_iterate.c +@@ -68,6 +68,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, + int (*func)(uint64_t dir, + int entry, + struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -131,6 +132,7 @@ out: + + struct xlate { + int (*func)(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -140,12 +142,12 @@ struct xlate { + + static int xlate_func(uint64_t dir, + int entry, +- struct ocfs2_dir_entry *dirent, int offset, +- int blocksize, char *buf, void *priv_data) ++ struct ocfs2_dir_entry *dirent, uint64_t blocknr, ++ int offset, int blocksize, char *buf, void *priv_data) + { + struct xlate *xl = (struct xlate *) priv_data; + +- return (*xl->func)(dirent, offset, blocksize, buf, xl->real_private); ++ return (*xl->func)(dirent, blocknr, offset, blocksize, buf, xl->real_private); + } + + extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, +@@ -153,6 +155,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, + int flags, + char *block_buf, + int (*func)(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -169,6 +172,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, + } + + static int ocfs2_process_dir_entry(ocfs2_filesys *fs, ++ uint64_t blocknr, + unsigned int offset, + int entry, + int *changed, +@@ -203,7 +207,7 @@ static int ocfs2_process_dir_entry(ocfs2_filesys *fs, + ret = (ctx->func)(ctx->dir, + (next_real_entry > offset) ? + OCFS2_DIRENT_DELETED_FILE : entry, +- dirent, offset, ++ dirent, blocknr, offset, + fs->fs_blocksize, ctx->buf, + ctx->priv_data); + if (entry < OCFS2_DIRENT_OTHER_FILE) +@@ -250,7 +254,7 @@ static int ocfs2_inline_dir_iterate(ocfs2_filesys *fs, + + entry = OCFS2_DIRENT_DOT_FILE; + +- ret = ocfs2_process_dir_entry(fs, offset, entry, &changed, ++ ret = ocfs2_process_dir_entry(fs, di->i_blkno, offset, entry, &changed, + &do_abort, ctx); + if (ret) + return ret; +@@ -291,7 +295,7 @@ int ocfs2_process_dir_block(ocfs2_filesys *fs, + if (ctx->errcode) + return OCFS2_BLOCK_ABORT; + +- ret = ocfs2_process_dir_entry(fs, offset, entry, &changed, ++ ret = ocfs2_process_dir_entry(fs, blocknr, offset, entry, &changed, + &do_abort, ctx); + if (ret) + return ret; +diff --git a/libocfs2/dir_iterate.h b/libocfs2/dir_iterate.h +index 2d9587b..7f44702 100644 +--- a/libocfs2/dir_iterate.h ++++ b/libocfs2/dir_iterate.h +@@ -35,6 +35,7 @@ struct dir_context { + int (*func)(uint64_t dir, + int entry, + struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c +index 06a1b64..c22d843 100644 +--- a/libocfs2/dirblock.c ++++ b/libocfs2/dirblock.c +@@ -33,7 +33,6 @@ + #include "ocfs2/byteorder.h" + #include "ocfs2/ocfs2.h" + +- + unsigned int ocfs2_dir_trailer_blk_off(ocfs2_filesys *fs) + { + return fs->fs_blocksize - sizeof(struct ocfs2_dir_block_trailer); +@@ -79,6 +78,15 @@ int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, + return 1; + } + ++/* ++ * We are sure there is prepared space for the trailer, no directory ++ * entry will overlap with the trailer: ++ * - if we rebuild the indexed tree for a directory, no dir entry ++ * will overwrite the trailer's space. ++ * - if we build the indexed tree by tunefs.ocfs2, it will enable ++ * meta ecc feature before enable indexed dirs feature. Which ++ * means space for each trailer is well prepared already. ++ */ + void ocfs2_init_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, + uint64_t blkno, void *buf) + { +@@ -105,9 +113,9 @@ static void ocfs2_swap_dir_entry(struct ocfs2_dir_entry *dirent) + static errcode_t ocfs2_swap_dir_entries_direction(void *buf, uint64_t bytes, + int to_cpu) + { +- char *p, *end; ++ char *p, *end; + struct ocfs2_dir_entry *dirent; +- unsigned int name_len, rec_len; ++ unsigned int name_len, rec_len; + errcode_t retval = 0; + + p = (char *) buf; +@@ -160,8 +168,8 @@ void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer) + errcode_t ocfs2_read_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di, + uint64_t block, void *buf) + { +- errcode_t retval; +- int end = fs->fs_blocksize; ++ errcode_t retval; ++ int end = fs->fs_blocksize; + struct ocfs2_dir_block_trailer *trailer = NULL; + + retval = ocfs2_read_blocks(fs, block, 1, buf); +@@ -197,9 +205,9 @@ out: + errcode_t ocfs2_write_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di, + uint64_t block, void *inbuf) + { +- errcode_t retval; +- char *buf = NULL; +- int end = fs->fs_blocksize; ++ errcode_t retval; ++ char *buf = NULL; ++ int end = fs->fs_blocksize; + struct ocfs2_dir_block_trailer *trailer = NULL; + + retval = ocfs2_malloc_block(fs->fs_io, &buf); +@@ -230,28 +238,36 @@ out: + return retval; + } + +-static void ocfs2_swap_dx_entry_to_cpu(struct ocfs2_dx_entry *dx_entry) ++static void ocfs2_swap_dx_entry(struct ocfs2_dx_entry *dx_entry) + { +- if (cpu_is_little_endian) +- return; +- + dx_entry->dx_major_hash = bswap_32(dx_entry->dx_major_hash); + dx_entry->dx_minor_hash = bswap_32(dx_entry->dx_minor_hash); + dx_entry->dx_dirent_blk = bswap_64(dx_entry->dx_dirent_blk); + } + +-static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) ++static void ocfs2_swap_dx_entry_list(struct ocfs2_dx_entry_list *dl_list) + { + int i; + +- if (cpu_is_little_endian) +- return; +- + dl_list->de_count = bswap_16(dl_list->de_count); + dl_list->de_num_used = bswap_16(dl_list->de_num_used); + + for (i = 0; i < dl_list->de_count; i++) +- ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]); ++ ocfs2_swap_dx_entry(&dl_list->de_entries[i]); ++} ++ ++static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ocfs2_swap_dx_entry_list(dl_list); ++} ++ ++static void ocfs2_swap_dx_entry_list_from_cpu(struct ocfs2_dx_entry_list *dl_list) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ocfs2_swap_dx_entry_list(dl_list); + } + + static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, +@@ -276,45 +292,124 @@ static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, + ocfs2_swap_extent_list_to_cpu(fs, dx_root, &dx_root->dr_list); + } + ++static void ocfs2_swap_dx_root_from_cpu(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ++ dx_root->dr_suballoc_slot = bswap_16(dx_root->dr_suballoc_slot); ++ dx_root->dr_suballoc_bit = bswap_16(dx_root->dr_suballoc_bit); ++ dx_root->dr_fs_generation = bswap_32(dx_root->dr_fs_generation); ++ dx_root->dr_blkno = bswap_64(dx_root->dr_blkno); ++ dx_root->dr_last_eb_blk = bswap_64(dx_root->dr_last_eb_blk); ++ dx_root->dr_clusters = bswap_32(dx_root->dr_clusters); ++ dx_root->dr_dir_blkno = bswap_64(dx_root->dr_dir_blkno); ++ dx_root->dr_num_entries = bswap_32(dx_root->dr_num_entries); ++ dx_root->dr_free_blk = bswap_64(dx_root->dr_free_blk); ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ ocfs2_swap_dx_entry_list_from_cpu(&dx_root->dr_entries); ++ else ++ ocfs2_swap_extent_list_from_cpu(fs, dx_root, &dx_root->dr_list); ++} ++ ++/* XXX: should use the errcode_t return value */ + errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, + void *buf) + { +- errcode_t ret; ++ errcode_t ret; + struct ocfs2_dx_root_block *dx_root; ++ char *dx_root_buf = NULL; + +- ret = ocfs2_read_blocks(fs, block, 1, buf); ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); + if (ret) +- return ret; ++ goto out; + +- dx_root = (struct ocfs2_dx_root_block *)buf; +- ret = ocfs2_validate_meta_ecc(fs, buf, &dx_root->dr_check); ++ ret = ocfs2_read_blocks(fs, block, 1, dx_root_buf); + if (ret) +- return ret; ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ret = ocfs2_validate_meta_ecc(fs, dx_root_buf, &dx_root->dr_check); ++ if (ret) ++ goto out; + + if (memcmp(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE, +- strlen(OCFS2_DX_ROOT_SIGNATURE))) +- return OCFS2_ET_DIR_CORRUPTED; ++ strlen(OCFS2_DX_ROOT_SIGNATURE))) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ goto out; ++ } + + ocfs2_swap_dx_root_to_cpu(fs, dx_root); ++ memcpy(buf, dx_root_buf, fs->fs_blocksize); ++ ret = 0; ++out: ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ return ret; ++} + +- return 0; ++errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block, ++ char *buf) ++{ ++ errcode_t ret; ++ char *dx_root_buf = NULL; ++ struct ocfs2_dx_root_block *dx_root; ++ ++ if (!(fs->fs_flags & OCFS2_FLAG_RW)) ++ return OCFS2_ET_RO_FILESYS; ++ ++ if ((block < OCFS2_SUPER_BLOCK_BLKNO) || ++ (block > fs->fs_blocks)) ++ return OCFS2_ET_BAD_BLKNO; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); ++ if (ret) ++ goto out; ++ ++ memcpy(dx_root_buf, buf, fs->fs_blocksize); ++ ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ocfs2_swap_dx_root_from_cpu(fs, dx_root); ++ ++ ocfs2_compute_meta_ecc(fs, dx_root_buf, &dx_root->dr_check); ++ ret = io_write_block(fs->fs_io, block, 1, dx_root_buf); ++ if (!ret) ++ fs->fs_flags |= OCFS2_FLAG_CHANGED; ++ ++out: ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ return ret; ++} ++ ++static void ocfs2_swap_dx_leaf(struct ocfs2_dx_leaf *dx_leaf) ++{ ++ dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); ++ dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); ++ ++ ocfs2_swap_dx_entry_list(&dx_leaf->dl_list); + } + + static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf) + { + if (cpu_is_little_endian) + return; ++ ocfs2_swap_dx_leaf(dx_leaf); ++} + +- dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); +- dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); +- +- ocfs2_swap_dx_entry_list_to_cpu(&dx_leaf->dl_list); ++static void ocfs2_swap_dx_leaf_from_cpu(struct ocfs2_dx_leaf *dx_leaf) ++{ ++ if (cpu_is_little_endian) ++ return; ++ ocfs2_swap_dx_leaf(dx_leaf); + } + + errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, + void *buf) + { +- errcode_t ret; ++ errcode_t ret; + struct ocfs2_dx_leaf *dx_leaf; + + ret = ocfs2_read_blocks(fs, block, 1, buf); +@@ -335,6 +430,42 @@ errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, + return 0; + } + ++errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block, ++ void *buf) ++{ ++ errcode_t ret; ++ char *dx_leaf_buf = NULL; ++ struct ocfs2_dx_leaf *dx_leaf; ++ ++ if (!(fs->fs_flags & OCFS2_FLAG_RW)) ++ return OCFS2_ET_RO_FILESYS; ++ ++ if ((block < OCFS2_SUPER_BLOCK_BLKNO) || ++ (block > fs->fs_blocks)) ++ return OCFS2_ET_BAD_BLKNO; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); ++ if (ret) ++ goto out; ++ ++ memcpy(dx_leaf_buf, buf, fs->fs_blocksize); ++ dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; ++ ocfs2_swap_dx_leaf_from_cpu(dx_leaf); ++ ++ ocfs2_compute_meta_ecc(fs, dx_leaf_buf, &dx_leaf->dl_check); ++ ret = io_write_block(fs->fs_io, block, 1, dx_leaf_buf); ++ ++ if (ret) ++ goto out; ++ ++ fs->fs_flags |= OCFS2_FLAG_CHANGED; ++ ++out: ++ if (dx_leaf_buf) ++ ocfs2_free(&dx_leaf_buf); ++ return ret; ++} ++ + int ocfs2_dir_indexed(struct ocfs2_dinode *di) + { + if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) +@@ -345,7 +476,7 @@ int ocfs2_dir_indexed(struct ocfs2_dinode *di) + /* + * Only use this when we already know the directory is indexed. + */ +-int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off) ++static int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off) + { + if (de_off == ocfs2_dir_trailer_blk_off(fs)) + return 1; +diff --git a/libocfs2/expanddir.c b/libocfs2/expanddir.c +index 6e9a192..ec05b74 100644 +--- a/libocfs2/expanddir.c ++++ b/libocfs2/expanddir.c +@@ -69,6 +69,10 @@ errcode_t ocfs2_expand_dir(ocfs2_filesys *fs, + if (ocfs2_support_inline_data(OCFS2_RAW_SB(fs->fs_super)) && + cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_convert_inline_data_to_extents(cinode); ++ if ((ret == 0) && ++ ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) { ++ ret = ocfs2_dx_dir_build(fs, dir); ++ } + goto bail; + } + +@@ -228,6 +232,18 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, + goto bail; + } + ++ /* ++ * Only build indexed tree if the directory is initiated as non-inline. ++ * Otherwise, the indexed tree will be build when convert the inlined ++ * directory to extent in ocfs2_expand_dir() ++ */ ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && ++ (!cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL)) { ++ ret = ocfs2_dx_dir_build(fs, dir); ++ if (ret) ++ goto bail; ++ } ++ + /* set link count of the parent */ + ret = ocfs2_read_inode(fs, parent_dir, buf); + if (ret) +@@ -243,8 +259,6 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, + + /* update the inode */ + ret = ocfs2_write_cached_inode(fs, cinode); +- if (ret) +- goto bail; + + bail: + if (buf) +diff --git a/libocfs2/extent_tree.c b/libocfs2/extent_tree.c +index fe1be7a..3cbdcf8 100644 +--- a/libocfs2/extent_tree.c ++++ b/libocfs2/extent_tree.c +@@ -153,6 +153,49 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { + .eo_fill_root_el = ocfs2_xattr_value_fill_root_el, + }; + ++static void ocfs2_dx_root_set_last_eb_blk (struct ocfs2_extent_tree *et, ++ uint64_t blkno) ++{ ++ struct ocfs2_dx_root_block *dx_root = et->et_object; ++ dx_root->dr_last_eb_blk = blkno; ++} ++ ++static uint64_t ocfs2_dx_root_get_last_eb_blk (struct ocfs2_extent_tree *et) ++{ ++ struct ocfs2_dx_root_block *dx_root = et->et_object; ++ return dx_root->dr_last_eb_blk; ++} ++ ++static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et, ++ uint32_t clusters) ++{ ++ struct ocfs2_dx_root_block *dx_root = et->et_object; ++ dx_root->dr_clusters += clusters; ++} ++ ++static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et) ++{ ++ struct ocfs2_dx_root_block *dx_root = (struct ocfs2_dx_root_block *)et->et_object; ++ assert(OCFS2_IS_VALID_DX_ROOT(dx_root)); ++ ++ return 0; ++} ++ ++static void ocfs2_dx_root_fill_root_el (struct ocfs2_extent_tree *et) ++{ ++ struct ocfs2_dx_root_block *dx_root = et->et_object; ++ ++ et->et_root_el = &dx_root->dr_list; ++} ++ ++static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { ++ .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk, ++ .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk, ++ .eo_update_clusters = ocfs2_dx_root_update_clusters, ++ .eo_sanity_check = ocfs2_dx_root_sanity_check, ++ .eo_fill_root_el = ocfs2_dx_root_fill_root_el, ++}; ++ + static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, + ocfs2_filesys *fs, + char *buf, +@@ -202,6 +245,15 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, + xv, &ocfs2_xattr_value_et_ops); + } + ++void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, ++ ocfs2_filesys *fs, ++ char *buf, uint64_t blkno) ++{ ++ __ocfs2_init_extent_tree(et, fs, buf, blkno, ++ ocfs2_write_dx_root, ++ buf, &ocfs2_dx_root_et_ops); ++} ++ + static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, + uint64_t new_last_eb_blk) + { +@@ -4182,3 +4234,4 @@ out: + ocfs2_free_path(path); + return ret; + } ++ +diff --git a/libocfs2/extent_tree.h b/libocfs2/extent_tree.h +index b7513dd..3f9b0dc 100644 +--- a/libocfs2/extent_tree.h ++++ b/libocfs2/extent_tree.h +@@ -123,6 +123,9 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, + char *buf, uint64_t blkno, + ocfs2_root_write_func write, + struct ocfs2_xattr_value_root *xv); ++void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, ++ ocfs2_filesys *fs, ++ char *buf, uint64_t blkno); + errcode_t ocfs2_tree_insert_extent(ocfs2_filesys *fs, + struct ocfs2_extent_tree *et, + uint32_t cpos, uint64_t c_blkno, +@@ -135,6 +138,8 @@ int ocfs2_change_extent_flag(ocfs2_filesys *fs, + int ocfs2_remove_extent(ocfs2_filesys *fs, + struct ocfs2_extent_tree *et, + uint32_t cpos, uint32_t len); ++ ++ + /* + * Structures which describe a path through a btree, and functions to + * manipulate them. +diff --git a/libocfs2/inode.c b/libocfs2/inode.c +index 95419f4..fd88dc8 100644 +--- a/libocfs2/inode.c ++++ b/libocfs2/inode.c +@@ -139,6 +139,9 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di) + sb->s_uuid_hash = bswap_32(sb->s_uuid_hash); + sb->s_first_cluster_group = bswap_64(sb->s_first_cluster_group); + sb->s_xattr_inline_size = bswap_16(sb->s_xattr_inline_size); ++ sb->s_dx_seed[0] = bswap_32(sb->s_dx_seed[0]); ++ sb->s_dx_seed[1] = bswap_32(sb->s_dx_seed[1]); ++ sb->s_dx_seed[2] = bswap_32(sb->s_dx_seed[2]); + + } else if (di->i_flags & OCFS2_LOCAL_ALLOC_FL) { + struct ocfs2_local_alloc *la = &di->id2.i_lab; +diff --git a/libocfs2/link.c b/libocfs2/link.c +index c89471d..310c24e 100644 +--- a/libocfs2/link.c ++++ b/libocfs2/link.c +@@ -43,10 +43,12 @@ struct link_struct { + of the block. This handles + the directory trailer if it + exists */ ++ int blkno; + struct ocfs2_dinode *sb; + }; + + static int link_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -101,6 +103,7 @@ static int link_proc(struct ocfs2_dir_entry *dirent, + strncpy(dirent->name, ls->name, ls->namelen); + dirent->file_type = ls->flags; + ++ ls->blkno = blocknr; + ls->done++; + return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED; + } +@@ -172,6 +175,12 @@ errcode_t ocfs2_link(ocfs2_filesys *fs, uint64_t dir, const char *name, + retval = OCFS2_ET_INTERNAL_FAILURE; + } + ++ if (ls.done) { ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && ++ (di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) ++ retval = ocfs2_dx_dir_insert_entry(fs, dir, ls.name, ++ ls.inode, ls.blkno); ++ } + out_free: + ocfs2_free(&buf); + +diff --git a/libocfs2/lookup.c b/libocfs2/lookup.c +index 374113c..dac83c0 100644 +--- a/libocfs2/lookup.c ++++ b/libocfs2/lookup.c +@@ -30,6 +30,7 @@ + + #include + #include ++#include + + #include "ocfs2/ocfs2.h" + +@@ -45,6 +46,7 @@ struct lookup_struct { + #pragma argsused + #endif + static int lookup_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -61,24 +63,81 @@ static int lookup_proc(struct ocfs2_dir_entry *dirent, + return OCFS2_DIRENT_ABORT; + } + ++static errcode_t ocfs2_find_entry_dx(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ char *buf, ++ struct lookup_struct *ls) ++{ ++ char *dx_root_buf = NULL; ++ struct ocfs2_dx_root_block *dx_root; ++ struct ocfs2_dir_lookup_result lookup; ++ errcode_t ret; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf); ++ if (ret) ++ goto out; ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ++ memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); ++ ocfs2_dx_dir_name_hash(fs, ls->name, ++ ls->len, &lookup.dl_hinfo); ++ ++ ret = ocfs2_dx_dir_search(fs, ls->name, ls->len, ++ dx_root, &lookup); ++ if (ret) ++ goto out; ++ ++ *ls->inode = lookup.dl_entry->inode; ++ ls->found++; ++ ret = 0; ++ ++out: ++ release_lookup_res(&lookup); ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ return ret; ++} + + errcode_t ocfs2_lookup(ocfs2_filesys *fs, uint64_t dir, + const char *name, int namelen, char *buf, + uint64_t *inode) + { +- errcode_t retval; ++ errcode_t ret; + struct lookup_struct ls; ++ char *di_buf = NULL; ++ struct ocfs2_dinode *di; + + ls.name = name; + ls.len = namelen; + ls.inode = inode; + ls.found = 0; + +- retval = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls); +- if (retval) +- return retval; ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && ++ ocfs2_dir_indexed(di)) { ++ ret = ocfs2_find_entry_dx(fs, di, buf, &ls); ++ } else { ++ ret = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls); ++ } ++ if (ret) ++ goto out; + +- return (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND; ++ ret = (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND; ++ ++out: ++ if(di_buf) ++ ocfs2_free(&di_buf); ++ return ret; + } + + +diff --git a/libocfs2/ocfs2_err.et b/libocfs2/ocfs2_err.et +index 13b03c4..1226681 100644 +--- a/libocfs2/ocfs2_err.et ++++ b/libocfs2/ocfs2_err.et +@@ -102,6 +102,9 @@ ec OCFS2_ET_DIR_CORRUPTED, + ec OCFS2_ET_NO_DIRECTORY, + "OCFS2 inode is not a directory" + ++ec OCFS2_ET_DIRENT_NOT_FOUND, ++ "Directory entry not found" ++ + ec OCFS2_ET_FILE_NOT_FOUND, + "File not found by ocfs2_lookup" + +@@ -189,6 +192,9 @@ ec OCFS2_ET_CORRUPT_QUOTA_FILE, + ec OCFS2_ET_CANNOT_DETERMINE_SECTOR_SIZE, + "Cannot determine sector size" + ++ec OCFS2_ET_DX_BALANCE_EMPTY_LEAF, ++ "Trying to rebalance empty leaf for indexed dir" ++ + ec OCFS2_ET_NONEMTY_QUOTA_HASH, + "Freeing non-empty quota hash" + +diff --git a/libocfs2/truncate.c b/libocfs2/truncate.c +index 1902366..7327253 100644 +--- a/libocfs2/truncate.c ++++ b/libocfs2/truncate.c +@@ -436,6 +436,22 @@ errcode_t ocfs2_xattr_tree_truncate(ocfs2_filesys *fs, + &ctxt, &changed); + } + ++ ++errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs, ++ struct ocfs2_dx_root_block *dx_root) ++{ ++ struct truncate_ctxt ctxt; ++ ++ memset(&ctxt, 0, sizeof (struct truncate_ctxt)); ++ ctxt.new_i_clusters = dx_root->dr_clusters; ++ ctxt.new_size_in_clusters = 0; ++ ++ return ocfs2_extent_iterate_dx_root(fs, dx_root, ++ OCFS2_EXTENT_FLAG_DEPTH_TRAVERSE, ++ NULL, truncate_iterate, &ctxt); ++} ++ ++ + #ifdef DEBUG_EXE + #include + #include +diff --git a/libocfs2/unlink.c b/libocfs2/unlink.c +index 286ef59..9ebba38 100644 +--- a/libocfs2/unlink.c ++++ b/libocfs2/unlink.c +@@ -30,6 +30,7 @@ + #define _LARGEFILE64_SOURCE + + #include ++#include + + #include "ocfs2/ocfs2.h" + +@@ -46,6 +47,7 @@ struct link_struct { + #pragma argsused + #endif + static int unlink_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -66,6 +68,161 @@ static int unlink_proc(struct ocfs2_dir_entry *dirent, + return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED; + } + ++static errcode_t ocfs2_unlink_el(ocfs2_filesys *fs, ++ uint64_t dir, ++ const char *name, ++ uint64_t ino, ++ int flags) ++{ ++ errcode_t ret; ++ struct link_struct ls; ++ ++ ls.name = name; ++ ls.namelen = name ? strlen(name) : 0; ++ ls.inode = ino; ++ ls.flags = 0; ++ ls.done = 0; ++ ++ ret = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls); ++ if (ret) ++ goto out; ++ ++ if (!ls.done) ++ ret = OCFS2_ET_DIR_NO_SPACE; ++out: ++ return ret; ++} ++ ++static errcode_t __ocfs2_delete_entry(ocfs2_filesys *fs, ++ struct ocfs2_dir_entry *de_del, ++ char *dir_buf) ++{ ++ struct ocfs2_dir_entry *de, *pde; ++ int offset = 0; ++ errcode_t ret = 0; ++ ++ pde = NULL; ++ de = (struct ocfs2_dir_entry *)dir_buf; ++ ++ while( offset < fs->fs_blocksize) { ++ if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ goto out; ++ } ++ ++ if (de == de_del) { ++ if (pde) ++ pde->rec_len += de->rec_len; ++ else ++ de->inode = 0; ++ ++ goto out; ++ } ++ if (de->rec_len <= 0) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ goto out; ++ } ++ pde = de; ++ offset += de->rec_len; ++ de = (struct ocfs2_dir_entry *)((char *)de + de->rec_len); ++ } ++ ++out: ++ return ret; ++} ++ ++static errcode_t ocfs2_unlink_dx(ocfs2_filesys *fs, ++ uint64_t dir, ++ const char *name, ++ uint64_t ino, ++ int flags) ++{ ++ char *di_buf = NULL, *dx_root_buf = NULL; ++ struct ocfs2_dinode *di; ++ struct ocfs2_dx_root_block *dx_root; ++ struct ocfs2_dx_entry_list *entry_list; ++ struct ocfs2_dir_block_trailer *trailer; ++ int write_dx_leaf = 0; ++ int add_to_free_list = 0; ++ int max_rec_len = 0; ++ struct ocfs2_dir_lookup_result lookup; ++ errcode_t ret; ++ ++ assert(name); ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf); ++ if (ret) ++ goto out; ++ dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; ++ ++ memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); ++ ret= ocfs2_dx_dir_search(fs, name, strlen(name), dx_root, &lookup); ++ if (ret) ++ goto out; ++ ++ trailer = ocfs2_dir_trailer_from_block(fs, lookup.dl_leaf); ++ if (trailer->db_free_rec_len == 0) ++ add_to_free_list = 1; ++ ++ ret = __ocfs2_delete_entry(fs, lookup.dl_entry, lookup.dl_leaf); ++ if (ret) ++ goto out; ++ ++ max_rec_len = ocfs2_find_max_rec_len(fs, lookup.dl_leaf); ++ trailer->db_free_rec_len = max_rec_len; ++ if (add_to_free_list) { ++ trailer->db_free_next = dx_root->dr_free_blk; ++ dx_root->dr_free_blk = lookup.dl_leaf_blkno; ++ } ++ ++ ret = ocfs2_write_dir_block(fs, di, ++ lookup.dl_leaf_blkno, lookup.dl_leaf); ++ if (ret) ++ goto out; ++ ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ entry_list = &dx_root->dr_entries; ++ else { ++ entry_list = &(lookup.dl_dx_leaf->dl_list); ++ write_dx_leaf = 1; ++ } ++ ++ ocfs2_dx_list_remove_entry(entry_list, ++ lookup.dl_dx_entry_idx); ++ ++ if (write_dx_leaf) { ++ ret = ocfs2_write_dx_leaf(fs, lookup.dl_dx_leaf_blkno, lookup.dl_dx_leaf); ++ if (ret) ++ goto out; ++ } ++ ++ dx_root->dr_num_entries --; ++ ret = ocfs2_write_dx_root(fs, di->i_dx_root, dx_root_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_write_inode(fs, di->i_blkno, di_buf); ++ ++out: ++ release_lookup_res(&lookup); ++ if (dx_root_buf) ++ ocfs2_free(&dx_root_buf); ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ ++ return ret; ++} ++ + #ifdef __TURBOC__ + #pragma argsused + #endif +@@ -73,22 +230,30 @@ errcode_t ocfs2_unlink(ocfs2_filesys *fs, uint64_t dir, + const char *name, uint64_t ino, + int flags) + { +- errcode_t retval; +- struct link_struct ls; ++ errcode_t ret; ++ char *di_buf = NULL; ++ struct ocfs2_dinode *di; + + if (!(fs->fs_flags & OCFS2_FLAG_RW)) + return OCFS2_ET_RO_FILESYS; + +- ls.name = name; +- ls.namelen = name ? strlen(name) : 0; +- ls.inode = ino; +- ls.flags = 0; +- ls.done = 0; ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ret = ocfs2_read_inode(fs, dir, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; + +- retval = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls); +- if (retval) +- return retval; ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && ++ (ocfs2_dir_indexed(di))) ++ ret = ocfs2_unlink_dx(fs, dir, name, ino, flags); ++ else ++ ret = ocfs2_unlink_el(fs, dir, name, ino, flags); + +- return (ls.done) ? 0 : OCFS2_ET_DIR_NO_SPACE; ++out: ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ return ret; + } + +diff --git a/sizetest/sizetest.c b/sizetest/sizetest.c +index e01e800..a68e6c3 100644 +--- a/sizetest/sizetest.c ++++ b/sizetest/sizetest.c +@@ -144,7 +144,7 @@ static void print_ocfs2_super_block(void) + SHOW_OFFSET(struct ocfs2_super_block, s_cluster_info); + SHOW_OFFSET(struct ocfs2_super_block, s_xattr_inline_size); + SHOW_OFFSET(struct ocfs2_super_block, s_reserved0); +- SHOW_OFFSET(struct ocfs2_super_block, s_reserved1); ++ SHOW_OFFSET(struct ocfs2_super_block, s_dx_seed); + SHOW_OFFSET(struct ocfs2_super_block, s_reserved2); + + END_TYPE(struct ocfs2_super_block); +-- +1.7.0.2 + diff --git a/0008-dx_dirs-fsck.ocfs2-support.patch b/0008-dx_dirs-fsck.ocfs2-support.patch new file mode 100644 index 0000000..9a475cb --- /dev/null +++ b/0008-dx_dirs-fsck.ocfs2-support.patch @@ -0,0 +1,415 @@ +From 2bbbdef8534534b12cf3dd3ad9db6a1ebd74d0a3 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:05 +0800 +Subject: [PATCH 08/30] dx_dirs: fsck.ocfs2 support + +This patch does a basic indexed dirs support in fsck.ocfs2. + +During pass2, if a directory block is changed, and indexed dirs in +enabled, the indexed tree of this directory will be +truncate, then rebuild with the modified directory data. All the +modified directories' inode numbers are recored in a +rb-tree, when all directories get scanned, truncate and rebuild the +directories whose inode recorded in the rb-tree. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + fsck.ocfs2/dirblocks.c | 125 ++++++++++++++++++++++++++++++++++++++- + fsck.ocfs2/include/dirblocks.h | 4 + + fsck.ocfs2/pass1.c | 54 +++++++++++++++++ + fsck.ocfs2/pass1b.c | 4 +- + fsck.ocfs2/pass2.c | 48 +++++++++++++++- + fsck.ocfs2/pass3.c | 1 + + fsck.ocfs2/pass4.c | 1 + + 7 files changed, 229 insertions(+), 8 deletions(-) + +diff --git a/fsck.ocfs2/dirblocks.c b/fsck.ocfs2/dirblocks.c +index 1fd5560..085dd1f 100644 +--- a/fsck.ocfs2/dirblocks.c ++++ b/fsck.ocfs2/dirblocks.c +@@ -34,6 +34,7 @@ + #include "fsck.h" + #include "dirblocks.h" + #include "util.h" ++#include "extent.h" + + errcode_t o2fsck_add_dir_block(o2fsck_dirblocks *db, uint64_t ino, + uint64_t blkno, uint64_t blkcount) +@@ -43,11 +44,9 @@ errcode_t o2fsck_add_dir_block(o2fsck_dirblocks *db, uint64_t ino, + o2fsck_dirblock_entry *dbe, *tmp_dbe; + errcode_t ret = 0; + +- dbe = calloc(1, sizeof(*dbe)); +- if (dbe == NULL) { +- ret = OCFS2_ET_NO_MEMORY; ++ ret = ocfs2_malloc0(sizeof(o2fsck_dirblock_entry), &dbe); ++ if (ret) + goto out; +- } + + dbe->e_ino = ino; + dbe->e_blkno = blkno; +@@ -134,6 +133,73 @@ static int try_to_cache(ocfs2_filesys *fs, struct rb_node *node, + return cached_blocks; + } + ++uint64_t o2fsck_search_reidx_dir(struct rb_root *root, uint64_t dino) ++{ ++ struct rb_node *node = root->rb_node; ++ o2fsck_dirblock_entry *dbe; ++ ++ while (node) { ++ dbe = rb_entry(node, o2fsck_dirblock_entry, e_node); ++ ++ if (dino < dbe->e_ino) ++ node = node->rb_left; ++ else if (dino > dbe->e_ino) ++ node = node->rb_right; ++ else ++ return dbe->e_ino; ++ } ++ return 0; ++} ++ ++static errcode_t o2fsck_add_reidx_dir_ino(struct rb_root *root, uint64_t dino) ++{ ++ struct rb_node **p = &root->rb_node; ++ struct rb_node *parent = NULL; ++ o2fsck_dirblock_entry *dp, *tmp_dp; ++ errcode_t ret = 0; ++ ++ ret = ocfs2_malloc0(sizeof (o2fsck_dirblock_entry), &dp); ++ if (ret) ++ goto out; ++ ++ dp->e_ino = dino; ++ ++ while(*p) ++ { ++ parent = *p; ++ tmp_dp = rb_entry(parent, o2fsck_dirblock_entry, e_node); ++ ++ if (dp->e_ino < tmp_dp->e_ino) ++ p = &(*p)->rb_left; ++ else if (dp->e_ino > tmp_dp->e_ino) ++ p = &(*p)->rb_right; ++ else { ++ ret = OCFS2_ET_INTERNAL_FAILURE; ++ ocfs2_free(&dp); ++ goto out; ++ } ++ } ++ ++ rb_link_node(&dp->e_node, parent, p); ++ rb_insert_color(&dp->e_node, root); ++ ++out: ++ return ret; ++} ++ ++errcode_t o2fsck_try_add_reidx_dir(struct rb_root *root, uint64_t dino) ++{ ++ errcode_t ret = 0; ++ uint64_t ino; ++ ino = o2fsck_search_reidx_dir(root, dino); ++ if (ino) ++ goto out; ++ ret = o2fsck_add_reidx_dir_ino(root, dino); ++ ++out: ++ return ret; ++} ++ + void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func, + void *priv_data) + { +@@ -174,3 +240,54 @@ void o2fsck_dir_block_iterate(o2fsck_state *ost, dirblock_iterator func, + if (pre_cache_buf) + ocfs2_free(&pre_cache_buf); + } ++ ++static errcode_t ocfs2_rebuild_indexed_dir(ocfs2_filesys *fs, uint64_t ino) ++{ ++ errcode_t ret = 0; ++ char *di_buf = NULL; ++ struct ocfs2_dinode *di; ++ ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &di_buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_inode(fs, ino, di_buf); ++ if (ret) ++ goto out; ++ di = (struct ocfs2_dinode *)di_buf; ++ ++ /* do not rebuild indexed tree for inline directory */ ++ if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) ++ goto out; ++ ++ ret = ocfs2_dx_dir_truncate(fs, ino); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_dx_dir_build(fs, ino); ++out: ++ if (di_buf) ++ ocfs2_free(&di_buf); ++ return ret; ++} ++ ++ ++errcode_t o2fsck_rebuild_indexed_dirs(ocfs2_filesys *fs, struct rb_root *root) ++{ ++ struct rb_node *node; ++ o2fsck_dirblock_entry *dbe; ++ uint64_t ino; ++ errcode_t ret = 0; ++ ++ for (node = rb_first(root); node; node = rb_next(node)) { ++ dbe = rb_entry(node, o2fsck_dirblock_entry, e_node); ++ ino = dbe->e_ino; ++ ret = ocfs2_rebuild_indexed_dir(fs, ino); ++ if (ret) ++ goto out; ++ } ++out: ++ return ret; ++} ++ +diff --git a/fsck.ocfs2/include/dirblocks.h b/fsck.ocfs2/include/dirblocks.h +index 7b3a2e9..f85974f 100644 +--- a/fsck.ocfs2/include/dirblocks.h ++++ b/fsck.ocfs2/include/dirblocks.h +@@ -48,6 +48,10 @@ struct _o2fsck_state; + void o2fsck_dir_block_iterate(struct _o2fsck_state *ost, dirblock_iterator func, + void *priv_data); + ++uint64_t o2fsck_search_reidx_dir(struct rb_root *root, uint64_t dino); ++errcode_t o2fsck_try_add_reidx_dir(struct rb_root *root, uint64_t dino); ++errcode_t o2fsck_rebuild_indexed_dirs(ocfs2_filesys *fs, struct rb_root *root); ++errcode_t o2fsck_check_dir_index(struct _o2fsck_state *ost, struct ocfs2_dinode *di); + + #endif /* __O2FSCK_DIRBLOCKS_H__ */ + +diff --git a/fsck.ocfs2/pass1.c b/fsck.ocfs2/pass1.c +index 00f3d54..b53b908 100644 +--- a/fsck.ocfs2/pass1.c ++++ b/fsck.ocfs2/pass1.c +@@ -781,6 +781,53 @@ static int clear_block(ocfs2_filesys *fs, + return 0; + } + ++ ++static errcode_t o2fsck_check_dx_dir(o2fsck_state *ost, struct ocfs2_dinode *di) ++{ ++ errcode_t ret = 0; ++ char *buf = NULL; ++ struct ocfs2_dx_root_block *dx_root; ++ ocfs2_filesys *fs = ost->ost_fs; ++ struct extent_info ei = {0,}; ++ int changed = 0; ++ ++ if (!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) ++ goto out; ++ ++ if (!ocfs2_dir_indexed(di)) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &buf); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_read_dx_root(fs, (uint64_t)di->i_dx_root, buf); ++ if (ret) ++ goto out; ++ ++ dx_root = (struct ocfs2_dx_root_block *)buf; ++ if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) ++ goto out; ++ ++ ret = check_el(ost, &ei, di->i_blkno, &dx_root->dr_list, ++ ocfs2_extent_recs_per_dx_root(fs->fs_blocksize), ++ &changed); ++ if (ret) ++ goto out; ++ ++ if (changed) { ++ ret = ocfs2_write_dx_root(fs, (uint64_t)di->i_dx_root, (char *)dx_root); ++ if (ret) ++ com_err(whoami, ret, "while writing an updated " ++ "dx_root block at %"PRIu64" for inode %"PRIu64, ++ (uint64_t)di->i_dx_root, (uint64_t)di->i_blkno); ++ } ++out: ++ if (buf) ++ ocfs2_free(&buf); ++ return ret; ++} ++ + /* + * this verifies i_size and i_clusters for inodes that use i_list to + * reference extents of data. +@@ -836,6 +883,13 @@ static errcode_t o2fsck_check_blocks(ocfs2_filesys *fs, o2fsck_state *ost, + goto out; + } + ++ ret = o2fsck_check_dx_dir(ost, di); ++ if (ret) { ++ com_err(whoami, ret, "while iterating over the dir indexed " ++ "tree for directory inode %"PRIu64, (uint64_t)di->i_blkno); ++ goto out; ++ } ++ + if (S_ISLNK(di->i_mode)) + check_link_data(&vb); + +diff --git a/fsck.ocfs2/pass1b.c b/fsck.ocfs2/pass1b.c +index 0ea87b4..3ca1d7d 100644 +--- a/fsck.ocfs2/pass1b.c ++++ b/fsck.ocfs2/pass1b.c +@@ -902,8 +902,8 @@ static void name_inode(struct dir_scan_context *scan, + pass1c_warn(OCFS2_ET_NO_MEMORY); + } + +-static int walk_iterate(struct ocfs2_dir_entry *de, int offset, +- int blocksize, char *buf, void *priv_data) ++static int walk_iterate(struct ocfs2_dir_entry *de, uint64_t blocknr, ++ int offset, int blocksize, char *buf, void *priv_data) + { + struct dir_scan_context *scan = priv_data; + +diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c +index d61c501..58efcd4 100644 +--- a/fsck.ocfs2/pass2.c ++++ b/fsck.ocfs2/pass2.c +@@ -36,6 +36,7 @@ + #include + + #include "ocfs2/ocfs2.h" ++#include "ocfs2/kernel-rbtree.h" + + #include "dirparents.h" + #include "icount.h" +@@ -70,6 +71,7 @@ struct dirblock_data { + errcode_t ret; + o2fsck_strings strings; + uint64_t last_ino; ++ struct rb_root re_idx_dirs; + }; + + static int dirent_has_dots(struct ocfs2_dir_entry *dirent, int num_dots) +@@ -833,10 +835,11 @@ next: + } + + if (ocfs2_dir_has_trailer(dd->fs, di)) +- fix_dir_trailer(dd->ost, dbe, ++ fix_dir_trailer(dd->ost, ++ dbe, + ocfs2_dir_trailer_from_block(dd->fs, + dd->dirblock_buf), +- &ret_flags); ++ &ret_flags); + + if (ret_flags & OCFS2_DIRENT_CHANGED) { + if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) { +@@ -851,15 +854,47 @@ next: + com_err(whoami, ret, "while writing dir block %"PRIu64, + dbe->e_blkno); + dd->ost->ost_write_error = 1; ++ goto out; ++ } ++ ++ if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(dd->fs->fs_super)) && ++ !(di->i_dyn_features & OCFS2_INLINE_DATA_FL) && ++ (di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) { ++ ret = o2fsck_try_add_reidx_dir(&dd->re_idx_dirs, dbe->e_ino); ++ if (ret) { ++ com_err(whoami, ret, "while adding block for " ++ "directory inode %"PRIu64" to rebuild " ++ "dir index", dbe->e_ino); ++ goto out; ++ } + } + } + ++ /* truncate invalid indexed tree */ ++ if ((!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(dd->fs->fs_super)))&& ++ di->i_dyn_features & OCFS2_INDEXED_DIR_FL ) { ++ /* ignore the return value */ ++ ocfs2_dx_dir_truncate(dd->fs, dbe->e_ino); ++ } ++ + out: + if (ret) + dd->ret = ret; + return ret_flags; + } + ++static void release_re_idx_dirs_rbtree(struct rb_root * root) ++{ ++ struct rb_node *node; ++ o2fsck_dirblock_entry *dp; ++ ++ while ((node = rb_first(root)) != NULL) { ++ dp = rb_entry(node, o2fsck_dirblock_entry, e_node); ++ rb_erase(&dp->e_node, root); ++ ocfs2_free(&dp); ++ } ++} ++ + errcode_t o2fsck_pass2(o2fsck_state *ost) + { + o2fsck_dir_parent *dp; +@@ -868,6 +903,7 @@ errcode_t o2fsck_pass2(o2fsck_state *ost) + .ost = ost, + .fs = ost->ost_fs, + .last_ino = 0, ++ .re_idx_dirs = RB_ROOT, + }; + + printf("Pass 2: Checking directory entries.\n"); +@@ -905,6 +941,14 @@ errcode_t o2fsck_pass2(o2fsck_state *ost) + dp->dp_dirent = ost->ost_fs->fs_sysdir_blkno; + + o2fsck_dir_block_iterate(ost, pass2_dir_block_iterate, &dd); ++ ++ if (dd.re_idx_dirs.rb_node) { ++ ret = o2fsck_rebuild_indexed_dirs(ost->ost_fs, &dd.re_idx_dirs); ++ if (ret) ++ com_err(whoami, ret, "while rebuild indexed dirs."); ++ } ++ release_re_idx_dirs_rbtree(&dd.re_idx_dirs); ++ + o2fsck_strings_free(&dd.strings); + out: + if (dd.dirblock_buf) +diff --git a/fsck.ocfs2/pass3.c b/fsck.ocfs2/pass3.c +index 457f312..94d9fbd 100644 +--- a/fsck.ocfs2/pass3.c ++++ b/fsck.ocfs2/pass3.c +@@ -193,6 +193,7 @@ struct fix_dot_dot_args { + }; + + static int fix_dot_dot_dirent(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/fsck.ocfs2/pass4.c b/fsck.ocfs2/pass4.c +index d713d13..5ca4f17 100644 +--- a/fsck.ocfs2/pass4.c ++++ b/fsck.ocfs2/pass4.c +@@ -101,6 +101,7 @@ out: + } + + static int replay_orphan_iterate(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +-- +1.7.0.2 + diff --git a/0009-dx_dirs-add-blocknr-in-callback-of-ocfs2_dir_iterate.patch b/0009-dx_dirs-add-blocknr-in-callback-of-ocfs2_dir_iterate.patch new file mode 100644 index 0000000..5eafc78 --- /dev/null +++ b/0009-dx_dirs-add-blocknr-in-callback-of-ocfs2_dir_iterate.patch @@ -0,0 +1,217 @@ +From c797d07a1021d5c3a5541236739dabb5dfd4c620 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:06 +0800 +Subject: [PATCH 09/30] dx_dirs: add 'blocknr' in callback of ocfs2_dir_iterate() + +In order to make ocfs2_dx_dir_insert() easier to set the dir block +number to a dx record, a new parameter 'uint64_t +blocknr' is added into the call back function parameter of +ocfs2_dir_iterate(). This patch includes (part of) the +related changes which are not mentioned in previous patches. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + debugfs.ocfs2/dump.c | 6 +++--- + debugfs.ocfs2/find_inode_paths.c | 3 ++- + debugfs.ocfs2/include/dump.h | 2 +- + debugfs.ocfs2/utils.c | 4 ++-- + extras/find_hardlinks.c | 1 + + extras/find_inode_paths.c | 1 + + fswreck/dir.c | 3 +++ + ocfs2console/ocfs2interface/ocfs2module.c | 1 + + tunefs.ocfs2/feature_quota.c | 3 ++- + tunefs.ocfs2/op_list_sparse_files.c | 4 ++-- + tunefs.ocfs2/op_set_slot_count.c | 10 ++++++---- + 11 files changed, 24 insertions(+), 14 deletions(-) + +diff --git a/debugfs.ocfs2/dump.c b/debugfs.ocfs2/dump.c +index 88ec430..d55fc0e 100644 +--- a/debugfs.ocfs2/dump.c ++++ b/debugfs.ocfs2/dump.c +@@ -461,8 +461,8 @@ void dump_group_descriptor (FILE *out, struct ocfs2_group_desc *grp, + * dump_dir_entry() + * + */ +-int dump_dir_entry (struct ocfs2_dir_entry *rec, int offset, int blocksize, +- char *buf, void *priv_data) ++int dump_dir_entry (struct ocfs2_dir_entry *rec, uint64_t blocknr, int offset, ++ int blocksize, char *buf, void *priv_data) + { + list_dir_opts *ls = (list_dir_opts *)priv_data; + char tmp = rec->name[rec->name_len]; +@@ -544,7 +544,7 @@ void dump_dir_block(FILE *out, char *buf) + return; + } + +- dump_dir_entry(dirent, offset, gbls.fs->fs_blocksize, NULL, ++ dump_dir_entry(dirent, 0, offset, gbls.fs->fs_blocksize, NULL, + &ls_opts); + offset += dirent->rec_len; + } +diff --git a/debugfs.ocfs2/find_inode_paths.c b/debugfs.ocfs2/find_inode_paths.c +index e2d0e7d..cf9e88a 100644 +--- a/debugfs.ocfs2/find_inode_paths.c ++++ b/debugfs.ocfs2/find_inode_paths.c +@@ -38,7 +38,8 @@ struct walk_path { + uint64_t *inode; + }; + +-static int walk_tree_func(struct ocfs2_dir_entry *dentry, int offset, ++static int walk_tree_func(struct ocfs2_dir_entry *dentry, ++ uint64_t blocknr, int offset, + int blocksize, char *buf, void *priv_data) + { + errcode_t ret; +diff --git a/debugfs.ocfs2/include/dump.h b/debugfs.ocfs2/include/dump.h +index 79b10b3..ae7b34a 100644 +--- a/debugfs.ocfs2/include/dump.h ++++ b/debugfs.ocfs2/include/dump.h +@@ -50,7 +50,7 @@ void dump_extent_list (FILE *out, struct ocfs2_extent_list *ext); + void dump_chain_list (FILE *out, struct ocfs2_chain_list *cl); + void dump_extent_block (FILE *out, struct ocfs2_extent_block *blk); + void dump_group_descriptor (FILE *out, struct ocfs2_group_desc *grp, int index); +-int dump_dir_entry (struct ocfs2_dir_entry *rec, int offset, int blocksize, ++int dump_dir_entry (struct ocfs2_dir_entry *rec, uint64_t blocknr, int offset, int blocksize, + char *buf, void *priv_data); + void dump_dx_root (FILE *out, struct ocfs2_dx_root_block *dx_root); + void dump_dx_leaf (FILE *out, struct ocfs2_dx_leaf *dx_leaf); +diff --git a/debugfs.ocfs2/utils.c b/debugfs.ocfs2/utils.c +index 6107d9e..2c5b588 100644 +--- a/debugfs.ocfs2/utils.c ++++ b/debugfs.ocfs2/utils.c +@@ -674,8 +674,8 @@ bail: + * Copyright (C) 1994 Theodore Ts'o. This file may be redistributed + * under the terms of the GNU Public License. + */ +-static int rdump_dirent(struct ocfs2_dir_entry *rec, int offset, int blocksize, +- char *buf, void *priv_data) ++static int rdump_dirent(struct ocfs2_dir_entry *rec, uint64_t blocknr, ++ int offset, int blocksize, char *buf, void *priv_data) + { + rdump_opts *rd = (rdump_opts *)priv_data; + char tmp = rec->name[rec->name_len]; +diff --git a/extras/find_hardlinks.c b/extras/find_hardlinks.c +index b70f58b..2e1f697 100644 +--- a/extras/find_hardlinks.c ++++ b/extras/find_hardlinks.c +@@ -55,6 +55,7 @@ struct walk_path { + }; + + static int walk_tree_func(struct ocfs2_dir_entry *dentry, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/extras/find_inode_paths.c b/extras/find_inode_paths.c +index 1725b40..b9ad920 100644 +--- a/extras/find_inode_paths.c ++++ b/extras/find_inode_paths.c +@@ -53,6 +53,7 @@ struct walk_path { + }; + + static int walk_tree_func(struct ocfs2_dir_entry *dentry, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/fswreck/dir.c b/fswreck/dir.c +index 66309fd..eb8d0f2 100644 +--- a/fswreck/dir.c ++++ b/fswreck/dir.c +@@ -112,6 +112,7 @@ static int corrupt_match_dirent(struct dirent_corrupt_struct *dcs, + } + + static int rename_dirent_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -157,6 +158,7 @@ static int rename_dirent(ocfs2_filesys *fs, uint64_t dir, + } + + static int corrupt_dirent_ino_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +@@ -200,6 +202,7 @@ static int corrupt_dirent_ino(ocfs2_filesys *fs, uint64_t dir, + } + + static int corrupt_dirent_reclen_proc(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/ocfs2console/ocfs2interface/ocfs2module.c b/ocfs2console/ocfs2interface/ocfs2module.c +index bd3139d..18ad04c 100644 +--- a/ocfs2console/ocfs2interface/ocfs2module.c ++++ b/ocfs2console/ocfs2interface/ocfs2module.c +@@ -735,6 +735,7 @@ typedef struct + + static int + walk_dirs (struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, + int offset, + int blocksize, + char *buf, +diff --git a/tunefs.ocfs2/feature_quota.c b/tunefs.ocfs2/feature_quota.c +index 2da3cbe..08d7770 100644 +--- a/tunefs.ocfs2/feature_quota.c ++++ b/tunefs.ocfs2/feature_quota.c +@@ -165,7 +165,8 @@ struct remove_quota_files_ctxt { + }; + + static int remove_quota_files_iterate(struct ocfs2_dir_entry *dirent, +- int offset, int blocksize, char *buf, ++ uint64_t blocknr, int offset, ++ int blocksize, char *buf, + void *priv_data) + { + struct remove_quota_files_ctxt *ctxt = priv_data; +diff --git a/tunefs.ocfs2/op_list_sparse_files.c b/tunefs.ocfs2/op_list_sparse_files.c +index 2f81d36..3127876 100644 +--- a/tunefs.ocfs2/op_list_sparse_files.c ++++ b/tunefs.ocfs2/op_list_sparse_files.c +@@ -290,8 +290,8 @@ bail: + } + + static int list_sparse_func(struct ocfs2_dir_entry *dirent, +- int offset, int blocksize, +- char *buf, void *priv_data) ++ uint64_t blocknr, int offset, ++ int blocksize, char *buf, void *priv_data) + { + errcode_t ret; + char *di_buf = NULL; +diff --git a/tunefs.ocfs2/op_set_slot_count.c b/tunefs.ocfs2/op_set_slot_count.c +index 46ce2de..cdd4f1a 100644 +--- a/tunefs.ocfs2/op_set_slot_count.c ++++ b/tunefs.ocfs2/op_set_slot_count.c +@@ -699,8 +699,9 @@ static errcode_t truncate_orphan_dir(ocfs2_filesys *fs, + return ret; + } + +-static int remove_slot_iterate(struct ocfs2_dir_entry *dirent, int offset, +- int blocksize, char *buf, void *priv_data) ++static int remove_slot_iterate(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, int offset, int blocksize, ++ char *buf, void *priv_data) + { + struct remove_slot_ctxt *ctxt = + (struct remove_slot_ctxt *)priv_data; +@@ -783,8 +784,9 @@ bail: + return ret; + } + +-static int orphan_iterate(struct ocfs2_dir_entry *dirent, int offset, +- int blocksize, char *buf, void *priv_data) ++static int orphan_iterate(struct ocfs2_dir_entry *dirent, ++ uint64_t blocknr, int offset, int blocksize, ++ char *buf, void *priv_data) + { + int *has_orphan = (int *)priv_data; + +-- +1.7.0.2 + diff --git a/0010-dx_dirs-add-disable-indexed-dirs-support-in-tunefs.o.patch b/0010-dx_dirs-add-disable-indexed-dirs-support-in-tunefs.o.patch new file mode 100644 index 0000000..b43ee08 --- /dev/null +++ b/0010-dx_dirs-add-disable-indexed-dirs-support-in-tunefs.o.patch @@ -0,0 +1,261 @@ +From 88ed9a6f3d2bb06220c9188ad333b668b3936268 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:07 +0800 +Subject: [PATCH 10/30] dx_dirs: add disable indexed-dirs support in tunefs.ocfs2 + +This patch truncates all directories' indexed tree if '--fs-features +noindexed-tree' option is given. The indexed dirs +related flags on directory inodes and superblock are cleared too. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + tunefs.ocfs2/feature_indexed_dirs.c | 192 ++++++++++++++++++++++++++++++++++- + tunefs.ocfs2/o2ne_err.et | 9 ++ + 2 files changed, 199 insertions(+), 2 deletions(-) + +diff --git a/tunefs.ocfs2/feature_indexed_dirs.c b/tunefs.ocfs2/feature_indexed_dirs.c +index 368eb87..e9f87fb 100644 +--- a/tunefs.ocfs2/feature_indexed_dirs.c ++++ b/tunefs.ocfs2/feature_indexed_dirs.c +@@ -6,7 +6,7 @@ + * ocfs2 tune utility for enabling and disabling the directory indexing + * feature. + * +- * Copyright (C) 2009 Novell. All rights reserved. ++ * Copyright (C) 2009, 2010 Novell. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public +@@ -30,6 +30,17 @@ + + #include "libocfs2ne.h" + ++struct dx_dirs_inode { ++ struct list_head list; ++ uint64_t ino; ++}; ++ ++struct dx_dirs_context { ++ errcode_t ret; ++ uint64_t dx_dirs_nr; ++ struct list_head inodes; ++ struct tools_progress *prog; ++}; + + static int enable_indexed_dirs(ocfs2_filesys *fs, int flags) + { +@@ -71,6 +82,183 @@ out: + return ret; + } + ++static errcode_t dx_dir_iterate(ocfs2_filesys *fs, struct ocfs2_dinode *di, ++ void *user_data) ++{ ++ errcode_t ret = 0; ++ struct dx_dirs_inode *dx_di = NULL; ++ struct dx_dirs_context *ctxt= (struct dx_dirs_context *)user_data; ++ ++ if (!S_ISDIR(di->i_mode)) ++ goto bail; ++ ++ if (!(di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) ++ goto bail; ++ ++ ret = ocfs2_malloc0(sizeof(struct dx_dirs_inode), &dx_di); ++ if (ret) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ goto bail; ++ } ++ ++ dx_di->ino = di->i_blkno; ++ ctxt->dx_dirs_nr ++; ++ list_add_tail(&dx_di->list, &ctxt->inodes); ++ ++ tools_progress_step(ctxt->prog, 1); ++ ++bail: ++ return ret; ++} ++ ++ ++static errcode_t find_indexed_dirs(ocfs2_filesys *fs, ++ struct dx_dirs_context *ctxt) ++{ ++ errcode_t ret; ++ ++ ctxt->prog = tools_progress_start("Scanning filesystem", "scanning", 0); ++ if (!ctxt->prog) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ goto bail; ++ } ++ ++ ret = tunefs_foreach_inode(fs, dx_dir_iterate, ctxt); ++ if (ret) { ++ if (ret != TUNEFS_ET_NO_MEMORY) ++ ret = TUNEFS_ET_DX_DIRS_SCAN_FAILED; ++ goto bail; ++ } ++ ++ verbosef(VL_APP, ++ "We have %lu indexed %s to truncate.\n", ++ ctxt->dx_dirs_nr, ++ (ctxt->dx_dirs_nr > 1)?"directories":"directory"); ++ ++bail: ++ if (ctxt->prog) ++ tools_progress_stop(ctxt->prog); ++ ++ return ret; ++} ++ ++static errcode_t clean_indexed_dirs(ocfs2_filesys *fs, ++ struct dx_dirs_context *ctxt) ++{ ++ errcode_t ret = 0; ++ struct list_head *pos; ++ struct dx_dirs_inode *dx_di; ++ struct tools_progress *prog; ++ uint64_t dirs_truncated = 0; ++ ++ prog = tools_progress_start("Truncating indexed dirs", "truncating", ++ ctxt->dx_dirs_nr); ++ if (!prog) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ goto bail; ++ } ++ ++ list_for_each(pos, &ctxt->inodes) { ++ dx_di = list_entry(pos, struct dx_dirs_inode, list); ++ ++ ret = ocfs2_dx_dir_truncate(fs, dx_di->ino); ++ if (ret) { ++ verbosef(VL_APP, ++ "Truncate directory (ino \"%lu\") failed.", ++ dx_di->ino); ++ ret = TUNEFS_ET_DX_DIRS_TRUNCATE_FAILED; ++ goto bail; ++ } ++ dirs_truncated ++; ++ tools_progress_step(prog, 1); ++ } ++ ++bail: ++ tools_progress_stop(prog); ++ verbosef(VL_APP, ++ "\"%lu\" from \"%lu\" indexed %s truncated.", ++ dirs_truncated, ctxt->dx_dirs_nr, ++ (dirs_truncated <= 1) ? "directory is" : "directories are"); ++ ++ return ret; ++} ++ ++static void release_dx_dirs_context(struct dx_dirs_context *ctxt) ++{ ++ struct list_head *pos, *n; ++ struct dx_dirs_inode *dx_di; ++ ++ list_for_each_safe(pos, n, &ctxt->inodes) { ++ dx_di = list_entry(pos, struct dx_dirs_inode, list); ++ list_del(&dx_di->list); ++ ocfs2_free(&dx_di); ++ } ++} ++ ++static int disable_indexed_dirs(ocfs2_filesys *fs, int flags) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_super_block *super = OCFS2_RAW_SB(fs->fs_super); ++ struct dx_dirs_context ctxt; ++ struct tools_progress *prog = NULL; ++ ++ if (!ocfs2_supports_indexed_dirs(super)) { ++ verbosef(VL_APP, ++ "Directory indexing feature is not enabled; " ++ "nothing to disable\n"); ++ goto out; ++ } ++ ++ if (!tools_interact("Disabling the directory indexing feature on " ++ "device \"%s\"? ", ++ fs->fs_devname)) ++ goto out; ++ ++ prog = tools_progress_start("Disable directory indexing", "no dir idx", 2); ++ if (!prog) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ tcom_err(ret, "while initializing the progress display"); ++ goto out; ++ } ++ ++ memset(&ctxt, 0, sizeof (struct dx_dirs_context)); ++ INIT_LIST_HEAD(&ctxt.inodes); ++ ret = find_indexed_dirs(fs, &ctxt); ++ if (ret) { ++ tcom_err(ret, "while scanning indexed directories"); ++ goto out_cleanup; ++ } ++ ++ tools_progress_step(prog, 1); ++ ++ tunefs_block_signals(); ++ ret = clean_indexed_dirs(fs, &ctxt); ++ if (ret) { ++ tcom_err(ret, "while truncate indexed directories"); ++ } ++ ++ /* We already touched file system, must disable dx dirs flag here. ++ * fsck.ocfs2 will handle the orphan indexed trees. */ ++ OCFS2_CLEAR_INCOMPAT_FEATURE(super, ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS); ++ ret = ocfs2_write_super(fs); ++ tunefs_unblock_signals(); ++ ++ if (ret) { ++ ret = TUNEFS_ET_IO_WRITE_FAILED; ++ tcom_err(ret, "while writing super block"); ++ } ++ ++ tools_progress_step(prog, 1); ++out_cleanup: ++ release_dx_dirs_context(&ctxt); ++out: ++ if (prog) ++ tools_progress_stop(prog); ++ ++ return ret; ++} ++ + /* + * TUNEFS_FLAG_ALLOCATION because disabling will want to dealloc + * blocks. +@@ -79,7 +267,7 @@ DEFINE_TUNEFS_FEATURE_INCOMPAT(indexed_dirs, + OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + TUNEFS_FLAG_RW | TUNEFS_FLAG_ALLOCATION, + enable_indexed_dirs, +- NULL); ++ disable_indexed_dirs); + + #ifdef DEBUG_EXE + int main(int argc, char *argv[]) +diff --git a/tunefs.ocfs2/o2ne_err.et b/tunefs.ocfs2/o2ne_err.et +index 20031a5..c2f700b 100644 +--- a/tunefs.ocfs2/o2ne_err.et ++++ b/tunefs.ocfs2/o2ne_err.et +@@ -85,4 +85,13 @@ ec TUNEFS_ET_ONLINE_NOT_SUPPORTED, + ec TUNEFS_ET_CLUSTER_SKIPPED, + "Cluster stack initialization was skipped" + ++ec TUNEFS_ET_DX_DIRS_SCAN_FAILED, ++ "Scanning inodes for directory indexing failed" ++ ++ec TUNEFS_ET_IO_WRITE_FAILED, ++ "Write I/O failed" ++ ++ec TUNEFS_ET_DX_DIRS_TRUNCATE_FAILED, ++ "Truncate directory indexed tree failed" ++ + end +-- +1.7.0.2 + diff --git a/0011-dx_dirs-build-indexed-trees-when-enabling-indexed-di.patch b/0011-dx_dirs-build-indexed-trees-when-enabling-indexed-di.patch new file mode 100644 index 0000000..a1e03b4 --- /dev/null +++ b/0011-dx_dirs-build-indexed-trees-when-enabling-indexed-di.patch @@ -0,0 +1,139 @@ +From 28f5939111c09de057750fb30ce40ade8bd2b8ef Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:08 +0800 +Subject: [PATCH 11/30] dx_dirs: build indexed trees when enabling indexed-dirs + +Previos enable indexed-dirs implementation only set superblock flag, +does not build indexed trees for existed directories. +Tis patch tries to build indexed trees for directories when enable +indexed-dirs. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + tunefs.ocfs2/feature_indexed_dirs.c | 67 +++++++++++++++++++++++++++++++---- + tunefs.ocfs2/o2ne_err.et | 3 ++ + 2 files changed, 63 insertions(+), 7 deletions(-) + +diff --git a/tunefs.ocfs2/feature_indexed_dirs.c b/tunefs.ocfs2/feature_indexed_dirs.c +index e9f87fb..cfa0dd0 100644 +--- a/tunefs.ocfs2/feature_indexed_dirs.c ++++ b/tunefs.ocfs2/feature_indexed_dirs.c +@@ -42,11 +42,46 @@ struct dx_dirs_context { + struct tools_progress *prog; + }; + ++/* ++ * If an indexed-dirs disabled directory has an indexed tree, ++ * this tree is unreliable. it must be truncated and rebuilt. ++ */ ++static errcode_t build_dx_dir(ocfs2_filesys *fs, struct ocfs2_dinode *di, ++ void *user_data) ++{ ++ errcode_t ret = 0; ++ struct dx_dirs_context *ctxt = (struct dx_dirs_context *)user_data; ++ ++ if (!S_ISDIR(di->i_mode)) ++ goto bail; ++ ++ if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) { ++ verbosef(VL_APP, ++ "Directory inode %llu already has an indexed tree, " ++ "rebuild the indexed tree.\n", di->i_blkno); ++ ret = ocfs2_dx_dir_truncate(fs, di->i_blkno); ++ if (ret) { ++ ret = TUNEFS_ET_DX_DIRS_TRUNCATE_FAILED; ++ tcom_err(ret, "while rebulid indexed tree"); ++ } ++ } ++ ret = ocfs2_dx_dir_build(fs, di->i_blkno); ++ if (ret) { ++ ret = TUNEFS_ET_DX_DIRS_BUILD_FAILED; ++ tcom_err(ret, "while enable indexed-dirs"); ++ } ++ ++bail: ++ tools_progress_step(ctxt->prog, 1); ++ return ret; ++} ++ + static int enable_indexed_dirs(ocfs2_filesys *fs, int flags) + { + errcode_t ret = 0; + struct ocfs2_super_block *super = OCFS2_RAW_SB(fs->fs_super); +- struct tools_progress *prog; ++ struct tools_progress *prog = NULL; ++ struct dx_dirs_context ctxt; + + if (ocfs2_supports_indexed_dirs(super)) { + verbosef(VL_APP, +@@ -55,30 +90,48 @@ static int enable_indexed_dirs(ocfs2_filesys *fs, int flags) + goto out; + } + +- + if (!tools_interact("Enable the directory indexing feature on " + "device \"%s\"? ", + fs->fs_devname)) + goto out; + +- prog = tools_progress_start("Enable directory indexing", "dir idx", 1); ++ prog = tools_progress_start("Enable directory indexing", "dir idx", 2); + if (!prog) { + ret = TUNEFS_ET_NO_MEMORY; + tcom_err(ret, "while initializing the progress display"); + goto out; + } + ++ memset(&ctxt, 0, sizeof(struct dx_dirs_context)); ++ ctxt.prog = tools_progress_start("Building indexed trees", "building", 0); ++ if (!ctxt.prog) { ++ ret = TUNEFS_ET_NO_MEMORY; ++ goto out; ++ } ++ + OCFS2_SET_INCOMPAT_FEATURE(super, + OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS); ++ + tunefs_block_signals(); + ret = ocfs2_write_super(fs); +- tunefs_unblock_signals(); +- if (ret) ++ if (ret) { ++ ret = TUNEFS_ET_IO_WRITE_FAILED; + tcom_err(ret, "while writing out the superblock"); +- ++ goto unblock_out; ++ } + tools_progress_step(prog, 1); +- tools_progress_stop(prog); ++ ret = tunefs_foreach_inode(fs, build_dx_dir, &ctxt); ++ if (ret) ++ tcom_err(ret, "while building indexed trees"); ++unblock_out: ++ tunefs_unblock_signals(); ++ tools_progress_step(prog, 1); ++ if (ctxt.prog) ++ tools_progress_stop(ctxt.prog); + out: ++ if (prog) ++ tools_progress_stop(prog); ++ + return ret; + } + +diff --git a/tunefs.ocfs2/o2ne_err.et b/tunefs.ocfs2/o2ne_err.et +index c2f700b..3561d8c 100644 +--- a/tunefs.ocfs2/o2ne_err.et ++++ b/tunefs.ocfs2/o2ne_err.et +@@ -94,4 +94,7 @@ ec TUNEFS_ET_IO_WRITE_FAILED, + ec TUNEFS_ET_DX_DIRS_TRUNCATE_FAILED, + "Truncate directory indexed tree failed" + ++ec TUNEFS_ET_DX_DIRS_BUILD_FAILED, ++ "Build directory indexed tree failed" ++ + end +-- +1.7.0.2 + diff --git a/0012-dx_dirs-fix-return-value-of-walk_dirblock-when-enabl.patch b/0012-dx_dirs-fix-return-value-of-walk_dirblock-when-enabl.patch new file mode 100644 index 0000000..c1fa84b --- /dev/null +++ b/0012-dx_dirs-fix-return-value-of-walk_dirblock-when-enabl.patch @@ -0,0 +1,32 @@ +From 183b0b3a12396e838dcb9c1e7dc0423eb3a4fbd5 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:09 +0800 +Subject: [PATCH 12/30] dx_dirs: fix return value of walk_dirblock() when enable metaecc + +If there is no error, the return value 'ret' might be used with +undefined initial value. This patch fixes the error by initiating +it to 0. + +Reported-by: Vit Pelcak +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + tunefs.ocfs2/feature_metaecc.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c +index c4de8be..799b404 100644 +--- a/tunefs.ocfs2/feature_metaecc.c ++++ b/tunefs.ocfs2/feature_metaecc.c +@@ -135,7 +135,7 @@ static errcode_t walk_dirblock(ocfs2_filesys *fs, + struct tunefs_trailer_context *tc, + struct tunefs_trailer_dirblock *db) + { +- errcode_t ret; ++ errcode_t ret = 0; + struct ocfs2_dir_entry *dirent, *prev = NULL; + unsigned int real_rec_len; + unsigned int offset = 0; +-- +1.7.0.2 + diff --git a/0013-dx_dirs-try-to-install-dir-trailers-when-enable-inde.patch b/0013-dx_dirs-try-to-install-dir-trailers-when-enable-inde.patch new file mode 100644 index 0000000..05e6492 --- /dev/null +++ b/0013-dx_dirs-try-to-install-dir-trailers-when-enable-inde.patch @@ -0,0 +1,131 @@ +From 947d4450eed4a18299d7b14b828a69533eb4a2ed Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:10 +0800 +Subject: [PATCH 13/30] dx_dirs: try to install dir trailers when enable indexed-dirs + +If metaecc feature is not enabled, previuos indexed-dirs patches in +ocfs2-tools does not install dir trailers and move the dir entries which +lie in the trailer area. This patch tries to install dir trailers when +enable indexed-dirs feature. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + tunefs.ocfs2/feature_indexed_dirs.c | 8 ++++++++ + tunefs.ocfs2/feature_metaecc.c | 25 +++---------------------- + tunefs.ocfs2/libocfs2ne.h | 25 +++++++++++++++++++++++++ + tunefs.ocfs2/o2ne_err.et | 3 +++ + 4 files changed, 39 insertions(+), 22 deletions(-) + +diff --git a/tunefs.ocfs2/feature_indexed_dirs.c b/tunefs.ocfs2/feature_indexed_dirs.c +index cfa0dd0..c26780e 100644 +--- a/tunefs.ocfs2/feature_indexed_dirs.c ++++ b/tunefs.ocfs2/feature_indexed_dirs.c +@@ -65,6 +65,14 @@ static errcode_t build_dx_dir(ocfs2_filesys *fs, struct ocfs2_dinode *di, + tcom_err(ret, "while rebulid indexed tree"); + } + } ++ ++ ret = tunefs_install_dir_trailer(fs, di, NULL); ++ if (ret) { ++ ret = TUNEFS_ET_INSTALL_DIR_TRAILER_FAILED; ++ tcom_err(ret, "while enable indexed-dirs"); ++ goto bail; ++ } ++ + ret = ocfs2_dx_dir_build(fs, di->i_blkno); + if (ret) { + ret = TUNEFS_ET_DX_DIRS_BUILD_FAILED; +diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c +index 799b404..e6b48b5 100644 +--- a/tunefs.ocfs2/feature_metaecc.c ++++ b/tunefs.ocfs2/feature_metaecc.c +@@ -68,25 +68,6 @@ struct tunefs_trailer_dirblock { + struct ocfs2_dir_entry *db_last; + }; + +-/* A directory inode we're adding trailers to */ +-struct tunefs_trailer_context { +- struct list_head d_list; +- uint64_t d_blkno; /* block number of the dir */ +- struct ocfs2_dinode *d_di; /* The directory's inode */ +- struct list_head d_dirblocks; /* List of its dirblocks */ +- uint64_t d_bytes_needed; /* How many new bytes will +- cover the dirents we are moving +- to make way for trailers */ +- uint64_t d_blocks_needed; /* How many blocks covers +- d_bytes_needed */ +- char *d_new_blocks; /* Buffer of new blocks to fill */ +- char *d_cur_block; /* Which block we're filling in +- d_new_blocks */ +- struct ocfs2_dir_entry *d_next_dirent; /* Next dentry to use */ +- errcode_t d_err; /* Any processing error during +- iteration of the directory */ +-}; +- + static void tunefs_trailer_context_free(struct tunefs_trailer_context *tc) + { + struct tunefs_trailer_dirblock *db; +@@ -529,9 +510,9 @@ out: + } + + +-static errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, +- struct ocfs2_dinode *di, +- struct tunefs_trailer_context *tc) ++errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ struct tunefs_trailer_context *tc) + { + errcode_t ret = 0; + struct tunefs_trailer_context *our_tc = NULL; +diff --git a/tunefs.ocfs2/libocfs2ne.h b/tunefs.ocfs2/libocfs2ne.h +index a8e1e5e..18380be 100644 +--- a/tunefs.ocfs2/libocfs2ne.h ++++ b/tunefs.ocfs2/libocfs2ne.h +@@ -281,5 +281,30 @@ errcode_t tunefs_feature_run(ocfs2_filesys *master_fs, + int tunefs_feature_main(int argc, char *argv[], struct tunefs_feature *feat); + int tunefs_op_main(int argc, char *argv[], struct tunefs_operation *op); + ++/* A directory inode we're adding trailers to */ ++struct tunefs_trailer_context { ++ struct list_head d_list; ++ uint64_t d_blkno; /* block number of the dir */ ++ struct ocfs2_dinode *d_di; /* The directory's inode */ ++ struct list_head d_dirblocks; /* List of its dirblocks */ ++ uint64_t d_bytes_needed; /* How many new bytes will ++ cover the dirents we are moving ++ to make way for trailers */ ++ uint64_t d_blocks_needed; /* How many blocks covers ++ d_bytes_needed */ ++ char *d_new_blocks; /* Buffer of new blocks to fill */ ++ char *d_cur_block; /* Which block we're filling in ++ d_new_blocks */ ++ struct ocfs2_dir_entry *d_next_dirent; /* Next dentry to use */ ++ errcode_t d_err; /* Any processing error during ++ iteration of the directory */ ++}; ++ ++/* ++ * called from feature_metaecc.c and feature_indexed_dirs.c ++ * to install dir trailers ++ */ ++errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, ++ struct tunefs_trailer_context *tc); + + #endif /* _LIBTUNEFS_H */ +diff --git a/tunefs.ocfs2/o2ne_err.et b/tunefs.ocfs2/o2ne_err.et +index 3561d8c..955c338 100644 +--- a/tunefs.ocfs2/o2ne_err.et ++++ b/tunefs.ocfs2/o2ne_err.et +@@ -97,4 +97,7 @@ ec TUNEFS_ET_DX_DIRS_TRUNCATE_FAILED, + ec TUNEFS_ET_DX_DIRS_BUILD_FAILED, + "Build directory indexed tree failed" + ++ec TUNEFS_ET_INSTALL_DIR_TRAILER_FAILED, ++ "Install directory trailer failed" ++ + end +-- +1.7.0.2 + diff --git a/0014-dx_dirs-add-an-initial-man-page-entry-for-indexed-di.patch b/0014-dx_dirs-add-an-initial-man-page-entry-for-indexed-di.patch new file mode 100644 index 0000000..3477997 --- /dev/null +++ b/0014-dx_dirs-add-an-initial-man-page-entry-for-indexed-di.patch @@ -0,0 +1,32 @@ +From 0fe34f26335f1d2f10550b2e12f65f6f24f39a6f Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 16:10:11 +0800 +Subject: [PATCH 14/30] dx_dirs: add an initial man page entry for indexed-dirs + +This patch add an initial man page entry for indexed-dirs feature +string. + +Signed-off-by: Coly Li +Signed-off-by: Mark Fasheh +--- + mkfs.ocfs2/mkfs.ocfs2.8.in | 4 ++++ + 1 files changed, 4 insertions(+), 0 deletions(-) + +diff --git a/mkfs.ocfs2/mkfs.ocfs2.8.in b/mkfs.ocfs2/mkfs.ocfs2.8.in +index c7a7888..a148133 100644 +--- a/mkfs.ocfs2/mkfs.ocfs2.8.in ++++ b/mkfs.ocfs2/mkfs.ocfs2.8.in +@@ -175,6 +175,10 @@ and number of inodes (files, directories, symbolic links) each group owns. It is + to limit the maximum amount of space or inodes user can have. See a documentation of + quota-tools package for more details. + .RE ++.TP ++\fBindexed-dirs\fR ++Enable directory indexing support. With this feature enabled, the file system creates indexed tree for non-inline directory entries. For large scale directories, directory entry lookup perfromance from the indexed tree is faster then from the legacy directory blocks. ++.RE + + .TP + \fB\-\-fs\-feature\-level=\fR\fR\fIfeature\-level\fR +-- +1.7.0.2 + diff --git a/0015-fsck.ocfs2-prompt-before-truncating-an-invalid-dir-i.patch b/0015-fsck.ocfs2-prompt-before-truncating-an-invalid-dir-i.patch new file mode 100644 index 0000000..54ea8e5 --- /dev/null +++ b/0015-fsck.ocfs2-prompt-before-truncating-an-invalid-dir-i.patch @@ -0,0 +1,48 @@ +From ed44744701bd4eeaf5ee0fcce9190cdf36b5b21c Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Mon, 19 Apr 2010 16:25:48 -0700 +Subject: [PATCH 15/30] fsck.ocfs2: prompt before truncating an invalid dir index + +Signed-off-by: Mark Fasheh +--- + fsck.ocfs2/fsck.ocfs2.checks.8.in | 6 ++++++ + fsck.ocfs2/pass2.c | 6 +++++- + 2 files changed, 11 insertions(+), 1 deletions(-) + +diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in +index 05561ae..5cda023 100644 +--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in ++++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in +@@ -1055,6 +1055,12 @@ but fsck has already found quota limits for this user / group. + + Answering yes will use new values of limits for the user / group. + ++.SS "IV_DX_TREE" ++A directory index was found on an inode but that feature is not enabled on the ++file system. ++ ++Answering yes will truncate the invalid index. ++ + .SH "SEE ALSO" + .BR fsck.ocfs2(8) + +diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c +index 58efcd4..b999761 100644 +--- a/fsck.ocfs2/pass2.c ++++ b/fsck.ocfs2/pass2.c +@@ -874,7 +874,11 @@ next: + if ((!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(dd->fs->fs_super)))&& + di->i_dyn_features & OCFS2_INDEXED_DIR_FL ) { + /* ignore the return value */ +- ocfs2_dx_dir_truncate(dd->fs, dbe->e_ino); ++ if (prompt(dd->ost, PY, PR_IV_DX_TREE, "A directory index was " ++ "found on inode %"PRIu64" but the indexing feature " ++ "is not set on the fs. Truncate the invalid index?", ++ dbe->e_ino)) ++ ocfs2_dx_dir_truncate(dd->fs, dbe->e_ino); + } + + out: +-- +1.7.0.2 + diff --git a/0016-tunefs.ocfs2-move-o2ne_add_tailers-into-libocfs2ne.c.patch b/0016-tunefs.ocfs2-move-o2ne_add_tailers-into-libocfs2ne.c.patch new file mode 100644 index 0000000..e300e4d --- /dev/null +++ b/0016-tunefs.ocfs2-move-o2ne_add_tailers-into-libocfs2ne.c.patch @@ -0,0 +1,1152 @@ +From 2484ca19295ad347b4b6197345ac0663888361ac Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Mon, 19 Apr 2010 17:17:44 -0700 +Subject: [PATCH 16/30] tunefs.ocfs2: move o2ne_add_tailers() into libocfs2ne.c + +The prototype is already in libocfs2ne.h - this function is shared between +feature_metaecc.c and feature_indexed_dirs.c so it should be in a shared c +file as well. + +Signed-off-by: Mark Fasheh +--- + tunefs.ocfs2/feature_metaecc.c | 547 ---------------------------------------- + tunefs.ocfs2/libocfs2ne.c | 509 +++++++++++++++++++++++++++++++++++++ + tunefs.ocfs2/libocfs2ne.h | 39 +++ + 3 files changed, 548 insertions(+), 547 deletions(-) + +diff --git a/tunefs.ocfs2/feature_metaecc.c b/tunefs.ocfs2/feature_metaecc.c +index e6b48b5..c9c2709 100644 +--- a/tunefs.ocfs2/feature_metaecc.c ++++ b/tunefs.ocfs2/feature_metaecc.c +@@ -33,553 +33,6 @@ + + + +-/* A dirblock we have to add a trailer to */ +-struct tunefs_trailer_dirblock { +- struct list_head db_list; +- uint64_t db_blkno; +- char *db_buf; +- +- /* +- * These require a little explanation. They point to +- * ocfs2_dir_entry structures inside db_buf. +- * +- * db_last entry we're going to *keep*. If the last entry in the +- * dirblock has enough extra rec_len to allow the trailer, db_last +- * points to it. We will shorten its rec_len and insert the +- * trailer. +- * +- * However, if the last entry in the dirblock cannot be truncated, +- * db_move points to the entry we have to move out, and db_last +- * points to the entry before that - the last entry we're keeping +- * in this dirblock. +- * +- * Examples: +- * +- * - The last entry in the dirblock has a name_len of 1 and a +- * rec_len of 128. We can easily change the rec_len to 64 and +- * insert the trailer. db_last points to this entry. +- * +- * - The last entry in the dirblock has a name_len of 1 and a +- * rec_len of 48. The previous entry has a name_len of 1 and a +- * rec_len of 32. We have to move the last entry out. The +- * second-to-last entry can have its rec_len truncated to 16, so +- * we put it in db_last. +- */ +- struct ocfs2_dir_entry *db_last; +-}; +- +-static void tunefs_trailer_context_free(struct tunefs_trailer_context *tc) +-{ +- struct tunefs_trailer_dirblock *db; +- struct list_head *n, *pos; +- +- if (!list_empty(&tc->d_list)) +- list_del(&tc->d_list); +- +- list_for_each_safe(pos, n, &tc->d_dirblocks) { +- db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); +- list_del(&db->db_list); +- ocfs2_free(&db->db_buf); +- ocfs2_free(&db); +- } +- +- ocfs2_free(&tc); +-} +- +-/* +- * We're calculating how many bytes we need to add to make space for +- * the dir trailers. But we need to make sure that the added directory +- * blocks also have room for a trailer. +- */ +-static void add_bytes_needed(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc, +- unsigned int rec_len) +-{ +- unsigned int toff = ocfs2_dir_trailer_blk_off(fs); +- unsigned int block_offset = tc->d_bytes_needed % fs->fs_blocksize; +- +- /* +- * If the current byte offset would put us into a trailer, push +- * it out to the start of the next block. Remember, dirents have +- * to be at least 16 bytes, which is why we check against the +- * smallest rec_len. +- */ +- if ((block_offset + rec_len) > (toff - OCFS2_DIR_REC_LEN(1))) +- tc->d_bytes_needed += fs->fs_blocksize - block_offset; +- +- tc->d_bytes_needed += rec_len; +- tc->d_blocks_needed = +- ocfs2_blocks_in_bytes(fs, tc->d_bytes_needed); +-} +- +-static errcode_t walk_dirblock(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc, +- struct tunefs_trailer_dirblock *db) +-{ +- errcode_t ret = 0; +- struct ocfs2_dir_entry *dirent, *prev = NULL; +- unsigned int real_rec_len; +- unsigned int offset = 0; +- unsigned int toff = ocfs2_dir_trailer_blk_off(fs); +- +- while (offset < fs->fs_blocksize) { +- dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset); +- if (((offset + dirent->rec_len) > fs->fs_blocksize) || +- (dirent->rec_len < 8) || +- ((dirent->rec_len % 4) != 0) || +- (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) { +- ret = OCFS2_ET_DIR_CORRUPTED; +- break; +- } +- +- real_rec_len = dirent->inode ? +- OCFS2_DIR_REC_LEN(dirent->name_len) : +- OCFS2_DIR_REC_LEN(1); +- if ((offset + real_rec_len) <= toff) +- goto next; +- +- /* +- * The first time through, we store off the last dirent +- * before the trailer. +- */ +- if (!db->db_last) +- db->db_last = prev; +- +- /* Only live dirents need to be moved */ +- if (dirent->inode) { +- verbosef(VL_DEBUG, +- "Will move dirent %.*s out of " +- "directory block %"PRIu64" to make way " +- "for the trailer\n", +- dirent->name_len, dirent->name, +- db->db_blkno); +- add_bytes_needed(fs, tc, real_rec_len); +- } +- +-next: +- prev = dirent; +- offset += dirent->rec_len; +- } +- +- /* There were no dirents across the boundary */ +- if (!db->db_last) +- db->db_last = prev; +- +- return ret; +-} +- +-static int dirblock_scan_iterate(ocfs2_filesys *fs, uint64_t blkno, +- uint64_t bcount, uint16_t ext_flags, +- void *priv_data) +-{ +- errcode_t ret = 0; +- struct tunefs_trailer_dirblock *db = NULL; +- struct tunefs_trailer_context *tc = priv_data; +- +- ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_dirblock), &db); +- if (ret) +- goto out; +- +- ret = ocfs2_malloc_block(fs->fs_io, &db->db_buf); +- if (ret) +- goto out; +- +- db->db_blkno = blkno; +- +- verbosef(VL_DEBUG, +- "Reading dinode %"PRIu64" dirblock %"PRIu64" at block " +- "%"PRIu64"\n", +- tc->d_di->i_blkno, bcount, blkno); +- ret = ocfs2_read_dir_block(fs, tc->d_di, blkno, db->db_buf); +- if (ret) +- goto out; +- +- ret = walk_dirblock(fs, tc, db); +- if (ret) +- goto out; +- +- list_add_tail(&db->db_list, &tc->d_dirblocks); +- db = NULL; +- +-out: +- if (db) { +- if (db->db_buf) +- ocfs2_free(&db->db_buf); +- ocfs2_free(&db); +- } +- +- if (ret) { +- tc->d_err = ret; +- return OCFS2_BLOCK_ABORT; +- } +- +- return 0; +-} +- +-static errcode_t tunefs_prepare_dir_trailer(ocfs2_filesys *fs, +- struct ocfs2_dinode *di, +- struct tunefs_trailer_context **tc_ret) +-{ +- errcode_t ret = 0; +- struct tunefs_trailer_context *tc = NULL; +- +- if (ocfs2_dir_has_trailer(fs, di)) +- goto out; +- +- ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_context), &tc); +- if (ret) +- goto out; +- +- tc->d_blkno = di->i_blkno; +- tc->d_di = di; +- INIT_LIST_HEAD(&tc->d_list); +- INIT_LIST_HEAD(&tc->d_dirblocks); +- +- ret = ocfs2_block_iterate_inode(fs, tc->d_di, 0, +- dirblock_scan_iterate, tc); +- if (!ret) +- ret = tc->d_err; +- if (ret) +- goto out; +- +- *tc_ret = tc; +- tc = NULL; +- +-out: +- if (tc) +- tunefs_trailer_context_free(tc); +- +- return ret; +-} +- +-/* +- * We are hand-coding the directory expansion because we're going to +- * build the new directory blocks ourselves. We can't just use +- * ocfs2_expand_dir() and ocfs2_link(), because we're moving around +- * entries. +- */ +-static errcode_t expand_dir_if_needed(ocfs2_filesys *fs, +- struct ocfs2_dinode *di, +- uint64_t blocks_needed) +-{ +- errcode_t ret = 0; +- uint64_t used_blocks, total_blocks; +- uint32_t clusters_needed; +- +- /* This relies on the fact that i_size of a directory is a +- * multiple of blocksize */ +- used_blocks = ocfs2_blocks_in_bytes(fs, di->i_size); +- total_blocks = ocfs2_clusters_to_blocks(fs, di->i_clusters); +- if ((used_blocks + blocks_needed) <= total_blocks) +- goto out; +- +- clusters_needed = +- ocfs2_clusters_in_blocks(fs, +- (used_blocks + blocks_needed) - +- total_blocks); +- ret = ocfs2_extend_allocation(fs, di->i_blkno, clusters_needed); +- if (ret) +- goto out; +- +- /* Pick up changes to the inode */ +- ret = ocfs2_read_inode(fs, di->i_blkno, (char *)di); +- +-out: +- return ret; +-} +- +-static void shift_dirent(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc, +- struct ocfs2_dir_entry *dirent) +-{ +- /* Using the real rec_len */ +- unsigned int rec_len = OCFS2_DIR_REC_LEN(dirent->name_len); +- unsigned int offset, remain; +- +- /* +- * If the current byte offset would put us into a trailer, push +- * it out to the start of the next block. Remember, dirents have +- * to be at least 16 bytes, which is why we check against the +- * smallest rec_len. +- */ +- if (rec_len > (tc->d_next_dirent->rec_len - OCFS2_DIR_REC_LEN(1))) { +- tc->d_cur_block += fs->fs_blocksize; +- tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block; +- } +- +- assert(ocfs2_blocks_in_bytes(fs, +- tc->d_cur_block - tc->d_new_blocks) < +- tc->d_blocks_needed); +- +- offset = (char *)(tc->d_next_dirent) - tc->d_cur_block; +- remain = tc->d_next_dirent->rec_len - rec_len; +- +- memcpy(tc->d_cur_block + offset, dirent, rec_len); +- tc->d_next_dirent->rec_len = rec_len; +- +- verbosef(VL_DEBUG, +- "Installed dirent %.*s at offset %u of new block " +- "%"PRIu64", rec_len %u\n", +- tc->d_next_dirent->name_len, tc->d_next_dirent->name, +- offset, +- ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks), +- rec_len); +- +- +- offset += rec_len; +- tc->d_next_dirent = +- (struct ocfs2_dir_entry *)(tc->d_cur_block + offset); +- tc->d_next_dirent->rec_len = remain; +- +- verbosef(VL_DEBUG, +- "New block %"PRIu64" has its last dirent at %u, with %u " +- "bytes left\n", +- ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks), +- offset, remain); +-} +- +-static errcode_t fixup_dirblock(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc, +- struct tunefs_trailer_dirblock *db) +-{ +- errcode_t ret = 0; +- struct ocfs2_dir_entry *dirent; +- unsigned int real_rec_len; +- unsigned int offset; +- unsigned int toff = ocfs2_dir_trailer_blk_off(fs); +- +- /* +- * db_last is the last dirent we're *keeping*. So we need to +- * move out every valid dirent *after* db_last. +- * +- * tunefs_prepare_dir_trailer() should have calculated this +- * correctly. +- */ +- offset = ((char *)db->db_last) - db->db_buf; +- offset += db->db_last->rec_len; +- while (offset < fs->fs_blocksize) { +- dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset); +- if (((offset + dirent->rec_len) > fs->fs_blocksize) || +- (dirent->rec_len < 8) || +- ((dirent->rec_len % 4) != 0) || +- (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) { +- ret = OCFS2_ET_DIR_CORRUPTED; +- break; +- } +- +- real_rec_len = dirent->inode ? +- OCFS2_DIR_REC_LEN(dirent->name_len) : +- OCFS2_DIR_REC_LEN(1); +- +- assert((offset + real_rec_len) > toff); +- +- /* Only live dirents need to be moved */ +- if (dirent->inode) { +- verbosef(VL_DEBUG, +- "Moving dirent %.*s out of directory " +- "block %"PRIu64" to make way for the " +- "trailer\n", +- dirent->name_len, dirent->name, +- db->db_blkno); +- shift_dirent(fs, tc, dirent); +- } +- +- offset += dirent->rec_len; +- } +- +- /* +- * Now that we've moved any dirents out of the way, we need to +- * fix up db_last and install the trailer. +- */ +- offset = ((char *)db->db_last) - db->db_buf; +- verbosef(VL_DEBUG, +- "Last valid dirent of directory block %"PRIu64" " +- "(\"%.*s\") is %u bytes in. Setting rec_len to %u and " +- "installing the trailer\n", +- db->db_blkno, db->db_last->name_len, db->db_last->name, +- offset, toff - offset); +- db->db_last->rec_len = toff - offset; +- ocfs2_init_dir_trailer(fs, tc->d_di, db->db_blkno, db->db_buf); +- +- return ret; +-} +- +-static errcode_t run_dirblocks(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc) +-{ +- errcode_t ret = 0; +- struct list_head *pos; +- struct tunefs_trailer_dirblock *db; +- +- list_for_each(pos, &tc->d_dirblocks) { +- db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); +- ret = fixup_dirblock(fs, tc, db); +- if (ret) +- break; +- } +- +- return ret; +-} +- +-static errcode_t write_dirblocks(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc) +-{ +- errcode_t ret = 0; +- struct list_head *pos; +- struct tunefs_trailer_dirblock *db; +- +- list_for_each(pos, &tc->d_dirblocks) { +- db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); +- ret = ocfs2_write_dir_block(fs, tc->d_di, db->db_blkno, +- db->db_buf); +- if (ret) { +- verbosef(VL_DEBUG, +- "Error writing dirblock %"PRIu64"\n", +- db->db_blkno); +- break; +- } +- } +- +- return ret; +-} +- +-static errcode_t init_new_dirblocks(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc) +-{ +- int i; +- errcode_t ret; +- uint64_t blkno; +- uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size); +- ocfs2_cached_inode *cinode; +- char *blockptr; +- struct ocfs2_dir_entry *first; +- +- ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode); +- if (ret) +- goto out; +- assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize)); +- +- for (i = 0; i < tc->d_blocks_needed; i++) { +- ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i, +- 1, &blkno, NULL, NULL); +- if (ret) +- goto out; +- blockptr = tc->d_new_blocks + (i * fs->fs_blocksize); +- memset(blockptr, 0, fs->fs_blocksize); +- first = (struct ocfs2_dir_entry *)blockptr; +- first->rec_len = ocfs2_dir_trailer_blk_off(fs); +- ocfs2_init_dir_trailer(fs, tc->d_di, blkno, blockptr); +- } +- +-out: +- return ret; +-} +- +-static errcode_t write_new_dirblocks(ocfs2_filesys *fs, +- struct tunefs_trailer_context *tc) +-{ +- int i; +- errcode_t ret; +- uint64_t blkno; +- uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size); +- ocfs2_cached_inode *cinode; +- char *blockptr; +- +- ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode); +- if (ret) +- goto out; +- assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize)); +- +- for (i = 0; i < tc->d_blocks_needed; i++) { +- ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i, +- 1, &blkno, NULL, NULL); +- if (ret) +- goto out; +- blockptr = tc->d_new_blocks + (i * fs->fs_blocksize); +- ret = ocfs2_write_dir_block(fs, tc->d_di, blkno, blockptr); +- if (ret) { +- verbosef(VL_DEBUG, +- "Error writing dirblock %"PRIu64"\n", +- blkno); +- goto out; +- } +- } +- +-out: +- return ret; +-} +- +- +-errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, +- struct ocfs2_dinode *di, +- struct tunefs_trailer_context *tc) +-{ +- errcode_t ret = 0; +- struct tunefs_trailer_context *our_tc = NULL; +- +- if (!tc) { +- ret = tunefs_prepare_dir_trailer(fs, di, &our_tc); +- if (ret) +- goto out; +- tc = our_tc; +- } +- +- if (tc->d_di != di) { +- ret = OCFS2_ET_INVALID_ARGUMENT; +- goto out; +- } +- +- if (tc->d_blocks_needed) { +- ret = ocfs2_malloc_blocks(fs->fs_io, tc->d_blocks_needed, +- &tc->d_new_blocks); +- if (ret) +- goto out; +- +- tc->d_cur_block = tc->d_new_blocks; +- +- ret = expand_dir_if_needed(fs, di, tc->d_blocks_needed); +- if (ret) +- goto out; +- +- ret = init_new_dirblocks(fs, tc); +- if (ret) +- goto out; +- tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block; +- verbosef(VL_DEBUG, "t_next_dirent has rec_len of %u\n", +- tc->d_next_dirent->rec_len); +- } +- +- ret = run_dirblocks(fs, tc); +- if (ret) +- goto out; +- +- /* +- * We write in a specific order. We write any new dirblocks first +- * so that they are on disk. Then we write the new i_size in the +- * inode. If we crash at this point, the directory has duplicate +- * entries but no lost entries. fsck can clean it up. Finally, we +- * write the modified dirblocks with trailers. +- */ +- if (tc->d_blocks_needed) { +- ret = write_new_dirblocks(fs, tc); +- if (ret) +- goto out; +- +- di->i_size += ocfs2_blocks_to_bytes(fs, tc->d_blocks_needed); +- ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di); +- if (ret) +- goto out; +- } +- +- ret = write_dirblocks(fs, tc); +- +-out: +- if (our_tc) +- tunefs_trailer_context_free(our_tc); +- return ret; +-} +- +- + /* + * Since we have to scan the inodes in our first pass to find directories + * that need trailers, we might as well store them off and avoid reading +diff --git a/tunefs.ocfs2/libocfs2ne.c b/tunefs.ocfs2/libocfs2ne.c +index 174fef0..824214b 100644 +--- a/tunefs.ocfs2/libocfs2ne.c ++++ b/tunefs.ocfs2/libocfs2ne.c +@@ -577,6 +577,515 @@ out: + return ret; + } + ++void tunefs_trailer_context_free(struct tunefs_trailer_context *tc) ++{ ++ struct tunefs_trailer_dirblock *db; ++ struct list_head *n, *pos; ++ ++ if (!list_empty(&tc->d_list)) ++ list_del(&tc->d_list); ++ ++ list_for_each_safe(pos, n, &tc->d_dirblocks) { ++ db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); ++ list_del(&db->db_list); ++ ocfs2_free(&db->db_buf); ++ ocfs2_free(&db); ++ } ++ ++ ocfs2_free(&tc); ++} ++ ++/* ++ * We're calculating how many bytes we need to add to make space for ++ * the dir trailers. But we need to make sure that the added directory ++ * blocks also have room for a trailer. ++ */ ++static void add_bytes_needed(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc, ++ unsigned int rec_len) ++{ ++ unsigned int toff = ocfs2_dir_trailer_blk_off(fs); ++ unsigned int block_offset = tc->d_bytes_needed % fs->fs_blocksize; ++ ++ /* ++ * If the current byte offset would put us into a trailer, push ++ * it out to the start of the next block. Remember, dirents have ++ * to be at least 16 bytes, which is why we check against the ++ * smallest rec_len. ++ */ ++ if ((block_offset + rec_len) > (toff - OCFS2_DIR_REC_LEN(1))) ++ tc->d_bytes_needed += fs->fs_blocksize - block_offset; ++ ++ tc->d_bytes_needed += rec_len; ++ tc->d_blocks_needed = ++ ocfs2_blocks_in_bytes(fs, tc->d_bytes_needed); ++} ++ ++static errcode_t walk_dirblock(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc, ++ struct tunefs_trailer_dirblock *db) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_dir_entry *dirent, *prev = NULL; ++ unsigned int real_rec_len; ++ unsigned int offset = 0; ++ unsigned int toff = ocfs2_dir_trailer_blk_off(fs); ++ ++ while (offset < fs->fs_blocksize) { ++ dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset); ++ if (((offset + dirent->rec_len) > fs->fs_blocksize) || ++ (dirent->rec_len < 8) || ++ ((dirent->rec_len % 4) != 0) || ++ (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ break; ++ } ++ ++ real_rec_len = dirent->inode ? ++ OCFS2_DIR_REC_LEN(dirent->name_len) : ++ OCFS2_DIR_REC_LEN(1); ++ if ((offset + real_rec_len) <= toff) ++ goto next; ++ ++ /* ++ * The first time through, we store off the last dirent ++ * before the trailer. ++ */ ++ if (!db->db_last) ++ db->db_last = prev; ++ ++ /* Only live dirents need to be moved */ ++ if (dirent->inode) { ++ verbosef(VL_DEBUG, ++ "Will move dirent %.*s out of " ++ "directory block %"PRIu64" to make way " ++ "for the trailer\n", ++ dirent->name_len, dirent->name, ++ db->db_blkno); ++ add_bytes_needed(fs, tc, real_rec_len); ++ } ++ ++next: ++ prev = dirent; ++ offset += dirent->rec_len; ++ } ++ ++ /* There were no dirents across the boundary */ ++ if (!db->db_last) ++ db->db_last = prev; ++ ++ return ret; ++} ++ ++static int dirblock_scan_iterate(ocfs2_filesys *fs, uint64_t blkno, ++ uint64_t bcount, uint16_t ext_flags, ++ void *priv_data) ++{ ++ errcode_t ret = 0; ++ struct tunefs_trailer_dirblock *db = NULL; ++ struct tunefs_trailer_context *tc = priv_data; ++ ++ ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_dirblock), &db); ++ if (ret) ++ goto out; ++ ++ ret = ocfs2_malloc_block(fs->fs_io, &db->db_buf); ++ if (ret) ++ goto out; ++ ++ db->db_blkno = blkno; ++ ++ verbosef(VL_DEBUG, ++ "Reading dinode %"PRIu64" dirblock %"PRIu64" at block " ++ "%"PRIu64"\n", ++ tc->d_di->i_blkno, bcount, blkno); ++ ret = ocfs2_read_dir_block(fs, tc->d_di, blkno, db->db_buf); ++ if (ret) ++ goto out; ++ ++ ret = walk_dirblock(fs, tc, db); ++ if (ret) ++ goto out; ++ ++ list_add_tail(&db->db_list, &tc->d_dirblocks); ++ db = NULL; ++ ++out: ++ if (db) { ++ if (db->db_buf) ++ ocfs2_free(&db->db_buf); ++ ocfs2_free(&db); ++ } ++ ++ if (ret) { ++ tc->d_err = ret; ++ return OCFS2_BLOCK_ABORT; ++ } ++ ++ return 0; ++} ++ ++errcode_t tunefs_prepare_dir_trailer(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ struct tunefs_trailer_context **tc_ret) ++{ ++ errcode_t ret = 0; ++ struct tunefs_trailer_context *tc = NULL; ++ ++ if (ocfs2_dir_has_trailer(fs, di)) ++ goto out; ++ ++ ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_context), &tc); ++ if (ret) ++ goto out; ++ ++ tc->d_blkno = di->i_blkno; ++ tc->d_di = di; ++ INIT_LIST_HEAD(&tc->d_list); ++ INIT_LIST_HEAD(&tc->d_dirblocks); ++ ++ ret = ocfs2_block_iterate_inode(fs, tc->d_di, 0, ++ dirblock_scan_iterate, tc); ++ if (!ret) ++ ret = tc->d_err; ++ if (ret) ++ goto out; ++ ++ *tc_ret = tc; ++ tc = NULL; ++ ++out: ++ if (tc) ++ tunefs_trailer_context_free(tc); ++ ++ return ret; ++} ++ ++/* ++ * We are hand-coding the directory expansion because we're going to ++ * build the new directory blocks ourselves. We can't just use ++ * ocfs2_expand_dir() and ocfs2_link(), because we're moving around ++ * entries. ++ */ ++static errcode_t expand_dir_if_needed(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ uint64_t blocks_needed) ++{ ++ errcode_t ret = 0; ++ uint64_t used_blocks, total_blocks; ++ uint32_t clusters_needed; ++ ++ /* This relies on the fact that i_size of a directory is a ++ * multiple of blocksize */ ++ used_blocks = ocfs2_blocks_in_bytes(fs, di->i_size); ++ total_blocks = ocfs2_clusters_to_blocks(fs, di->i_clusters); ++ if ((used_blocks + blocks_needed) <= total_blocks) ++ goto out; ++ ++ clusters_needed = ++ ocfs2_clusters_in_blocks(fs, ++ (used_blocks + blocks_needed) - ++ total_blocks); ++ ret = ocfs2_extend_allocation(fs, di->i_blkno, clusters_needed); ++ if (ret) ++ goto out; ++ ++ /* Pick up changes to the inode */ ++ ret = ocfs2_read_inode(fs, di->i_blkno, (char *)di); ++ ++out: ++ return ret; ++} ++ ++static void shift_dirent(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc, ++ struct ocfs2_dir_entry *dirent) ++{ ++ /* Using the real rec_len */ ++ unsigned int rec_len = OCFS2_DIR_REC_LEN(dirent->name_len); ++ unsigned int offset, remain; ++ ++ /* ++ * If the current byte offset would put us into a trailer, push ++ * it out to the start of the next block. Remember, dirents have ++ * to be at least 16 bytes, which is why we check against the ++ * smallest rec_len. ++ */ ++ if (rec_len > (tc->d_next_dirent->rec_len - OCFS2_DIR_REC_LEN(1))) { ++ tc->d_cur_block += fs->fs_blocksize; ++ tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block; ++ } ++ ++ assert(ocfs2_blocks_in_bytes(fs, ++ tc->d_cur_block - tc->d_new_blocks) < ++ tc->d_blocks_needed); ++ ++ offset = (char *)(tc->d_next_dirent) - tc->d_cur_block; ++ remain = tc->d_next_dirent->rec_len - rec_len; ++ ++ memcpy(tc->d_cur_block + offset, dirent, rec_len); ++ tc->d_next_dirent->rec_len = rec_len; ++ ++ verbosef(VL_DEBUG, ++ "Installed dirent %.*s at offset %u of new block " ++ "%"PRIu64", rec_len %u\n", ++ tc->d_next_dirent->name_len, tc->d_next_dirent->name, ++ offset, ++ ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks), ++ rec_len); ++ ++ ++ offset += rec_len; ++ tc->d_next_dirent = ++ (struct ocfs2_dir_entry *)(tc->d_cur_block + offset); ++ tc->d_next_dirent->rec_len = remain; ++ ++ verbosef(VL_DEBUG, ++ "New block %"PRIu64" has its last dirent at %u, with %u " ++ "bytes left\n", ++ ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks), ++ offset, remain); ++} ++ ++static errcode_t fixup_dirblock(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc, ++ struct tunefs_trailer_dirblock *db) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_dir_entry *dirent; ++ unsigned int real_rec_len; ++ unsigned int offset; ++ unsigned int toff = ocfs2_dir_trailer_blk_off(fs); ++ ++ /* ++ * db_last is the last dirent we're *keeping*. So we need to ++ * move out every valid dirent *after* db_last. ++ * ++ * tunefs_prepare_dir_trailer() should have calculated this ++ * correctly. ++ */ ++ offset = ((char *)db->db_last) - db->db_buf; ++ offset += db->db_last->rec_len; ++ while (offset < fs->fs_blocksize) { ++ dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset); ++ if (((offset + dirent->rec_len) > fs->fs_blocksize) || ++ (dirent->rec_len < 8) || ++ ((dirent->rec_len % 4) != 0) || ++ (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) { ++ ret = OCFS2_ET_DIR_CORRUPTED; ++ break; ++ } ++ ++ real_rec_len = dirent->inode ? ++ OCFS2_DIR_REC_LEN(dirent->name_len) : ++ OCFS2_DIR_REC_LEN(1); ++ ++ assert((offset + real_rec_len) > toff); ++ ++ /* Only live dirents need to be moved */ ++ if (dirent->inode) { ++ verbosef(VL_DEBUG, ++ "Moving dirent %.*s out of directory " ++ "block %"PRIu64" to make way for the " ++ "trailer\n", ++ dirent->name_len, dirent->name, ++ db->db_blkno); ++ shift_dirent(fs, tc, dirent); ++ } ++ ++ offset += dirent->rec_len; ++ } ++ ++ /* ++ * Now that we've moved any dirents out of the way, we need to ++ * fix up db_last and install the trailer. ++ */ ++ offset = ((char *)db->db_last) - db->db_buf; ++ verbosef(VL_DEBUG, ++ "Last valid dirent of directory block %"PRIu64" " ++ "(\"%.*s\") is %u bytes in. Setting rec_len to %u and " ++ "installing the trailer\n", ++ db->db_blkno, db->db_last->name_len, db->db_last->name, ++ offset, toff - offset); ++ db->db_last->rec_len = toff - offset; ++ ocfs2_init_dir_trailer(fs, tc->d_di, db->db_blkno, db->db_buf); ++ ++ return ret; ++} ++ ++static errcode_t run_dirblocks(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc) ++{ ++ errcode_t ret = 0; ++ struct list_head *pos; ++ struct tunefs_trailer_dirblock *db; ++ ++ list_for_each(pos, &tc->d_dirblocks) { ++ db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); ++ ret = fixup_dirblock(fs, tc, db); ++ if (ret) ++ break; ++ } ++ ++ return ret; ++} ++ ++static errcode_t write_dirblocks(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc) ++{ ++ errcode_t ret = 0; ++ struct list_head *pos; ++ struct tunefs_trailer_dirblock *db; ++ ++ list_for_each(pos, &tc->d_dirblocks) { ++ db = list_entry(pos, struct tunefs_trailer_dirblock, db_list); ++ ret = ocfs2_write_dir_block(fs, tc->d_di, db->db_blkno, ++ db->db_buf); ++ if (ret) { ++ verbosef(VL_DEBUG, ++ "Error writing dirblock %"PRIu64"\n", ++ db->db_blkno); ++ break; ++ } ++ } ++ ++ return ret; ++} ++ ++static errcode_t init_new_dirblocks(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc) ++{ ++ int i; ++ errcode_t ret; ++ uint64_t blkno; ++ uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size); ++ ocfs2_cached_inode *cinode; ++ char *blockptr; ++ struct ocfs2_dir_entry *first; ++ ++ ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode); ++ if (ret) ++ goto out; ++ assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize)); ++ ++ for (i = 0; i < tc->d_blocks_needed; i++) { ++ ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i, ++ 1, &blkno, NULL, NULL); ++ if (ret) ++ goto out; ++ blockptr = tc->d_new_blocks + (i * fs->fs_blocksize); ++ memset(blockptr, 0, fs->fs_blocksize); ++ first = (struct ocfs2_dir_entry *)blockptr; ++ first->rec_len = ocfs2_dir_trailer_blk_off(fs); ++ ocfs2_init_dir_trailer(fs, tc->d_di, blkno, blockptr); ++ } ++ ++out: ++ return ret; ++} ++ ++static errcode_t write_new_dirblocks(ocfs2_filesys *fs, ++ struct tunefs_trailer_context *tc) ++{ ++ int i; ++ errcode_t ret; ++ uint64_t blkno; ++ uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size); ++ ocfs2_cached_inode *cinode; ++ char *blockptr; ++ ++ ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode); ++ if (ret) ++ goto out; ++ assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize)); ++ ++ for (i = 0; i < tc->d_blocks_needed; i++) { ++ ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i, ++ 1, &blkno, NULL, NULL); ++ if (ret) ++ goto out; ++ blockptr = tc->d_new_blocks + (i * fs->fs_blocksize); ++ ret = ocfs2_write_dir_block(fs, tc->d_di, blkno, blockptr); ++ if (ret) { ++ verbosef(VL_DEBUG, ++ "Error writing dirblock %"PRIu64"\n", ++ blkno); ++ goto out; ++ } ++ } ++ ++out: ++ return ret; ++} ++ ++errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ struct tunefs_trailer_context *tc) ++{ ++ errcode_t ret = 0; ++ struct tunefs_trailer_context *our_tc = NULL; ++ ++ if (!tc) { ++ ret = tunefs_prepare_dir_trailer(fs, di, &our_tc); ++ if (ret) ++ goto out; ++ tc = our_tc; ++ } ++ ++ if (tc->d_di != di) { ++ ret = OCFS2_ET_INVALID_ARGUMENT; ++ goto out; ++ } ++ ++ if (tc->d_blocks_needed) { ++ ret = ocfs2_malloc_blocks(fs->fs_io, tc->d_blocks_needed, ++ &tc->d_new_blocks); ++ if (ret) ++ goto out; ++ ++ tc->d_cur_block = tc->d_new_blocks; ++ ++ ret = expand_dir_if_needed(fs, di, tc->d_blocks_needed); ++ if (ret) ++ goto out; ++ ++ ret = init_new_dirblocks(fs, tc); ++ if (ret) ++ goto out; ++ tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block; ++ verbosef(VL_DEBUG, "t_next_dirent has rec_len of %u\n", ++ tc->d_next_dirent->rec_len); ++ } ++ ++ ret = run_dirblocks(fs, tc); ++ if (ret) ++ goto out; ++ ++ /* ++ * We write in a specific order. We write any new dirblocks first ++ * so that they are on disk. Then we write the new i_size in the ++ * inode. If we crash at this point, the directory has duplicate ++ * entries but no lost entries. fsck can clean it up. Finally, we ++ * write the modified dirblocks with trailers. ++ */ ++ if (tc->d_blocks_needed) { ++ ret = write_new_dirblocks(fs, tc); ++ if (ret) ++ goto out; ++ ++ di->i_size += ocfs2_blocks_to_bytes(fs, tc->d_blocks_needed); ++ ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di); ++ if (ret) ++ goto out; ++ } ++ ++ ret = write_dirblocks(fs, tc); ++ ++out: ++ if (our_tc) ++ tunefs_trailer_context_free(our_tc); ++ return ret; ++} + + /* + * Starting, opening, closing, and exiting. +diff --git a/tunefs.ocfs2/libocfs2ne.h b/tunefs.ocfs2/libocfs2ne.h +index 18380be..250ba76 100644 +--- a/tunefs.ocfs2/libocfs2ne.h ++++ b/tunefs.ocfs2/libocfs2ne.h +@@ -300,11 +300,50 @@ struct tunefs_trailer_context { + iteration of the directory */ + }; + ++/* A dirblock we have to add a trailer to */ ++struct tunefs_trailer_dirblock { ++ struct list_head db_list; ++ uint64_t db_blkno; ++ char *db_buf; ++ ++ /* ++ * These require a little explanation. They point to ++ * ocfs2_dir_entry structures inside db_buf. ++ * ++ * db_last entry we're going to *keep*. If the last entry in the ++ * dirblock has enough extra rec_len to allow the trailer, db_last ++ * points to it. We will shorten its rec_len and insert the ++ * trailer. ++ * ++ * However, if the last entry in the dirblock cannot be truncated, ++ * db_move points to the entry we have to move out, and db_last ++ * points to the entry before that - the last entry we're keeping ++ * in this dirblock. ++ * ++ * Examples: ++ * ++ * - The last entry in the dirblock has a name_len of 1 and a ++ * rec_len of 128. We can easily change the rec_len to 64 and ++ * insert the trailer. db_last points to this entry. ++ * ++ * - The last entry in the dirblock has a name_len of 1 and a ++ * rec_len of 48. The previous entry has a name_len of 1 and a ++ * rec_len of 32. We have to move the last entry out. The ++ * second-to-last entry can have its rec_len truncated to 16, so ++ * we put it in db_last. ++ */ ++ struct ocfs2_dir_entry *db_last; ++}; ++ + /* + * called from feature_metaecc.c and feature_indexed_dirs.c + * to install dir trailers + */ ++errcode_t tunefs_prepare_dir_trailer(ocfs2_filesys *fs, ++ struct ocfs2_dinode *di, ++ struct tunefs_trailer_context **tc_ret); + errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, + struct tunefs_trailer_context *tc); ++void tunefs_trailer_context_free(struct tunefs_trailer_context *tc); + + #endif /* _LIBTUNEFS_H */ +-- +1.7.0.2 + diff --git a/0017-dx_dirs-add-check-for-invalid-slot-in-ocfs2_new_dx_r.patch b/0017-dx_dirs-add-check-for-invalid-slot-in-ocfs2_new_dx_r.patch new file mode 100644 index 0000000..1576742 --- /dev/null +++ b/0017-dx_dirs-add-check-for-invalid-slot-in-ocfs2_new_dx_r.patch @@ -0,0 +1,30 @@ +From e618ad9a6cafae5351f87ae0601d3b16ec9af96a Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Mon, 19 Apr 2010 21:36:37 -0700 +Subject: [PATCH 17/30] dx_dirs: add check for invalid slot in ocfs2_new_dx_root() + +This can happen in the case of a system inode, such as the root or orphan +directories. + +Signed-off-by: Mark Fasheh +--- + libocfs2/alloc.c | 3 +++ + 1 files changed, 3 insertions(+), 0 deletions(-) + +diff --git a/libocfs2/alloc.c b/libocfs2/alloc.c +index 84f3b05..7f85a34 100644 +--- a/libocfs2/alloc.c ++++ b/libocfs2/alloc.c +@@ -658,6 +658,9 @@ errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, + goto out; + + slot = di->i_suballoc_slot; ++ if (slot == (uint16_t)OCFS2_INVALID_SLOT) ++ slot = 0; ++ + ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, + slot, &fs->fs_eb_allocs[slot]); + if (ret) +-- +1.7.0.2 + diff --git a/0018-mkfs.ocfs2-create-root-and-orphan-directories-as-ind.patch b/0018-mkfs.ocfs2-create-root-and-orphan-directories-as-ind.patch new file mode 100644 index 0000000..637a56c --- /dev/null +++ b/0018-mkfs.ocfs2-create-root-and-orphan-directories-as-ind.patch @@ -0,0 +1,137 @@ +From 88d139c22a91b17ff451a50e37d002d8714748f3 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Mon, 19 Apr 2010 22:26:47 -0700 +Subject: [PATCH 18/30] mkfs.ocfs2: create root and orphan directories as indexed + +If the indexed dirs feature is enabled but the inline directories feature is +for some reason disabled, we'll create sub-optimal (non-indexed) root and +orphan directories. It's easy however at the end of mkfs.ocfs2 to simply +index these. + +Signed-off-by: Mark Fasheh +--- + mkfs.ocfs2/mkfs.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++---- + mkfs.ocfs2/mkfs.h | 1 + + 2 files changed, 62 insertions(+), 6 deletions(-) + +diff --git a/mkfs.ocfs2/mkfs.c b/mkfs.ocfs2/mkfs.c +index b43a9ba..5507872 100644 +--- a/mkfs.ocfs2/mkfs.c ++++ b/mkfs.ocfs2/mkfs.c +@@ -82,6 +82,7 @@ static AllocGroup * initialize_alloc_group(State *s, const char *name, + uint64_t blkno, + uint16_t chain, uint16_t cpg, + uint16_t bpc); ++static void index_system_dirs(State *s, ocfs2_filesys *fs); + static void create_lost_found_dir(State *s, ocfs2_filesys *fs); + static void format_journals(State *s, ocfs2_filesys *fs); + static void format_slotmap(State *s, ocfs2_filesys *fs); +@@ -436,12 +437,6 @@ static void finish_normal_format(State *s) + printf("done\n"); + + if (!s->quiet) +- printf("Writing lost+found: "); +- create_lost_found_dir(s, fs); +- if (!s->quiet) +- printf("done\n"); +- +- if (!s->quiet) + printf("Formatting quota files: "); + + format_quota_files(s, fs); +@@ -449,6 +444,24 @@ static void finish_normal_format(State *s) + if (!s->quiet) + printf("done\n"); + ++ if (s->dx_dirs && !s->inline_data) { ++ /* ++ * We want to do this after quota, but before adding ++ * any new entries to directories. ++ */ ++ if (!s->quiet) ++ printf("Indexing system directories: "); ++ index_system_dirs(s, fs); ++ if (!s->quiet) ++ printf("done\n"); ++ } ++ ++ if (!s->quiet) ++ printf("Writing lost+found: "); ++ create_lost_found_dir(s, fs); ++ if (!s->quiet) ++ printf("done\n"); ++ + ocfs2_close(fs); + } + +@@ -1085,6 +1098,10 @@ get_state(int argc, char **argv) + s->no_backup_super = 0; + else + s->no_backup_super = 1; ++ if (s->feature_flags.opt_incompat & OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) ++ s->dx_dirs = 1; ++ else ++ s->dx_dirs = 0; + + + /* Here if the user set these flags explicitly, we will use them and +@@ -2751,6 +2768,44 @@ clear_both_ends(State *s) + return ; + } + ++static void index_system_dirs(State *s, ocfs2_filesys *fs) ++{ ++ errcode_t ret; ++ int i, num_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots; ++ uint64_t orphan_dir_blkno; ++ ++ ++ /* Start with the root directory */ ++ ret = ocfs2_dx_dir_build(fs, fs->fs_root_blkno); ++ if (ret) { ++ com_err(s->progname, ret, "while indexing root directory"); ++ goto bail; ++ } ++ ++ for (i = 0; i < num_slots; i++) { ++ ret = ocfs2_lookup_system_inode(fs, ORPHAN_DIR_SYSTEM_INODE, ++ i, &orphan_dir_blkno); ++ if (ret) { ++ com_err(s->progname, ret, ++ "while looking up orphan dir %d for indexing", ++ i); ++ goto bail; ++ } ++ ++ ret = ocfs2_dx_dir_build(fs, orphan_dir_blkno); ++ if (ret) { ++ com_err(s->progname, ret, "while indexing root directory"); ++ goto bail; ++ } ++ } ++ ++ return; ++ ++bail: ++ clear_both_ends(s); ++ exit(1); ++} ++ + static void create_lost_found_dir(State *s, ocfs2_filesys *fs) + { + errcode_t ret; +diff --git a/mkfs.ocfs2/mkfs.h b/mkfs.ocfs2/mkfs.h +index b702f00..c3aecd6 100644 +--- a/mkfs.ocfs2/mkfs.h ++++ b/mkfs.ocfs2/mkfs.h +@@ -188,6 +188,7 @@ struct _State { + int mount; + int no_backup_super; + int inline_data; ++ int dx_dirs; + int dry_run; + + uint32_t blocksize; +-- +1.7.0.2 + diff --git a/0019-libocfs2-fix-flag-check-in-ocfs2_init_dir.patch b/0019-libocfs2-fix-flag-check-in-ocfs2_init_dir.patch new file mode 100644 index 0000000..74e12f3 --- /dev/null +++ b/0019-libocfs2-fix-flag-check-in-ocfs2_init_dir.patch @@ -0,0 +1,26 @@ +From 912f3e698ed20eb14daad38ef79b106a30d39a02 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Tue, 20 Apr 2010 10:19:54 -0700 +Subject: [PATCH 19/30] libocfs2: fix flag check in ocfs2_init_dir() + +Signed-off-by: Mark Fasheh +--- + libocfs2/expanddir.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/libocfs2/expanddir.c b/libocfs2/expanddir.c +index ec05b74..eb18260 100644 +--- a/libocfs2/expanddir.c ++++ b/libocfs2/expanddir.c +@@ -238,7 +238,7 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, + * directory to extent in ocfs2_expand_dir() + */ + if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && +- (!cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL)) { ++ !(cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL)) { + ret = ocfs2_dx_dir_build(fs, dir); + if (ret) + goto bail; +-- +1.7.0.2 + diff --git a/0020-libocfs2-fix-ocfs2_init_dir-to-retain-indexed-flag.patch b/0020-libocfs2-fix-ocfs2_init_dir-to-retain-indexed-flag.patch new file mode 100644 index 0000000..65a625f --- /dev/null +++ b/0020-libocfs2-fix-ocfs2_init_dir-to-retain-indexed-flag.patch @@ -0,0 +1,40 @@ +From 30a19b42c6da181fa3f96123041fb20e69d065d9 Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Tue, 20 Apr 2010 10:20:24 -0700 +Subject: [PATCH 20/30] libocfs2: fix ocfs2_init_dir() to retain indexed flag + +We were re-using the out of date 'cached inode' later in the function after +ocfs2_dx_dir_build() (which updates and writes out the inode internally). + +As a result, ocfs2_init_dir() was accidentally clearing +OCFS2_INDEXED_DIR_FL. Fix this by refreshing the cache after the call to +ocfs2_dx_dir_build(). + +Signed-off-by: Mark Fasheh +--- + libocfs2/expanddir.c | 9 +++++++++ + 1 files changed, 9 insertions(+), 0 deletions(-) + +diff --git a/libocfs2/expanddir.c b/libocfs2/expanddir.c +index eb18260..a81cfbe 100644 +--- a/libocfs2/expanddir.c ++++ b/libocfs2/expanddir.c +@@ -242,6 +242,15 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, + ret = ocfs2_dx_dir_build(fs, dir); + if (ret) + goto bail; ++ ++ /* ++ * Re-read the 'cached inode' as ocfs2_dx_dir_build() ++ * may have written out changes which won't be ++ * reflected in our copy. ++ */ ++ ret = ocfs2_read_cached_inode(fs, dir, &cinode); ++ if (ret) ++ goto bail; + } + + /* set link count of the parent */ +-- +1.7.0.2 + diff --git a/0021-fsck.ocfs2-verify-dirent-dx-entry-linkages.patch b/0021-fsck.ocfs2-verify-dirent-dx-entry-linkages.patch new file mode 100644 index 0000000..173913c --- /dev/null +++ b/0021-fsck.ocfs2-verify-dirent-dx-entry-linkages.patch @@ -0,0 +1,92 @@ +From 4c1cf61779ee71c828134d956f5779e272a3195e Mon Sep 17 00:00:00 2001 +From: Mark Fasheh +Date: Fri, 23 Apr 2010 23:09:05 -0700 +Subject: [PATCH 21/30] fsck.ocfs2: verify dirent -> dx entry linkages + +During pass2 we can trivially do a lookup on dirents while walking the +directory tree. This will help us make sure that an index entry exists for +each dirent. If an entry is not found, the users is prompted and the parent +directory will be marked for an index rebuild. + +Signed-off-by: Mark Fasheh +--- + fsck.ocfs2/fsck.ocfs2.checks.8.in | 7 +++++++ + fsck.ocfs2/pass2.c | 37 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 44 insertions(+), 0 deletions(-) + +diff --git a/fsck.ocfs2/fsck.ocfs2.checks.8.in b/fsck.ocfs2/fsck.ocfs2.checks.8.in +index 5cda023..cfbb12e 100644 +--- a/fsck.ocfs2/fsck.ocfs2.checks.8.in ++++ b/fsck.ocfs2/fsck.ocfs2.checks.8.in +@@ -1061,6 +1061,13 @@ file system. + + Answering yes will truncate the invalid index. + ++.SS "DX_LOOKUP_FAILED" ++A directory entry is missing an entry in the directory index. Not ++found in directory index. The missing index entry will cause lookups ++on this name to fail. ++ ++Answering yes will rebuild the directory index, restoring the missing entry. ++ + .SH "SEE ALSO" + .BR fsck.ocfs2(8) + +diff --git a/fsck.ocfs2/pass2.c b/fsck.ocfs2/pass2.c +index b999761..e03bd4e 100644 +--- a/fsck.ocfs2/pass2.c ++++ b/fsck.ocfs2/pass2.c +@@ -648,6 +648,39 @@ out: + return ret; + } + ++static errcode_t fix_dirent_index(o2fsck_dirblock_entry *dbe, ++ struct dirblock_data *dd, ++ struct ocfs2_dir_entry *dirent, ++ unsigned int *flags) ++{ ++ errcode_t ret = 0; ++ struct ocfs2_dinode *di = (struct ocfs2_dinode *)dd->inoblock_buf; ++ uint64_t ino; ++ ++ if (!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(dd->fs->fs_super))) ++ goto out; ++ ++ if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) { ++ ret = ocfs2_lookup(dd->fs, dbe->e_ino, dirent->name, ++ dirent->name_len, NULL, &ino); ++ if (ret) { ++ if (ret != OCFS2_ET_FILE_NOT_FOUND) ++ goto out; ++ ret = 0; ++ ++ if (prompt(dd->ost, PY, PR_DX_LOOKUP_FAILED, ++ "Directory inode %"PRIu64" is missing " ++ "an index entry for child inode %"PRIu64 ++ "\n. Repair this by rebuilding the " ++ "directory index?", dbe->e_ino, ino)) ++ *flags |= OCFS2_DIRENT_CHANGED; ++ goto out; ++ } ++ } ++out: ++ return ret; ++} ++ + static int corrupt_dirent_lengths(struct ocfs2_dir_entry *dirent, int left) + { + if ((dirent->rec_len >= OCFS2_DIR_REC_LEN(1)) && +@@ -805,6 +838,10 @@ static unsigned pass2_dir_block_iterate(o2fsck_dirblock_entry *dbe, + if (dirent->inode == 0) + goto next; + ++ ret = fix_dirent_index(dbe, dd, dirent, &ret_flags); ++ if (ret) ++ goto out; ++ + verbosef("dirent %.*s refs ino %"PRIu64"\n", dirent->name_len, + dirent->name, (uint64_t)dirent->inode); + o2fsck_icount_delta(dd->ost->ost_icount_refs, dirent->inode, 1); +-- +1.7.0.2 + diff --git a/0022-dx_dirs-stop-iterate-dir-entries-for-I-O-error.patch b/0022-dx_dirs-stop-iterate-dir-entries-for-I-O-error.patch new file mode 100644 index 0000000..332a769 --- /dev/null +++ b/0022-dx_dirs-stop-iterate-dir-entries-for-I-O-error.patch @@ -0,0 +1,106 @@ +From 24b059e1e75a0ff5dabb8a6dfdc09e82d488c244 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:34:27 +0800 +Subject: [PATCH 22/30] dx_dirs: stop iterate dir entries for I/O error + +Callback dx_iterator() may encounter an I/O error when calling +ocfs2_read_dx_leaf(). The caller of dx_iterator is extent_iterate_el(), +which does not accept error code other than OCFS2_EXTENT_ERROR and +OCFS2_EXTENT_ABORT. The result is, dir entries iteration can not stop +if there is an I/O error happens in dx_iterator(). + +This patch add 'errcode_t err' member to struct dx_iterator_data, if +error returned from ocfs2_read_dx_leaf(), the error code will be +stored here, then dx_iterator() returns OCFS2_EXTENT_ERROR to make +extent_iterate_el() quit. + +Thanks to Tao Ma for catching this error. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_iterate.c | 22 +++++++++++++++------- + libocfs2/extents.c | 2 +- + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c +index 8a3f5a9..d044bb8 100644 +--- a/libocfs2/dir_iterate.c ++++ b/libocfs2/dir_iterate.c +@@ -320,6 +320,7 @@ struct dx_iterator_data { + void *dx_priv_data; + char *leaf_buf; + struct ocfs2_dx_root_block *dx_root; ++ errcode_t err; + }; + + static int dx_iterator(ocfs2_filesys *fs, +@@ -330,7 +331,7 @@ static int dx_iterator(ocfs2_filesys *fs, + int ref_recno, + void *priv_data) + { +- int ret, i; ++ int err, i; + struct ocfs2_dx_leaf *dx_leaf; + struct dx_iterator_data *iter = priv_data; + uint64_t blkno, count; +@@ -339,9 +340,11 @@ static int dx_iterator(ocfs2_filesys *fs, + + blkno = rec->e_blkno; + for (i = 0; i < count; i++) { +- ret = ocfs2_read_dx_leaf(fs, blkno, iter->leaf_buf); +- if (ret) +- return ret; ++ err = ocfs2_read_dx_leaf(fs, blkno, iter->leaf_buf); ++ if (err) { ++ iter->err = err; ++ return OCFS2_EXTENT_ERROR; ++ } + + dx_leaf = (struct ocfs2_dx_leaf *)iter->leaf_buf; + iter->dx_func(fs, &dx_leaf->dl_list, iter->dx_root, dx_leaf, +@@ -387,8 +390,7 @@ extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs, + dx_root = (struct ocfs2_dx_root_block *)buf; + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { +- func(fs, &dx_root->dr_entries, dx_root, NULL, priv_data); +- ret = 0; ++ ret = func(fs, &dx_root->dr_entries, dx_root, NULL, priv_data); + goto out; + } + +@@ -404,10 +406,16 @@ extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs, + data.dx_priv_data = priv_data; + data.leaf_buf = leaf_buf; + data.dx_root = dx_root; ++ data.err = 0; + ret = ocfs2_extent_iterate_dx_root(fs, dx_root, + OCFS2_EXTENT_FLAG_DATA_ONLY, eb_buf, + dx_iterator, &data); +- ++ /* dx_iterator may set the error code for non-extents-related ++ * errors. If the error code is set by dx_iterator, no matter ++ * what ocfs2_extent_iterate_dx_root() returns, we should take ++ * data.err as retured error code. */ ++ if (data.err) ++ ret = data.err; + out: + if (buf) + ocfs2_free(&buf); +diff --git a/libocfs2/extents.c b/libocfs2/extents.c +index 8c322b1..bb233f0 100644 +--- a/libocfs2/extents.c ++++ b/libocfs2/extents.c +@@ -470,7 +470,7 @@ errcode_t ocfs2_extent_iterate_inode(ocfs2_filesys *fs, + uint64_t ref_blkno, + int ref_recno, + void *priv_data), +- void *priv_data) ++ void *priv_data) + { + int i; + int iret = 0; +-- +1.7.0.2 + diff --git a/0023-dx_dirs-check-callback-iter-dx_func-return-value-in-.patch b/0023-dx_dirs-check-callback-iter-dx_func-return-value-in-.patch new file mode 100644 index 0000000..41e90d0 --- /dev/null +++ b/0023-dx_dirs-check-callback-iter-dx_func-return-value-in-.patch @@ -0,0 +1,40 @@ +From d4bbb81a8e6870155eb939a1f9d6def456fa3b91 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:35:02 +0800 +Subject: [PATCH 23/30] dx_dirs: check callback iter->dx_func() return value in dx_iterator() + +This patch makes dx_iterator() check returned value of callback +iter->dx_func(). If an error returned from the callback, dx_iterator() +returns OCFS2_EXTENT_ERROR to stop the iteration. + +Thanks to Tao Ma for catching the error. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_iterate.c | 7 ++++++- + 1 files changed, 6 insertions(+), 1 deletions(-) + +diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c +index d044bb8..9f2ff7e 100644 +--- a/libocfs2/dir_iterate.c ++++ b/libocfs2/dir_iterate.c +@@ -347,8 +347,13 @@ static int dx_iterator(ocfs2_filesys *fs, + } + + dx_leaf = (struct ocfs2_dx_leaf *)iter->leaf_buf; +- iter->dx_func(fs, &dx_leaf->dl_list, iter->dx_root, dx_leaf, ++ err = iter->dx_func(fs, &dx_leaf->dl_list, iter->dx_root, dx_leaf, + iter->dx_priv_data); ++ /* callback dx_func() is defined by users, the return value does not ++ * follow libocfs2 error codes. Don't touch iter->err and just stop ++ * the iteration here.*/ ++ if (err) ++ return OCFS2_EXTENT_ERROR; + + blkno++; + } +-- +1.7.0.2 + diff --git a/0024-dx_dirs-remove-unncessary-return-value-assignment.patch b/0024-dx_dirs-remove-unncessary-return-value-assignment.patch new file mode 100644 index 0000000..27ec80e --- /dev/null +++ b/0024-dx_dirs-remove-unncessary-return-value-assignment.patch @@ -0,0 +1,45 @@ +From c74da336c1e4eb2fac311c9e6ea8ef8fe9b0a6c1 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:35:27 +0800 +Subject: [PATCH 24/30] dx_dirs: remove unncessary return value assignment + +This patch removes unnecessary return value assigned in +ocfs2_dx_entries_iterate() and ocfs2_dx_frees_iterate(). + +Thanks for Tao Ma for catching this. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_iterate.c | 3 --- + 1 files changed, 0 insertions(+), 3 deletions(-) + +diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c +index 9f2ff7e..835305a 100644 +--- a/libocfs2/dir_iterate.c ++++ b/libocfs2/dir_iterate.c +@@ -378,7 +378,6 @@ extern errcode_t ocfs2_dx_entries_iterate(ocfs2_filesys *fs, + struct dx_iterator_data data; + + if (!S_ISDIR(dir->i_mode) && !ocfs2_dir_indexed(dir)) { +- ret = 0; + goto out; + } + +@@ -448,12 +447,10 @@ extern errcode_t ocfs2_dx_frees_iterate(ocfs2_filesys *fs, + struct ocfs2_dir_block_trailer *trailer; + + if (!S_ISDIR(dir->i_mode) || !(ocfs2_dir_indexed(dir))) { +- ret = 0; + goto out; + } + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { +- ret = 0; + goto out; + } + +-- +1.7.0.2 + diff --git a/0025-dx_dirs-unifiy-feature-string-of-indexed-dirs.patch b/0025-dx_dirs-unifiy-feature-string-of-indexed-dirs.patch new file mode 100644 index 0000000..7bb6a31 --- /dev/null +++ b/0025-dx_dirs-unifiy-feature-string-of-indexed-dirs.patch @@ -0,0 +1,33 @@ +From 1e82ab1d1407d4578eda91214e843b5433a92d6f Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:35:58 +0800 +Subject: [PATCH 25/30] dx_dirs: unifiy feature string of indexed-dirs + +This patch changes the indexed-dirs fn_name of ocfs2_feature_name array +from IndexedDirs to indexed-dirs. Which unitifies fn_name displayed in +debugfs.ocfs2 to feature string 'indexed-dirs' used in mkfs.ocfs2 and +tunefs.ocfs2. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/feature_string.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/libocfs2/feature_string.c b/libocfs2/feature_string.c +index 9f395c6..83fec9a 100644 +--- a/libocfs2/feature_string.c ++++ b/libocfs2/feature_string.c +@@ -254,7 +254,7 @@ static struct feature_name ocfs2_feature_names[] = { + .fn_flag = {0, OCFS2_FEATURE_INCOMPAT_XATTR, 0}, + }, + { +- .fn_name = "IndexedDirs", ++ .fn_name = "indexed-dirs", + .fn_flag = {0, OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, 0}, + }, + { +-- +1.7.0.2 + diff --git a/0026-dx_dirs-Improve-information-displayed-by-dump_dx_roo.patch b/0026-dx_dirs-Improve-information-displayed-by-dump_dx_roo.patch new file mode 100644 index 0000000..90e5519 --- /dev/null +++ b/0026-dx_dirs-Improve-information-displayed-by-dump_dx_roo.patch @@ -0,0 +1,33 @@ +From a3a8830774c41b90285d77ccc04a035c944080e9 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:36:38 +0800 +Subject: [PATCH 26/30] dx_dirs: Improve information displayed by dump_dx_root() + +If dr->dr_suballoc_slot is OCFS2_INVALID_SLOT (which should not happen), +should not display "Global", because there is not "Global" conception +for dx root allocation slot. This patch fixes the display by +"Invalid Slot". + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + debugfs.ocfs2/dump.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/debugfs.ocfs2/dump.c b/debugfs.ocfs2/dump.c +index d55fc0e..6ad202c 100644 +--- a/debugfs.ocfs2/dump.c ++++ b/debugfs.ocfs2/dump.c +@@ -593,7 +593,7 @@ void dump_dx_root(FILE *out, struct ocfs2_dx_root_block *dr) + (uint64_t)dr->dr_dir_blkno); + + if (dr->dr_suballoc_slot == (uint16_t)OCFS2_INVALID_SLOT) +- strcpy(tmp_str, "Global"); ++ strcpy(tmp_str, "Invalid Slot"); + else + sprintf(tmp_str, "%d", dr->dr_suballoc_slot); + fprintf(out, "\tSub Alloc Slot: %s Sub Alloc Bit: %u " +-- +1.7.0.2 + diff --git a/0027-dx_dirs-stop-iteration-of-dir-trailer-initialization.patch b/0027-dx_dirs-stop-iteration-of-dir-trailer-initialization.patch new file mode 100644 index 0000000..20856b7 --- /dev/null +++ b/0027-dx_dirs-stop-iteration-of-dir-trailer-initialization.patch @@ -0,0 +1,103 @@ +From cb9471a5f73c69858d9dd35ea90b86476e65e4ca Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:37:05 +0800 +Subject: [PATCH 27/30] dx_dirs: stop iteration of dir trailer initialization for I/O error + +Callback dir_trailer_func() may encounter malloc or I/O error, these +errors can not return to its caller directly. This patch add a member +'errcode_t err' to struct trailer_ctxt, which can catch the error. By +this fix, dir_trailer_func() can return OCFS2_EXTENT_ERROR to stop dir +iteration immediately and can return the REAL error as well. + +Thanks to Tao Ma catches this error. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_indexed.c | 37 +++++++++++++++++++++++++++++-------- + 1 files changed, 29 insertions(+), 8 deletions(-) + +diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c +index 9cae3d0..5f3db95 100644 +--- a/libocfs2/dir_indexed.c ++++ b/libocfs2/dir_indexed.c +@@ -121,6 +121,7 @@ int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf) + struct trailer_ctxt { + struct ocfs2_dx_root_block *dx_root; + struct ocfs2_dinode *di; ++ errcode_t err; + }; + + /* make sure the space for trailer is reserved */ +@@ -170,8 +171,8 @@ static int dir_trailer_func(ocfs2_filesys *fs, + struct ocfs2_dinode *di = ctxt->di; + struct ocfs2_dx_root_block *dx_root = ctxt->dx_root; + struct ocfs2_dir_block_trailer *trailer; +- int max_rec_len = 0; +- errcode_t ret = 0; ++ int max_rec_len = 0, ret = 0; ++ errcode_t err; + char *blk = NULL; + + ret = ocfs2_malloc_block(fs->fs_io, &blk); +@@ -180,12 +181,20 @@ static int dir_trailer_func(ocfs2_filesys *fs, + + /* here we don't trust trailer, cannot use + * ocfs2_read_dir_block() */ +- ret = ocfs2_read_blocks(fs, blkno, 1, blk); +- if (ret) ++ err = ocfs2_read_blocks(fs, blkno, 1, blk); ++ if (err) { ++ ctxt->err = err; ++ ret = OCFS2_EXTENT_ERROR; + goto out; +- ret = ocfs2_check_dir_trailer_space(fs, di, blkno, blk); +- if (ret) ++ } ++ ++ err = ocfs2_check_dir_trailer_space(fs, di, blkno, blk); ++ if (err) { ++ ctxt->err = err; ++ ret = OCFS2_EXTENT_ERROR; + goto out; ++ } ++ + ocfs2_init_dir_trailer(fs, di, blkno, blk); + max_rec_len = ocfs2_find_max_rec_len(fs, blk); + trailer = ocfs2_dir_trailer_from_block(fs, blk); +@@ -198,7 +207,12 @@ static int dir_trailer_func(ocfs2_filesys *fs, + + /* comput trailer->db_check here, after writes out, + * trailer is trustable */ +- ret = ocfs2_write_dir_block(fs, di, blkno, blk); ++ err = ocfs2_write_dir_block(fs, di, blkno, blk); ++ if (err) { ++ ctxt->err = err; ++ ret = OCFS2_EXTENT_ERROR; ++ } ++ + out: + if (blk) + ocfs2_free(&blk); +@@ -219,9 +233,16 @@ static errcode_t ocfs2_init_dir_trailers(ocfs2_filesys *fs, + + ctxt.di = di; + ctxt.dx_root = dx_root; +- ++ ctxt.err = 0; + ret = ocfs2_block_iterate_inode(fs, di, + 0, dir_trailer_func, &ctxt); ++ ++ /* callback dir_trailer_func() may have error which can not ++ * return to its caller directly. If dir_trailer_func() sets ++ * error in ctxt.err, we should take this REAL error other ++ * than the value returned by ocfs2_block_iterate_inode(). */ ++ if (ctxt.err) ++ ret = ctxt.err; + out: + return ret; + } +-- +1.7.0.2 + diff --git a/0028-dx_dirs-stop-dx-insert-iteration-for-callback-error.patch b/0028-dx_dirs-stop-dx-insert-iteration-for-callback-error.patch new file mode 100644 index 0000000..d4698a4 --- /dev/null +++ b/0028-dx_dirs-stop-dx-insert-iteration-for-callback-error.patch @@ -0,0 +1,141 @@ +From 1f49857cc5fa914ac0d3577f841b398421ca01a0 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:37:32 +0800 +Subject: [PATCH 28/30] dx_dirs: stop dx insert iteration for callback error + +Callback ocfs2_dx_dir_insert() may encounter memory alloc or I/O error. +These kind of errors can not return to caller of the callback directly, +so the dir block iteration for dx insert can not stop immediately when +such errors occure. + +This patch adds a member 'errcode_t err' to struct dx_insert_ctxt, which +can catch the error and permit ocfs2_dx_dir_insert() returns +OCFS2_EXTENT_ERROR to it's caller to stop the iteration immediately. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_indexed.c | 61 +++++++++++++++++++++++++++++------------------ + 1 files changed, 38 insertions(+), 23 deletions(-) + +diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c +index 5f3db95..08c43b7 100644 +--- a/libocfs2/dir_indexed.c ++++ b/libocfs2/dir_indexed.c +@@ -269,6 +269,7 @@ struct dx_insert_ctxt { + uint64_t dir_blkno; + uint64_t dx_root_blkno; + ocfs2_filesys *fs; ++ errcode_t err; + }; + + +@@ -1069,7 +1070,8 @@ static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, + char *buf, + void *priv_data) + { +- errcode_t ret = 0; ++ int ret = 0; ++ errcode_t err; + char *dx_buf = NULL; + char *dx_leaf_buf = NULL; + struct ocfs2_dx_root_block *dx_root = NULL; +@@ -1081,17 +1083,17 @@ static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, + uint64_t dx_root_blkno = ctxt->dx_root_blkno; + int write_dx_leaf = 0; + +- ret = ocfs2_malloc_block(fs->fs_io, &dx_buf); +- if (ret) +- goto out; ++ err = ocfs2_malloc_block(fs->fs_io, &dx_buf); ++ if (err) ++ goto set_err; + +- ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); +- if (ret) +- goto out; ++ err = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); ++ if (err) ++ goto set_err; + +- ret = ocfs2_read_dx_root(fs, dx_root_blkno, dx_buf); +- if (ret) +- goto out; ++ err = ocfs2_read_dx_root(fs, dx_root_blkno, dx_buf); ++ if (err) ++ goto set_err; + + dx_root = (struct ocfs2_dx_root_block *)dx_buf; + memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); +@@ -1104,19 +1106,21 @@ static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, + goto insert_into_entries; + } else { + /* root block is full, expand it to an extent */ +- ret = ocfs2_expand_inline_dx_root(fs, dx_root); +- if (ret) +- goto out; ++ err = ocfs2_expand_inline_dx_root(fs, dx_root); ++ if (err) ++ goto set_err; + } + } + +- ret = ocfs2_find_dir_space_dx(fs, dx_root, ++ err = ocfs2_find_dir_space_dx(fs, dx_root, + dentry->name, dentry->name_len, &lookup); +- if (ret) +- goto out; +- ret = ocfs2_read_dx_leaf(fs, lookup.dl_dx_leaf_blkno, dx_leaf_buf); +- if (ret) +- goto out; ++ if (err) ++ goto set_err; ++ ++ err = ocfs2_read_dx_leaf(fs, lookup.dl_dx_leaf_blkno, dx_leaf_buf); ++ if (err) ++ goto set_err; ++ + dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + entry_list = &dx_leaf->dl_list; + write_dx_leaf = 1; +@@ -1124,12 +1128,18 @@ static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, + insert_into_entries: + ocfs2_dx_entry_list_insert(entry_list, &lookup.dl_hinfo, blocknr); + if (write_dx_leaf) { +- ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); +- if (ret) +- goto out; ++ err = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); ++ if (err) ++ goto set_err; + } + dx_root->dr_num_entries += 1; +- ret = ocfs2_write_dx_root(fs, dx_root_blkno, dx_buf); ++ err = ocfs2_write_dx_root(fs, dx_root_blkno, dx_buf); ++ if (!err) ++ goto out; ++ ++set_err: ++ ctxt->err = err; ++ ret = OCFS2_EXTENT_ERROR; + out: + if (dx_leaf_buf) + ocfs2_free(&dx_leaf_buf); +@@ -1256,8 +1266,13 @@ errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, + ctxt.dir_blkno = dir; + ctxt.dx_root_blkno = dr_blkno; + ctxt.fs = fs; ++ ctxt.err = 0; + ret = ocfs2_dir_iterate(fs, dir, 0, NULL, + ocfs2_dx_dir_insert, &ctxt); ++ if (ctxt.err) ++ ret = ctxt.err; ++ if (ret) ++ goto out; + + /* check quota for dx_leaf */ + ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); +-- +1.7.0.2 + diff --git a/0029-dx_dirs-set-OCFS2_INDEXED_DIR_FL-after-indexed-tree-.patch b/0029-dx_dirs-set-OCFS2_INDEXED_DIR_FL-after-indexed-tree-.patch new file mode 100644 index 0000000..c141072 --- /dev/null +++ b/0029-dx_dirs-set-OCFS2_INDEXED_DIR_FL-after-indexed-tree-.patch @@ -0,0 +1,63 @@ +From 2dff67994c3d6015b2b989d79a00c2cd10669ed0 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:37:59 +0800 +Subject: [PATCH 29/30] dx_dirs: set OCFS2_INDEXED_DIR_FL after indexed tree gets built + +In ocfs2_dx_dir_build(), current code set OCFS2_INDEXED_DIR_FL to +di->i_dyn_features before inserting dir entries into the indexed tree. +If there is any error during the insertion, the corresponded dirent will +be lost from the indexed tree. Though this error can be checked and +fixed in fsck.ocfs2, it should be fixed. + +This patch modifies to set OCFS2_INDEXED_DIR_FL after +ocfs2_dir_iterate() returns successfully. If ocfs2_dir_iterate() returns +with error, ocfs2_dx_dir_build() will return error and stop to build the +indexed tree for a specific directory. In this case, no dirent will be +losted. + +Thanks to Tao Ma to catch this. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dir_indexed.c | 9 +++++++-- + 1 files changed, 7 insertions(+), 2 deletions(-) + +diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c +index 08c43b7..eb872fd 100644 +--- a/libocfs2/dir_indexed.c ++++ b/libocfs2/dir_indexed.c +@@ -1254,7 +1254,6 @@ errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, + dx_root->dr_entries.de_count = ocfs2_dx_entries_per_root(fs->fs_blocksize); + + di->i_dx_root = dr_blkno; +- di->i_dyn_features |= OCFS2_INDEXED_DIR_FL; + + ret = ocfs2_write_dx_root(fs, dr_blkno, dx_buf); + if (ret) +@@ -1274,14 +1273,20 @@ errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, + if (ret) + goto out; + +- /* check quota for dx_leaf */ + ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; ++ /* set inode to use indexed-dirs */ ++ di->i_dyn_features |= OCFS2_INDEXED_DIR_FL; + ++ ret = ocfs2_write_inode(fs, dir, di_buf); ++ if(ret) ++ goto out; ++ ++ /* check quota for dx_leaf */ + change = ocfs2_clusters_to_bytes(fs, + dx_root->dr_clusters); + uid = di->i_uid; +-- +1.7.0.2 + diff --git a/0030-dx_dirs-fix-ocfs2_swap_dx_entry_list-for-big-endian.patch b/0030-dx_dirs-fix-ocfs2_swap_dx_entry_list-for-big-endian.patch new file mode 100644 index 0000000..cc6a561 --- /dev/null +++ b/0030-dx_dirs-fix-ocfs2_swap_dx_entry_list-for-big-endian.patch @@ -0,0 +1,97 @@ +From 8cf3a61039b0bda46d8824e50c3989eae83b9a1a Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Mon, 26 Apr 2010 22:38:31 +0800 +Subject: [PATCH 30/30] dx_dirs: fix ocfs2_swap_dx_entry_list() for big endian + +As Tao Ma suggested, current ocfs2_swap_dx_entry_list() is +buggy for big endian hardware, because after dl_list->de_count +swapped, it is referenced in the following loop. + +This patch fixes this bug with adding an 'int to_cpu' argument, also +modifies other routines who call ocfs2_swap_dx_entry_list(). + +Signed-off-by: Coly Li +Cc: Mark Fasheh +Cc: Tao Ma +--- + libocfs2/dirblock.c | 23 ++++++++++++++--------- + 1 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c +index c22d843..e128d73 100644 +--- a/libocfs2/dirblock.c ++++ b/libocfs2/dirblock.c +@@ -245,29 +245,34 @@ static void ocfs2_swap_dx_entry(struct ocfs2_dx_entry *dx_entry) + dx_entry->dx_dirent_blk = bswap_64(dx_entry->dx_dirent_blk); + } + +-static void ocfs2_swap_dx_entry_list(struct ocfs2_dx_entry_list *dl_list) ++/* called for big endian */ ++static void ocfs2_swap_dx_entry_list(struct ocfs2_dx_entry_list *dl_list, int to_cpu) + { + int i; + +- dl_list->de_count = bswap_16(dl_list->de_count); +- dl_list->de_num_used = bswap_16(dl_list->de_num_used); ++ if (to_cpu) ++ dl_list->de_count = bswap_16(dl_list->de_count); + + for (i = 0; i < dl_list->de_count; i++) + ocfs2_swap_dx_entry(&dl_list->de_entries[i]); ++ dl_list->de_num_used = bswap_16(dl_list->de_num_used); ++ ++ if (!to_cpu) ++ dl_list->de_count = bswap_16(dl_list->de_count); + } + + static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) + { + if (cpu_is_little_endian) + return; +- ocfs2_swap_dx_entry_list(dl_list); ++ ocfs2_swap_dx_entry_list(dl_list, 1); + } + + static void ocfs2_swap_dx_entry_list_from_cpu(struct ocfs2_dx_entry_list *dl_list) + { + if (cpu_is_little_endian) + return; +- ocfs2_swap_dx_entry_list(dl_list); ++ ocfs2_swap_dx_entry_list(dl_list, 0); + } + + static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, +@@ -384,26 +389,26 @@ out: + return ret; + } + +-static void ocfs2_swap_dx_leaf(struct ocfs2_dx_leaf *dx_leaf) ++static void ocfs2_swap_dx_leaf(struct ocfs2_dx_leaf *dx_leaf, int to_cpu) + { + dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); + dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); + +- ocfs2_swap_dx_entry_list(&dx_leaf->dl_list); ++ ocfs2_swap_dx_entry_list(&dx_leaf->dl_list, to_cpu); + } + + static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf) + { + if (cpu_is_little_endian) + return; +- ocfs2_swap_dx_leaf(dx_leaf); ++ ocfs2_swap_dx_leaf(dx_leaf, 1); + } + + static void ocfs2_swap_dx_leaf_from_cpu(struct ocfs2_dx_leaf *dx_leaf) + { + if (cpu_is_little_endian) + return; +- ocfs2_swap_dx_leaf(dx_leaf); ++ ocfs2_swap_dx_leaf(dx_leaf, 0); + } + + errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, +-- +1.7.0.2 + diff --git a/0031-dx_dirs-enable-metaecc-and-indexed-dirs-support-as-d.patch b/0031-dx_dirs-enable-metaecc-and-indexed-dirs-support-as-d.patch new file mode 100644 index 0000000..acdc56d --- /dev/null +++ b/0031-dx_dirs-enable-metaecc-and-indexed-dirs-support-as-d.patch @@ -0,0 +1,87 @@ +From 76e095ae3d132828bbb70bad68c428101d3652a9 Mon Sep 17 00:00:00 2001 +From: Coly Li +Date: Sun, 11 Apr 2010 00:03:33 +0800 +Subject: [PATCH 15/15] dx_dirs v11: enable metaecc and indexed-dirs support as default features + +metaecc feature enables ECC checking for meta data, which helps the file +system consistency. indexed-dirs feature enables indexed tree for +directories, which improves lookup performance for large scale +directories. + +This patch enables metaecc and indexed-dirs support as default features. + +Signed-off-by: Coly Li +Cc: Mark Fasheh +--- + libocfs2/feature_string.c | 18 ++++++++++++++---- + mkfs.ocfs2/mkfs.ocfs2.8.in | 2 +- + 2 files changed, 15 insertions(+), 5 deletions(-) + +diff --git a/libocfs2/feature_string.c b/libocfs2/feature_string.c +index 0974fb6..b2413bf 100644 +--- a/libocfs2/feature_string.c ++++ b/libocfs2/feature_string.c +@@ -76,7 +76,9 @@ static ocfs2_fs_options feature_level_defaults[] = { + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, + OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | +- OCFS2_FEATURE_INCOMPAT_XATTR, ++ OCFS2_FEATURE_INCOMPAT_XATTR | ++ OCFS2_FEATURE_INCOMPAT_META_ECC | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_FEATURE_LEVEL_DEFAULT */ + + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, +@@ -100,25 +102,33 @@ static ocfs2_fs_options mkfstypes_features_defaults[] = { + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, + OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | +- OCFS2_FEATURE_INCOMPAT_XATTR, ++ OCFS2_FEATURE_INCOMPAT_XATTR | ++ OCFS2_FEATURE_INCOMPAT_META_ECC | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_MKFSTYPE_DEFAULT */ + + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, + OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | +- OCFS2_FEATURE_INCOMPAT_XATTR, ++ OCFS2_FEATURE_INCOMPAT_XATTR | ++ OCFS2_FEATURE_INCOMPAT_META_ECC | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_MKFSTYPE_DATAFILES */ + + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, + OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | +- OCFS2_FEATURE_INCOMPAT_XATTR, ++ OCFS2_FEATURE_INCOMPAT_XATTR | ++ OCFS2_FEATURE_INCOMPAT_META_ECC | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_MKFSTYPE_MAIL */ + + {OCFS2_FEATURE_COMPAT_BACKUP_SB | OCFS2_FEATURE_COMPAT_JBD2_SB, + OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | + OCFS2_FEATURE_INCOMPAT_INLINE_DATA | + OCFS2_FEATURE_INCOMPAT_XATTR | ++ OCFS2_FEATURE_INCOMPAT_META_ECC | ++ OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS | + OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE, + OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_MKFSTYPE_VMSTORE */ + }; +diff --git a/mkfs.ocfs2/mkfs.ocfs2.8.in b/mkfs.ocfs2/mkfs.ocfs2.8.in +index a148133..2fd7a70 100644 +--- a/mkfs.ocfs2/mkfs.ocfs2.8.in ++++ b/mkfs.ocfs2/mkfs.ocfs2.8.in +@@ -191,7 +191,7 @@ Chooses fewer features but ensures that the file system can be mounted from olde + .RS 1.2i + .TP + \fBdefault\fR +-The default feature set tries to strike a balance between providing new features and maintaining compatibility with relatively recent versions of \fIOCFS2\fR. It currently enables \fBsparse\fR, \fBunwritten\fR \fBinline-data\fR and \fBxattr\fR. It also enables \fBrefcount\fR for the \fIvmstore\fR volumes. ++The default feature set tries to strike a balance between providing new features and maintaining compatibility with relatively recent versions of \fIOCFS2\fR. It currently enables \fBsparse\fR, \fBunwritten\fR, \fBinline-data\fR, \fRxattr\fR, \fRmetaecc\fR, and \fRindexed-dirs\fR. It also enables \fBrefcount\fR for the \fIvmstore\fR volumes. + .RE + .RS 1.2i + .TP +-- +1.7.0.2 + diff --git a/bug-470741-debug_start_failures.patch b/bug-470741-debug_start_failures.patch new file mode 100644 index 0000000..e05aa27 --- /dev/null +++ b/bug-470741-debug_start_failures.patch @@ -0,0 +1,39 @@ +Index: ocfs2-tools/libo2cb/o2cb_err.et +=================================================================== +--- ocfs2-tools.orig/libo2cb/o2cb_err.et ++++ ocfs2-tools/libo2cb/o2cb_err.et +@@ -33,6 +33,12 @@ ec O2CB_ET_IO, + ec O2CB_ET_SERVICE_UNAVAILABLE, + "Unable to access cluster service" + ++ec O2CB_ET_SERVICE_HANDSHAKE_UNAVAILABLE, ++ "Unable to access cluster serivce due to bad handshake" ++ ++ec O2CB_ET_SERVICE_DEV_UNAVAILABLE, ++ "Unable to access cluster service device" ++ + ec O2CB_ET_INTERNAL_FAILURE, + "Internal logic failure" + +Index: ocfs2-tools/libo2cb/o2cb_abi.c +=================================================================== +--- ocfs2-tools.orig/libo2cb/o2cb_abi.c ++++ ocfs2-tools/libo2cb/o2cb_abi.c +@@ -2095,7 +2095,7 @@ static errcode_t o2cb_control_handshake( + if (ret != 0) + err = O2CB_ET_IO; + else if (!found) +- err = O2CB_ET_SERVICE_UNAVAILABLE; /* no match */ ++ err = O2CB_ET_SERVICE_HANDSHAKE_UNAVAILABLE; /* no match */ + break; + } + +@@ -2152,7 +2152,7 @@ errcode_t o2cb_control_open(unsigned int + case ENOTDIR: + case ENOENT: + case EISDIR: +- err = O2CB_ET_SERVICE_UNAVAILABLE; ++ err = O2CB_ET_SERVICE_DEV_UNAVAILABLE; + break; + + case EACCES: diff --git a/bug-543119-o2dlm.patch b/bug-543119-o2dlm.patch new file mode 100644 index 0000000..ffaf551 --- /dev/null +++ b/bug-543119-o2dlm.patch @@ -0,0 +1,12 @@ +diff -rup ocfs2-tools.orig//libo2dlm/o2dlm.c ocfs2-tools/libo2dlm/o2dlm.c +--- ocfs2-tools.orig//libo2dlm/o2dlm.c 2008-10-27 01:10:50.000000000 +0800 ++++ ocfs2-tools/libo2dlm/o2dlm.c 2009-11-03 16:49:38.000000000 +0800 +@@ -713,7 +713,7 @@ static errcode_t load_fsdlm(struct o2dlm + goto out; + } + +- ctxt->ct_lib_handle = dlopen("libdlm_lt.so", ++ ctxt->ct_lib_handle = dlopen("libdlm_lt.so.3", + RTLD_NOW | RTLD_LOCAL); + if (!ctxt->ct_lib_handle) + goto out; diff --git a/bug-585080-handle-symbolic-link.patch b/bug-585080-handle-symbolic-link.patch new file mode 100644 index 0000000..abde0b6 --- /dev/null +++ b/bug-585080-handle-symbolic-link.patch @@ -0,0 +1,47 @@ +This is a simple patch which fix the issue that tunefs.ocfs2 online +resize can't handle symbolic link of a device file. For example, in +the LVM using scenario, '/dev/vg1/lv1' and '/dev/mapper/vg1-lv1' are +the same device, '/dev/vg1/lv1' is just a symbolic link to +'/dev/mapper/vg1-lv1'. But if we try to do online resize like +'tunefs.ocfs2 -S /dev/vg1/lv1', it fails. + +Signed-off-by: Jiaju Zhang +--- + ocfs2_controld/mount.c | 20 +++++++++++++++++--- + 1 files changed, 17 insertions(+), 3 deletions(-) + +diff --git a/ocfs2_controld/mount.c b/ocfs2_controld/mount.c +--- a/ocfs2_controld/mount.c ++++ b/ocfs2_controld/mount.c +@@ -260,13 +260,27 @@ static void add_service(struct mountgroup *mg, const char *device, + const char *service, int ci, int fd) + { + struct service *ms; ++ struct stat st1, st2; + +- log_debug("Adding service %s to device %s uuid %s", ++ log_debug("Adding service \"%s\" to device \"%s\" uuid \"%s\"", + service, device, mg->mg_uuid); + +- if (strcmp(mg->mg_device, device)) { ++ if (stat(mg->mg_device, &st1)) { ++ fill_error(mg, errno, "Failed to stat device \"%s\": %s", ++ mg->mg_device, strerror(errno)); ++ return; ++ } ++ ++ if (stat(device, &st2)) { ++ fill_error(mg, errno, "Failed to stat device \"%s\": %s", ++ device, strerror(errno)); ++ return; ++ } ++ ++ if (st1.st_rdev != st2.st_rdev) { + fill_error(mg, EINVAL, +- "Trying to mount fs %s on device %s, but it is already mounted from device %s", ++ "Trying to mount fs \"%s\" on device \"%s\", " ++ "but it is already mounted from device \"%s\"", + mg->mg_uuid, device, mg->mg_device); + return; + } + diff --git a/change-quotafile-names.diff b/change-quotafile-names.diff deleted file mode 100644 index d8866c2..0000000 --- a/change-quotafile-names.diff +++ /dev/null @@ -1,30 +0,0 @@ -diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h -index d27b098..b29abdc 100644 ---- a/include/ocfs2-kernel/ocfs2_fs.h -+++ b/include/ocfs2-kernel/ocfs2_fs.h -@@ -354,8 +354,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { - [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, - [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, - [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }, -- [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota%04d.user", OCFS2_QUOTA_FL, S_IFREG | 0644 }, -- [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota%04d.group", OCFS2_QUOTA_FL, S_IFREG | 0644 }, -+ [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota.user:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, -+ [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group:%04d", OCFS2_QUOTA_FL, S_IFREG | 0644 }, - }; - - /* Parameter passed from mount.ocfs2 to module */ -diff --git a/mkfs.ocfs2/mkfs.c b/mkfs.ocfs2/mkfs.c -index 9fe2ac2..441df68 100644 ---- a/mkfs.ocfs2/mkfs.c -+++ b/mkfs.ocfs2/mkfs.c -@@ -104,8 +104,8 @@ static SystemFileInfo system_files[] = { - { "journal:%04d", SFI_JOURNAL, 0, S_IFREG | 0644 }, - { "local_alloc:%04d", SFI_LOCAL_ALLOC, 0, S_IFREG | 0644 }, - { "truncate_log:%04d", SFI_TRUNCATE_LOG, 0, S_IFREG | 0644 }, -- { "aquota%04d.user", SFI_QUOTA, 0, S_IFREG | 0644 }, -- { "aquota%04d.group", SFI_QUOTA, 0, S_IFREG | 0644 }, -+ { "aquota.user:%04d", SFI_QUOTA, 0, S_IFREG | 0644 }, -+ { "aquota.group:%04d", SFI_QUOTA, 0, S_IFREG | 0644 }, - }; - - struct fs_type_translation { diff --git a/debug-ocfs2_hb_ctl.patch b/debug-ocfs2_hb_ctl.patch new file mode 100644 index 0000000..6108267 --- /dev/null +++ b/debug-ocfs2_hb_ctl.patch @@ -0,0 +1,104 @@ +Index: ocfs2-tools/ocfs2_hb_ctl/ocfs2_hb_ctl.c +=================================================================== +--- ocfs2-tools.orig/ocfs2_hb_ctl/ocfs2_hb_ctl.c ++++ ocfs2-tools/ocfs2_hb_ctl/ocfs2_hb_ctl.c +@@ -39,6 +39,7 @@ + #include + #include + #include ++#include + + #include "ocfs2/ocfs2.h" + +@@ -137,6 +138,7 @@ static errcode_t get_desc(const char *de + if (!region_desc->r_name || !region_desc->r_device_name) + err = OCFS2_ET_NO_MEMORY; + } else { ++ syslog(LOG_INFO, "filled heartbeat desc, err: %d\n", (int)err); + region_desc->r_name = NULL; + region_desc->r_device_name = NULL; + goto out_close; +@@ -157,6 +159,7 @@ static errcode_t get_desc(const char *de + err = OCFS2_ET_NO_MEMORY; + } + } else { ++ syslog(LOG_INFO, "filled cluster desc, err: %d\n", (int)err); + cluster_desc->c_stack = NULL; + cluster_desc->c_cluster = NULL; + } +@@ -202,9 +205,9 @@ static errcode_t compare_dev(const char + + /* Any problem with getting the descriptor is NOT FOUND */ + err = OCFS2_ET_FILE_NOT_FOUND; +- if (get_desc(device)) ++ if (get_desc(device)) { + goto out; +- ++ } + if (!strcmp(region_desc->r_name, hbo->uuid_str)) { + hbo->dev_str = device; + err = 0; +@@ -381,12 +384,19 @@ static errcode_t stop_heartbeat(struct h + { + errcode_t err = 0; + +- if (!hbo->dev_str) ++ if (!hbo->dev_str) { + err = lookup_dev(hbo); ++ if (err) ++ syslog(LOG_INFO, "looked up device, ret: %d\n", ++ (int)err); ++ } + if (!err) { + region_desc->r_persist = 1; /* hb_ctl is for reals */ + region_desc->r_service = hbo->service; + err = o2cb_group_leave(cluster_desc, region_desc); ++ if (err) ++ syslog(LOG_INFO, "left group - err: %d\n", ++ (int)err); + } + + return err; +@@ -536,14 +546,42 @@ static void print_usage(int err) + fprintf(output, " %s -h\n", progname); + } + ++static int ++hack_enable_coredumps(void) ++{ ++ struct rlimit rlim; ++ ++ if (getrlimit(RLIMIT_CORE, &rlim) < 0) { ++ return -1; ++ } ++ ++ rlim.rlim_max = RLIM_INFINITY; ++ setrlimit(RLIMIT_CORE, &rlim); ++ (void)chdir("/var/lib/openais"); ++ return 0; ++} ++ ++ + int main(int argc, char **argv) + { + errcode_t err = 0; + int ret = 0; ++ int i; + struct hb_ctl_options hbo = { + .action = HB_ACTION_UNKNOWN, + }; + char hbuuid[33]; ++ char tmp[1024]; ++ ++ openlog("ocfs2_hb_ctl", LOG_CONS|LOG_NDELAY|LOG_PID, LOG_KERN); ++ sprintf(tmp, "ocfs2_hb_ctl"); ++ for (i = 0; i < argc; i++) { ++ strncat(tmp, " ", sizeof(tmp)-1); ++ strncat(tmp, argv[i], sizeof(tmp)-1); ++ } ++ syslog(LOG_INFO, "%s\n", tmp); ++ ++ hack_enable_coredumps(); + + setbuf(stdout, NULL); + setbuf(stderr, NULL); diff --git a/extra-debug.patch b/extra-debug.patch new file mode 100644 index 0000000..1ecb28f --- /dev/null +++ b/extra-debug.patch @@ -0,0 +1,187 @@ +--- + ocfs2_controld/ckpt.c | 2 - + ocfs2_controld/main.c | 53 ++++++++++++++++++++++++++++++++++------ + ocfs2_controld/mount.c | 2 + + ocfs2_controld/ocfs2_controld.h | 1 + ocfs2_controld/pacemaker.c | 2 - + 5 files changed, 51 insertions(+), 9 deletions(-) + +Index: ocfs2_controld/ckpt.c +=================================================================== +--- a/ocfs2_controld/ckpt.c.orig ++++ b/ocfs2_controld/ckpt.c +@@ -381,7 +381,7 @@ static int call_section_read(struct ckpt + + /* -ENOENT is a clean error for the caller to handle */ + if (rc == -ENOENT) { +- log_debug("Checkpoint \"%.*s\" does not have a " ++ log_error("Checkpoint \"%.*s\" does not have a " + "section named \"%s\"", + handle->ch_name.length, + handle->ch_name.value, name); +Index: ocfs2_controld/main.c +=================================================================== +--- a/ocfs2_controld/main.c.orig ++++ b/ocfs2_controld/main.c +@@ -73,7 +73,7 @@ static int time_to_die = 0; + static int sigpipe_write_fd; + + char *prog_name; +-int daemon_debug_opt; ++int daemon_debug_opt = 0; + char daemon_debug_buf[1024]; + char dump_buf[DUMP_SIZE]; + int dump_point; +@@ -994,8 +994,7 @@ static void lockfile(void) + fd = open(LOCKFILE_NAME, O_CREAT|O_WRONLY, + S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); + if (fd < 0) { +- fprintf(stderr, "cannot open/create lock file %s\n", +- LOCKFILE_NAME); ++ log_error("cannot open/create lock file %s", LOCKFILE_NAME); + exit(EXIT_FAILURE); + } + +@@ -1006,13 +1005,13 @@ static void lockfile(void) + + error = fcntl(fd, F_SETLK, &lock); + if (error) { +- fprintf(stderr, "ocfs2_controld is already running\n"); ++ log_error("ocfs2_controld is already running"); + exit(EXIT_FAILURE); + } + + error = ftruncate(fd, 0); + if (error) { +- fprintf(stderr, "cannot clear lock file %s\n", LOCKFILE_NAME); ++ log_error("cannot clear lock file %s", LOCKFILE_NAME); + exit(EXIT_FAILURE); + } + +@@ -1020,7 +1019,7 @@ static void lockfile(void) + + error = write(fd, buf, strlen(buf)); + if (error <= 0) { +- fprintf(stderr, "cannot write lock file %s\n", LOCKFILE_NAME); ++ log_error("cannot write lock file %s", LOCKFILE_NAME); + exit(EXIT_FAILURE); + } + } +@@ -1030,13 +1029,13 @@ static void daemonize(void) + int fd; + pid_t pid = fork(); + if (pid < 0) { ++ log_error("main: cannot fork"); + perror("main: cannot fork"); + exit(EXIT_FAILURE); + } + if (pid) + exit(EXIT_SUCCESS); + setsid(); +- chdir("/"); + umask(0); + close(0); + close(1); +@@ -1107,6 +1106,7 @@ static void decode_arguments(int argc, c + break; + + default: ++ log_error("unknown option: %c\n", optchar); + fprintf(stderr, "unknown option: %c\n", optchar); + exit(EXIT_FAILURE); + break; +@@ -1144,12 +1144,53 @@ static void set_scheduler(void) + } + } + ++#include ++#include ++ ++static int ++hack_enable_coredumps(void) ++{ ++ int rc; ++ struct rlimit rlim; ++ int doenable = 1; ++ ++ if ((rc = getrlimit(RLIMIT_CORE, &rlim)) < 0) { ++ int errsave = errno; ++ log_error("Cannot get current core limit value. %d", errsave); ++ errno = errsave; ++ return rc; ++ } ++ if (rlim.rlim_max == 0 && geteuid() == 0) { ++ rlim.rlim_max = RLIM_INFINITY; ++ } ++ ++ rlim.rlim_cur = (doenable ? rlim.rlim_max : 0); ++ ++ if (doenable && rlim.rlim_max == 0) { ++ log_error("Not possible to enable core dumps (rlim_max is 0)"); ++ } ++ ++ if ((rc = setrlimit(RLIMIT_CORE, &rlim)) < 0) { ++ int errsave = errno; ++ log_error("Unable to enable core dumps: %d", errsave); ++ errno = errsave; ++ return rc; ++ } ++ chdir("/var/lib/openais"); ++ log_debug("Core dumps enabled: /var/lib/openais"); ++ return 0; ++} ++ + int main(int argc, char **argv) + { + errcode_t err; + prog_name = argv[0]; + const char *stack = NULL; + ++ decode_arguments(argc, argv); ++ ++ hack_enable_coredumps(); ++ + init_mounts(); + + initialize_o2cb_error_table(); +@@ -1165,13 +1206,11 @@ int main(int argc, char **argv) + return 1; + } + if (strcmp(stack, stackname)) { +- fprintf(stderr, "%s: This daemon supports the \"%s\" stack, but the \"%s\" stack is in use\n", +- prog_name, stackname, stack); ++ log_error("%s: This daemon supports the \"%s\" stack, but the \"%s\" stack is in use", ++ prog_name, stackname, stack); + return 1; + } + +- decode_arguments(argc, argv); +- + if (!daemon_debug_opt) + daemonize(); + +Index: ocfs2_controld/mount.c +=================================================================== +--- a/ocfs2_controld/mount.c.orig ++++ b/ocfs2_controld/mount.c +@@ -176,6 +176,8 @@ static void notify_mount_client(struct m + else + mg->mg_mount_notified = 1; + ++ log_debug("Notified client: %d", mg->mg_mount_notified); ++ + /* + * XXX If we failed to notify the client, what can we do? I'm + * guessing that our main loop will get POLLHUP and we'll clean +Index: ocfs2_controld/ocfs2_controld.h +=================================================================== +--- a/ocfs2_controld/ocfs2_controld.h.orig ++++ b/ocfs2_controld/ocfs2_controld.h +@@ -60,6 +60,7 @@ do { \ + #define log_error(fmt, args...) \ + do { \ + log_debug(fmt, ##args); \ ++ fprintf(stderr, fmt "\n", ##args); \ + syslog(LOG_ERR, fmt, ##args); \ + } while (0) + diff --git a/force-debug.patch b/force-debug.patch new file mode 100644 index 0000000..ba2ae2f --- /dev/null +++ b/force-debug.patch @@ -0,0 +1,28 @@ +--- + ocfs2_controld/ocfs2_controld.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +Index: ocfs2_controld/ocfs2_controld.h +=================================================================== +--- a/ocfs2_controld/ocfs2_controld.h.orig 2008-10-26 18:10:50.000000000 +0100 ++++ b/ocfs2_controld/ocfs2_controld.h 2009-03-10 22:22:52.719197000 +0100 +@@ -44,12 +44,19 @@ extern void daemon_dump_save(void); + + #define log_debug(fmt, args...) \ + do { \ ++ syslog(LOG_DEBUG, fmt, ##args); \ + snprintf(daemon_debug_buf, 1023, "%ld %s@%d: " fmt "\n", \ + time(NULL), __FUNCTION__, __LINE__, ##args); \ + if (daemon_debug_opt) fprintf(stderr, "%s", daemon_debug_buf); \ + daemon_dump_save(); \ + } while (0) + ++#define log_info(fmt, args...) \ ++do { \ ++ log_debug(fmt, ##args); \ ++ syslog(LOG_INFO, fmt, ##args); \ ++} while (0) ++ + #define log_error(fmt, args...) \ + do { \ + log_debug(fmt, ##args); \ diff --git a/o2cb.ocf b/o2cb.ocf index 02c6b30..784ca3d 100644 --- a/o2cb.ocf +++ b/o2cb.ocf @@ -168,19 +168,18 @@ bringup_daemon() kill_daemon() { status_daemon; rc=$? - if [ $rc != $OCF_SUCCESS ]; then - return $rc + if [ $rc == $OCF_NOT_RUNNING ]; then + return 0 fi ocf_log info "Stopping `basename "$DAEMON"`" killproc "$DAEMON" - while [ $rc = $OCF_NOT_RUNNING ]; do - sleep 1 + while [ $rc != $OCF_NOT_RUNNING ]; do + sleep 1 status_daemon; rc=$? done - - return $OCF_SUCCESS + return 0 } # diff --git a/ocfs2-devel.diff b/ocfs2-devel.diff index 5be63e8..727ab22 100644 --- a/ocfs2-devel.diff +++ b/ocfs2-devel.diff @@ -1,33 +1,6 @@ ---- ocfs2-tools/ocfs2_controld/main.c -+++ ocfs2-tools/ocfs2_controld/main.c -@@ -1027,6 +1027,7 @@ - - static void daemonize(void) - { -+ int fd; - pid_t pid = fork(); - if (pid < 0) { - perror("main: cannot fork"); -@@ -1040,6 +1041,18 @@ - close(0); - close(1); - close(2); -+ fd = open("/dev/null", O_RDWR); -+ if (fd >= 0) { -+ /* dup2 to 0 / 1 / 2 (stdin / stdout / stderr) */ -+ dup2(fd, STDIN_FILENO); /* 0 */ -+ dup2(fd, STDOUT_FILENO); /* 1 */ -+ dup2(fd, STDERR_FILENO); /* 2 */ -+ -+ /* Should be 0, but just in case it isn't... */ -+ if (fd > 2) { -+ close(fd); -+ } -+ } - openlog("ocfs2_controld", LOG_PID, LOG_DAEMON); - - lockfile(); ---- ocfs2-tools/vendor/common/o2cb.init.sh +Index: ocfs2-tools/vendor/common/o2cb.init.sh +=================================================================== +--- ocfs2-tools.orig/vendor/common/o2cb.init.sh +++ ocfs2-tools/vendor/common/o2cb.init.sh @@ -8,7 +8,8 @@ # Provides: o2cb @@ -39,7 +12,7 @@ # Default-Start: 2 3 5 # Default-Stop: # Short-Description: Load O2CB cluster services at system boot. -@@ -653,7 +654,7 @@ +@@ -653,7 +654,7 @@ status_filesystem() status_daemon() { @@ -48,16 +21,7 @@ echo -n "Checking for control daemon: " if [ -n "$(pidofproc "$DAEMON")" ] then -@@ -667,7 +668,7 @@ - - bringup_daemon() - { -- DAEMON="/sbin/ocfs2_controld.${O2CB_STACK}" -+ DAEMON="/usr/sbin/ocfs2_controld.${O2CB_STACK}" - echo -n "Starting $(basename "$DAEMON"): " - start_daemon "$DAEMON" - [ $? != 0 ] && return 1 -@@ -682,6 +683,7 @@ +@@ -682,6 +683,7 @@ bringup_daemon() fi sleep 1 done @@ -65,7 +29,7 @@ return 0 } -@@ -689,7 +691,7 @@ +@@ -689,7 +691,7 @@ bringup_daemon() kill_daemon() { SIGNAL="$1" @@ -74,7 +38,9 @@ status_daemon >/dev/null 2>&1 || return 2 ---- ocfs2-tools/vendor/common/ocfs2.init +Index: ocfs2-tools/vendor/common/ocfs2.init +=================================================================== +--- ocfs2-tools.orig/vendor/common/ocfs2.init +++ ocfs2-tools/vendor/common/ocfs2.init @@ -8,9 +8,9 @@ ### BEGIN INIT INFO diff --git a/ocfs2-mount-gcc45.patch b/ocfs2-mount-gcc45.patch deleted file mode 100644 index f26fb38..0000000 --- a/ocfs2-mount-gcc45.patch +++ /dev/null @@ -1,21 +0,0 @@ -Index: mount.ocfs2/mount.ocfs2.c -=================================================================== ---- mount.ocfs2/mount.ocfs2.c.orig -+++ mount.ocfs2/mount.ocfs2.c -@@ -261,7 +261,7 @@ int main(int argc, char **argv) - char *extra = NULL; - int dev_ro = 0; - char *hbstr = NULL; -- char stackstr[strlen(OCFS2_CLUSTER_STACK_ARG) + OCFS2_STACK_LABEL_LEN + 1] = ""; -+ char stackstr[strlen(OCFS2_CLUSTER_STACK_ARG) + OCFS2_STACK_LABEL_LEN + 1]; - ocfs2_filesys *fs = NULL; - struct o2cb_cluster_desc cluster; - struct o2cb_region_desc desc; -@@ -269,6 +269,7 @@ int main(int argc, char **argv) - int hb_started = 0; - struct stat statbuf; - -+ stackstr[0] = '\0'; - initialize_ocfs_error_table(); - initialize_o2dl_error_table(); - initialize_o2cb_error_table(); diff --git a/ocfs2-tools-static-glib.diff b/ocfs2-o2cb_ctl-static-glibc.diff similarity index 58% rename from ocfs2-tools-static-glib.diff rename to ocfs2-o2cb_ctl-static-glibc.diff index a89c421..bfa4a22 100644 --- a/ocfs2-tools-static-glib.diff +++ b/ocfs2-o2cb_ctl-static-glibc.diff @@ -1,7 +1,8 @@ -diff -ru ocfs2-tools/o2cb_ctl/Makefile ocfs2-tools.fix/o2cb_ctl/Makefile ---- ocfs2-tools/o2cb_ctl/Makefile 2007-11-13 04:19:54.000000000 -0500 -+++ ocfs2-tools.fix/o2cb_ctl/Makefile 2008-02-19 15:00:40.000000000 -0500 -@@ -49,6 +49,6 @@ +Index: ocfs2-tools/o2cb_ctl/Makefile +=================================================================== +--- ocfs2-tools.orig/o2cb_ctl/Makefile ++++ ocfs2-tools/o2cb_ctl/Makefile +@@ -40,6 +40,6 @@ o2cb_config_CPPFLAGS = $(GLIB_CFLAGS) -D o2cb_ctl_CPPFLAGS = $(GLIB_CFLAGS) -DG_DISABLE_DEPRECATED o2cb_ctl: $(O2CB_CTL_OBJS) $(LIBOCFS2_DEPS) $(LIBO2CB_DEPS) diff --git a/ocfs2-quota.diff b/ocfs2-quota.diff deleted file mode 100644 index 9b11bd7..0000000 --- a/ocfs2-quota.diff +++ /dev/null @@ -1,795 +0,0 @@ ->From 4220f907aba2afa2c045f26f0d9afe465aea6863 Mon Sep 17 00:00:00 2001 -From: Jan Kara -Date: Tue, 14 Oct 2008 15:44:31 +0200 -Subject: [PATCH] Implement quota support to mkfs and add a few auxiliary functions for quotas -into libocfs2. - -Signed-off-by: Jan Kara ---- - Preamble.make | 3 +- - include/ocfs2-kernel/ocfs2_fs.h | 114 +++++++++++++++++- - include/ocfs2/ocfs2.h | 25 ++++ - libocfs2/Makefile | 1 + - libocfs2/feature_string.c | 22 +++- - libocfs2/quota.c | 261 +++++++++++++++++++++++++++++++++++++++ - mkfs.ocfs2/mkfs.c | 140 +++++++++++++++++++++- - mkfs.ocfs2/mkfs.h | 1 + - o2image/Makefile | 2 +- - 9 files changed, 563 insertions(+), 6 deletions(-) - create mode 100644 libocfs2/quota.c - -diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h -index e454099..d27b098 100644 ---- a/include/ocfs2-kernel/ocfs2_fs.h -+++ b/include/ocfs2-kernel/ocfs2_fs.h -@@ -92,7 +92,9 @@ - | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ - | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ - | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) --#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN -+#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ -+ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ -+ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) - - /* - * Heartbeat-only devices are missing journals and other files. The -@@ -159,6 +161,12 @@ - */ - #define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 0x0001 - -+/* -+ * Maintain quota information for this filesystem -+ */ -+#define OCFS2_FEATURE_RO_COMPAT_USRQUOTA 0x0002 -+#define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA 0x0004 -+ - /* The byte offset of the first backup block will be 1G. - * The following will be 4G, 16G, 64G, 256G and 1T. - */ -@@ -188,6 +196,7 @@ - #define OCFS2_HEARTBEAT_FL (0x00000200) /* Heartbeat area */ - #define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ - #define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ -+#define OCFS2_QUOTA_FL (0x00001000) /* Quota file */ - - /* - * Flags on ocfs2_dinode.i_dyn_features -@@ -311,6 +320,8 @@ enum { - #define OCFS2_FIRST_ONLINE_SYSTEM_INODE SLOT_MAP_SYSTEM_INODE - HEARTBEAT_SYSTEM_INODE, - GLOBAL_BITMAP_SYSTEM_INODE, -+ USER_QUOTA_SYSTEM_INODE, -+ GROUP_QUOTA_SYSTEM_INODE, - #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GLOBAL_BITMAP_SYSTEM_INODE - ORPHAN_DIR_SYSTEM_INODE, - EXTENT_ALLOC_SYSTEM_INODE, -@@ -318,6 +329,8 @@ enum { - JOURNAL_SYSTEM_INODE, - LOCAL_ALLOC_SYSTEM_INODE, - TRUNCATE_LOG_SYSTEM_INODE, -+ LOCAL_USER_QUOTA_SYSTEM_INODE, -+ LOCAL_GROUP_QUOTA_SYSTEM_INODE, - NUM_SYSTEM_INODES - }; - -@@ -331,6 +344,8 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { - [SLOT_MAP_SYSTEM_INODE] = { "slot_map", 0, S_IFREG | 0644 }, - [HEARTBEAT_SYSTEM_INODE] = { "heartbeat", OCFS2_HEARTBEAT_FL, S_IFREG | 0644 }, - [GLOBAL_BITMAP_SYSTEM_INODE] = { "global_bitmap", 0, S_IFREG | 0644 }, -+ [USER_QUOTA_SYSTEM_INODE] = { "aquota.user", OCFS2_QUOTA_FL, S_IFREG | 0644 }, -+ [GROUP_QUOTA_SYSTEM_INODE] = { "aquota.group", OCFS2_QUOTA_FL, S_IFREG | 0644 }, - - /* Slot-specific system inodes (one copy per slot) */ - [ORPHAN_DIR_SYSTEM_INODE] = { "orphan_dir:%04d", 0, S_IFDIR | 0755 }, -@@ -338,7 +353,9 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { - [INODE_ALLOC_SYSTEM_INODE] = { "inode_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_CHAIN_FL, S_IFREG | 0644 }, - [JOURNAL_SYSTEM_INODE] = { "journal:%04d", OCFS2_JOURNAL_FL, S_IFREG | 0644 }, - [LOCAL_ALLOC_SYSTEM_INODE] = { "local_alloc:%04d", OCFS2_BITMAP_FL | OCFS2_LOCAL_ALLOC_FL, S_IFREG | 0644 }, -- [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 } -+ [TRUNCATE_LOG_SYSTEM_INODE] = { "truncate_log:%04d", OCFS2_DEALLOC_FL, S_IFREG | 0644 }, -+ [LOCAL_USER_QUOTA_SYSTEM_INODE] = { "aquota%04d.user", OCFS2_QUOTA_FL, S_IFREG | 0644 }, -+ [LOCAL_GROUP_QUOTA_SYSTEM_INODE] = { "aquota%04d.group", OCFS2_QUOTA_FL, S_IFREG | 0644 }, - }; - - /* Parameter passed from mount.ocfs2 to module */ -@@ -713,6 +730,99 @@ struct ocfs2_group_desc - /*40*/ __u8 bg_bitmap[0]; - }; - -+/* Magic numbers and known versions for local quota files */ -+#define OCFS2_LOCAL_QMAGICS {\ -+ 0x0cf524c0, /* USRQUOTA */ \ -+ 0x0cf524c1 /* GRPQUOTA */ \ -+} -+ -+#define OCFS2_LOCAL_QVERSIONS {\ -+ 0, \ -+ 0, \ -+} -+ -+/* Magic numbers and known versions for global quota files */ -+#define OCFS2_GLOBAL_QMAGICS {\ -+ 0x0cf52470, /* USRQUOTA */ \ -+ 0x0cf52471 /* GRPQUOTA */ \ -+} -+ -+#define OCFS2_GLOBAL_QVERSIONS {\ -+ 0, \ -+ 0, \ -+} -+ -+/* How many bytes to we reserve in each quota file block for our internal -+ * purposes? E.g. checksums... */ -+#define OCFS2_QBLK_RESERVED_SPACE 8 -+ -+/* Generic header of all quota files */ -+struct ocfs2_disk_dqheader { -+ __le32 dqh_magic; /* Magic number identifying file */ -+ __le32 dqh_version; /* Quota format version */ -+}; -+ -+/* Quota flags in dqinfo header */ -+#define OLQF_CLEAN 0x0001 /* Quota file is empty (this should be after\ -+ * quota has been cleanly turned off) */ -+ -+#define OCFS2_LOCAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) -+ -+/* Information header of local quota file (immediately follows the generic -+ * header) */ -+struct ocfs2_local_disk_dqinfo { -+ __le32 dqi_flags; /* Flags for quota file */ -+ __le32 dqi_chunks; /* Number of chunks of quota structures -+ * with a bitmap */ -+ __le32 dqi_blocks; /* Number of blocks allocated for quota file */ -+}; -+ -+/* Header of one chunk of a quota file */ -+struct ocfs2_local_disk_chunk { -+ __le32 dqc_free; /* Number of free entries in the bitmap */ -+ uint8_t dqc_bitmap[0]; /* Bitmap of entries in the corresponding -+ * chunk of quota file */ -+}; -+ -+#define OCFS2_GLOBAL_INFO_OFF (sizeof(struct ocfs2_disk_dqheader)) -+ -+/* Information header of global quota file (immediately follows the generic -+ * header) */ -+struct ocfs2_global_disk_dqinfo { -+ __le32 dqi_bgrace; -+ __le32 dqi_igrace; -+ __le32 dqi_syncms; -+ __le32 dqi_blocks; -+ __le32 dqi_free_blk; -+ __le32 dqi_free_entry; -+}; -+ -+/* Header of leaf tree block */ -+struct ocfs2_disk_dqdbheader { -+ __le32 dqdh_next_free; /* Number of next block with free entry */ -+ __le32 dqdh_prev_free; /* Number of previous block with free entry */ -+ __le16 dqdh_entries; /* Number of valid entries in block */ -+ __le16 dqdh_pad1; -+ __le32 dqdh_pad2; -+}; -+ -+/* Structure with global user / group information. We reserve some space -+ * for future use. */ -+struct ocfs2_global_disk_dqblk { -+ __le32 dqb_id; /* ID the structure belongs to */ -+ __le32 dqb_use_count; /* Number of nodes having reference to this structure */ -+ __le64 dqb_ihardlimit; /* absolute limit on allocated inodes */ -+ __le64 dqb_isoftlimit; /* preferred inode limit */ -+ __le64 dqb_curinodes; /* current # allocated inodes */ -+ __le64 dqb_bhardlimit; /* absolute limit on disk space */ -+ __le64 dqb_bsoftlimit; /* preferred limit on disk space */ -+ __le64 dqb_curspace; /* current space occupied */ -+ __le64 dqb_btime; /* time limit for excessive disk use */ -+ __le64 dqb_itime; /* time limit for excessive inode use */ -+ __le64 dqb_pad1; -+ __le64 dqb_pad2; -+}; -+ - #ifdef __KERNEL__ - static inline int ocfs2_fast_symlink_chars(struct super_block *sb) - { -diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h -index 68ba4f5..cabae04 100644 ---- a/include/ocfs2/ocfs2.h -+++ b/include/ocfs2/ocfs2.h -@@ -134,6 +134,16 @@ typedef struct _ocfs2_dir_scan ocfs2_dir_scan; - typedef struct _ocfs2_bitmap ocfs2_bitmap; - typedef struct _ocfs2_devices ocfs2_devices; - -+#define MAXQUOTAS 2 -+ -+struct _ocfs2_quota_info { -+ uint32_t dqi_bgrace; -+ uint32_t dqi_igrace; -+ uint32_t dqi_syncms; -+}; -+ -+typedef struct _ocfs2_quota_info ocfs2_quota_info; -+ - struct _ocfs2_filesys { - char *fs_devname; - uint32_t fs_flags; -@@ -160,6 +170,8 @@ struct _ocfs2_filesys { - struct o2dlm_ctxt *fs_dlm_ctxt; - struct ocfs2_image_state *ost; - -+ ocfs2_quota_info qinfo[MAXQUOTAS]; -+ - /* Reserved for the use of the calling application. */ - void *fs_private; - }; -@@ -557,6 +569,19 @@ errcode_t ocfs2_meta_lock(ocfs2_filesys *fs, ocfs2_cached_inode *inode, - - errcode_t ocfs2_meta_unlock(ocfs2_filesys *fs, ocfs2_cached_inode *ci); - -+/* Quota operations */ -+void ocfs2_swap_quota_header(struct ocfs2_disk_dqheader *header); -+void ocfs2_swap_quota_local_info(struct ocfs2_local_disk_dqinfo *info); -+void ocfs2_swap_quota_chunk_header(struct ocfs2_local_disk_chunk *chunk); -+void ocfs2_swap_quota_global_info(struct ocfs2_global_disk_dqinfo *info); -+void ocfs2_swap_quota_global_dqblk(struct ocfs2_global_disk_dqblk *dqblk); -+void ocfs2_swap_quota_leaf_block_header(struct ocfs2_disk_dqdbheader *bheader); -+errcode_t ocfs2_create_local_quota_file(ocfs2_filesys *fs, int type, -+ uint64_t blkno); -+int ocfs2_qtree_depth(int blocksize); -+errcode_t ocfs2_create_global_quota_file(ocfs2_filesys *fs, int type, -+ uint64_t blkno); -+ - /* Low level */ - void ocfs2_swap_slot_map(struct ocfs2_slot_map *sm, int num_slots); - void ocfs2_swap_slot_map_extended(struct ocfs2_slot_map_extended *se, -diff --git a/libocfs2/Makefile b/libocfs2/Makefile -index 446c8b4..18cf2ee 100644 ---- a/libocfs2/Makefile -+++ b/libocfs2/Makefile -@@ -72,6 +72,7 @@ CFILES = \ - lockid.c \ - backup_super.c \ - feature_string.c\ -+ quota.c \ - image.c - - HFILES = \ -diff --git a/libocfs2/feature_string.c b/libocfs2/feature_string.c -index 7b1f93e..00c4f26 100644 ---- a/libocfs2/feature_string.c -+++ b/libocfs2/feature_string.c -@@ -73,7 +73,9 @@ static ocfs2_fs_options feature_level_defaults[] = { - OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC | - OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP | - OCFS2_FEATURE_INCOMPAT_INLINE_DATA, -- OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, /* OCFS2_FEATURE_LEVEL_MAX_FEATURES */ -+ OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | -+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA | -+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }, /* OCFS2_FEATURE_LEVEL_MAX_FEATURES */ - }; - - /* These are the features we support in mkfs/tunefs via --fs-features */ -@@ -110,6 +112,16 @@ static struct fs_feature_flags ocfs2_supported_features[] = { - {0, OCFS2_FEATURE_INCOMPAT_INLINE_DATA, 0}, - }, - { -+ "usrquota", -+ {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, -+ {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, -+ }, -+ { -+ "grpquota", -+ {0, 0, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}, -+ {0, 0, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}, -+ }, -+ { - NULL, - {0, 0, 0}, - {0, 0, 0} -@@ -161,6 +173,14 @@ static struct feature_name ocfs2_feature_names[] = { - .fn_flag = {0, 0, OCFS2_FEATURE_RO_COMPAT_UNWRITTEN}, - }, - { -+ .fn_name = "UserQuota", -+ .fn_flag = {0, 0, OCFS2_FEATURE_RO_COMPAT_USRQUOTA}, -+ }, -+ { -+ .fn_name = "GroupQuota", -+ .fn_flag = {0, 0, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA}, -+ }, -+ { - .fn_name = "InlineData", - .fn_flag = {0, OCFS2_FEATURE_INCOMPAT_INLINE_DATA, 0}, - }, -diff --git a/libocfs2/quota.c b/libocfs2/quota.c -new file mode 100644 -index 0000000..8670ae0 ---- /dev/null -+++ b/libocfs2/quota.c -@@ -0,0 +1,261 @@ -+/* -*- mode: c; c-basic-offset: 8; -*- -+ * vim: noexpandtab sw=8 ts=8 sts=0: -+ * -+ * quota.c -+ * -+ * Quota operations for the OCFS2 userspace library. -+ * -+ * Copyright (C) 2008 Novell. All rights reserved. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public -+ * License, version 2, as published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public -+ * License along with this program; if not, write to the -+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, -+ * Boston, MA 021110-1307, USA. -+ */ -+ -+#include -+ -+#include "ocfs2/byteorder.h" -+#include "ocfs2/ocfs2.h" -+ -+void ocfs2_swap_quota_header(struct ocfs2_disk_dqheader *header) -+{ -+ if (cpu_is_little_endian) -+ return; -+ header->dqh_magic = bswap_32(header->dqh_magic); -+ header->dqh_version = bswap_32(header->dqh_version); -+} -+ -+void ocfs2_swap_quota_local_info(struct ocfs2_local_disk_dqinfo *info) -+{ -+ if (cpu_is_little_endian) -+ return; -+ info->dqi_flags = bswap_32(info->dqi_flags); -+ info->dqi_chunks = bswap_32(info->dqi_chunks); -+ info->dqi_blocks = bswap_32(info->dqi_blocks); -+} -+ -+void ocfs2_swap_quota_chunk_header(struct ocfs2_local_disk_chunk *chunk) -+{ -+ if (cpu_is_little_endian) -+ return; -+ chunk->dqc_free = bswap_32(chunk->dqc_free); -+} -+ -+void ocfs2_swap_quota_global_info(struct ocfs2_global_disk_dqinfo *info) -+{ -+ if (cpu_is_little_endian) -+ return; -+ info->dqi_bgrace = bswap_32(info->dqi_bgrace); -+ info->dqi_igrace = bswap_32(info->dqi_igrace); -+ info->dqi_syncms = bswap_32(info->dqi_syncms); -+ info->dqi_blocks = bswap_32(info->dqi_blocks); -+ info->dqi_free_blk = bswap_32(info->dqi_free_blk); -+ info->dqi_free_entry = bswap_32(info->dqi_free_entry); -+} -+ -+void ocfs2_swap_quota_global_dqblk(struct ocfs2_global_disk_dqblk *dqblk) -+{ -+ if (cpu_is_little_endian) -+ return; -+ dqblk->dqb_id = bswap_32(dqblk->dqb_id); -+ dqblk->dqb_use_count = bswap_32(dqblk->dqb_use_count); -+ dqblk->dqb_ihardlimit = bswap_64(dqblk->dqb_ihardlimit); -+ dqblk->dqb_isoftlimit = bswap_64(dqblk->dqb_isoftlimit); -+ dqblk->dqb_curinodes = bswap_64(dqblk->dqb_curinodes); -+ dqblk->dqb_bhardlimit = bswap_64(dqblk->dqb_bhardlimit); -+ dqblk->dqb_bsoftlimit = bswap_64(dqblk->dqb_bsoftlimit); -+ dqblk->dqb_curspace = bswap_64(dqblk->dqb_curspace); -+ dqblk->dqb_btime = bswap_64(dqblk->dqb_btime); -+ dqblk->dqb_itime = bswap_64(dqblk->dqb_itime); -+} -+ -+void ocfs2_swap_quota_leaf_block_header(struct ocfs2_disk_dqdbheader *bheader) -+{ -+ if (cpu_is_little_endian) -+ return; -+ bheader->dqdh_next_free = bswap_32(bheader->dqdh_next_free); -+ bheader->dqdh_prev_free = bswap_32(bheader->dqdh_prev_free); -+ bheader->dqdh_entries = bswap_16(bheader->dqdh_entries); -+} -+ -+errcode_t ocfs2_create_local_quota_file(ocfs2_filesys *fs, int type, -+ uint64_t blkno) -+{ -+ ocfs2_cached_inode *ci = NULL; -+ struct ocfs2_dinode *di; -+ struct ocfs2_disk_dqheader *header; -+ struct ocfs2_local_disk_dqinfo *info; -+ unsigned int magics[] = OCFS2_LOCAL_QMAGICS; -+ int versions[] = OCFS2_LOCAL_QVERSIONS; -+ char *buf = NULL; -+ unsigned int written; -+ errcode_t err; -+ -+ err = ocfs2_read_cached_inode(fs, blkno, &ci); -+ if (err) -+ goto out; -+ -+ if (!(ci->ci_inode->i_flags & OCFS2_VALID_FL) || -+ !(ci->ci_inode->i_flags & OCFS2_SYSTEM_FL) || -+ !(ci->ci_inode->i_flags & OCFS2_QUOTA_FL)) { -+ err = OCFS2_ET_INTERNAL_FAILURE; -+ goto out; -+ } -+ di = ci->ci_inode; -+ -+ /* We need at least two blocks */ -+ err = ocfs2_extend_allocation(fs, blkno, (2 * fs->fs_blocksize + -+ fs->fs_clustersize - 1) / fs->fs_clustersize); -+ if (err) -+ goto out; -+ di->i_size = 2 * fs->fs_blocksize; -+ di->i_mtime = time(NULL); -+ err = ocfs2_write_inode(fs, blkno, (char *)di); -+ if (err) -+ goto out; -+ -+ err = ocfs2_malloc_blocks(fs->fs_io, fs->fs_blocksize * 2, &buf); -+ if (err) -+ goto out; -+ memset(buf, 0, 2 * fs->fs_blocksize); -+ -+ header = (struct ocfs2_disk_dqheader *)buf; -+ header->dqh_magic = magics[type]; -+ header->dqh_version = versions[type]; -+ ocfs2_swap_quota_header(header); -+ -+ info = (struct ocfs2_local_disk_dqinfo *)(buf + OCFS2_LOCAL_INFO_OFF); -+ info->dqi_chunks = 1; -+ info->dqi_blocks = 2; -+ info->dqi_flags = OLQF_CLEAN; -+ ocfs2_swap_quota_local_info(info); -+ -+ /* There are no free chunks because there are no blocks allocated for -+ * them yet. So chunk header is all-zero and needs no initialization */ -+ -+ err = ocfs2_file_write(ci, buf, 2 * fs->fs_blocksize, 0, &written); -+ if (!err && written != 2 * fs->fs_blocksize) { -+ err = OCFS2_ET_INTERNAL_FAILURE; -+ goto out; -+ } -+out: -+ if (ci) -+ ocfs2_free_cached_inode(fs, ci); -+ if (buf) -+ ocfs2_free(&buf); -+ return err; -+} -+ -+int ocfs2_qtree_depth(int blocksize) -+{ -+ unsigned int epb = (blocksize - OCFS2_QBLK_RESERVED_SPACE) >> 2; -+ unsigned long long entries = epb; -+ int i; -+ -+ for (i = 1; entries < (1ULL << 32); i++) -+ entries *= epb; -+ return i; -+} -+ -+errcode_t ocfs2_create_global_quota_file(ocfs2_filesys *fs, int type, -+ uint64_t blkno) -+{ -+ ocfs2_cached_inode *ci = NULL; -+ struct ocfs2_dinode *di; -+ char *buf = NULL; -+ struct ocfs2_disk_dqheader *header; -+ struct ocfs2_global_disk_dqinfo *info; -+ struct ocfs2_global_disk_dqblk *dqblk; -+ struct ocfs2_disk_dqdbheader *bheader; -+ uint32_t *treeblk; -+ unsigned int magics[] = OCFS2_GLOBAL_QMAGICS; -+ int versions[] = OCFS2_GLOBAL_QVERSIONS; -+ errcode_t err; -+ int blocks = ocfs2_qtree_depth(fs->fs_blocksize) + 2; -+ unsigned int written; -+ int i; -+ -+ err = ocfs2_read_cached_inode(fs, blkno, &ci); -+ if (err) -+ goto out; -+ -+ if (!(ci->ci_inode->i_flags & OCFS2_VALID_FL) || -+ !(ci->ci_inode->i_flags & OCFS2_SYSTEM_FL) || -+ !(ci->ci_inode->i_flags & OCFS2_QUOTA_FL)) { -+ err = OCFS2_ET_INTERNAL_FAILURE; -+ goto out; -+ } -+ di = ci->ci_inode; -+ -+ err = ocfs2_extend_allocation(fs, blkno, (blocks * fs->fs_blocksize + -+ fs->fs_clustersize - 1) / fs->fs_clustersize); -+ if (err) -+ goto out; -+ di->i_size = blocks * fs->fs_blocksize; -+ di->i_mtime = time(NULL); -+ err = ocfs2_write_inode(fs, blkno, (char *)di); -+ if (err) -+ goto out; -+ err = ocfs2_malloc_blocks(fs->fs_io, fs->fs_blocksize * blocks, &buf); -+ if (err) -+ goto out; -+ memset(buf, 0, fs->fs_blocksize * blocks); -+ -+ header = (struct ocfs2_disk_dqheader *)buf; -+ header->dqh_magic = magics[type]; -+ header->dqh_version = versions[type]; -+ ocfs2_swap_quota_header(header); -+ -+ info = (struct ocfs2_global_disk_dqinfo *)(buf + OCFS2_GLOBAL_INFO_OFF); -+ info->dqi_bgrace = fs->qinfo[type].dqi_bgrace; -+ info->dqi_igrace = fs->qinfo[type].dqi_igrace; -+ info->dqi_syncms = fs->qinfo[type].dqi_syncms; -+ info->dqi_blocks = blocks; -+ info->dqi_free_blk = 0; -+ info->dqi_free_entry = blocks - 1; -+ ocfs2_swap_quota_global_info(info); -+ -+ /* FIXME: This should be split into a separate function that is able to -+ * add blocks etc. Usage information can then be properly specified by -+ * the caller. */ -+ /* Create quota structure for root user */ -+ for (i = 0; i < ocfs2_qtree_depth(fs->fs_blocksize); i++) { -+ treeblk = (uint32_t *)(buf + -+ ((i + 1) * fs->fs_blocksize)); -+ *treeblk = cpu_to_le32(i + 2); -+ } -+ -+ bheader = (struct ocfs2_disk_dqdbheader *)(buf + -+ ((i + 1) * fs->fs_blocksize)); -+ bheader->dqdh_entries = 1; -+ ocfs2_swap_quota_leaf_block_header(bheader); -+ -+ dqblk = (struct ocfs2_global_disk_dqblk *)(buf + -+ ((i + 1) * fs->fs_blocksize) + -+ sizeof(struct ocfs2_disk_dqdbheader)); -+ dqblk->dqb_curinodes = 2; -+ dqblk->dqb_curspace = 2 * fs->fs_blocksize; -+ ocfs2_swap_quota_global_dqblk(dqblk); -+ -+ err = ocfs2_file_write(ci, buf, blocks * fs->fs_blocksize, 0, &written); -+ if (!err && written != blocks * fs->fs_blocksize) { -+ err = OCFS2_ET_INTERNAL_FAILURE; -+ goto out; -+ } -+out: -+ if (ci) -+ ocfs2_free_cached_inode(fs, ci); -+ if (buf) -+ ocfs2_free(&buf); -+ return err; -+} -diff --git a/mkfs.ocfs2/mkfs.c b/mkfs.ocfs2/mkfs.c -index aad166b..9fe2ac2 100644 ---- a/mkfs.ocfs2/mkfs.c -+++ b/mkfs.ocfs2/mkfs.c -@@ -96,12 +96,16 @@ static SystemFileInfo system_files[] = { - { "slot_map", SFI_OTHER, 1, S_IFREG | 0644 }, - { "heartbeat", SFI_HEARTBEAT, 1, S_IFREG | 0644 }, - { "global_bitmap", SFI_CLUSTER, 1, S_IFREG | 0644 }, -+ { "aquota.user", SFI_QUOTA, 1, S_IFREG | 0644 }, -+ { "aquota.group", SFI_QUOTA, 1, S_IFREG | 0644 }, - { "orphan_dir:%04d", SFI_OTHER, 0, S_IFDIR | 0755 }, - { "extent_alloc:%04d", SFI_CHAIN, 0, S_IFREG | 0644 }, - { "inode_alloc:%04d", SFI_CHAIN, 0, S_IFREG | 0644 }, - { "journal:%04d", SFI_JOURNAL, 0, S_IFREG | 0644 }, - { "local_alloc:%04d", SFI_LOCAL_ALLOC, 0, S_IFREG | 0644 }, -- { "truncate_log:%04d", SFI_TRUNCATE_LOG, 0, S_IFREG | 0644 } -+ { "truncate_log:%04d", SFI_TRUNCATE_LOG, 0, S_IFREG | 0644 }, -+ { "aquota%04d.user", SFI_QUOTA, 0, S_IFREG | 0644 }, -+ { "aquota%04d.group", SFI_QUOTA, 0, S_IFREG | 0644 }, - }; - - struct fs_type_translation { -@@ -184,6 +188,23 @@ static int hb_dev_skip(State *s, int system_inode) - return ret; - } - -+/* Should we skip this inode because of features enabled / disabled? */ -+static int feature_skip(State *s, int system_inode) -+{ -+ switch (system_inode) { -+ case USER_QUOTA_SYSTEM_INODE: -+ case LOCAL_USER_QUOTA_SYSTEM_INODE: -+ return !(s->feature_flags.opt_ro_compat & -+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA); -+ case GROUP_QUOTA_SYSTEM_INODE: -+ case LOCAL_GROUP_QUOTA_SYSTEM_INODE: -+ return !(s->feature_flags.opt_ro_compat & -+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA); -+ default: -+ return 0; -+ } -+} -+ - static inline uint32_t system_dir_bytes_needed(State *s) - { - int each = OCFS2_DIR_REC_LEN(SYSTEM_FILE_NAME_MAX); -@@ -191,6 +212,108 @@ static inline uint32_t system_dir_bytes_needed(State *s) - return each * sys_blocks_needed(s->initial_slots); - } - -+static void format_quota_files(State *s) -+{ -+ errcode_t ret; -+ ocfs2_filesys *fs = NULL; -+ char fname[SYSTEM_FILE_NAME_MAX]; -+ uint64_t blkno; -+ int i; -+ -+ ret = ocfs2_open(s->device_name, OCFS2_FLAG_RW, 0, 0, &fs); -+ if (ret) { -+ com_err(s->progname, ret, "while opening new file system"); -+ exit(1); -+ } -+ -+ /* Write correct data into quota files */ -+ if (!feature_skip(s, USER_QUOTA_SYSTEM_INODE)) { -+ fs->qinfo[0].dqi_syncms = 10000; -+ fs->qinfo[0].dqi_bgrace = 604800; /* 1 week */ -+ fs->qinfo[0].dqi_igrace = 604800; /* 1 week */ -+ snprintf(fname, sizeof(fname), -+ ocfs2_system_inodes[USER_QUOTA_SYSTEM_INODE].si_name); -+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname, -+ strlen(fname), NULL, &blkno); -+ if (ret) { -+ com_err(s->progname, ret, -+ "while looking up quota filename \"%.*s\"", -+ (int)strlen(fname), fname); -+ goto error; -+ } -+ ret = ocfs2_create_global_quota_file(fs, 0, blkno); -+ if (ret) { -+ com_err(s->progname, ret, "while creating global user " -+ "quota file"); -+ goto error; -+ } -+ for (i = 0; i < s->initial_slots; ++i) { -+ snprintf(fname, sizeof(fname), -+ ocfs2_system_inodes[ -+ LOCAL_USER_QUOTA_SYSTEM_INODE].si_name, i); -+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname, -+ strlen(fname), NULL, &blkno); -+ if (ret) { -+ com_err(s->progname, ret, -+ "while looking up quota filename " -+ "\"%.*s\"", (int)strlen(fname), fname); -+ goto error; -+ } -+ ret = ocfs2_create_local_quota_file(fs, 0, blkno); -+ if (ret) { -+ com_err(s->progname, ret, "while creating " -+ "local user quota file"); -+ goto error; -+ } -+ } -+ } -+ if (!feature_skip(s, GROUP_QUOTA_SYSTEM_INODE)) { -+ fs->qinfo[1].dqi_syncms = 10000; -+ fs->qinfo[1].dqi_bgrace = 604800; /* 1 week */ -+ fs->qinfo[1].dqi_igrace = 604800; /* 1 week */ -+ snprintf(fname, sizeof(fname), -+ ocfs2_system_inodes[GROUP_QUOTA_SYSTEM_INODE].si_name); -+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname, -+ strlen(fname), NULL, &blkno); -+ if (ret) { -+ com_err(s->progname, ret, -+ "while looking up quota filename \"%.*s\"", -+ (int)strlen(fname), fname); -+ goto error; -+ } -+ ret = ocfs2_create_global_quota_file(fs, 1, blkno); -+ if (ret) { -+ com_err(s->progname, ret, "while creating global group " -+ "quota file"); -+ goto error; -+ } -+ for (i = 0; i < s->initial_slots; ++i) { -+ snprintf(fname, sizeof(fname), -+ ocfs2_system_inodes[ -+ LOCAL_GROUP_QUOTA_SYSTEM_INODE].si_name, i); -+ ret = ocfs2_lookup(fs, fs->fs_sysdir_blkno, fname, -+ strlen(fname), NULL, &blkno); -+ if (ret) { -+ com_err(s->progname, ret, -+ "while looking up quota filename " -+ "\"%.*s\"", (int)strlen(fname), fname); -+ goto error; -+ } -+ ret = ocfs2_create_local_quota_file(fs, 1, blkno); -+ if (ret) { -+ com_err(s->progname, ret, "while creating " -+ "local group quota file"); -+ goto error; -+ } -+ } -+ } -+ ocfs2_close(fs); -+ return; -+error: -+ clear_both_ends(s); -+ exit(1); -+} -+ - int - main(int argc, char **argv) - { -@@ -355,6 +478,8 @@ main(int argc, char **argv) - for (i = 0; i < NUM_SYSTEM_INODES; i++) { - if (hb_dev_skip(s, i)) - continue; -+ if (feature_skip(s, i)) -+ continue; - - num = (system_files[i].global) ? 1 : s->initial_slots; - for (j = 0; j < num; j++) { -@@ -413,6 +538,8 @@ main(int argc, char **argv) - for (i = 0; i < NUM_SYSTEM_INODES; i++) { - if (hb_dev_skip(s, i)) - continue; -+ if (feature_skip(s, i)) -+ continue; - - num = system_files[i].global ? 1 : s->initial_slots; - for (j = 0; j < num; j++) { -@@ -496,6 +623,14 @@ main(int argc, char **argv) - - if (!s->quiet) - printf("done\n"); -+ -+ if (!s->quiet) -+ printf("Formatting quota files: "); -+ -+ format_quota_files(s); -+ -+ if (!s->quiet) -+ printf("done\n"); - } - - close_device(s); -@@ -2304,6 +2439,9 @@ init_record(State *s, SystemFileDiskRecord *rec, int type, int mode) - case SFI_TRUNCATE_LOG: - rec->flags |= OCFS2_DEALLOC_FL; - break; -+ case SFI_QUOTA: -+ rec->flags |= OCFS2_QUOTA_FL; -+ break; - case SFI_OTHER: - break; - } -diff --git a/mkfs.ocfs2/mkfs.h b/mkfs.ocfs2/mkfs.h -index bd8ac45..969e4df 100644 ---- a/mkfs.ocfs2/mkfs.h -+++ b/mkfs.ocfs2/mkfs.h -@@ -96,6 +96,7 @@ enum { - SFI_HEARTBEAT, - SFI_CHAIN, - SFI_TRUNCATE_LOG, -+ SFI_QUOTA, - SFI_OTHER - }; - -diff --git a/o2image/Makefile b/o2image/Makefile -index 0259161..b50201a 100644 ---- a/o2image/Makefile -+++ b/o2image/Makefile -@@ -32,6 +32,6 @@ OBJS = $(subst .c,.o,$(CFILES)) - DIST_FILES = $(CFILES) $(HFILES) o2image.8.in - - o2image: $(OBJS) $(LIBOCFS2_DEPS) -- $(LINK) $(GLIB_LIBS) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS) -+ $(LINK) -static $(GLIB_LIBS) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS) -lpthread - - include $(TOPDIR)/Postamble.make --- -1.5.2.4 - diff --git a/ocfs2-tools-dynamic-com-err.patch b/ocfs2-tools-dynamic-com-err.patch deleted file mode 100644 index 9dc8f95..0000000 --- a/ocfs2-tools-dynamic-com-err.patch +++ /dev/null @@ -1,12 +0,0 @@ -Index: o2image/Makefile -=================================================================== ---- o2image/Makefile.orig -+++ o2image/Makefile -@@ -32,6 +32,6 @@ OBJS = $(subst .c,.o,$(CFILES)) - DIST_FILES = $(CFILES) $(HFILES) o2image.8.in - - o2image: $(OBJS) $(LIBOCFS2_DEPS) -- $(LINK) -static $(GLIB_LIBS) $(LIBOCFS2_LIBS) $(COM_ERR_LIBS) -lpthread -+ $(LINK) -static $(GLIB_LIBS) $(LIBOCFS2_LIBS) -Wl,-Bdynamic $(COM_ERR_LIBS) -lpthread - - include $(TOPDIR)/Postamble.make diff --git a/ocfs2-tools-glibc210.diff b/ocfs2-tools-glibc210.diff deleted file mode 100644 index 84d711a..0000000 --- a/ocfs2-tools-glibc210.diff +++ /dev/null @@ -1,12 +0,0 @@ -Index: ocfs2-tools/ocfs2_controld/main.c -=================================================================== ---- ocfs2-tools.orig/ocfs2_controld/main.c -+++ ocfs2-tools/ocfs2_controld/main.c -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include - #include - - #include "ocfs2-kernel/kernel-list.h" diff --git a/ocfs2-tools.changes b/ocfs2-tools.changes index 481892e..eb3e438 100644 --- a/ocfs2-tools.changes +++ b/ocfs2-tools.changes @@ -1,29 +1,242 @@ ------------------------------------------------------------------- -Sun Apr 18 16:10:28 UTC 2010 - crrodriguez@opensuse.org +Mon Jun 14 15:19:45 UTC 2010 - lmb@novell.com -- Requires timezone +- ocfs2_controld.pcmk: compilation issue fixed. ------------------------------------------------------------------- -Fri Apr 9 07:25:41 UTC 2010 - aj@suse.de +Mon Apr 26 14:43:04 UTC 2010 - coyli@novell.com -- Fix build. +- fixes for indexed-dirs + - 0016-tunefs.ocfs2-move-o2ne_add_tailers-into-libocfs2ne.c.patch + - 0017-dx_dirs-add-check-for-invalid-slot-in-ocfs2_new_dx_r.patch + - 0018-mkfs.ocfs2-create-root-and-orphan-directories-as-ind.patch + - 0019-libocfs2-fix-flag-check-in-ocfs2_init_dir.patch + - 0020-libocfs2-fix-ocfs2_init_dir-to-retain-indexed-flag.patch + - 0021-fsck.ocfs2-verify-dirent-dx-entry-linkages.patch + - 0022-dx_dirs-stop-iterate-dir-entries-for-I-O-error.patch + - 0023-dx_dirs-check-callback-iter-dx_func-return-value-in-.patch + - 0024-dx_dirs-remove-unncessary-return-value-assignment.patch + - 0025-dx_dirs-unifiy-feature-string-of-indexed-dirs.patch + - 0026-dx_dirs-Improve-information-displayed-by-dump_dx_roo.patch + - 0027-dx_dirs-stop-iteration-of-dir-trailer-initialization.patch + - 0028-dx_dirs-stop-dx-insert-iteration-for-callback-error.patch + - 0029-dx_dirs-set-OCFS2_INDEXED_DIR_FL-after-indexed-tree-.patch + - 0030-dx_dirs-fix-ocfs2_swap_dx_entry_list-for-big-endian.patch ------------------------------------------------------------------- -Wed Sep 30 08:03:29 UTC 2009 - aj@suse.de +Sun Apr 11 20:06:09 CST 2010 - coyli@novell.com -- Do not require ocfs-kmp for openSUSE 11.2. +- update indexed-dirs patches to v11 patches + - a bug fix in fsck.ocfs2/pass2.c + - extra check whether the directory trailer overwrites valid + directory items. ------------------------------------------------------------------- -Mon Jul 13 16:57:52 CEST 2009 - coolo@novell.com +Thu Apr 8 17:19:21 UTC 2010 - lmb@novell.com -- fix build with e2fsprogs -- fix build with glibc 2.10 -- fix build with as-needed +- Add reflink user-space utility to expose the copy-on-write links in + OCFS2 for all platforms (bnc#594979). ------------------------------------------------------------------- -Thu Apr 16 01:40:48 CEST 2009 - ro@suse.de +Thu Mar 25 19:17:55 UTC 2010 - coyli@novell.com -- buildfix: refresh patches +- 0015-dx_dirs-enable-metaecc-and-indexed-dirs-support-as-d.patch: + enable metaecc and indexed-dirs support as default features. +- ocfs2-tools-static-glib.diff: rename to ocfs2-o2cb_ctl-static-glibc.diff, + don't explicitly link pthread anymore. +- update ocfs2-tools.tar.bz2 to upstream (0a1c1970): + - mkfs.ocfs2: enable xattr support as a default feature + - Fsck.ocfs2: Correctly check i_size for directory in a sparse + filesystem. + - fsck.ocfs2: Freshen up some fsck messages + - mkfs.ocfs2: Add dry-run option + - mkfs.ocfs2: Allocate groups to the extent allocator + - libocfs2: Add ocfs2_grow_chain_allocator() + - libocfs2: ocfs2_chain_add_group() adds new groups to the next + logical chain + - mounted.ocfs2: Add the stack/local to the output + - mounted.ocfs2: Display uuid in all caps + - mkfs.ocfs2: Tweak the feature compatibility list in manpage + - mkfs.ocfs2: Print fs features during make + +------------------------------------------------------------------- +Fri Mar 12 06:32:34 UTC 2010 - coyli@novell.com + +- 0014-dx_dirs-man-page.diff: add an initial man page entry for + indexed-dirs feature. + +------------------------------------------------------------------- +Thu Mar 11 16:41:09 UTC 2010 - coyli@novell.com + +- bug-585080-handle-symbolic-link.patch: handle symbolic link device + file for tunefs.ocfs2 online resize (bnc#585080) + +------------------------------------------------------------------- +Thu Mar 11 02:50:57 UTC 2010 - coyli@novell.com + +- version-1.4.3.patch: keep package version as 1.4.3 for SLES11 HAE SP1. + +------------------------------------------------------------------- +Thu Mar 11 01:04:34 UTC 2010 - coyli@novell.com + +- 0013-dx_dirs-try-to-install-dir-trailers-when-enable-inde.patch: try + to install dir trailers when enable indexed-dirs feature while + metaecc features is not enabled. The fix can avoid ocfs2_dx_dir_build() + in ocfs2-tools to overwrite existed dir entries lying in trailer area + by moving them into other dir blocks. No BNC associated. + +------------------------------------------------------------------- +Fri Mar 5 09:13:30 UTC 2010 - coyli@novell.com + +- update indexed-dirs patches for + - remove the metaecc dependence code for indexed dirs. (bnc#584578) + - indexed dirs does not depend on metaecc, remove all + incorrect dependence code in mkfs.ocfs2 and tunefs.ocfs2 (bnc#585418) + - code clean up in mkfs.ocfs2 +- back port ocfs2-tools version 1.6 to SP1 ocfs2-tools package. + +------------------------------------------------------------------- +Wed Mar 3 09:51:32 UTC 2010 - coyli@novell.com + +- add 0012-dx_dirs-fix-for-tunefs-metaecc-and-indexed-dirs.patch + (bnc#584578) + +------------------------------------------------------------------- +Mon Feb 22 18:40:29 UTC 2010 - coyli@novell.com + +- update indexed dirs patches for fate#300315 + +------------------------------------------------------------------- +Fri Feb 5 10:25:07 UTC 2010 - coyli@novell.com + +- delete 64bytes_lvb_len_in_libo2dlm.diff (bnc#573460) + +------------------------------------------------------------------- +Fri Jan 29 01:01:49 UTC 2010 - coyli@novell.com + +- update indexed dirs patches. + +------------------------------------------------------------------- +Thu Jan 28 21:01:02 UTC 2010 - coyli@novell.com + +- compiling fix + +------------------------------------------------------------------- +Thu Jan 28 20:52:10 UTC 2010 - coyli@novell.com + +- add disable indexed-dirs support in tunefs.ocfs2 (fate#300315) +- more fixes for previous indexed-dirs patches + +------------------------------------------------------------------- +Thu Jan 28 04:07:37 UTC 2010 - coyli@novell.com + +- add the first version of indexed dir support (fate#300315) + +------------------------------------------------------------------- +Mon Jan 25 16:23:47 UTC 2010 - lmb@novell.com + +- ocfs2-tools requires openais for the user-space cluster stack. + +------------------------------------------------------------------- +Wed Dec 9 09:47:31 UTC 2009 - "lmb@novell.com" + +- Fix ocfs2-tools build (bnc#561625). + +------------------------------------------------------------------- +Tue Dec 8 14:42:05 UTC 2009 - "lmb@novell.com" + +- Add libcorosync-devel build dependency. + +------------------------------------------------------------------- +Tue Dec 8 06:44:41 UTC 2009 - coyli@novell.com + +- 64bytes_lvb_len_in_libo2dlm.diff: use 64 bytes lvb len ((bnc#515645)) + +------------------------------------------------------------------- +Thu Nov 26 06:17:33 UTC 2009 - coyli@novell.com + +- change version number to 1.4.3 + +------------------------------------------------------------------- +Thu Nov 26 06:12:43 UTC 2009 - coyli@novell.com + +- update ocfs2-tools-1.4.3 to SLES11SP1 + +------------------------------------------------------------------- +Tue Nov 3 16:34:52 UTC 2009 - coyli@novell.com + +- bug-543119-o2dlm.patch: load libdlm_lt.so.3 explicitly in o2dlm.c (bnc#543119) +- drop bug-543119_0001-Option-to-skip-cluster-checking-in-tunefs.ocfs2.patch since + bnc#543119 is a dlopen() issue, and this patch is not accepted by upstream. + +------------------------------------------------------------------- +Wed Oct 14 09:20:32 UTC 2009 - "lmb@novell.com" + +- Implement a -F/--force option to bypass the cluster stack in + tunefs.ocfs2 (bnc#543119). + +------------------------------------------------------------------- +Thu Sep 10 15:57:38 CEST 2009 - coly.li@suse.de + +- fix kill_deamon() from o2cb.ocf, now when the deamon to be killed + is not running, kill_deamon() will return SUCESS. Thanks to Dejan. + +------------------------------------------------------------------- +Tue Aug 4 20:34:25 CST 2009 - coly.li@suse.de + +- add quota patches from Jan Kara, full support for enabling, disabling + and checking quotas. + +------------------------------------------------------------------- +Tue Jul 28 21:26:27 CST 2009 - coly.li@suse.de + +- Update code to upstream 1.4.2, to support xattr and acl in sles11 + (bnc#499278), thanks to Goldwyn Rodrigues. + +------------------------------------------------------------------- +Mon Mar 30 12:45:00 CEST 2009 - lmb@suse.de + +- bug-478794-ocfs2_hb_ctl.diff: Fix crash on parsing corrupt file + systems (bnc#478794) +- debug-ocfs2_hb_ctl.patch: Better error logging. + +------------------------------------------------------------------- +Fri Mar 20 12:17:02 CET 2009 - abeekhof@suse.de + +- Fix the logging of calls to crm_terminate_member_no_mainloop() + +------------------------------------------------------------------- +Tue Mar 17 23:25:00 CET 2009 - lmb@suse.de + +- ocfs2_controld.pcmk crashed immediately on x86-64 (bnc#486104) + +------------------------------------------------------------------- +Mon Mar 16 14:45:00 CET 2009 - lmb@suse.de + +- ocfs2_controld-retry-470741.diff: removed. +- bug-470741_000{1,2,3}-ocfs2_controld.patch: retry most CKPT operations + forever, and recover from partially installed checkpoints + (bnc#470741). + +------------------------------------------------------------------- +Thu Mar 12 14:34:26 CET 2009 - abeekhof@suse.de + +- Use a Pacemaker library function for kicking nodes form the cluster + +------------------------------------------------------------------- +Wed Mar 11 11:09:11 CET 2009 - abeekhof@suse.de + +- Add some extra logging for bnc#484028 + +------------------------------------------------------------------- +Tue Mar 10 22:25:33 CET 2009 - abeekhof@suse.de + +- Allow debug message to been seen in syslog for bnc#484028 + +------------------------------------------------------------------- +Thu Mar 05 09:00:00 CET 2009 - lmb@suse.de + +- Adjust packaging split. ------------------------------------------------------------------- Fri Feb 27 03:53:53 CET 2009 - coyli@suse.de diff --git a/ocfs2-tools.spec b/ocfs2-tools.spec index de18bd0..ab2fa69 100644 --- a/ocfs2-tools.spec +++ b/ocfs2-tools.spec @@ -1,5 +1,5 @@ # -# spec file for package ocfs2-tools (Version 1.4.1) +# spec file for package ocfs2-tools (Version 1.4.3) # # Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany. # @@ -19,37 +19,63 @@ Name: ocfs2-tools -BuildRequires: e2fsprogs-devel glib2-devel libcom_err-devel libdlm-devel libopenais-devel libpacemaker-devel libxml2-devel libxslt-devel python-devel python-gtk-devel readline-devel update-desktop-files +BuildRequires: e2fsprogs-devel glib2-devel libcorosync-devel libdlm-devel libopenais-devel libpacemaker-devel libxml2-devel libxslt-devel python-devel python-gtk-devel readline-devel update-desktop-files Summary: Oracle Cluster File System 2 Core Tools -Version: 1.4.1 -Release: 25 -License: GPLv2+ +Version: 1.4.3 +Release: 0. +License: GPL v2 or later Group: System/Filesystems Source: ocfs2-tools.tar.bz2 Source1: o2cb.ocf -Patch5: ocfs2-tools-static-glib.diff -Patch6: ocfs2console-display-fix.diff -Patch7: ocfs2-tools-glibc210.diff -Patch99: ocfs2-devel.diff -Patch100: ocfs2-quota.diff -Patch101: change-quotafile-names.diff -Patch102: ocfs2_controld-pacemaker.diff -Patch103: ocfs2console-extraoption-fix.diff -Patch104: ocfs2console-pop-display-warning.diff -Patch105: ocfs2_controld-retry-470741.diff -Patch106: tunefs.ocfs2.8.in_more_options.diff -Patch107: ocfs2-mount-gcc45.patch -Patch108: ocfs2-tools-dynamic-com-err.patch +Source2: reflink.tar.bz2 +Patch101: ocfs2-o2cb_ctl-static-glibc.diff +Patch102: force-debug.patch +Patch103: extra-debug.patch +Patch104: debug-ocfs2_hb_ctl.patch +Patch105: ocfs2_controld-pacemaker.diff +Patch106: bug-470741-debug_start_failures.patch +Patch107: ocfs2-devel.diff +Patch201: bug-543119-o2dlm.patch +Patch202: bug-585080-handle-symbolic-link.patch +Patch301: 0001-dx_dirs-Add-library-support-for-directory-indexing.patch +Patch302: 0002-dx_dirs-debugfs.ocfs2-support.patch +Patch303: 0003-dx_dirs-mkfs.ocfs2-support.patch +Patch304: 0004-dx_dirs-Add-tunefs.ocfs2-feature-for-indexed-directo.patch +Patch305: 0005-dx_dirs-Update-for-dr_num_entries.patch +Patch306: 0006-dx_dirs-add-missing-ocfs2_filesys-fs-parameter.patch +Patch307: 0007-dx_dirs-more-library-support-for-directory-indexing.patch +Patch308: 0008-dx_dirs-fsck.ocfs2-support.patch +Patch309: 0009-dx_dirs-add-blocknr-in-callback-of-ocfs2_dir_iterate.patch +Patch310: 0010-dx_dirs-add-disable-indexed-dirs-support-in-tunefs.o.patch +Patch311: 0011-dx_dirs-build-indexed-trees-when-enabling-indexed-di.patch +Patch312: 0012-dx_dirs-fix-return-value-of-walk_dirblock-when-enabl.patch +Patch313: 0013-dx_dirs-try-to-install-dir-trailers-when-enable-inde.patch +Patch314: 0014-dx_dirs-add-an-initial-man-page-entry-for-indexed-di.patch +Patch315: 0015-fsck.ocfs2-prompt-before-truncating-an-invalid-dir-i.patch +Patch316: 0016-tunefs.ocfs2-move-o2ne_add_tailers-into-libocfs2ne.c.patch +Patch317: 0017-dx_dirs-add-check-for-invalid-slot-in-ocfs2_new_dx_r.patch +Patch318: 0018-mkfs.ocfs2-create-root-and-orphan-directories-as-ind.patch +Patch319: 0019-libocfs2-fix-flag-check-in-ocfs2_init_dir.patch +Patch320: 0020-libocfs2-fix-ocfs2_init_dir-to-retain-indexed-flag.patch +Patch321: 0021-fsck.ocfs2-verify-dirent-dx-entry-linkages.patch +Patch322: 0022-dx_dirs-stop-iterate-dir-entries-for-I-O-error.patch +Patch323: 0023-dx_dirs-check-callback-iter-dx_func-return-value-in-.patch +Patch324: 0024-dx_dirs-remove-unncessary-return-value-assignment.patch +Patch325: 0025-dx_dirs-unifiy-feature-string-of-indexed-dirs.patch +Patch326: 0026-dx_dirs-Improve-information-displayed-by-dump_dx_roo.patch +Patch327: 0027-dx_dirs-stop-iteration-of-dir-trailer-initialization.patch +Patch328: 0028-dx_dirs-stop-dx-insert-iteration-for-callback-error.patch +Patch329: 0029-dx_dirs-set-OCFS2_INDEXED_DIR_FL-after-indexed-tree-.patch +Patch330: 0030-dx_dirs-fix-ocfs2_swap_dx_entry_list-for-big-endian.patch +Patch331: 0031-dx_dirs-enable-metaecc-and-indexed-dirs-support-as-d.patch +Patch401: version-1.4.3.patch +Patch402: reflink-no-syscall.patch Url: http://oss.oracle.com/projects/ocfs2-tools/ Requires: net-tools, modutils, e2fsprogs, /sbin/chkconfig, glib2 >= 2.2.3 PreReq: %insserv_prereq %fillup_prereq AutoReqProv: on BuildRoot: %{_tmppath}/%{name}-%{version}-build -Requires: libdlm timezone -%if %suse_version < 1120 -# There's no separate kmp for openSUSE 11.2 -Requires: ocfs2-kmp -%endif +Requires: libdlm ocfs2-kmp openais %description OCFS is the Oracle Cluster File System. @@ -64,7 +90,7 @@ Authors: Oracle Corporation %package -n ocfs2console -License: GPLv2+ +License: GPL v2 or later Summary: Oracle Cluster Filesystem 2 GUI tools Group: System/Filesystems Obsoletes: ocfs2-support @@ -84,7 +110,7 @@ Authors: Oracle Corporation %package devel -License: GPLv2+ +License: GPL v2 or later Summary: Oracle Cluster File System 2 Development files Group: System/Filesystems Requires: ocfs2-tools = %{version}, libcom_err, libcom_err-devel @@ -102,7 +128,7 @@ Authors: Oracle Corporation %package o2cb -License: GPLv2+ +License: GPL v2 or later Summary: Oracle Cluster File System 2 Core Tools Group: System/Filesystems Requires: ocfs2-tools = %{version} @@ -120,34 +146,66 @@ Authors: Oracle Corporation %prep -%setup -n %{name} -%patch5 -p1 -%patch6 -p1 -%patch7 -p1 -%patch99 -p1 -%patch100 -p1 +%setup -n %{name} -a 2 %patch101 -p1 %patch102 -p1 %patch103 -p1 %patch104 -p1 %patch105 -p1 %patch106 -p1 -%patch107 -p0 -%patch108 -p0 +%patch107 -p1 +%patch201 -p1 +%patch202 -p1 +%patch301 -p1 +%patch302 -p1 +%patch303 -p1 +%patch304 -p1 +%patch305 -p1 +%patch306 -p1 +%patch307 -p1 +%patch308 -p1 +%patch309 -p1 +%patch310 -p1 +%patch311 -p1 +%patch312 -p1 +%patch313 -p1 +%patch314 -p1 +%patch315 -p1 +%patch316 -p1 +%patch317 -p1 +%patch318 -p1 +%patch319 -p1 +%patch320 -p1 +%patch321 -p1 +%patch322 -p1 +%patch323 -p1 +%patch324 -p1 +%patch325 -p1 +%patch326 -p1 +%patch327 -p1 +%patch328 -p1 +%patch329 -p1 +%patch330 -p1 +%patch331 -p1 +%patch401 -p1 +%patch402 -p0 %build -# disable as-needed to fix configure checks -export SUSE_ASNEEDED=0 %{?suse_update_config:%{suse_update_config -f}} export CFLAGS="${CFLAGS} ${RPM_OPT_FLAGS}" export PROJECT="ocfs2-tools" autoreconf -fi -I /usr/share/aclocal -%configure --disable-debug --prefix=/usr --mandir=%{_mandir} \ - --enable-dynamic-fsck=yes --enable-dynamic-ctl=yes +%configure --disable-debug --prefix=/usr --mandir=%{_mandir} --enable-dynamic-fsck=yes --enable-dynamic-ctl=yes make OPTS="$RPM_OPT_FLAGS" +cd reflink +%configure --prefix=/usr --mandir=%{_mandir} +make reflink +cd .. + %install mkdir -p $RPM_BUILD_ROOT/sbin +mkdir -p $RPM_BUILD_ROOT/usr/bin mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/init.d mkdir -p ${RPM_BUILD_ROOT}/var/adm/fillup-templates mkdir -p $RPM_BUILD_ROOT%{_sysconfdir}/udev/rules.d/ @@ -161,6 +219,11 @@ cp -f vendor/common/o2cb.sysconfig ${RPM_BUILD_ROOT}/var/adm/fillup-templates/sy ln -sf ..%{_sysconfdir}/init.d/o2cb $RPM_BUILD_ROOT/sbin/rco2cb ln -sf ..%{_sysconfdir}/init.d/ocfs2 $RPM_BUILD_ROOT/sbin/rcocfs2 make DESTDIR="$RPM_BUILD_ROOT" install + +cd reflink +make DESTDIR="$RPM_BUILD_ROOT" install +cd .. + mv $RPM_BUILD_ROOT/{,/usr}/sbin/o2image mv $RPM_BUILD_ROOT/{,/usr}/sbin/debugfs.ocfs2 mv $RPM_BUILD_ROOT/{,/usr}/sbin/ocfs2_controld.pcmk @@ -196,9 +259,11 @@ fi /sbin/mounted.ocfs2 /sbin/tunefs.ocfs2 /sbin/mount.ocfs2 +/sbin/ocfs2_hb_ctl %{_sbindir}/o2image %{_sbindir}/debugfs.ocfs2 %{_sbindir}/ocfs2_controld.pcmk +%{_bindir}/reflink %config %{_sysconfdir}/udev/rules.d/51-ocfs2.rules %{_mandir}/man8/debugfs.ocfs2.8* %{_mandir}/man8/fsck.ocfs2.8* @@ -206,9 +271,10 @@ fi %{_mandir}/man8/mkfs.ocfs2.8* %{_mandir}/man8/tunefs.ocfs2.8* %{_mandir}/man8/mounted.ocfs2.8* +%{_mandir}/man8/ocfs2_hb_ctl.8* %{_mandir}/man8/mount.ocfs2.8* %{_mandir}/man8/o2image.8.gz -%doc README.O2CB COPYING CREDITS MAINTAINERS +%doc COPYING CREDITS MAINTAINERS %doc documentation/users_guide.txt %dir /usr/lib/ocf %dir /usr/lib/ocf/resource.d @@ -226,13 +292,12 @@ fi /sbin/rco2cb /sbin/rcocfs2 /sbin/o2cb_ctl -/sbin/ocfs2_hb_ctl %{_sysconfdir}/init.d/o2cb %{_sysconfdir}/init.d/ocfs2 %{_mandir}/man8/o2cb_ctl.8* -%{_mandir}/man8/ocfs2_hb_ctl.8* /var/adm/fillup-templates/sysconfig.o2cb %{_mandir}/man7/o2cb.7.gz +%doc README.O2CB %files devel %defattr(-,root,root) diff --git a/ocfs2-tools.tar.bz2 b/ocfs2-tools.tar.bz2 index d24712a..69a5265 100644 --- a/ocfs2-tools.tar.bz2 +++ b/ocfs2-tools.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:147650748b00a969091b68886cfbaf1dea85e6912f709a76ab599f00ad576ece -size 1103130 +oid sha256:c86b31db7cd2df1f711b977462c47889241a28b907de081c666ed8db7baccf29 +size 1206555 diff --git a/ocfs2_controld-pacemaker.diff b/ocfs2_controld-pacemaker.diff index 9ed314e..3c529d0 100644 --- a/ocfs2_controld-pacemaker.diff +++ b/ocfs2_controld-pacemaker.diff @@ -1,487 +1,26 @@ -diff --git a/ocfs2_controld/pacemaker.c b/ocfs2_controld/pacemaker.c ---- a/ocfs2_controld/pacemaker.c -+++ b/ocfs2_controld/pacemaker.c -@@ -20,20 +20,31 @@ - #include - #include - --#include --#include --#include -+#include -+ -+#include -+ -+/* heartbeat support is irrelevant here */ -+#undef SUPPORT_HEARTBEAT -+#define SUPPORT_HEARTBEAT 0 -+ -+#include -+#include -+#include -+#include -+#include +Index: ocfs2-tools/ocfs2_controld/pacemaker.c +=================================================================== +--- ocfs2-tools.orig/ocfs2_controld/pacemaker.c ++++ ocfs2-tools/ocfs2_controld/pacemaker.c +@@ -31,6 +31,8 @@ + #include + #include + #include +#include +#include #include "ocfs2-kernel/kernel-list.h" #include "o2cb/o2cb.h" - - #include "ocfs2_controld.h" - --#include --#include --#include - #include - -+#define log_printf(level, format, args...) syslog(level, "%s:%d " format "\n", __FILE__, __LINE__, ##args) -+ - int our_nodeid = 0; - static int pcmk_ci; - static int stonithd_ci; -@@ -44,29 +55,57 @@ const char *stackname = "pcmk"; - extern int ais_fd_async; - char *local_node_uname = NULL; - --int kill_stack_node(int nodeid) --{ -- int error = 1; -- stonith_ops_t st_op; -- char *target = nodeid2name(nodeid); -- -- log_debug("killing node %d (aka. %s)", nodeid, target); -- -- if(target) { -- st_op.timeout = 150; -- st_op.node_uuid = NULL; -- st_op.private_data = NULL; -- st_op.node_name = target; -- st_op.optype = POWEROFF; -- -- error = stonithd_node_fence(&st_op); -- } -+static IPC_Channel *attrd = NULL; - -- if (error) -- log_debug("Unable to kill node %d, %d %d", nodeid, error, -- errno); -+static void attrd_deadfn(int ci) -+{ -+ log_printf(LOG_ERR, "Lost connection to attrd"); -+ attrd = NULL; -+ return; -+} - -- return error; -+int kill_stack_node(int nodeid) -+{ -+ gboolean rc = FALSE; -+ xmlNode *update = NULL; -+ time_t now = time(NULL); -+ crm_node_t *node = crm_get_peer(nodeid, NULL); -+ -+ if(node == NULL || node->uname == NULL) { -+ log_printf(LOG_ERR, "%s: Don't know how to kick node %d/%p", __FUNCTION__, nodeid, node); -+ return -1; -+ } -+ -+ if(attrd == NULL) { -+ log_printf(LOG_INFO, "Connecting to attrd..."); -+ attrd = init_client_ipc_comms_nodispatch(T_ATTRD); -+ if(attrd) { -+ connection_add(attrd->ops->get_recv_select_fd(attrd), NULL, attrd_deadfn); -+ } -+ } -+ -+ if(attrd != NULL) { -+ update = create_xml_node(NULL, __FUNCTION__); -+ crm_xml_add(update, F_TYPE, T_ATTRD); -+ crm_xml_add(update, F_ORIG, crm_system_name); -+ -+ crm_xml_add(update, F_ATTRD_TASK, "update"); -+ crm_xml_add(update, F_ATTRD_SECTION, XML_CIB_TAG_STATUS); -+ crm_xml_add(update, F_ATTRD_ATTRIBUTE, "terminate"); -+ crm_xml_add_int(update, F_ATTRD_VALUE, now); -+ crm_xml_add(update, F_ATTRD_HOST, node->uname); -+ -+ rc = send_ipc_message(attrd, update); -+ free_xml(update); -+ } -+ -+ if(rc) { -+ log_printf(LOG_INFO, "Requested that node %d/%s be kicked from the cluster", nodeid, node->uname); -+ return 1; -+ } -+ -+ log_printf(LOG_ERR, "Could not kick node %d/%s from the cluster", nodeid, node->uname); -+ return 0; - } - - char *nodeid2name(int nodeid) { -@@ -81,7 +120,7 @@ char *nodeid2name(int nodeid) { - int validate_cluster(const char *cluster) - { - if (!clustername) { -- log_error("Trying to validate before pacemaker is alive"); -+ log_printf(LOG_ERR, "Trying to validate before pacemaker is alive"); - return 0; - } - -@@ -94,12 +133,12 @@ int validate_cluster(const char *cluster) - int get_clustername(const char **cluster) - { - if (!clustername) { -- log_error("Trying to validate before pacemaker is alive"); -+ log_printf(LOG_ERR, "Trying to validate before pacemaker is alive"); - return -EIO; - } - - if (!cluster) { -- log_error("NULL passed!"); -+ log_printf(LOG_ERR, "NULL passed!"); - return -EINVAL; - } - -@@ -110,316 +149,36 @@ int get_clustername(const char **cluster) - static void dead_pcmk(int ci) - { - if (ci != pcmk_ci) { -- log_error("Unknown connection %d", ci); -+ log_printf(LOG_ERR, "Unknown connection %d", ci); - return; - } - -- log_error("pacemaker connection died"); -+ log_printf(LOG_ERR, "pacemaker connection died"); - shutdown_daemon(); - connection_dead(ci); - } - -+extern void terminate_ais_connection(void); -+ - void exit_stack(void) - { -- log_debug("closing stonithd connection"); -- stonithd_signoff(); -- - log_debug("closing pacemaker connection"); -- if (ais_fd_async) { -- close(ais_fd_async); -- ais_fd_async = 0; -- } -- if (ais_fd_sync) { -- close(ais_fd_sync); -- ais_fd_sync = 0; -- } -+ terminate_ais_connection(); - } - - static void process_pcmk(int ci) - { -- /* ci ::= client number */ -- char *data = NULL; -- char *uncompressed = NULL; -- AIS_Message *msg = NULL; -- SaAisErrorT rc = SA_AIS_OK; -- mar_res_header_t *header = NULL; -- static int header_len = sizeof(mar_res_header_t); -- -- header = malloc(header_len); -- memset(header, 0, header_len); -- -- errno = 0; -- rc = saRecvRetry(ais_fd_async, header, header_len); -- if (rc != SA_AIS_OK) { -- cl_perror("Receiving message header failed: (%d) %s", rc, -- ais_error2text(rc)); -- goto bail; -- } else if(header->size == header_len) { -- log_error("Empty message: id=%d, size=%d, error=%d, header_len=%d", -- header->id, header->size, header->error, header_len); -- goto done; -- } else if(header->size == 0 || header->size < header_len) { -- log_error("Mangled header: size=%d, header=%d, error=%d", -- header->size, header_len, header->error); -- goto done; -- } else if(header->error != 0) { -- log_error("Header contined error: %d", header->error); -- } -- -- header = realloc(header, header->size); -- /* Use a char* so we can store the remainder into an offset */ -- data = (char*)header; -- -- errno = 0; -- rc = saRecvRetry(ais_fd_async, data+header_len, header->size - header_len); -- msg = (AIS_Message*)data; -- -- if (rc != SA_AIS_OK) { -- cl_perror("Receiving message body failed: (%d) %s", rc, ais_error2text(rc)); -- goto bail; -- } -- -- data = msg->data; -- if(msg->is_compressed && msg->size > 0) { -- int rc = BZ_OK; -- unsigned int new_size = msg->size; -- -- if (check_message_sanity(msg, NULL) == FALSE) -- goto badmsg; -- -- log_debug("Decompressing message data"); -- uncompressed = malloc(new_size); -- memset(uncompressed, 0, new_size); -- -- rc = BZ2_bzBuffToBuffDecompress( -- uncompressed, &new_size, data, msg->compressed_size, -- 1, 0); -- -- if(rc != BZ_OK) { -- log_error("Decompression failed: %d", rc); -- goto badmsg; -- } -- -- CRM_ASSERT(rc == BZ_OK); -- CRM_ASSERT(new_size == msg->size); -- -- data = uncompressed; -- -- } else if(check_message_sanity(msg, data) == FALSE) { -- goto badmsg; -- -- } else if(safe_str_eq("identify", data)) { -- int pid = getpid(); -- char *pid_s = crm_itoa(pid); -- -- send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); -- crm_free(pid_s); -- goto done; -- } -- -- if (msg->header.id == crm_class_members) { -- xmlNode *xml = string2xml(data); -- -- if(xml != NULL) { -- const char *value = crm_element_value(xml, "id"); -- if(value) -- crm_peer_seq = crm_int_helper(value, NULL); -- -- log_debug("Updating membership %llu", crm_peer_seq); -- /* crm_log_xml_info(xml, __PRETTY_FUNCTION__); */ -- xml_child_iter(xml, node, crm_update_ais_node(node, crm_peer_seq)); -- crm_calculate_quorum(); -- free_xml(xml); -- } else { -- log_error("Invalid peer update: %s", data); -- } -- } else { -- log_error("Unexpected AIS message type: %d", msg->header.id); -- } -- --done: -- free(uncompressed); -- free(msg); -- return; -- --badmsg: -- log_error("Invalid message (id=%d, dest=%s:%s, from=%s:%s.%d):" -- " min=%d, total=%d, size=%d, bz2_size=%d", -- msg->id, ais_dest(&(msg->host)), msg_type2text(msg->host.type), -- ais_dest(&(msg->sender)), msg_type2text(msg->sender.type), -- msg->sender.pid, (int)sizeof(AIS_Message), -- msg->header.size, msg->size, msg->compressed_size); -- free(uncompressed); -- free(msg); -- return; -- --bail: -- log_error("AIS connection failed"); -- return; --} -- --static void dead_stonithd(int ci) --{ -- if (ci != stonithd_ci) { -- log_error("Unknown connection %d", ci); -- return; -- } -- -- log_error("stonithd connection died"); -- shutdown_daemon(); -- connection_dead(ci); --} -- --static void process_stonithd(int ci) --{ -- IPC_Channel *stonithd_ch = stonithd_input_IPC_channel(); -- -- while (stonithd_op_result_ready()) { -- if (stonithd_ch->ch_status != IPC_CONNECT) { -- /* The message which was pending for us is that -- * the IPC status is now IPC_DISCONNECT */ -- break; -- } -- -- if (ST_FAIL == stonithd_receive_ops_result(FALSE)) { -- log_error("stonithd_receive_ops_result() failed"); -- } -- } -- -- if (stonithd_ch->ch_status != IPC_CONNECT) -- dead_stonithd(stonithd_ci); --} -- --static void result_stonithd(stonith_ops_t *op) --{ -- if (op == NULL) { -- log_error("Called with a NULL op!"); -- return; -- } -- -- log_debug("Stonithd result: call=%d, optype=%d, node_name=%s, result=%d, node_list=%s, action=%s", -- op->call_id, op->optype, op->node_name, op->op_result, -- (char *)op->node_list, op->private_data); -- -- switch(op->op_result) { -- case STONITH_SUCCEEDED: -- break; -- case STONITH_CANNOT: -- case STONITH_TIMEOUT: -- case STONITH_GENERIC: -- log_error("Stonith of %s failed (%d)", -- op->node_name, op->op_result); -- break; -- default: -- log_error("Unsupported action result: %d", op->op_result); -- } --} -- --static gboolean setup_stonith(void) --{ -- int lpc = 0; -- int rc = ST_OK; -- int stonithd_fd; -- const char *reason = NULL; -- IPC_Channel *stonithd_ch = NULL; -- -- for(lpc = 0; lpc < 30; lpc++) { -- log_debug("Attempting connection to fencing daemon..."); -- -- sleep(1); -- rc = stonithd_signon("ocfs2-tools"); -- if(rc == ST_OK) -- break; -- -- log_error("Sign-in failed: pausing and trying again in 2s..."); -- sleep(1); -- } -- -- if(rc != ST_OK) { -- reason = "Sign-in failed"; -- goto bail; -- } -- -- rc = stonithd_set_stonith_ops_callback(result_stonithd); -- if(rc != ST_OK) { -- reason = "Setup failed"; -- goto bail; -- } -- -- stonithd_ch = stonithd_input_IPC_channel(); -- if(stonithd_ch == NULL) { -- reason = "No connection"; -- goto bail; -- } -- stonithd_fd = stonithd_ch->ops->get_recv_select_fd(stonithd_ch); -- if(stonithd_ch <= 0) { -- reason = "No fd"; -- goto bail; -- } -- -- stonithd_ci = connection_add(stonithd_fd, process_stonithd, -- dead_stonithd); -- if (stonithd_ci < 0) { -- log_error("Unable to add stonithd client: %s", -- strerror(-stonithd_ci)); -- goto bail; -- } -- -- return TRUE; -- --bail: -- log_error("Unable to add stonithd client: %s", reason); -- return FALSE; -+ ais_dispatch(ais_fd_async, NULL); - } +@@ -152,10 +154,10 @@ static void process_pcmk(int ci) int setup_stack(void) { -- int retries = 0; -- int pid; -- char *pid_s; -- int rc = SA_AIS_OK; -- struct utsname name; -- - crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL); -- crm_peer_init(); +- crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL); ++ crm_log_init("ocfs2_controld", LOG_INFO, FALSE, TRUE, 0, NULL, FALSE); -- if (local_node_uname == NULL) { -- if (uname(&name) < 0) { -- cl_perror("uname(2) call failed"); -- exit(100); -+ if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) { -+ log_printf(LOG_ERR, "Connection to our AIS plugin (%d) failed", CRM_SERVICE); -+ return -1; - } -- local_node_uname = crm_strdup(name.nodename); -- log_debug("Local node name: %s", local_node_uname); -- } -- --retry: -- log_debug("Creating connection to our AIS plugin"); -- rc = saServiceConnect (&ais_fd_sync, &ais_fd_async, CRM_SERVICE); -- if (rc != SA_AIS_OK) -- log_error("Connection to our AIS plugin (%d) failed: %s (%d)", -- CRM_SERVICE, ais_error2text(rc), rc); -- -- switch(rc) { -- case SA_AIS_OK: -- break; -- case SA_AIS_ERR_TRY_AGAIN: -- if(retries < 30) { -- sleep(1); -- retries++; -- goto retry; -- } -- log_error("Retry count exceeded"); -- return 0; -- default: -- return 0; -- } -- -- log_debug("AIS connection established"); -- -- pid = getpid(); -- pid_s = crm_itoa(pid); -- send_ais_text(0, pid_s, TRUE, NULL, crm_msg_ais); -- crm_free(pid_s); + if(init_ais_connection(NULL, NULL, NULL, &local_node_uname, &our_nodeid) == FALSE) { +- log_error("Connection to our AIS plugin (%d) failed", CRM_SERVICE); ++ log_error("Connection to our AIS plugin (CRM) failed"); + return -1; + } - /* Sign up for membership updates */ - send_ais_text(crm_class_notify, "true", TRUE, NULL, crm_msg_ais); -@@ -427,14 +186,11 @@ retry: - /* Requesting the current list of known nodes */ - send_ais_text(crm_class_members, __FUNCTION__, TRUE, NULL, crm_msg_ais); - -- our_nodeid = get_ais_nodeid(); -- log_debug("Local node id: %d", our_nodeid); -- - pcmk_ci = connection_add(ais_fd_async, process_pcmk, dead_pcmk); -- if (pcmk_ci >= 0 && setup_stonith()) -+ if (pcmk_ci >= 0) - return ais_fd_async; - -- log_error("Unable to add pacemaker client: %s", strerror(-pcmk_ci)); -+ log_printf(LOG_ERR, "Unable to add pacemaker client: %s", strerror(-pcmk_ci)); - exit_stack(); - return pcmk_ci; - } diff --git a/ocfs2_controld-retry-470741.diff b/ocfs2_controld-retry-470741.diff deleted file mode 100644 index 9305d2b..0000000 --- a/ocfs2_controld-retry-470741.diff +++ /dev/null @@ -1,22 +0,0 @@ -From: lmb@suse.de -References: bnc#470741 - -ocfs2_controld.pcmk does not retry often enough for some situations, and -thus fails to start. This tentative patch "masks" this by simply -retrying much more often. - -Index: ocfs2-tools/ocfs2_controld/ckpt.c -=================================================================== ---- ocfs2-tools.orig/ocfs2_controld/ckpt.c -+++ ocfs2-tools/ocfs2_controld/ckpt.c -@@ -32,8 +32,8 @@ - * A tentative retry is something we don't want to spend a lot of time on; - * it works or we error. A serious retry we really want to complete. - */ --#define TENTATIVE_RETRY_TRIES 2 --#define SERIOUS_RETRY_TRIES 5 -+#define TENTATIVE_RETRY_TRIES 20 -+#define SERIOUS_RETRY_TRIES 50 - - - diff --git a/ocfs2console-display-fix.diff b/ocfs2console-display-fix.diff deleted file mode 100644 index ebfd5a6..0000000 --- a/ocfs2console-display-fix.diff +++ /dev/null @@ -1,41 +0,0 @@ -From: Coly Li -Subject: fix ocfs2console crashing when DISPLAY is not correctly set -References: bnc#448523 -Author: Joel Becker - -When DISPLAY is not correctly set, this patch makes ocfs2console report error -mesage and exit, other than a ugly crash. - -Signed-off-by: Joel Becker -Signed-off-by: Coly Li ---- -diff --git a/ocfs2console/ocfs2console b/ocfs2console/ocfs2console -index 77ab623..17dcfc5 100755 ---- a/ocfs2console/ocfs2console -+++ b/ocfs2console/ocfs2console -@@ -3,14 +3,23 @@ - from ocfs2interface.about import process_args - nodeconf = process_args() - -+import warnings -+warnings.filterwarnings("error") -+ - try: - import gtk --except RuntimeError: -+except Exception, e: - import sys -- print >>sys.stderr, '''ERROR: ocfs2console needs an X11 display. -+ if str(e).lower().find('display') == -1: -+ print >>sys.stderr, '''ERROR: Unable to initialize the windowing -+system: %s\n''' % e -+ else: -+ print >>sys.stderr, '''ERROR: ocfs2console needs an X11 display. - Make sure a proper setup for your display environment exists.\n''' - sys.exit(1) - -+warnings.resetwarnings() -+ - if nodeconf: - from ocfs2interface.nodeconfig import node_config - node_config() diff --git a/ocfs2console-extraoption-fix.diff b/ocfs2console-extraoption-fix.diff deleted file mode 100644 index 8add6f2..0000000 --- a/ocfs2console-extraoption-fix.diff +++ /dev/null @@ -1,29 +0,0 @@ -From: Coly Li -Subject: ocfs2console: remove unsupported option when calling tunefs.ocfs2 -References: bnc#472353 -commit 1adbec9a9d409f3f2c127ddbff8c4504e2d126ed -Author: Coly Li -Date: Sun Feb 8 12:20:05 2009 +0800 - - ocfs2console: remove unsupported option when calling tunefs.ocfs2 - - ocfs2interface/tune.py call tunefs.ocfs2 with -x option, which is not supported in tunefs.ocfs2. - This patch remove this unsupported option from ocfs2console. - - Signed-off-by: Coly Li - Cc: Joel Becker - Signed-off-by: Joel Becker - -diff --git a/ocfs2console/ocfs2interface/tune.py b/ocfs2console/ocfs2interface/tune.py -index c76dbaa..0e514bd 100644 ---- a/ocfs2console/ocfs2interface/tune.py -+++ b/ocfs2console/ocfs2interface/tune.py -@@ -24,7 +24,7 @@ from process import Process - - from fswidgets import NumSlots, VolumeLabel - --base_command = ('tunefs.ocfs2', '-x') -+base_command = ('tunefs.ocfs2',) - - class TuneVolumeLabel(VolumeLabel): - def __init__(self, device=None): diff --git a/ocfs2console-pop-display-warning.diff b/ocfs2console-pop-display-warning.diff deleted file mode 100644 index b1bfb2e..0000000 --- a/ocfs2console-pop-display-warning.diff +++ /dev/null @@ -1,33 +0,0 @@ -From: Coly Li -Subject: ocfs2console: Whoops, don't reset all warnings, just pop the DISPLAY one -References: bnc#448523 -Author: Hu Ziming - -comment from patch by Joel: - We turn warnings into error while calling 'import gtk' to handle a pygtk - bug with an invalid DISPLAY. After we return from 'import gtk', we were - calling warnings.resetwarnings() to get back to the normal state. - However, it looks like that clears more warning filters than the one we - added. Let's fix this. -comment from bugzilla by Joel: - Ziming Hu's patch for fixing the warning filters is now upstream. I regret to - say that since I hand-coded it from memory, I forgot to credit him with the - discovery before committing. That credit will live here. - -Signed-off-by: Hu Ziming -Signed-off-by: Joel Becker -Signed-off-by: Coly Li ---- -diff -ru ocfs2-tools/ocfs2console/ocfs2console ocfs2-tools-new/ocfs2console/ocfs2console ---- ocfs2-tools/ocfs2console/ocfs2console 2009-02-17 06:06:57.910766000 +0100 -+++ ocfs2-tools-new/ocfs2console/ocfs2console 2009-02-17 06:09:32.935591000 +0100 -@@ -17,8 +17,7 @@ - print >>sys.stderr, '''ERROR: ocfs2console needs an X11 display. - Make sure a proper setup for your display environment exists.\n''' - sys.exit(1) -- --warnings.resetwarnings() -+warnings.filters.pop(0) - - if nodeconf: - from ocfs2interface.nodeconfig import node_config diff --git a/reflink-no-syscall.patch b/reflink-no-syscall.patch new file mode 100644 index 0000000..73cc7a8 --- /dev/null +++ b/reflink-no-syscall.patch @@ -0,0 +1,57 @@ +Index: reflink/coreutils-6.9/src/ln.c +=================================================================== +--- reflink.orig/coreutils-6.9/src/ln.c ++++ reflink/coreutils-6.9/src/ln.c +@@ -152,52 +152,6 @@ target_directory_operand (char const *fi + return is_a_dir; + } + +-#ifndef HAVE_REFLINK +-# ifndef HAVE_REFLINKAT +-# include +- +-# ifndef SYS_reflinkat +-# ifdef __i386__ +-# define __NR_reflinkat 337 +-# endif +-# ifdef __x86_64__ +-# define __NR_reflinkat 299 +-# endif +-# define SYS_reflinkat __NR_reflinkat +-# endif /* SYS_reflinkat */ +- +-# ifdef SYS_reflinkat +-static int reflinkat(int olddirfd, const char *oldpath, +- int newdirfd, const char *newpath, +- int preserve, int flags) +-{ +- return syscall(SYS_reflinkat, olddirfd, oldpath, newdirfd, newpath, +- preserve, flags); +-} +-# else +-static int reflinkat(int olddirfd, const char *oldpath, +- int newdirfd, const char *newpath, +- int preserve, int flags) +-{ +- return -ENOSYS; +-} +-# endif /* SYS_reflinkat */ +-# endif /* HAVE_REFLINKAT */ +- +-/* +- * We've now defined reflinkat of some sort. We can use it to build +- * reflink. +- */ +-# ifndef AT_FDCWD +-# define AT_FDCWD -100 +-# endif +-static int reflink(const char *oldpath, const char *newpath, +- int preserve) +-{ +- return reflinkat(AT_FDCWD, oldpath, AT_FDCWD, newpath, preserve, 0); +-} +-#endif /* HAVE_REFLINK */ +- + #ifndef REFLINK_ATTR_NONE + # define REFLINK_ATTR_NONE 0 + #endif diff --git a/reflink.tar.bz2 b/reflink.tar.bz2 new file mode 100644 index 0000000..2f4fa50 --- /dev/null +++ b/reflink.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d197e3c716ab12213332d9787375fecda0ede55508ca1e71aec9922e8c124ba5 +size 5167807 diff --git a/tunefs.ocfs2.8.in_more_options.diff b/tunefs.ocfs2.8.in_more_options.diff deleted file mode 100644 index 66e1a52..0000000 --- a/tunefs.ocfs2.8.in_more_options.diff +++ /dev/null @@ -1,43 +0,0 @@ -commit f5939cb6da32352a782e36507b5bef8a8194b26d -Author: Coly Li -Date: Tue Feb 24 09:43:04 2009 +0800 - - tunefs.ocfs2.8: add info for -y,-n,--update-cluster-stack - - The manual for tunefs.ocfs2.8.in does not have information for -y, -n and - --update-cluster-stack options. This patch adds these info to tuneofs.ocfs2 - manual. - - Signed-off-by: Coly Li - Signed-off-by: Mark Fasheh - -diff --git a/tunefs.ocfs2/tunefs.ocfs2.8.in b/tunefs.ocfs2/tunefs.ocfs2.8.in -index 7a9b66b..b49a00f 100644 ---- a/tunefs.ocfs2/tunefs.ocfs2.8.in -+++ b/tunefs.ocfs2/tunefs.ocfs2.8.in -@@ -90,6 +90,14 @@ Verbose mode. - Show version and exit. - - .TP -+\fB\-y, \-\-yes\fR -+Alway answer Yes in interactive command line. -+ -+.TP -+\fB\-n, \-\-no\fR -+Always answer No in interactive command line. -+ -+.TP - \fB\-\-backup\-super\fR - Backs up the superblock to fixed offsets (1G, 4G, 16G, 64G, 256G and 1T) - on disk. This option is useful for users to backup the superblock on volumes -@@ -102,6 +110,10 @@ facility. - Lists the files having holes. This option is useful when disabling the \fIsparse\fR feature. - - .TP -+\fB\-\-update-cluster-stack\fR -+Updating on-disk cluster information to match the running cluster. -+ -+.TP - \fIblocks-count\fR - During resize, \fBtunefs.ocfs2\fR automatically determines the size of the given - device and grows the file system such that it uses all of the available space on diff --git a/version-1.4.3.patch b/version-1.4.3.patch new file mode 100644 index 0000000..39eabb8 --- /dev/null +++ b/version-1.4.3.patch @@ -0,0 +1,18 @@ +This patch keeps ocfs2-tools version as 1.4.3, until we decide to update the package version. + +Signed-off-by: Coly Li +--- +diff -ur ocfs2-tools/configure.in ocfs2-tools-new//configure.in +--- ocfs2-tools/configure.in 2010-02-20 01:21:46.000000000 +0800 ++++ ocfs2-tools-new//configure.in 2010-03-11 09:38:01.000000000 +0800 +@@ -8,8 +8,8 @@ + + # Adjust these for the software version. + MAJOR_VERSION=1 +-MINOR_VERSION=6 +-MICRO_VERSION=0 ++MINOR_VERSION=4 ++MICRO_VERSION=3 + EXTRA_VERSION= + + DIST_VERSION=$MAJOR_VERSION.$MINOR_VERSION.$MICRO_VERSION