From 583be25c5f5ea6ec34b4ec499df12c450d457778 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 11 Apr 2010 16:10:04 +0800 Subject: [PATCH 07/30] dx_dirs: more library support for directory indexing This patch adds more library support for indexed dirs, - dx_root alloc/delete - dx_leaf alloc/delete - dx_root read/write - dx_leaf read/write - indexed tree insert/truncate - dx_root extent tree operations - indexed dir support in ocfs2_lookup_system_inode(), ocfs2_init_dir(), ocfs2_link(), ocfs2_unlink(), ocfs2_lookup(). With this patch, indexed dirs support in fsck.ocfs2 is possible. Signed-off-by: Coly Li Signed-off-by: Mark Fasheh --- include/ocfs2-kernel/ocfs2_fs.h | 29 + include/ocfs2/ocfs2.h | 55 ++- libocfs2/Makefile | 3 +- libocfs2/alloc.c | 96 +++ libocfs2/dir_indexed.c | 1461 +++++++++++++++++++++++++++++++++++++++ libocfs2/dir_iterate.c | 16 +- libocfs2/dir_iterate.h | 1 + libocfs2/dirblock.c | 195 +++++- libocfs2/expanddir.c | 18 +- libocfs2/extent_tree.c | 53 ++ libocfs2/extent_tree.h | 5 + libocfs2/inode.c | 3 + libocfs2/link.c | 9 + libocfs2/lookup.c | 71 ++- libocfs2/ocfs2_err.et | 6 + libocfs2/truncate.c | 16 + libocfs2/unlink.c | 187 +++++- sizetest/sizetest.c | 2 +- 18 files changed, 2165 insertions(+), 61 deletions(-) create mode 100644 libocfs2/dir_indexed.c diff --git a/include/ocfs2-kernel/ocfs2_fs.h b/include/ocfs2-kernel/ocfs2_fs.h index f3c2450..6fc63e8 100644 --- a/include/ocfs2-kernel/ocfs2_fs.h +++ b/include/ocfs2-kernel/ocfs2_fs.h @@ -1406,6 +1406,35 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize) return size / sizeof(struct ocfs2_extent_rec); } +static inline int ocfs2_dx_entries_per_leaf(int blocksize) +{ + int size; + size = blocksize - + offsetof(struct ocfs2_dx_leaf, dl_list.de_entries); + + return size / sizeof(struct ocfs2_dx_entry); +} + +static inline int ocfs2_dx_entries_per_root(int blocksize) +{ + int size; + + size = blocksize - + offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries); + + return size / sizeof(struct ocfs2_dx_entry); +} + +static inline int ocfs2_extent_recs_per_dx_root(int blocksize) +{ + int size; + + size = blocksize - + offsetof(struct ocfs2_dx_root_block, dr_list.l_recs); + + return size / sizeof(struct ocfs2_extent_rec); +} + static inline int ocfs2_local_alloc_size(int blocksize) { int size; diff --git a/include/ocfs2/ocfs2.h b/include/ocfs2/ocfs2.h index 332d4bd..465dc58 100644 --- a/include/ocfs2/ocfs2.h +++ b/include/ocfs2/ocfs2.h @@ -254,6 +254,22 @@ struct _ocfs2_quota_hash { ocfs2_cached_dquot **hash; }; +struct ocfs2_dx_hinfo { + uint32_t major_hash; + uint32_t minor_hash; +}; + +struct ocfs2_dir_lookup_result { + struct ocfs2_dx_hinfo dl_hinfo; /* name hash results */ + char * dl_leaf; /* unindexed block buffer */ + uint64_t dl_leaf_blkno; /* blk number of dl_leaf */ + struct ocfs2_dir_entry * dl_entry; /* dirent pointed into dl_leaf */ + struct ocfs2_dx_leaf * dl_dx_leaf; /* indexed block buffer */ + uint64_t dl_dx_leaf_blkno; /* blk number of dl_dx_leaf */ + struct ocfs2_dx_entry * dl_dx_entry; /* indexed entry pointed to dl_dx_leaf */ + int dl_dx_entry_idx; /* index of dl_dx_entry in entries list */ +}; + typedef struct _ocfs2_quota_hash ocfs2_quota_hash; errcode_t ocfs2_malloc(unsigned long size, void *ptr); @@ -332,7 +348,8 @@ errcode_t ocfs2_read_inode(ocfs2_filesys *fs, uint64_t blkno, errcode_t ocfs2_write_inode(ocfs2_filesys *fs, uint64_t blkno, char *inode_buf); errcode_t ocfs2_check_directory(ocfs2_filesys *fs, uint64_t dir); - +int ocfs2_check_dir_entry(ocfs2_filesys *fs, struct ocfs2_dir_entry *de, + char *dir_buf, unsigned int offset); errcode_t ocfs2_read_cached_inode(ocfs2_filesys *fs, uint64_t blkno, ocfs2_cached_inode **ret_ci); errcode_t ocfs2_write_cached_inode(ocfs2_filesys *fs, @@ -478,7 +495,7 @@ errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, void *buf); int ocfs2_dir_indexed(struct ocfs2_dinode *di); - +errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs, uint64_t dir); errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, uint64_t dir, int flags, @@ -486,6 +503,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, int (*func)(uint64_t dir, int entry, struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -496,6 +514,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, int flags, char *block_buf, int (*func)(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -675,7 +694,10 @@ errcode_t ocfs2_new_inode(ocfs2_filesys *fs, uint64_t *ino, int mode); errcode_t ocfs2_new_system_inode(ocfs2_filesys *fs, uint64_t *ino, int mode, int flags); errcode_t ocfs2_delete_inode(ocfs2_filesys *fs, uint64_t ino); errcode_t ocfs2_new_extent_block(ocfs2_filesys *fs, uint64_t *blkno); +errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t *dr_blkno); errcode_t ocfs2_delete_extent_block(ocfs2_filesys *fs, uint64_t blkno); +errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno); + /* * Allocate the blocks and insert them to the file. * only i_clusters of dinode will be updated accordingly, i_size not changed. @@ -1347,6 +1369,10 @@ static inline int ocfs2_refcount_tree(struct ocfs2_super_block *osb) #define OCFS2_BLOCK_ABORT 0x02 #define OCFS2_BLOCK_ERROR 0x04 + +#define OCFS2_IS_VALID_DX_ROOT(ptr) \ + (!strcmp((char *)(ptr)->dr_signature, OCFS2_DX_ROOT_SIGNATURE)) + /* * Block iterate flags * @@ -1487,5 +1513,30 @@ errcode_t ocfs2_extent_iterate_xattr(ocfs2_filesys *fs, void *priv_data, int *changed); errcode_t ocfs2_delete_xattr_block(ocfs2_filesys *fs, uint64_t blkno); +errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root); +errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block, char *buf); +errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block, void *buf); +errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, uint64_t dir); +errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name, + uint64_t ino, uint64_t blkno); +int ocfs2_search_dirblock(ocfs2_filesys *fs, char *dir_buf, + const char *name, int namelen, unsigned int bytes, + struct ocfs2_dir_entry **res_dir); +void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs, const char *name, + int len, struct ocfs2_dx_hinfo *hinfo); +errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs, struct ocfs2_dx_root_block *dx_root, + struct ocfs2_extent_list *el, struct ocfs2_dx_hinfo *hinfo, + uint32_t *ret_cpos, uint64_t *ret_phys_blkno); +errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs, const char *name, + int namelen, struct ocfs2_dx_root_block *dx_root, + struct ocfs2_dir_lookup_result *res); +void release_lookup_res(struct ocfs2_dir_lookup_result *res); +int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf); +void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, int index); +int ocfs2_is_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, unsigned long de_off); + + + #endif /* _FILESYS_H */ diff --git a/libocfs2/Makefile b/libocfs2/Makefile index 622b89e..cff5061 100644 --- a/libocfs2/Makefile +++ b/libocfs2/Makefile @@ -77,7 +77,8 @@ CFILES = \ image.c \ xattr.c \ extent_tree.c \ - refcount.c + refcount.c \ + dir_indexed.c HFILES = \ bitmap.h \ diff --git a/libocfs2/alloc.c b/libocfs2/alloc.c index 842ebdb..84f3b05 100644 --- a/libocfs2/alloc.c +++ b/libocfs2/alloc.c @@ -622,6 +622,102 @@ out: return ret; } +/* only initiate part of dx_root: + * dr_subllaoc_slot + * dr_sbualloc_bit + * dr_fs_generation + * dr_blkno + * dr_flags + */ +static void init_dx_root(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root, + int slot, uint64_t gd_blkno, uint64_t dr_blkno) +{ + + memset(dx_root, 0, fs->fs_blocksize); + strcpy((char *)dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); + dx_root->dr_suballoc_slot = slot; + dx_root->dr_suballoc_bit = (uint16_t)(dr_blkno - gd_blkno); + dx_root->dr_fs_generation = fs->fs_super->i_fs_generation; + dx_root->dr_blkno = dr_blkno; + dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE; +} + +errcode_t ocfs2_new_dx_root(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + uint64_t *dr_blkno) +{ + errcode_t ret; + char *buf = NULL; + uint64_t gd_blkno; + struct ocfs2_dx_root_block *dx_root; + int slot; + + ret = ocfs2_malloc_block(fs->fs_io, &buf); + if (ret) + goto out; + + slot = di->i_suballoc_slot; + ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, + slot, &fs->fs_eb_allocs[slot]); + if (ret) + goto out; + + ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot], + &gd_blkno, dr_blkno); + if (ret == OCFS2_ET_BIT_NOT_FOUND) { + ret = ocfs2_chain_add_group(fs, fs->fs_eb_allocs[slot]); + if (ret) + goto out; + ret = ocfs2_chain_alloc_with_io(fs, fs->fs_eb_allocs[slot], + &gd_blkno, dr_blkno); + if (ret) + goto out; + } else if (ret) + goto out; + + dx_root = (struct ocfs2_dx_root_block *)buf; + init_dx_root(fs, dx_root, slot, gd_blkno, *dr_blkno); + + ret = ocfs2_write_dx_root(fs, *dr_blkno, (char *)dx_root); +out: + if (buf) + ocfs2_free(&buf); + return ret; +} + +errcode_t ocfs2_delete_dx_root(ocfs2_filesys *fs, uint64_t dr_blkno) +{ + errcode_t ret; + char *buf = NULL; + struct ocfs2_dx_root_block *dx_root; + int slot; + + ret = ocfs2_malloc_block(fs->fs_io, &buf); + if (ret) + goto out; + + ret = ocfs2_read_dx_root(fs, dr_blkno, buf); + if (ret) + goto out; + + dx_root = (struct ocfs2_dx_root_block *)buf; + slot = dx_root->dr_suballoc_slot; + + ret = ocfs2_load_allocator(fs, EXTENT_ALLOC_SYSTEM_INODE, slot, + &fs->fs_eb_allocs[slot]); + if (ret) + goto out; + + ret = ocfs2_chain_free_with_io(fs, fs->fs_eb_allocs[slot], dr_blkno); + +out: + if (buf) + ocfs2_free(&buf); + + return ret; +} + /* XXX what to do about local allocs? * XXX Well, we shouldn't use local allocs to allocate, as we are * userspace and we have the entire bitmap in memory. However, this diff --git a/libocfs2/dir_indexed.c b/libocfs2/dir_indexed.c new file mode 100644 index 0000000..9cae3d0 --- /dev/null +++ b/libocfs2/dir_indexed.c @@ -0,0 +1,1461 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * Copyright (C) 2009, 2010 Novell. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License, version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "ocfs2_err.h" +#include "extent_tree.h" + + +errcode_t ocfs2_dx_dir_truncate(ocfs2_filesys *fs, + uint64_t dir) +{ + struct ocfs2_dx_root_block *dx_root; + char *dx_root_buf = NULL, *di_buf = NULL; + struct ocfs2_dinode *di; + uint64_t dx_root_blk; + errcode_t ret = 0; + + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; + + /* we have to trust i_dyn_features */ + if (!S_ISDIR(di->i_mode) || + !ocfs2_dir_indexed(di) || + di->i_dyn_features & OCFS2_INLINE_DATA_FL) + goto out; + + dx_root_blk = di->i_dx_root; + + di->i_dyn_features &= ~OCFS2_INDEXED_DIR_FL; + di->i_dx_root = 0; + + /* update inode firstly */ + ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di); + if (ret) + goto out; + + /* inode is updated, the rested errors are not fatal */ + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); + if (ret) + goto out; + ret = ocfs2_read_dx_root(fs, dx_root_blk, dx_root_buf); + if (ret) + goto out; + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) + goto remove_index; + + ret = ocfs2_dir_indexed_tree_truncate(fs, dx_root); + + /* + * even ocfs2_dir_indexed_tree_truncate() failed, + * we still want to call ocfs2_delete_dx_root(). + */ + +remove_index: + ret = ocfs2_delete_dx_root(fs, dx_root->dr_blkno); +out: + if (di_buf) + ocfs2_free(&di_buf); + if (dx_root_buf) + ocfs2_free(&dx_root_buf); + return ret; +} + +static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de) +{ + unsigned int hole; + + if (de->inode == 0) + hole = de->rec_len; + else + hole = de->rec_len - OCFS2_DIR_REC_LEN(de->name_len); + + return hole; +} + +int ocfs2_find_max_rec_len(ocfs2_filesys *fs, char *buf) +{ + int size, this_hole, largest_hole = 0; + char *de_buf, *limit; + struct ocfs2_dir_entry *de; + + size = ocfs2_dir_trailer_blk_off(fs); + limit = buf + size; + de_buf = buf; + de = (struct ocfs2_dir_entry *)de_buf; + do { + this_hole = ocfs2_figure_dirent_hole(de); + if (this_hole > largest_hole) + largest_hole = this_hole; + + de_buf += de->rec_len; + de = (struct ocfs2_dir_entry *)de_buf; + } while (de_buf < limit); + + if (largest_hole >= OCFS2_DIR_MIN_REC_LEN) + return largest_hole; + return 0; +} + +struct trailer_ctxt { + struct ocfs2_dx_root_block *dx_root; + struct ocfs2_dinode *di; +}; + +/* make sure the space for trailer is reserved */ +static errcode_t ocfs2_check_dir_trailer_space(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + uint64_t blkno, + char *blk) +{ + errcode_t ret = 0; + struct ocfs2_dir_entry *dirent; + unsigned int offset = 0; + unsigned int toff = ocfs2_dir_trailer_blk_off(fs); + unsigned int real_rec_len = 0; + + while(offset < fs->fs_blocksize) { + dirent = (struct ocfs2_dir_entry *)(blk + offset); + if (!ocfs2_check_dir_entry(fs, dirent, blk, offset)) { + ret = OCFS2_ET_DIR_CORRUPTED; + break; + } + + real_rec_len = dirent->inode ? + OCFS2_DIR_REC_LEN(dirent->name_len) : + OCFS2_DIR_REC_LEN(1); + if ((offset + real_rec_len) <= toff) + goto next; + + if (dirent->inode) { + ret = OCFS2_ET_DIR_NO_SPACE; + break; + } +next: + offset += dirent->rec_len; + } + +out: + return ret; +} + +static int dir_trailer_func(ocfs2_filesys *fs, + uint64_t blkno, + uint64_t bcount, + uint16_t ext_flags, + void *priv_data) +{ + struct trailer_ctxt *ctxt = (struct trailer_ctxt *)priv_data; + struct ocfs2_dinode *di = ctxt->di; + struct ocfs2_dx_root_block *dx_root = ctxt->dx_root; + struct ocfs2_dir_block_trailer *trailer; + int max_rec_len = 0; + errcode_t ret = 0; + char *blk = NULL; + + ret = ocfs2_malloc_block(fs->fs_io, &blk); + if (ret) + goto out; + + /* here we don't trust trailer, cannot use + * ocfs2_read_dir_block() */ + ret = ocfs2_read_blocks(fs, blkno, 1, blk); + if (ret) + goto out; + ret = ocfs2_check_dir_trailer_space(fs, di, blkno, blk); + if (ret) + goto out; + ocfs2_init_dir_trailer(fs, di, blkno, blk); + max_rec_len = ocfs2_find_max_rec_len(fs, blk); + trailer = ocfs2_dir_trailer_from_block(fs, blk); + trailer->db_free_rec_len = max_rec_len; + + if (max_rec_len) { + trailer->db_free_next = dx_root->dr_free_blk; + dx_root->dr_free_blk = blkno; + } + + /* comput trailer->db_check here, after writes out, + * trailer is trustable */ + ret = ocfs2_write_dir_block(fs, di, blkno, blk); +out: + if (blk) + ocfs2_free(&blk); + return ret; +} + +static errcode_t ocfs2_init_dir_trailers(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + struct ocfs2_dx_root_block *dx_root) +{ + errcode_t ret = 0; + struct trailer_ctxt ctxt; + + if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = OCFS2_ET_INODE_NOT_VALID; + goto out; + } + + ctxt.di = di; + ctxt.dx_root = dx_root; + + ret = ocfs2_block_iterate_inode(fs, di, + 0, dir_trailer_func, &ctxt); +out: + return ret; +} + +static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list, + struct ocfs2_dx_hinfo *hinfo, + uint64_t dirent_blk) +{ + int i; + struct ocfs2_dx_entry *dx_entry; + + i = entry_list->de_num_used; + dx_entry = &entry_list->de_entries[i]; + + memset(dx_entry, 0, sizeof(struct ocfs2_dx_entry)); + dx_entry->dx_major_hash = hinfo->major_hash; + dx_entry->dx_minor_hash = hinfo->minor_hash; + dx_entry->dx_dirent_blk = dirent_blk; + + entry_list->de_num_used += 1; +} + +struct dx_insert_ctxt { + uint64_t dir_blkno; + uint64_t dx_root_blkno; + ocfs2_filesys *fs; +}; + + +inline static int ocfs2_inline_dx_has_space(struct ocfs2_dx_root_block *dx_root) +{ + struct ocfs2_dx_entry_list *entry_list; + + entry_list = &dx_root->dr_entries; + + if (entry_list->de_num_used >= entry_list->de_count) + return 0; + + return 1; +} + +static struct ocfs2_dx_leaf **ocfs2_dx_dir_alloc_leaves(ocfs2_filesys *fs, + int *ret_num_leaves) +{ + errcode_t num_dx_leaves = ocfs2_clusters_to_blocks(fs, 1); + char **dx_leaves_buf = NULL; + + dx_leaves_buf = calloc(num_dx_leaves, sizeof (void *)); + if (dx_leaves_buf && ret_num_leaves) + *ret_num_leaves = num_dx_leaves; + + return (struct ocfs2_dx_leaf **)dx_leaves_buf; +} + +static errcode_t ocfs2_dx_dir_format_cluster(ocfs2_filesys *fs, + struct ocfs2_dx_leaf **dx_leaves, + int num_dx_leaves, + uint64_t start_blk) +{ + errcode_t ret; + int i; + struct ocfs2_dx_leaf *dx_leaf; + char *blk; + + for (i = 0; i < num_dx_leaves; i++) { + ret = ocfs2_malloc_block(fs->fs_io, &blk); + if (ret) + goto out; + + dx_leaves[i] = (struct ocfs2_dx_leaf *)blk; + dx_leaf = (struct ocfs2_dx_leaf *)blk; + + memset(dx_leaf, 0, fs->fs_blocksize); + strcpy((char *)dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE); + dx_leaf->dl_fs_generation = fs->fs_super->i_fs_generation; + dx_leaf->dl_blkno = start_blk + i; + dx_leaf->dl_list.de_count = ocfs2_dx_entries_per_leaf(fs->fs_blocksize); + + ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); + if (ret) + goto out; + } + ret = 0; +out: + return ret; +} + +static inline unsigned int __ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs, + uint32_t minor_hash) +{ + unsigned int cbits, bbits, dx_mask; + + cbits = OCFS2_RAW_SB(fs->fs_super)->s_clustersize_bits; + bbits = OCFS2_RAW_SB(fs->fs_super)->s_blocksize_bits; + dx_mask = (1 << (cbits - bbits)) -1; + + return (minor_hash & dx_mask); +} + +static inline unsigned int ocfs2_dx_dir_hash_idx(ocfs2_filesys *fs, + struct ocfs2_dx_hinfo *hinfo) +{ + return __ocfs2_dx_dir_hash_idx(fs, hinfo->minor_hash); +} + +static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf, + struct ocfs2_dx_entry *dx_new_entry) +{ + int i; + + i = dx_leaf->dl_list.de_num_used; + dx_leaf->dl_list.de_entries[i] = *dx_new_entry; + + dx_leaf->dl_list.de_num_used += 1; +} + +static errcode_t ocfs2_expand_inline_dx_root(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root) +{ + errcode_t ret; + int num_dx_leaves, i, j; + uint64_t start_blkno = 0; + uint32_t clusters_found = 0; + struct ocfs2_dx_leaf **dx_leaves = NULL; + struct ocfs2_dx_leaf *target_leaf; + struct ocfs2_dx_entry_list *entry_list; + struct ocfs2_extent_tree et; + struct ocfs2_dx_entry *dx_entry; + + dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves); + if (!dx_leaves) { + ret = OCFS2_ET_NO_MEMORY; + goto out; + } + + ret = ocfs2_new_clusters(fs, 1, 1, &start_blkno, &clusters_found); + if (ret) + goto out; + assert(clusters_found == 1); + ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves, + num_dx_leaves, start_blkno); + if (ret) + goto out; + + /* + * Transfer the entries from inline dx_root into the appropriate + * block + */ + entry_list = &dx_root->dr_entries; + + for (i = 0; i < entry_list->de_num_used; i++) { + dx_entry = &entry_list->de_entries[i]; + j = __ocfs2_dx_dir_hash_idx(fs, dx_entry->dx_minor_hash); + target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]; + ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry); + } + + /* + * Write out all leaves. + * If ocfs2_write_dx_leaf() failed, since dx_root is not cleared + * yet, and the leaves are not inserted into indexed tree yet, + * this cluster will be recoganized as orphan in blocks scan of + * fsck.ocfs2 + */ + for (i = 0; i < num_dx_leaves; i ++) { + target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[i]; + ret = ocfs2_write_dx_leaf(fs, target_leaf->dl_blkno, + target_leaf); + if (ret) + goto out; + } + + dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE; + memset(&dx_root->dr_list, 0, fs->fs_blocksize - + offsetof(struct ocfs2_dx_root_block, dr_list)); + dx_root->dr_list.l_count = + ocfs2_extent_recs_per_dx_root(fs->fs_blocksize); + + /* This should never fail considering we start with an empty + * dx_root */ + ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno); + ret = ocfs2_tree_insert_extent(fs, &et, 0, start_blkno, 1, 0); + if (ret) + goto out; + +out: + return ret; +} + +static errcode_t ocfs2_dx_dir_lookup_rec(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root, + struct ocfs2_extent_list *el, + uint32_t major_hash, + uint32_t *ret_cpos, + uint64_t *ret_phys_blkno, + unsigned int *ret_clen) +{ + errcode_t ret = 0; + int i, found; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_rec *rec = NULL; + char *eb_buf = NULL; + + if (el->l_tree_depth) { + ret = ocfs2_tree_find_leaf(fs, + &dx_root->dr_list, + dx_root->dr_blkno, + (char *)dx_root, + major_hash, &eb_buf); + if (ret) + goto out; + + eb = (struct ocfs2_extent_block *)eb_buf; + el = &eb->h_list; + + if (el->l_tree_depth) { + ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; + goto out; + } + } + + found = 0; + for (i = el->l_next_free_rec - 1; i >= 0; i--) { + rec = &el->l_recs[i]; + if (rec->e_cpos <= major_hash) { + found = 1; + break; + } + } + if (!found) { + ret = OCFS2_ET_CORRUPT_EXTENT_BLOCK; + goto out; + } + + if (ret_phys_blkno) + *ret_phys_blkno = rec->e_blkno; + if (ret_cpos) + *ret_cpos = rec->e_cpos; + if (ret_clen) + *ret_clen = rec->e_leaf_clusters; + +out: + if (eb_buf) + ocfs2_free(&eb_buf); + return ret; +} + +errcode_t ocfs2_dx_dir_lookup(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root, + struct ocfs2_extent_list *el, + struct ocfs2_dx_hinfo *hinfo, + uint32_t *ret_cpos, + uint64_t *ret_phys_blkno) +{ + errcode_t ret = 0; + unsigned int cend = 0, clen = 0; + uint32_t cpos = 0; + uint64_t blkno = 0; + uint32_t name_hash = hinfo->major_hash; + + ret = ocfs2_dx_dir_lookup_rec(fs, dx_root, el, + name_hash, &cpos, &blkno, &clen); + if (ret) + goto out; + cend = cpos + clen; + if (name_hash >= cend) { + blkno += ocfs2_clusters_to_blocks(fs, clen - 1); + cpos += clen - 1; + } else { + blkno += ocfs2_clusters_to_blocks(fs, name_hash - cpos); + cpos = name_hash; + } + + blkno += ocfs2_dx_dir_hash_idx(fs, hinfo); + + if (ret_phys_blkno) + *ret_phys_blkno = blkno; + if (ret_cpos) + *ret_cpos = cpos; + +out: + return ret; +} + +static int dx_leaf_sort_cmp(const void *a, const void *b) +{ + const struct ocfs2_dx_entry *e1 = a; + const struct ocfs2_dx_entry *e2 = b; + uint32_t major_hash1 = e1->dx_major_hash; + uint32_t major_hash2 = e2->dx_major_hash; + uint32_t minor_hash1 = e1->dx_minor_hash; + uint32_t minor_hash2 = e2->dx_minor_hash; + + if (major_hash1 > major_hash2) + return 1; + if (major_hash1 < major_hash2) + return -1; + + /* it is not strictly necessary to sort by minor */ + if (minor_hash1 > minor_hash2) + return 1; + if (minor_hash1 < minor_hash2) + return -1; + return 0; +} + +static void dx_leaf_sort_swap(void *a, void *b, int size) +{ + struct ocfs2_dx_entry *e1 = a; + struct ocfs2_dx_entry *e2 = b; + struct ocfs2_dx_entry tmp; + + assert(size == sizeof (struct ocfs2_dx_entry)); + + tmp = *e1; + *e1 = *e2; + *e2 = tmp; +} + +static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) +{ + struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; + int i, num = dl_list->de_num_used; + + for (i = 0; i < (num - 1); i++) { + if (dl_list->de_entries[i].dx_major_hash != + dl_list->de_entries[i + 1].dx_major_hash) + return 0; + } + return 1; +} + +/* + * Find the optimal value to split this leaf on. This expects the leaf + * entries to be in sorted order. + * + * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is + * the hash we want to insert. + * + * This function is only concerned with the major hash - that which + * determines which cluster an item belongs to. + */ +static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf, + uint32_t leaf_cpos, + uint32_t insert_hash, + uint32_t *split_hash) +{ + struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; + int i, num_used = dl_list->de_num_used; + int allsame; + + /* + * There's a couple rare, but nasty corner cases we have to + * check for here. All of them involve a leaf where all value + * have the same hash, which is what we look for first. + * + * Most of the time, all of the above is false, and we simply + * pick the median value for a split. + */ + allsame = ocfs2_dx_leaf_same_major(dx_leaf); + if (allsame) { + uint32_t val = dl_list->de_entries[0].dx_major_hash; + if (val == insert_hash) { + /* + * No matter where we would choose to split, + * the new entry would want to occupy the same + * block as these. Since there's no space left + * in their existing block, we know there + * won't be space after the split. + */ + return OCFS2_ET_DIR_NO_SPACE; + } + + if (val == leaf_cpos) { + /* + * Because val is the same as leaf_cpos (which + * is the smallest value this leaf can have), + * yet is not equal to insert_hash, then we + * know that insert_hash *must* be larger than + * val (and leaf_cpos). At least cpos+1 in value. + * + * We also know then, that there cannot be an + * adjacent extent (otherwise we'd be looking + * at it). Choosing this value gives us a + * chance to get some continguousness. + */ + *split_hash = leaf_cpos + 1; + return 0; + } + + if (val > insert_hash) { + /* + * val can not be the same as insert_hash, and + * also must be larger than leaf_cpos. Also, + * we know that there can't be a leaf between + * cpos and val, otherwise the entries with + * hash 'val' would be there. + */ + *split_hash = val; + return 0; + } + + *split_hash = insert_hash; + return 0; + } + + /* + * Since the records are sorted and the checks above + * guaranteed that not all records in this block are the same, + * we simple travel forward, from the median, and pick the 1st + * record whose value is larger than leaf_cpos. + */ + for (i = (num_used /2); i < num_used; i++) { + if (dl_list->de_entries[i].dx_major_hash > leaf_cpos) + break; + } + + assert(i < num_used); /* Should be impossible */ + *split_hash = dl_list->de_entries[i].dx_major_hash; + return 0; +} + +static errcode_t ocfs2_read_dx_leaves(ocfs2_filesys *fs, + uint64_t start, + int num, + struct ocfs2_dx_leaf **dx_leaves) +{ + errcode_t ret; + int i; + struct ocfs2_dx_leaf *dx_leaf; + for (i = 0; i < num; i++) { + assert(!dx_leaves[i]); + ret = ocfs2_malloc_block(fs->fs_io, (char **)&dx_leaf); + if (ret) + goto bail; + ret = ocfs2_read_dx_leaf(fs, start + i, (char *)dx_leaf); + if (ret) + goto bail; + dx_leaves[i] = dx_leaf; + } + goto out; + +bail: + for (; i >= 0; i--) { + if (dx_leaves[i]) + ocfs2_free(&dx_leaves[i]); + } +out: + return ret; +} + +static errcode_t __ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs, + uint32_t cpos, + struct ocfs2_dx_leaf **dx_leaves, + int num_dx_leaves, + uint64_t *ret_phys_blkno) +{ + errcode_t ret; + uint32_t num; + uint64_t phys; + + ret = ocfs2_new_clusters(fs, 1, 1, &phys, &num); + if (ret) + goto out; + assert(num == 1); + ret = ocfs2_dx_dir_format_cluster(fs, dx_leaves, + num_dx_leaves, phys); + if (ret) + goto out; + + *ret_phys_blkno = phys; + +out: + return ret; +} + +static errcode_t ocfs2_dx_dir_new_cluster(ocfs2_filesys *fs, + struct ocfs2_extent_tree *et, + uint32_t cpos, + uint64_t *phys_blocknr, + struct ocfs2_dx_leaf **dx_leaves, + int num_dx_leaves) +{ + errcode_t ret; + uint64_t blkno; + ret = __ocfs2_dx_dir_new_cluster(fs, cpos, dx_leaves, + num_dx_leaves, &blkno); + if (ret) + goto out; + + *phys_blocknr = blkno; + ret = ocfs2_tree_insert_extent(fs, et, cpos, blkno, 1, 0); + +out: + return ret; +} + + +static errcode_t ocfs2_dx_dir_transfer_leaf(ocfs2_filesys *fs, + uint32_t split_hash, + struct ocfs2_dx_leaf *tmp_dx_leaf, + struct ocfs2_dx_leaf **orig_dx_leaves, + uint64_t orig_dx_leaves_blkno, + struct ocfs2_dx_leaf **new_dx_leaves, + uint64_t new_dx_leaves_blkno, + int num_dx_leaves) +{ + errcode_t ret; + int i, j, num_used; + uint32_t major_hash; + struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf; + struct ocfs2_dx_entry_list *orig_list, *new_list, *tmp_list; + struct ocfs2_dx_entry *dx_entry; + + tmp_list = &tmp_dx_leaf->dl_list; + + for (i = 0; i < num_dx_leaves; i++) { + orig_dx_leaf = orig_dx_leaves[i]; + orig_list = &orig_dx_leaf->dl_list; + new_dx_leaf = new_dx_leaves[i]; + new_list = &new_dx_leaf->dl_list; + + num_used = orig_list->de_num_used; + + memcpy(tmp_dx_leaf, orig_dx_leaf, fs->fs_blocksize); + tmp_list->de_num_used = 0; + memset(&tmp_list->de_entries, 0, + sizeof(struct ocfs2_dx_entry) * num_used); + + for (j = 0; j < num_used; j++) { + dx_entry = &orig_list->de_entries[j]; + major_hash = dx_entry->dx_major_hash; + if (major_hash >= split_hash) + ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf, + dx_entry); + else + ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf, + dx_entry); + } + memcpy(orig_dx_leaf, tmp_dx_leaf, fs->fs_blocksize); + + ret = ocfs2_write_dx_leaf(fs, orig_dx_leaves_blkno + i, + (char *)orig_dx_leaf); + if (ret) + goto out; + ret = ocfs2_write_dx_leaf(fs, new_dx_leaves_blkno + i, + (char *)new_dx_leaf); + if (ret) + goto out; + } + +out: + return ret; +} + +static int ocfs2_dx_dir_free_leaves(ocfs2_filesys *fs, + struct ocfs2_dx_leaf **dx_leaves) +{ + int i, num; + + num = ocfs2_clusters_to_blocks(fs, 1); + for (i = 0; i < num; i++) { + if (dx_leaves[i]) + ocfs2_free(&dx_leaves[i]); + } + free(dx_leaves); + return 0; +} + +/* from Linux kernel lib/sort.c */ +static void ocfs2_sort(void *base, size_t num, size_t size, + int (*cmp_func)(const void *, const void *), + void (*swap_func)(void *, void *, int size)) +{ + /* pre-scale counters for performance */ + int i = (num/2 - 1) * size, n = num * size, c, r; + + /* heapify */ + for (; i >= 0; i -= size) { + for (r = i; r * 2 + size < n; r = c) { + c = r * 2 + size; + if (c < n - size && + cmp_func(base + c, base + c + size) < 0) + c += size; + if (cmp_func(base + r, base + c) >= 0) + break; + swap_func(base + r, base + c, size); + } + } + + /* sort */ + for (i = n - size; i > 0; i -= size) { + swap_func(base, base + i, size); + for (r = 0; r * 2 + size < i; r = c) { + c = r * 2 + size; + if (c < i - size && + cmp_func(base + c, base + c + size) < 0) + c += size; + if (cmp_func(base + r, base + c) >= 0) + break; + swap_func(base + r, base + c, size); + } + } + +} + +static errcode_t ocfs2_dx_dir_rebalance(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root, + struct ocfs2_dx_leaf *dx_leaf, + struct ocfs2_dx_hinfo *hinfo, + uint32_t leaf_cpos, + uint64_t leaf_blkno) +{ + struct ocfs2_extent_tree et; + struct ocfs2_dx_leaf **orig_dx_leaves = NULL; + struct ocfs2_dx_leaf **new_dx_leaves = NULL; + struct ocfs2_dx_leaf *tmp_dx_leaf = NULL; + uint32_t insert_hash = hinfo->major_hash; + uint32_t split_hash, cpos; + uint64_t orig_leaves_start, new_leaves_start; + errcode_t ret; + int num_used, num_dx_leaves; + + ocfs2_init_dx_root_extent_tree(&et, fs, (char *)dx_root, dx_root->dr_blkno); + + if (dx_root->dr_clusters == UINT_MAX) { + ret = OCFS2_ET_DIR_NO_SPACE; + goto out; + } + + num_used = dx_leaf->dl_list.de_num_used; + if (num_used < dx_leaf->dl_list.de_count) { + ret = OCFS2_ET_DX_BALANCE_EMPTY_LEAF; + goto out; + } + + orig_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, &num_dx_leaves); + if (!orig_dx_leaves) { + ret = OCFS2_ET_NO_MEMORY; + goto out; + } + + new_dx_leaves = ocfs2_dx_dir_alloc_leaves(fs, NULL); + if (!new_dx_leaves) { + ret = OCFS2_ET_NO_MEMORY; + goto out; + } + + ocfs2_sort(dx_leaf->dl_list.de_entries, num_used, + sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, + dx_leaf_sort_swap); + + ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, + insert_hash, &split_hash); + if (ret) + goto out; + + ret = ocfs2_malloc_block(fs->fs_io, (char **)(&tmp_dx_leaf)); + if (ret) + goto out; + orig_leaves_start = ocfs2_blocks_to_clusters(fs, leaf_blkno); + ret = ocfs2_read_dx_leaves(fs, orig_leaves_start, num_dx_leaves, + orig_dx_leaves); + if (ret) + goto out; + + cpos = split_hash; + ret = ocfs2_dx_dir_new_cluster(fs, &et, cpos, &new_leaves_start, + new_dx_leaves, num_dx_leaves); + if (ret) + goto out; + ret = ocfs2_dx_dir_transfer_leaf(fs, split_hash, tmp_dx_leaf, + orig_dx_leaves, orig_leaves_start, + new_dx_leaves, new_leaves_start, + num_dx_leaves); + +out: + if (tmp_dx_leaf) + ocfs2_free((char **)(&tmp_dx_leaf)); + + if (orig_dx_leaves) + ocfs2_dx_dir_free_leaves(fs, orig_dx_leaves); + if (new_dx_leaves) + ocfs2_dx_dir_free_leaves(fs, new_dx_leaves); + return ret; +} + +static errcode_t ocfs2_find_dir_space_dx(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root, + const char *name, int namelen, + struct ocfs2_dir_lookup_result *lookup) +{ + errcode_t ret; + int rebalanced = 0; + struct ocfs2_dx_leaf *dx_leaf; + char *dx_leaf_buf = NULL; + uint64_t blkno; + uint32_t leaf_cpos; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); + if (ret) + goto out; + +restart_search: + ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list, + &lookup->dl_hinfo, &leaf_cpos, &blkno); + if (ret) + goto out; + ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf); + if (ret) + goto out; + dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + if (dx_leaf->dl_list.de_num_used >= dx_leaf->dl_list.de_count) { + if (rebalanced) { + /* + * Rebalancing should have provided us with + * space in an appropriate leaf. + */ + ret = OCFS2_ET_DIR_NO_SPACE; + goto out; + } + + ret = ocfs2_dx_dir_rebalance(fs, dx_root, dx_leaf, + &lookup->dl_hinfo, leaf_cpos, blkno); + if (ret) + goto out; + rebalanced = 1; + goto restart_search; + } + lookup->dl_dx_leaf_blkno = blkno; + +out: + if (dx_leaf_buf) + ocfs2_free(&dx_leaf_buf); + return ret; +} + +/* + * Hashing code adapted from ext3 + */ +#define DELTA 0x9E3779B9 + +static void TEA_transform(uint32_t buf[4], uint32_t const in[]) +{ + uint32_t sum = 0; + uint32_t b0 = buf[0], b1 = buf[1]; + uint32_t a = in[0], b = in[1], c = in[2], d = in[3]; + int n = 16; + + do { + sum += DELTA; + b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); + b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); + } while (--n); + + buf[0] += b0; + buf[1] += b1; +} + +static void str2hashbuf(const char *msg, int len, uint32_t *buf, int num) +{ + uint32_t pad, val; + int i; + + pad = (uint32_t)len | ((uint32_t)len << 8); + pad |= pad << 16; + + val = pad; + if (len > (num * 4)) + len = num * 4; + for (i = 0; i < len; i++) { + if ((i % 4) == 0) + val = pad; + val = msg[i] + (val << 8); + if ((i % 4) == 3) { + *buf++ = val; + val = pad; + num --; + } + } + if (--num >= 0) + *buf++ = val; + while(--num >= 0) + *buf++ = pad; +} + +void ocfs2_dx_dir_name_hash(ocfs2_filesys *fs, + const char *name, + int len, + struct ocfs2_dx_hinfo *hinfo) +{ + const char *p; + uint32_t in[8], buf[4]; + + /* + * XXX: Is this really necessary, if the index is never looked + * at by readdir? Is a hash value of '0' a bad idea ? + */ + if ((len == 1 && !strncmp(".", name, 1)) || + (len == 2 && !strncmp("..", name, 2))) { + buf[0] = buf[1] = 0; + goto out; + } + + memcpy(buf, OCFS2_RAW_SB(fs->fs_super)->s_dx_seed, sizeof(buf)); + + p = name; + while(len > 0) { + str2hashbuf(p, len, in, 4); + TEA_transform(buf, in); + len -= 16; + p += 16; + } + +out: + hinfo->major_hash = buf[0]; + hinfo->minor_hash = buf[1]; +} + +static int ocfs2_dx_dir_insert(struct ocfs2_dir_entry *dentry, + uint64_t blocknr, + int offset, + int blocksize, + char *buf, + void *priv_data) +{ + errcode_t ret = 0; + char *dx_buf = NULL; + char *dx_leaf_buf = NULL; + struct ocfs2_dx_root_block *dx_root = NULL; + struct ocfs2_dx_leaf *dx_leaf = NULL; + struct ocfs2_dir_lookup_result lookup; + struct ocfs2_dx_entry_list *entry_list; + struct dx_insert_ctxt *ctxt = (struct dx_insert_ctxt *)priv_data; + ocfs2_filesys *fs = ctxt->fs; + uint64_t dx_root_blkno = ctxt->dx_root_blkno; + int write_dx_leaf = 0; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_buf); + if (ret) + goto out; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); + if (ret) + goto out; + + ret = ocfs2_read_dx_root(fs, dx_root_blkno, dx_buf); + if (ret) + goto out; + + dx_root = (struct ocfs2_dx_root_block *)dx_buf; + memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); + ocfs2_dx_dir_name_hash(fs, dentry->name, + dentry->name_len, &lookup.dl_hinfo); + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) { + if (ocfs2_inline_dx_has_space(dx_root)) { + entry_list = &dx_root->dr_entries; + goto insert_into_entries; + } else { + /* root block is full, expand it to an extent */ + ret = ocfs2_expand_inline_dx_root(fs, dx_root); + if (ret) + goto out; + } + } + + ret = ocfs2_find_dir_space_dx(fs, dx_root, + dentry->name, dentry->name_len, &lookup); + if (ret) + goto out; + ret = ocfs2_read_dx_leaf(fs, lookup.dl_dx_leaf_blkno, dx_leaf_buf); + if (ret) + goto out; + dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + entry_list = &dx_leaf->dl_list; + write_dx_leaf = 1; + +insert_into_entries: + ocfs2_dx_entry_list_insert(entry_list, &lookup.dl_hinfo, blocknr); + if (write_dx_leaf) { + ret = ocfs2_write_dx_leaf(fs, dx_leaf->dl_blkno, dx_leaf); + if (ret) + goto out; + } + dx_root->dr_num_entries += 1; + ret = ocfs2_write_dx_root(fs, dx_root_blkno, dx_buf); +out: + if (dx_leaf_buf) + ocfs2_free(&dx_leaf_buf); + if (dx_buf) + ocfs2_free(&dx_buf); + return ret; +} + +errcode_t ocfs2_dx_dir_insert_entry(ocfs2_filesys *fs, uint64_t dir, const char *name, + uint64_t ino, uint64_t blkno) +{ + struct ocfs2_dir_entry dummy_de; + struct dx_insert_ctxt dummy_ctxt; + char *di_buf = NULL; + struct ocfs2_dinode *di; + errcode_t ret = 0; + + if (!ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) + goto out; + + assert(name); + memset(&dummy_de, 0, sizeof(struct ocfs2_dir_entry)); + memcpy(dummy_de.name, name, strlen(name)); + dummy_de.name_len = strlen(name); + + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; + + if (!(di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) + goto out; + + memset(&dummy_ctxt, 0, sizeof(struct dx_insert_ctxt)); + dummy_ctxt.dir_blkno = dir; + dummy_ctxt.fs = fs; + dummy_ctxt.dx_root_blkno = di->i_dx_root; + + ret = ocfs2_dx_dir_insert(&dummy_de, blkno, 0, + fs->fs_blocksize, NULL, &dummy_ctxt); +out: + if (di_buf) + ocfs2_free(&di_buf); + return ret; +} + + +/* + * This function overwite the indexed dir attribute of + * the given inode. The caller should make sure the dir's + * indexed tree is truncated. + * Currently tunefs.ocfs2 is the only user, before calling + * this function, tunefs.ocfs2 makes sure there is space + * for directory trailer. So directory entry moves here. + */ +errcode_t ocfs2_dx_dir_build(ocfs2_filesys *fs, + uint64_t dir) +{ + errcode_t ret = 0, err; + uint64_t dr_blkno; + char *dx_buf = NULL, *di_buf = NULL; + struct ocfs2_dinode *di; + struct ocfs2_dx_root_block *dx_root; + struct dx_insert_ctxt ctxt; + ocfs2_quota_hash *usrhash = NULL, *grphash = NULL; + uint32_t uid, gid; + long long change; + + ret = ocfs2_load_fs_quota_info(fs); + if (ret) + goto out; + + ret = ocfs2_init_quota_change(fs, &usrhash, &grphash); + if (ret) + goto out; + + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; + + if ((ocfs2_dir_indexed(di)) || + (di->i_dyn_features & OCFS2_INLINE_DATA_FL)) + goto out; + + ret = ocfs2_new_dx_root(fs, di, &dr_blkno); + if (ret) + goto out; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_buf); + if (ret) + goto out; + + ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); + if (ret) + goto out; + dx_root = (struct ocfs2_dx_root_block *)dx_buf; + + ret = ocfs2_init_dir_trailers(fs, di, dx_root); + if (ret) + goto out; + + dx_root->dr_dir_blkno = di->i_blkno; + dx_root->dr_num_entries = 0; + dx_root->dr_entries.de_count = ocfs2_dx_entries_per_root(fs->fs_blocksize); + + di->i_dx_root = dr_blkno; + di->i_dyn_features |= OCFS2_INDEXED_DIR_FL; + + ret = ocfs2_write_dx_root(fs, dr_blkno, dx_buf); + if (ret) + goto out; + ret = ocfs2_write_inode(fs, dir, di_buf); + if (ret) + goto out; + + ctxt.dir_blkno = dir; + ctxt.dx_root_blkno = dr_blkno; + ctxt.fs = fs; + ret = ocfs2_dir_iterate(fs, dir, 0, NULL, + ocfs2_dx_dir_insert, &ctxt); + + /* check quota for dx_leaf */ + ret = ocfs2_read_dx_root(fs, dr_blkno, dx_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + + change = ocfs2_clusters_to_bytes(fs, + dx_root->dr_clusters); + uid = di->i_uid; + gid = di->i_gid; + + ret = ocfs2_apply_quota_change(fs, usrhash, grphash, + uid, gid, change, 0); + if (ret) { + /* exceed quota, truncate the indexed tree */ + ret = ocfs2_dx_dir_truncate(fs, dir); + } + +out: + err = ocfs2_finish_quota_change(fs, usrhash, grphash); + if (!ret) + ret = err; + + if (di_buf) + ocfs2_free(&di_buf); + if (dx_buf) + ocfs2_free(&dx_buf); + + return ret; +} + +void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list, + int index) +{ + int num_used = entry_list->de_num_used; + if (num_used == 1 || index == (num_used - 1)) + goto clear; + + memmove(&entry_list->de_entries[index], + &entry_list->de_entries[index + 1], + (num_used - index - 1)*sizeof(struct ocfs2_dx_entry)); +clear: + num_used --; + memset(&entry_list->de_entries[num_used], 0, + sizeof(struct ocfs2_dx_entry)); + entry_list->de_num_used = num_used; +} + +static int ocfs2_match(int len, + const char *name, + struct ocfs2_dir_entry *de) +{ + if (len != de->name_len) + return 0; + if (!de->inode) + return 0; + return !memcmp((char *)name, de->name, len); +} + +int ocfs2_check_dir_entry(ocfs2_filesys *fs, + struct ocfs2_dir_entry *de, + char *dir_buf, + unsigned int offset) +{ + int rlen = de->rec_len; + int ret = 1; + + if ((rlen < OCFS2_DIR_REC_LEN(1)) || + (rlen % 4 != 0) || + (rlen < OCFS2_DIR_REC_LEN(de->name_len)) || + (((char *)de - dir_buf) > fs->fs_blocksize)) + ret = 0; + + return ret; +} + +int ocfs2_search_dirblock(ocfs2_filesys *fs, + char *dir_buf, + const char *name, + int namelen, + unsigned int bytes, + struct ocfs2_dir_entry **res_dir) +{ + struct ocfs2_dir_entry *de; + char *dlimit, *de_buf; + int de_len, offset = 0; + int ret = 0; + + de_buf = (char *)dir_buf; + dlimit = de_buf + bytes; + + while(de_buf < dlimit) { + de = (struct ocfs2_dir_entry *)de_buf; + + if ((de_buf + namelen <= dlimit) && + ocfs2_match(namelen, name, de)) { + if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) { + ret = -1; + goto out; + } + if (res_dir) + *res_dir = de; + ret = 1; + goto out; + } + + de_len = de->rec_len; + if (de_len <= 0) { + ret = -1; + goto out; + } + de_buf += de_len; + offset += de_len; + } +out: + return ret; +} + +errcode_t ocfs2_dx_dir_search(ocfs2_filesys *fs, + const char *name, + int namelen, + struct ocfs2_dx_root_block *dx_root, + struct ocfs2_dir_lookup_result *lookup) +{ + errcode_t ret; + char *di_buf = NULL, *dir_buf = NULL, *dx_leaf_buf = NULL; + struct ocfs2_dx_entry_list *entry_list; + struct ocfs2_dx_leaf *dx_leaf; + struct ocfs2_dx_entry *dx_entry; + struct ocfs2_dir_entry *dir_ent; + uint32_t leaf_cpos; + uint64_t blkno; + int i, found; + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) + entry_list = &dx_root->dr_entries; + else { + ret = ocfs2_dx_dir_lookup(fs, dx_root, &dx_root->dr_list, + &lookup->dl_hinfo, &leaf_cpos, &blkno); + if (ret) + goto out; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); + if (ret) + goto out; + + ret = ocfs2_read_dx_leaf(fs, blkno, dx_leaf_buf); + if (ret) + goto out; + dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + entry_list = &dx_leaf->dl_list; + } + + assert(entry_list->de_count > 0); + assert(entry_list->de_num_used > 0); + assert(dx_root->dr_num_entries > 0); + + ret = ocfs2_malloc_block(fs->fs_io, &dir_buf); + if (ret) + goto out; + + found = 0; + for (i = 0; i < entry_list->de_num_used; i++) { + dx_entry = &entry_list->de_entries[i]; + if ((lookup->dl_hinfo.major_hash != dx_entry->dx_major_hash) || + (lookup->dl_hinfo.minor_hash != dx_entry->dx_minor_hash)) + continue; + + ret = ocfs2_read_blocks(fs, dx_entry->dx_dirent_blk, 1, dir_buf); + if (ret) + goto out; + + found = ocfs2_search_dirblock(fs, dir_buf, name, namelen, + fs->fs_blocksize, &dir_ent); + if (found == 1) + break; + + if (found == -1) { + ret = OCFS2_ET_DIR_CORRUPTED; + goto out; + } + } + + if (found <= 0) { + ret = OCFS2_ET_DIRENT_NOT_FOUND; + goto out; + } + + lookup->dl_leaf = dir_buf; + lookup->dl_leaf_blkno = dx_entry->dx_dirent_blk; + lookup->dl_entry = dir_ent; + lookup->dl_dx_entry = dx_entry; + lookup->dl_dx_entry_idx = i; + if (!(dx_root->dr_flags & OCFS2_DX_FLAG_INLINE)) { + lookup->dl_dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + lookup->dl_dx_leaf_blkno = blkno; + } + ret = 0; +out: + if (di_buf) + ocfs2_free(&di_buf); + if (ret) { + if (dir_buf) + ocfs2_free(&dir_buf); + if (dx_leaf_buf) + ocfs2_free(&dx_leaf_buf); + } + return ret; +} + +void release_lookup_res(struct ocfs2_dir_lookup_result *res) +{ + if (res->dl_leaf) + ocfs2_free(&res->dl_leaf); + if (res->dl_dx_leaf) + ocfs2_free(&res->dl_dx_leaf); +} + + diff --git a/libocfs2/dir_iterate.c b/libocfs2/dir_iterate.c index 1064d9f..8a3f5a9 100644 --- a/libocfs2/dir_iterate.c +++ b/libocfs2/dir_iterate.c @@ -68,6 +68,7 @@ errcode_t ocfs2_dir_iterate2(ocfs2_filesys *fs, int (*func)(uint64_t dir, int entry, struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -131,6 +132,7 @@ out: struct xlate { int (*func)(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -140,12 +142,12 @@ struct xlate { static int xlate_func(uint64_t dir, int entry, - struct ocfs2_dir_entry *dirent, int offset, - int blocksize, char *buf, void *priv_data) + struct ocfs2_dir_entry *dirent, uint64_t blocknr, + int offset, int blocksize, char *buf, void *priv_data) { struct xlate *xl = (struct xlate *) priv_data; - return (*xl->func)(dirent, offset, blocksize, buf, xl->real_private); + return (*xl->func)(dirent, blocknr, offset, blocksize, buf, xl->real_private); } extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, @@ -153,6 +155,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, int flags, char *block_buf, int (*func)(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -169,6 +172,7 @@ extern errcode_t ocfs2_dir_iterate(ocfs2_filesys *fs, } static int ocfs2_process_dir_entry(ocfs2_filesys *fs, + uint64_t blocknr, unsigned int offset, int entry, int *changed, @@ -203,7 +207,7 @@ static int ocfs2_process_dir_entry(ocfs2_filesys *fs, ret = (ctx->func)(ctx->dir, (next_real_entry > offset) ? OCFS2_DIRENT_DELETED_FILE : entry, - dirent, offset, + dirent, blocknr, offset, fs->fs_blocksize, ctx->buf, ctx->priv_data); if (entry < OCFS2_DIRENT_OTHER_FILE) @@ -250,7 +254,7 @@ static int ocfs2_inline_dir_iterate(ocfs2_filesys *fs, entry = OCFS2_DIRENT_DOT_FILE; - ret = ocfs2_process_dir_entry(fs, offset, entry, &changed, + ret = ocfs2_process_dir_entry(fs, di->i_blkno, offset, entry, &changed, &do_abort, ctx); if (ret) return ret; @@ -291,7 +295,7 @@ int ocfs2_process_dir_block(ocfs2_filesys *fs, if (ctx->errcode) return OCFS2_BLOCK_ABORT; - ret = ocfs2_process_dir_entry(fs, offset, entry, &changed, + ret = ocfs2_process_dir_entry(fs, blocknr, offset, entry, &changed, &do_abort, ctx); if (ret) return ret; diff --git a/libocfs2/dir_iterate.h b/libocfs2/dir_iterate.h index 2d9587b..7f44702 100644 --- a/libocfs2/dir_iterate.h +++ b/libocfs2/dir_iterate.h @@ -35,6 +35,7 @@ struct dir_context { int (*func)(uint64_t dir, int entry, struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, diff --git a/libocfs2/dirblock.c b/libocfs2/dirblock.c index 06a1b64..c22d843 100644 --- a/libocfs2/dirblock.c +++ b/libocfs2/dirblock.c @@ -33,7 +33,6 @@ #include "ocfs2/byteorder.h" #include "ocfs2/ocfs2.h" - unsigned int ocfs2_dir_trailer_blk_off(ocfs2_filesys *fs) { return fs->fs_blocksize - sizeof(struct ocfs2_dir_block_trailer); @@ -79,6 +78,15 @@ int ocfs2_skip_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, return 1; } +/* + * We are sure there is prepared space for the trailer, no directory + * entry will overlap with the trailer: + * - if we rebuild the indexed tree for a directory, no dir entry + * will overwrite the trailer's space. + * - if we build the indexed tree by tunefs.ocfs2, it will enable + * meta ecc feature before enable indexed dirs feature. Which + * means space for each trailer is well prepared already. + */ void ocfs2_init_dir_trailer(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t blkno, void *buf) { @@ -105,9 +113,9 @@ static void ocfs2_swap_dir_entry(struct ocfs2_dir_entry *dirent) static errcode_t ocfs2_swap_dir_entries_direction(void *buf, uint64_t bytes, int to_cpu) { - char *p, *end; + char *p, *end; struct ocfs2_dir_entry *dirent; - unsigned int name_len, rec_len; + unsigned int name_len, rec_len; errcode_t retval = 0; p = (char *) buf; @@ -160,8 +168,8 @@ void ocfs2_swap_dir_trailer(struct ocfs2_dir_block_trailer *trailer) errcode_t ocfs2_read_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t block, void *buf) { - errcode_t retval; - int end = fs->fs_blocksize; + errcode_t retval; + int end = fs->fs_blocksize; struct ocfs2_dir_block_trailer *trailer = NULL; retval = ocfs2_read_blocks(fs, block, 1, buf); @@ -197,9 +205,9 @@ out: errcode_t ocfs2_write_dir_block(ocfs2_filesys *fs, struct ocfs2_dinode *di, uint64_t block, void *inbuf) { - errcode_t retval; - char *buf = NULL; - int end = fs->fs_blocksize; + errcode_t retval; + char *buf = NULL; + int end = fs->fs_blocksize; struct ocfs2_dir_block_trailer *trailer = NULL; retval = ocfs2_malloc_block(fs->fs_io, &buf); @@ -230,28 +238,36 @@ out: return retval; } -static void ocfs2_swap_dx_entry_to_cpu(struct ocfs2_dx_entry *dx_entry) +static void ocfs2_swap_dx_entry(struct ocfs2_dx_entry *dx_entry) { - if (cpu_is_little_endian) - return; - dx_entry->dx_major_hash = bswap_32(dx_entry->dx_major_hash); dx_entry->dx_minor_hash = bswap_32(dx_entry->dx_minor_hash); dx_entry->dx_dirent_blk = bswap_64(dx_entry->dx_dirent_blk); } -static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) +static void ocfs2_swap_dx_entry_list(struct ocfs2_dx_entry_list *dl_list) { int i; - if (cpu_is_little_endian) - return; - dl_list->de_count = bswap_16(dl_list->de_count); dl_list->de_num_used = bswap_16(dl_list->de_num_used); for (i = 0; i < dl_list->de_count; i++) - ocfs2_swap_dx_entry_to_cpu(&dl_list->de_entries[i]); + ocfs2_swap_dx_entry(&dl_list->de_entries[i]); +} + +static void ocfs2_swap_dx_entry_list_to_cpu(struct ocfs2_dx_entry_list *dl_list) +{ + if (cpu_is_little_endian) + return; + ocfs2_swap_dx_entry_list(dl_list); +} + +static void ocfs2_swap_dx_entry_list_from_cpu(struct ocfs2_dx_entry_list *dl_list) +{ + if (cpu_is_little_endian) + return; + ocfs2_swap_dx_entry_list(dl_list); } static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, @@ -276,45 +292,124 @@ static void ocfs2_swap_dx_root_to_cpu(ocfs2_filesys *fs, ocfs2_swap_extent_list_to_cpu(fs, dx_root, &dx_root->dr_list); } +static void ocfs2_swap_dx_root_from_cpu(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root) +{ + if (cpu_is_little_endian) + return; + + dx_root->dr_suballoc_slot = bswap_16(dx_root->dr_suballoc_slot); + dx_root->dr_suballoc_bit = bswap_16(dx_root->dr_suballoc_bit); + dx_root->dr_fs_generation = bswap_32(dx_root->dr_fs_generation); + dx_root->dr_blkno = bswap_64(dx_root->dr_blkno); + dx_root->dr_last_eb_blk = bswap_64(dx_root->dr_last_eb_blk); + dx_root->dr_clusters = bswap_32(dx_root->dr_clusters); + dx_root->dr_dir_blkno = bswap_64(dx_root->dr_dir_blkno); + dx_root->dr_num_entries = bswap_32(dx_root->dr_num_entries); + dx_root->dr_free_blk = bswap_64(dx_root->dr_free_blk); + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) + ocfs2_swap_dx_entry_list_from_cpu(&dx_root->dr_entries); + else + ocfs2_swap_extent_list_from_cpu(fs, dx_root, &dx_root->dr_list); +} + +/* XXX: should use the errcode_t return value */ errcode_t ocfs2_read_dx_root(ocfs2_filesys *fs, uint64_t block, void *buf) { - errcode_t ret; + errcode_t ret; struct ocfs2_dx_root_block *dx_root; + char *dx_root_buf = NULL; - ret = ocfs2_read_blocks(fs, block, 1, buf); + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); if (ret) - return ret; + goto out; - dx_root = (struct ocfs2_dx_root_block *)buf; - ret = ocfs2_validate_meta_ecc(fs, buf, &dx_root->dr_check); + ret = ocfs2_read_blocks(fs, block, 1, dx_root_buf); if (ret) - return ret; + goto out; + + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; + ret = ocfs2_validate_meta_ecc(fs, dx_root_buf, &dx_root->dr_check); + if (ret) + goto out; if (memcmp(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE, - strlen(OCFS2_DX_ROOT_SIGNATURE))) - return OCFS2_ET_DIR_CORRUPTED; + strlen(OCFS2_DX_ROOT_SIGNATURE))) { + ret = OCFS2_ET_DIR_CORRUPTED; + goto out; + } ocfs2_swap_dx_root_to_cpu(fs, dx_root); + memcpy(buf, dx_root_buf, fs->fs_blocksize); + ret = 0; +out: + if (dx_root_buf) + ocfs2_free(&dx_root_buf); + return ret; +} - return 0; +errcode_t ocfs2_write_dx_root(ocfs2_filesys *fs, uint64_t block, + char *buf) +{ + errcode_t ret; + char *dx_root_buf = NULL; + struct ocfs2_dx_root_block *dx_root; + + if (!(fs->fs_flags & OCFS2_FLAG_RW)) + return OCFS2_ET_RO_FILESYS; + + if ((block < OCFS2_SUPER_BLOCK_BLKNO) || + (block > fs->fs_blocks)) + return OCFS2_ET_BAD_BLKNO; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); + if (ret) + goto out; + + memcpy(dx_root_buf, buf, fs->fs_blocksize); + + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; + ocfs2_swap_dx_root_from_cpu(fs, dx_root); + + ocfs2_compute_meta_ecc(fs, dx_root_buf, &dx_root->dr_check); + ret = io_write_block(fs->fs_io, block, 1, dx_root_buf); + if (!ret) + fs->fs_flags |= OCFS2_FLAG_CHANGED; + +out: + if (dx_root_buf) + ocfs2_free(&dx_root_buf); + return ret; +} + +static void ocfs2_swap_dx_leaf(struct ocfs2_dx_leaf *dx_leaf) +{ + dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); + dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); + + ocfs2_swap_dx_entry_list(&dx_leaf->dl_list); } static void ocfs2_swap_dx_leaf_to_cpu(struct ocfs2_dx_leaf *dx_leaf) { if (cpu_is_little_endian) return; + ocfs2_swap_dx_leaf(dx_leaf); +} - dx_leaf->dl_blkno = bswap_64(dx_leaf->dl_blkno); - dx_leaf->dl_fs_generation = bswap_64(dx_leaf->dl_fs_generation); - - ocfs2_swap_dx_entry_list_to_cpu(&dx_leaf->dl_list); +static void ocfs2_swap_dx_leaf_from_cpu(struct ocfs2_dx_leaf *dx_leaf) +{ + if (cpu_is_little_endian) + return; + ocfs2_swap_dx_leaf(dx_leaf); } errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, void *buf) { - errcode_t ret; + errcode_t ret; struct ocfs2_dx_leaf *dx_leaf; ret = ocfs2_read_blocks(fs, block, 1, buf); @@ -335,6 +430,42 @@ errcode_t ocfs2_read_dx_leaf(ocfs2_filesys *fs, uint64_t block, return 0; } +errcode_t ocfs2_write_dx_leaf(ocfs2_filesys *fs, uint64_t block, + void *buf) +{ + errcode_t ret; + char *dx_leaf_buf = NULL; + struct ocfs2_dx_leaf *dx_leaf; + + if (!(fs->fs_flags & OCFS2_FLAG_RW)) + return OCFS2_ET_RO_FILESYS; + + if ((block < OCFS2_SUPER_BLOCK_BLKNO) || + (block > fs->fs_blocks)) + return OCFS2_ET_BAD_BLKNO; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_leaf_buf); + if (ret) + goto out; + + memcpy(dx_leaf_buf, buf, fs->fs_blocksize); + dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_buf; + ocfs2_swap_dx_leaf_from_cpu(dx_leaf); + + ocfs2_compute_meta_ecc(fs, dx_leaf_buf, &dx_leaf->dl_check); + ret = io_write_block(fs->fs_io, block, 1, dx_leaf_buf); + + if (ret) + goto out; + + fs->fs_flags |= OCFS2_FLAG_CHANGED; + +out: + if (dx_leaf_buf) + ocfs2_free(&dx_leaf_buf); + return ret; +} + int ocfs2_dir_indexed(struct ocfs2_dinode *di) { if (di->i_dyn_features & OCFS2_INDEXED_DIR_FL) @@ -345,7 +476,7 @@ int ocfs2_dir_indexed(struct ocfs2_dinode *di) /* * Only use this when we already know the directory is indexed. */ -int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off) +static int __ocfs2_is_dir_trailer(ocfs2_filesys *fs, unsigned long de_off) { if (de_off == ocfs2_dir_trailer_blk_off(fs)) return 1; diff --git a/libocfs2/expanddir.c b/libocfs2/expanddir.c index 6e9a192..ec05b74 100644 --- a/libocfs2/expanddir.c +++ b/libocfs2/expanddir.c @@ -69,6 +69,10 @@ errcode_t ocfs2_expand_dir(ocfs2_filesys *fs, if (ocfs2_support_inline_data(OCFS2_RAW_SB(fs->fs_super)) && cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL) { ret = ocfs2_convert_inline_data_to_extents(cinode); + if ((ret == 0) && + ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super))) { + ret = ocfs2_dx_dir_build(fs, dir); + } goto bail; } @@ -228,6 +232,18 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, goto bail; } + /* + * Only build indexed tree if the directory is initiated as non-inline. + * Otherwise, the indexed tree will be build when convert the inlined + * directory to extent in ocfs2_expand_dir() + */ + if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && + (!cinode->ci_inode->i_dyn_features & OCFS2_INLINE_DATA_FL)) { + ret = ocfs2_dx_dir_build(fs, dir); + if (ret) + goto bail; + } + /* set link count of the parent */ ret = ocfs2_read_inode(fs, parent_dir, buf); if (ret) @@ -243,8 +259,6 @@ errcode_t ocfs2_init_dir(ocfs2_filesys *fs, /* update the inode */ ret = ocfs2_write_cached_inode(fs, cinode); - if (ret) - goto bail; bail: if (buf) diff --git a/libocfs2/extent_tree.c b/libocfs2/extent_tree.c index fe1be7a..3cbdcf8 100644 --- a/libocfs2/extent_tree.c +++ b/libocfs2/extent_tree.c @@ -153,6 +153,49 @@ static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { .eo_fill_root_el = ocfs2_xattr_value_fill_root_el, }; +static void ocfs2_dx_root_set_last_eb_blk (struct ocfs2_extent_tree *et, + uint64_t blkno) +{ + struct ocfs2_dx_root_block *dx_root = et->et_object; + dx_root->dr_last_eb_blk = blkno; +} + +static uint64_t ocfs2_dx_root_get_last_eb_blk (struct ocfs2_extent_tree *et) +{ + struct ocfs2_dx_root_block *dx_root = et->et_object; + return dx_root->dr_last_eb_blk; +} + +static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et, + uint32_t clusters) +{ + struct ocfs2_dx_root_block *dx_root = et->et_object; + dx_root->dr_clusters += clusters; +} + +static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et) +{ + struct ocfs2_dx_root_block *dx_root = (struct ocfs2_dx_root_block *)et->et_object; + assert(OCFS2_IS_VALID_DX_ROOT(dx_root)); + + return 0; +} + +static void ocfs2_dx_root_fill_root_el (struct ocfs2_extent_tree *et) +{ + struct ocfs2_dx_root_block *dx_root = et->et_object; + + et->et_root_el = &dx_root->dr_list; +} + +static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = { + .eo_set_last_eb_blk = ocfs2_dx_root_set_last_eb_blk, + .eo_get_last_eb_blk = ocfs2_dx_root_get_last_eb_blk, + .eo_update_clusters = ocfs2_dx_root_update_clusters, + .eo_sanity_check = ocfs2_dx_root_sanity_check, + .eo_fill_root_el = ocfs2_dx_root_fill_root_el, +}; + static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, ocfs2_filesys *fs, char *buf, @@ -202,6 +245,15 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, xv, &ocfs2_xattr_value_et_ops); } +void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, + ocfs2_filesys *fs, + char *buf, uint64_t blkno) +{ + __ocfs2_init_extent_tree(et, fs, buf, blkno, + ocfs2_write_dx_root, + buf, &ocfs2_dx_root_et_ops); +} + static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, uint64_t new_last_eb_blk) { @@ -4182,3 +4234,4 @@ out: ocfs2_free_path(path); return ret; } + diff --git a/libocfs2/extent_tree.h b/libocfs2/extent_tree.h index b7513dd..3f9b0dc 100644 --- a/libocfs2/extent_tree.h +++ b/libocfs2/extent_tree.h @@ -123,6 +123,9 @@ void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, char *buf, uint64_t blkno, ocfs2_root_write_func write, struct ocfs2_xattr_value_root *xv); +void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, + ocfs2_filesys *fs, + char *buf, uint64_t blkno); errcode_t ocfs2_tree_insert_extent(ocfs2_filesys *fs, struct ocfs2_extent_tree *et, uint32_t cpos, uint64_t c_blkno, @@ -135,6 +138,8 @@ int ocfs2_change_extent_flag(ocfs2_filesys *fs, int ocfs2_remove_extent(ocfs2_filesys *fs, struct ocfs2_extent_tree *et, uint32_t cpos, uint32_t len); + + /* * Structures which describe a path through a btree, and functions to * manipulate them. diff --git a/libocfs2/inode.c b/libocfs2/inode.c index 95419f4..fd88dc8 100644 --- a/libocfs2/inode.c +++ b/libocfs2/inode.c @@ -139,6 +139,9 @@ static void ocfs2_swap_inode_second(struct ocfs2_dinode *di) sb->s_uuid_hash = bswap_32(sb->s_uuid_hash); sb->s_first_cluster_group = bswap_64(sb->s_first_cluster_group); sb->s_xattr_inline_size = bswap_16(sb->s_xattr_inline_size); + sb->s_dx_seed[0] = bswap_32(sb->s_dx_seed[0]); + sb->s_dx_seed[1] = bswap_32(sb->s_dx_seed[1]); + sb->s_dx_seed[2] = bswap_32(sb->s_dx_seed[2]); } else if (di->i_flags & OCFS2_LOCAL_ALLOC_FL) { struct ocfs2_local_alloc *la = &di->id2.i_lab; diff --git a/libocfs2/link.c b/libocfs2/link.c index c89471d..310c24e 100644 --- a/libocfs2/link.c +++ b/libocfs2/link.c @@ -43,10 +43,12 @@ struct link_struct { of the block. This handles the directory trailer if it exists */ + int blkno; struct ocfs2_dinode *sb; }; static int link_proc(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -101,6 +103,7 @@ static int link_proc(struct ocfs2_dir_entry *dirent, strncpy(dirent->name, ls->name, ls->namelen); dirent->file_type = ls->flags; + ls->blkno = blocknr; ls->done++; return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED; } @@ -172,6 +175,12 @@ errcode_t ocfs2_link(ocfs2_filesys *fs, uint64_t dir, const char *name, retval = OCFS2_ET_INTERNAL_FAILURE; } + if (ls.done) { + if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && + (di->i_dyn_features & OCFS2_INDEXED_DIR_FL)) + retval = ocfs2_dx_dir_insert_entry(fs, dir, ls.name, + ls.inode, ls.blkno); + } out_free: ocfs2_free(&buf); diff --git a/libocfs2/lookup.c b/libocfs2/lookup.c index 374113c..dac83c0 100644 --- a/libocfs2/lookup.c +++ b/libocfs2/lookup.c @@ -30,6 +30,7 @@ #include #include +#include #include "ocfs2/ocfs2.h" @@ -45,6 +46,7 @@ struct lookup_struct { #pragma argsused #endif static int lookup_proc(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -61,24 +63,81 @@ static int lookup_proc(struct ocfs2_dir_entry *dirent, return OCFS2_DIRENT_ABORT; } +static errcode_t ocfs2_find_entry_dx(ocfs2_filesys *fs, + struct ocfs2_dinode *di, + char *buf, + struct lookup_struct *ls) +{ + char *dx_root_buf = NULL; + struct ocfs2_dx_root_block *dx_root; + struct ocfs2_dir_lookup_result lookup; + errcode_t ret; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); + if (ret) + goto out; + ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf); + if (ret) + goto out; + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; + + memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); + ocfs2_dx_dir_name_hash(fs, ls->name, + ls->len, &lookup.dl_hinfo); + + ret = ocfs2_dx_dir_search(fs, ls->name, ls->len, + dx_root, &lookup); + if (ret) + goto out; + + *ls->inode = lookup.dl_entry->inode; + ls->found++; + ret = 0; + +out: + release_lookup_res(&lookup); + if (dx_root_buf) + ocfs2_free(&dx_root_buf); + return ret; +} errcode_t ocfs2_lookup(ocfs2_filesys *fs, uint64_t dir, const char *name, int namelen, char *buf, uint64_t *inode) { - errcode_t retval; + errcode_t ret; struct lookup_struct ls; + char *di_buf = NULL; + struct ocfs2_dinode *di; ls.name = name; ls.len = namelen; ls.inode = inode; ls.found = 0; - retval = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls); - if (retval) - return retval; + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; + + if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && + ocfs2_dir_indexed(di)) { + ret = ocfs2_find_entry_dx(fs, di, buf, &ls); + } else { + ret = ocfs2_dir_iterate(fs, dir, 0, buf, lookup_proc, &ls); + } + if (ret) + goto out; - return (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND; + ret = (ls.found) ? 0 : OCFS2_ET_FILE_NOT_FOUND; + +out: + if(di_buf) + ocfs2_free(&di_buf); + return ret; } diff --git a/libocfs2/ocfs2_err.et b/libocfs2/ocfs2_err.et index 13b03c4..1226681 100644 --- a/libocfs2/ocfs2_err.et +++ b/libocfs2/ocfs2_err.et @@ -102,6 +102,9 @@ ec OCFS2_ET_DIR_CORRUPTED, ec OCFS2_ET_NO_DIRECTORY, "OCFS2 inode is not a directory" +ec OCFS2_ET_DIRENT_NOT_FOUND, + "Directory entry not found" + ec OCFS2_ET_FILE_NOT_FOUND, "File not found by ocfs2_lookup" @@ -189,6 +192,9 @@ ec OCFS2_ET_CORRUPT_QUOTA_FILE, ec OCFS2_ET_CANNOT_DETERMINE_SECTOR_SIZE, "Cannot determine sector size" +ec OCFS2_ET_DX_BALANCE_EMPTY_LEAF, + "Trying to rebalance empty leaf for indexed dir" + ec OCFS2_ET_NONEMTY_QUOTA_HASH, "Freeing non-empty quota hash" diff --git a/libocfs2/truncate.c b/libocfs2/truncate.c index 1902366..7327253 100644 --- a/libocfs2/truncate.c +++ b/libocfs2/truncate.c @@ -436,6 +436,22 @@ errcode_t ocfs2_xattr_tree_truncate(ocfs2_filesys *fs, &ctxt, &changed); } + +errcode_t ocfs2_dir_indexed_tree_truncate(ocfs2_filesys *fs, + struct ocfs2_dx_root_block *dx_root) +{ + struct truncate_ctxt ctxt; + + memset(&ctxt, 0, sizeof (struct truncate_ctxt)); + ctxt.new_i_clusters = dx_root->dr_clusters; + ctxt.new_size_in_clusters = 0; + + return ocfs2_extent_iterate_dx_root(fs, dx_root, + OCFS2_EXTENT_FLAG_DEPTH_TRAVERSE, + NULL, truncate_iterate, &ctxt); +} + + #ifdef DEBUG_EXE #include #include diff --git a/libocfs2/unlink.c b/libocfs2/unlink.c index 286ef59..9ebba38 100644 --- a/libocfs2/unlink.c +++ b/libocfs2/unlink.c @@ -30,6 +30,7 @@ #define _LARGEFILE64_SOURCE #include +#include #include "ocfs2/ocfs2.h" @@ -46,6 +47,7 @@ struct link_struct { #pragma argsused #endif static int unlink_proc(struct ocfs2_dir_entry *dirent, + uint64_t blocknr, int offset, int blocksize, char *buf, @@ -66,6 +68,161 @@ static int unlink_proc(struct ocfs2_dir_entry *dirent, return OCFS2_DIRENT_ABORT|OCFS2_DIRENT_CHANGED; } +static errcode_t ocfs2_unlink_el(ocfs2_filesys *fs, + uint64_t dir, + const char *name, + uint64_t ino, + int flags) +{ + errcode_t ret; + struct link_struct ls; + + ls.name = name; + ls.namelen = name ? strlen(name) : 0; + ls.inode = ino; + ls.flags = 0; + ls.done = 0; + + ret = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls); + if (ret) + goto out; + + if (!ls.done) + ret = OCFS2_ET_DIR_NO_SPACE; +out: + return ret; +} + +static errcode_t __ocfs2_delete_entry(ocfs2_filesys *fs, + struct ocfs2_dir_entry *de_del, + char *dir_buf) +{ + struct ocfs2_dir_entry *de, *pde; + int offset = 0; + errcode_t ret = 0; + + pde = NULL; + de = (struct ocfs2_dir_entry *)dir_buf; + + while( offset < fs->fs_blocksize) { + if (!ocfs2_check_dir_entry(fs, de, dir_buf, offset)) { + ret = OCFS2_ET_DIR_CORRUPTED; + goto out; + } + + if (de == de_del) { + if (pde) + pde->rec_len += de->rec_len; + else + de->inode = 0; + + goto out; + } + if (de->rec_len <= 0) { + ret = OCFS2_ET_DIR_CORRUPTED; + goto out; + } + pde = de; + offset += de->rec_len; + de = (struct ocfs2_dir_entry *)((char *)de + de->rec_len); + } + +out: + return ret; +} + +static errcode_t ocfs2_unlink_dx(ocfs2_filesys *fs, + uint64_t dir, + const char *name, + uint64_t ino, + int flags) +{ + char *di_buf = NULL, *dx_root_buf = NULL; + struct ocfs2_dinode *di; + struct ocfs2_dx_root_block *dx_root; + struct ocfs2_dx_entry_list *entry_list; + struct ocfs2_dir_block_trailer *trailer; + int write_dx_leaf = 0; + int add_to_free_list = 0; + int max_rec_len = 0; + struct ocfs2_dir_lookup_result lookup; + errcode_t ret; + + assert(name); + + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; + + ret = ocfs2_malloc_block(fs->fs_io, &dx_root_buf); + if (ret) + goto out; + ret = ocfs2_read_dx_root(fs, di->i_dx_root, dx_root_buf); + if (ret) + goto out; + dx_root = (struct ocfs2_dx_root_block *)dx_root_buf; + + memset(&lookup, 0, sizeof(struct ocfs2_dir_lookup_result)); + ret= ocfs2_dx_dir_search(fs, name, strlen(name), dx_root, &lookup); + if (ret) + goto out; + + trailer = ocfs2_dir_trailer_from_block(fs, lookup.dl_leaf); + if (trailer->db_free_rec_len == 0) + add_to_free_list = 1; + + ret = __ocfs2_delete_entry(fs, lookup.dl_entry, lookup.dl_leaf); + if (ret) + goto out; + + max_rec_len = ocfs2_find_max_rec_len(fs, lookup.dl_leaf); + trailer->db_free_rec_len = max_rec_len; + if (add_to_free_list) { + trailer->db_free_next = dx_root->dr_free_blk; + dx_root->dr_free_blk = lookup.dl_leaf_blkno; + } + + ret = ocfs2_write_dir_block(fs, di, + lookup.dl_leaf_blkno, lookup.dl_leaf); + if (ret) + goto out; + + if (dx_root->dr_flags & OCFS2_DX_FLAG_INLINE) + entry_list = &dx_root->dr_entries; + else { + entry_list = &(lookup.dl_dx_leaf->dl_list); + write_dx_leaf = 1; + } + + ocfs2_dx_list_remove_entry(entry_list, + lookup.dl_dx_entry_idx); + + if (write_dx_leaf) { + ret = ocfs2_write_dx_leaf(fs, lookup.dl_dx_leaf_blkno, lookup.dl_dx_leaf); + if (ret) + goto out; + } + + dx_root->dr_num_entries --; + ret = ocfs2_write_dx_root(fs, di->i_dx_root, dx_root_buf); + if (ret) + goto out; + ret = ocfs2_write_inode(fs, di->i_blkno, di_buf); + +out: + release_lookup_res(&lookup); + if (dx_root_buf) + ocfs2_free(&dx_root_buf); + if (di_buf) + ocfs2_free(&di_buf); + + return ret; +} + #ifdef __TURBOC__ #pragma argsused #endif @@ -73,22 +230,30 @@ errcode_t ocfs2_unlink(ocfs2_filesys *fs, uint64_t dir, const char *name, uint64_t ino, int flags) { - errcode_t retval; - struct link_struct ls; + errcode_t ret; + char *di_buf = NULL; + struct ocfs2_dinode *di; if (!(fs->fs_flags & OCFS2_FLAG_RW)) return OCFS2_ET_RO_FILESYS; - ls.name = name; - ls.namelen = name ? strlen(name) : 0; - ls.inode = ino; - ls.flags = 0; - ls.done = 0; + ret = ocfs2_malloc_block(fs->fs_io, &di_buf); + if (ret) + goto out; + ret = ocfs2_read_inode(fs, dir, di_buf); + if (ret) + goto out; + di = (struct ocfs2_dinode *)di_buf; - retval = ocfs2_dir_iterate(fs, dir, 0, 0, unlink_proc, &ls); - if (retval) - return retval; + if (ocfs2_supports_indexed_dirs(OCFS2_RAW_SB(fs->fs_super)) && + (ocfs2_dir_indexed(di))) + ret = ocfs2_unlink_dx(fs, dir, name, ino, flags); + else + ret = ocfs2_unlink_el(fs, dir, name, ino, flags); - return (ls.done) ? 0 : OCFS2_ET_DIR_NO_SPACE; +out: + if (di_buf) + ocfs2_free(&di_buf); + return ret; } diff --git a/sizetest/sizetest.c b/sizetest/sizetest.c index e01e800..a68e6c3 100644 --- a/sizetest/sizetest.c +++ b/sizetest/sizetest.c @@ -144,7 +144,7 @@ static void print_ocfs2_super_block(void) SHOW_OFFSET(struct ocfs2_super_block, s_cluster_info); SHOW_OFFSET(struct ocfs2_super_block, s_xattr_inline_size); SHOW_OFFSET(struct ocfs2_super_block, s_reserved0); - SHOW_OFFSET(struct ocfs2_super_block, s_reserved1); + SHOW_OFFSET(struct ocfs2_super_block, s_dx_seed); SHOW_OFFSET(struct ocfs2_super_block, s_reserved2); END_TYPE(struct ocfs2_super_block); -- 1.7.0.2