From 84064263d0128536113ae7ced9ad2f0d17f7663e Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Wed, 15 Jun 2016 13:37:55 -0700 Subject: [PATCH 2/3] btrfs-progs: btrfsck: verify qgroups above level 0 At the moment we only check subvolume quota groups (level 0). With this patch we can check groups above 0, thus verifying the entire qgroup hierarchy on a file system. The accounting portion of this patch is modeled after the kernel - we are essentially reimplementing the 'quota rescan' case here. Most other sections of this code went unchanged, in particular the root counting works independently of the accounting. Signed-off-by: Mark Fasheh --- qgroup-verify.c | 309 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 252 insertions(+), 57 deletions(-) diff --git a/qgroup-verify.c b/qgroup-verify.c index 7b78504..23f2961 100644 --- a/qgroup-verify.c +++ b/qgroup-verify.c @@ -35,7 +35,8 @@ /*#define QGROUP_VERIFY_DEBUG*/ static unsigned long tot_extents_scanned = 0; -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive); +struct qgroup_count; +static struct qgroup_count *find_count(u64 qgroupid); struct qgroup_info { u64 referenced; @@ -54,6 +55,16 @@ struct qgroup_count { struct qgroup_info info; struct rb_node rb_node; + + struct list_head groups; /* Parents when we are a child group */ + + /* + * Children when we are a parent group (not currently used but + * maintained to mirror kernel handling of qgroups) + */ + struct list_head members; + + u64 cur_refcnt; }; static struct counts_tree { @@ -66,6 +77,39 @@ static struct counts_tree { static struct rb_root by_bytenr = RB_ROOT; /* + * Glue structure to represent the relations between qgroups. Mirrored + * from kernel. + */ +struct btrfs_qgroup_list { + struct list_head next_group; + struct list_head next_member; + struct qgroup_count *group; /* Parent group */ + struct qgroup_count *member; +}; + +/* Allow us to reset ref counts during accounting without zeroing each group. */ +static u64 qgroup_seq = 1ULL; + +static inline void update_cur_refcnt(struct qgroup_count *c) +{ + if (c->cur_refcnt < qgroup_seq) + c->cur_refcnt = qgroup_seq; + c->cur_refcnt++; +} + +static inline u64 group_get_cur_refcnt(struct qgroup_count *c) +{ + if (c->cur_refcnt < qgroup_seq) + return 0; + return c->cur_refcnt - qgroup_seq; +} + +static void inc_qgroup_seq(int root_count) +{ + qgroup_seq += root_count + 1; +} + +/* * List of interior tree blocks. We walk this list after loading the * extent tree to resolve implied refs. For each interior node we'll * place a shared ref in the ref tree against each child object. This @@ -296,9 +340,10 @@ static void find_parent_roots(struct ulist *roots, u64 parent) } do { - if (ref->root) - ulist_add(roots, ref->root, 0, 0); - else + if (ref->root) { + if (is_fstree(ref->root)) + ulist_add(roots, ref->root, 0, 0); + } else find_parent_roots(roots, ref->parent); node = rb_next(node); @@ -307,6 +352,114 @@ static void find_parent_roots(struct ulist *roots, u64 parent) } while (node && ref->bytenr == parent); } +static int account_one_extent(struct ulist *roots, u64 bytenr, u64 num_bytes) +{ + int ret; + u64 id, nr_roots, nr_refs; + struct qgroup_count *count; + struct ulist *counts = ulist_alloc(0); + struct ulist *tmp = ulist_alloc(0); + struct ulist_iterator uiter; + struct ulist_iterator tmp_uiter; + struct ulist_node *unode; + struct ulist_node *tmp_unode; + struct btrfs_qgroup_list *glist; + + if (!counts || !tmp) { + ulist_free(counts); + ulist_free(tmp); + return ENOMEM; + } + + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(roots, &uiter))) { + BUG_ON(unode->val == 0ULL); + + /* + * For each root, find their corresponding tracking group and + * add it to our qgroups list. + */ + count = find_count(unode->val); + if (!count) + continue; + + BUG_ON(!is_fstree(unode->val)); + ret = ulist_add(counts, count->qgroupid, ptr_to_u64(count), 0); + if (ret < 0) + goto out; + + /* + * Now we look for parents (and parents of those...). Use a tmp + * ulist here to avoid re-walking (and re-incrementing) our + * already added items on every loop iteration. + */ + ulist_reinit(tmp); + ret = ulist_add(tmp, count->qgroupid, ptr_to_u64(count), 0); + if (ret < 0) + goto out; + + ULIST_ITER_INIT(&tmp_uiter); + while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) { + /* Bump the refcount on a node every time we see it. */ + count = u64_to_ptr(tmp_unode->aux); + update_cur_refcnt(count); + + list_for_each_entry(glist, &count->groups, next_group) { + struct qgroup_count *parent; + parent = glist->group; + id = parent->qgroupid; + + BUG_ON(!count); + + ret = ulist_add(counts, id, ptr_to_u64(parent), + 0); + if (ret < 0) + goto out; + ret = ulist_add(tmp, id, ptr_to_u64(parent), + 0); + if (ret < 0) + goto out; + } + } + } + + /* + * Now that we have gathered up and counted all the groups, we + * can add bytes for this ref. + */ + nr_roots = roots->nnodes; + ULIST_ITER_INIT(&uiter); + while ((unode = ulist_next(counts, &uiter))) { + count = u64_to_ptr(unode->aux); + + nr_refs = group_get_cur_refcnt(count); + if (nr_refs) { + count->info.referenced += num_bytes; + count->info.referenced_compressed += num_bytes; + + if (nr_refs == nr_roots) { + count->info.exclusive += num_bytes; + count->info.exclusive_compressed += num_bytes; + } + } +#ifdef QGROUP_VERIFY_DEBUG + printf("account (%llu, %llu), qgroup %llu/%llu, rfer %llu," + " excl %llu, refs %llu, roots %llu\n", bytenr, num_bytes, + btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid), + count->info.referenced, count->info.exclusive, nr_refs, + nr_roots); +#endif + } + + inc_qgroup_seq(roots->nnodes); + ret = 0; +out: + ulist_free(counts); + ulist_free(tmp); + return ret; +} + static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes, struct ulist *roots); /* @@ -318,18 +471,15 @@ static void print_subvol_info(u64 subvolid, u64 bytenr, u64 num_bytes, * - resolve all possible roots for shared refs, insert each * of those into ref_roots ulist (this is a recursive process) * - * - Walk ref_roots ulist, adding extent bytes to each qgroup count that - * cooresponds to a found root. + * - With all roots resolved we can account the ref - this is done in + * account_one_extent(). */ -static void account_all_refs(int do_qgroups, u64 search_subvol) +static int account_all_refs(int do_qgroups, u64 search_subvol) { - int exclusive; struct ref *ref; struct rb_node *node; u64 bytenr, num_bytes; struct ulist *roots = ulist_alloc(0); - struct ulist_iterator uiter; - struct ulist_node *unode; node = rb_first(&by_bytenr); while (node) { @@ -347,10 +497,14 @@ static void account_all_refs(int do_qgroups, u64 search_subvol) do { BUG_ON(ref->bytenr != bytenr); BUG_ON(ref->num_bytes != num_bytes); - if (ref->root) - ulist_add(roots, ref->root, 0, 0); - else + if (ref->root) { + if (is_fstree(ref->root)) { + if (ulist_add(roots, ref->root, 0, 0) < 0) + goto enomem; + } + } else { find_parent_roots(roots, ref->parent); + } /* * When we leave this inner loop, node is set @@ -362,29 +516,22 @@ static void account_all_refs(int do_qgroups, u64 search_subvol) ref = rb_entry(node, struct ref, bytenr_node); } while (node && ref->bytenr == bytenr); - /* - * Now that we have all roots, we can properly account - * this extent against the corresponding qgroups. - */ - if (roots->nnodes == 1) - exclusive = 1; - else - exclusive = 0; - if (search_subvol) print_subvol_info(search_subvol, bytenr, num_bytes, roots); - ULIST_ITER_INIT(&uiter); - while ((unode = ulist_next(roots, &uiter))) { - BUG_ON(unode->val == 0ULL); - /* We only want to account fs trees */ - if (is_fstree(unode->val) && do_qgroups) - add_bytes(unode->val, num_bytes, exclusive); - } + if (!do_qgroups) + continue; + + if (account_one_extent(roots, bytenr, num_bytes)) + goto enomem; } ulist_free(roots); + return 0; +enomem: + error("Out of memory while accounting refs for qgroups!\n"); + return -ENOMEM; } static u64 resolve_one_root(u64 bytenr) @@ -668,6 +815,8 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key, item->exclusive = btrfs_qgroup_info_exclusive(leaf, disk); item->exclusive_compressed = btrfs_qgroup_info_exclusive_compressed(leaf, disk); + INIT_LIST_HEAD(&c->groups); + INIT_LIST_HEAD(&c->members); if (insert_count(c)) { free(c); @@ -677,29 +826,30 @@ static struct qgroup_count *alloc_count(struct btrfs_disk_key *key, return c; } -static void add_bytes(u64 root_objectid, u64 num_bytes, int exclusive) +static int add_qgroup_relation(u64 memberid, u64 parentid) { - struct qgroup_count *count = find_count(root_objectid); - struct qgroup_info *qg; + struct qgroup_count *member; + struct qgroup_count *parent; + struct btrfs_qgroup_list *list; - BUG_ON(num_bytes < 4096); /* Random sanity check. */ + if (memberid > parentid) + return 0; - if (!count) - return; + member = find_count(memberid); + parent = find_count(parentid); + if (!member || !parent) + return -ENOENT; - qg = &count->info; + list = calloc(1, sizeof(*list)); + if (!list) + return -ENOMEM; - qg->referenced += num_bytes; - /* - * count of compressed bytes is unimplemented, so we do the - * same as kernel. - */ - qg->referenced_compressed += num_bytes; + list->group = parent; + list->member = member; + list_add_tail(&list->next_group, &member->groups); + list_add_tail(&list->next_member, &parent->members); - if (exclusive) { - qg->exclusive += num_bytes; - qg->exclusive_compressed += num_bytes; - } + return 0; } static void read_qgroup_status(struct btrfs_path *path, @@ -728,11 +878,18 @@ static int load_quota_info(struct btrfs_fs_info *info) struct btrfs_qgroup_info_item *item; struct qgroup_count *count; int i, nr; + int search_relations = 0; +loop: + /* + * Do 2 passes, the first allocates group counts and reads status + * items. The 2nd pass picks up relation items and glues them + * to their respective count structures. + */ btrfs_init_path(&path); key.offset = 0; - key.objectid = 0; + key.objectid = search_relations ? 0 : BTRFS_QGROUP_RELATION_KEY; key.type = 0; ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); @@ -749,17 +906,26 @@ static int load_quota_info(struct btrfs_fs_info *info) btrfs_item_key(leaf, &disk_key, i); btrfs_disk_key_to_cpu(&key, &disk_key); + if (search_relations) { + if (key.type == BTRFS_QGROUP_RELATION_KEY) { + ret = add_qgroup_relation(key.objectid, + key.offset); + if (ret) { + error("out of memory\n"); + goto out; + } + } + continue; + } + if (key.type == BTRFS_QGROUP_STATUS_KEY) { read_qgroup_status(&path, &counts); continue; } - if (key.type == BTRFS_QGROUP_RELATION_KEY) - printf("Ignoring qgroup relation key %llu\n", - key.objectid); /* - * Ignore: BTRFS_QGROUP_LIMIT_KEY, - * BTRFS_QGROUP_RELATION_KEY + * At this point, we can ignore anything that + * isn't a qgroup info. */ if (key.type != BTRFS_QGROUP_INFO_KEY) continue; @@ -791,6 +957,12 @@ static int load_quota_info(struct btrfs_fs_info *info) ret = 0; btrfs_release_path(&path); + + if (!search_relations) { + search_relations = 1; + goto loop; + } + out: return ret; } @@ -1035,6 +1207,11 @@ static void print_fields_signed(long long bytes, prefix, type, bytes, type, bytes_compressed); } +static inline int qgroup_printable(struct qgroup_count *c) +{ + return !!(c->subvol_exists || btrfs_qgroup_level(c->qgroupid)); +} + static int report_qgroup_difference(struct qgroup_count *count, int verbose) { int is_different; @@ -1045,9 +1222,10 @@ static int report_qgroup_difference(struct qgroup_count *count, int verbose) is_different = excl_diff || ref_diff; - if (verbose || (is_different && count->subvol_exists)) { - printf("Counts for qgroup id: %llu %s\n", - (unsigned long long)count->qgroupid, + if (verbose || (is_different && qgroup_printable(count))) { + printf("Counts for qgroup id: %llu/%llu %s\n", + btrfs_qgroup_level(count->qgroupid), + btrfs_qgroup_subvid(count->qgroupid), is_different ? "are different" : ""); print_fields(info->referenced, info->referenced_compressed, @@ -1099,10 +1277,27 @@ void free_qgroup_counts(void) { struct rb_node *node; struct qgroup_count *c; + struct btrfs_qgroup_list *glist, *tmpglist; + node = rb_first(&counts.root); while (node) { c = rb_entry(node, struct qgroup_count, rb_node); + + list_for_each_entry_safe(glist, tmpglist, &c->groups, + next_group) { + list_del(&glist->next_group); + list_del(&glist->next_member); + free(glist); + } + list_for_each_entry_safe(glist, tmpglist, &c->members, + next_group) { + list_del(&glist->next_group); + list_del(&glist->next_member); + free(glist); + } + node = rb_next(node); + rb_erase(&c->rb_node, &counts.root); free(c); } @@ -1143,7 +1338,7 @@ int qgroup_verify_all(struct btrfs_fs_info *info) goto out; } - account_all_refs(1, 0); + ret = account_all_refs(1, 0); out: /* @@ -1215,7 +1410,7 @@ int print_extent_state(struct btrfs_fs_info *info, u64 subvol) } printf("Offset\t\tLen\tRoot Refs\tRoots\n"); - account_all_refs(0, subvol); + ret = account_all_refs(0, subvol); out: free_tree_blocks(); -- 2.1.4