forked from pool/grub2
125 lines
4.4 KiB
Diff
125 lines
4.4 KiB
Diff
|
From 149df8b7bb86401693e1f064859de0a8906d97b7 Mon Sep 17 00:00:00 2001
|
||
|
From: Qu Wenruo <wqu@suse.com>
|
||
|
Date: Thu, 28 Oct 2021 17:44:57 +0800
|
||
|
Subject: [PATCH] fs/btrfs: Make extent item iteration to handle gaps
|
||
|
|
||
|
[BUG]
|
||
|
Grub btrfs implementation can't handle two very basic btrfs file
|
||
|
layouts:
|
||
|
|
||
|
1. Mixed inline/regualr extents
|
||
|
# mkfs.btrfs -f test.img
|
||
|
# mount test.img /mnt/btrfs
|
||
|
# xfs_io -f -c "pwrite 0 1k" -c "sync" -c "falloc 0 4k" \
|
||
|
-c "pwrite 4k 4k" /mnt/btrfs/file
|
||
|
# umount /mnt/btrfs
|
||
|
# ./grub-fstest ./grub-fstest --debug=btrfs ~/test.img hex "/file"
|
||
|
|
||
|
Such mixed inline/regular extents case is not recommended layout,
|
||
|
but all existing tools and kernel can handle it without problem
|
||
|
|
||
|
2. NO_HOLES feature
|
||
|
# mkfs.btrfs -f test.img -O no_holes
|
||
|
# mount test.img /mnt/btrfs
|
||
|
# xfs_io -f -c "pwrite 0 4k" -c "pwrite 8k 4k" /mnt/btrfs/file
|
||
|
# umount /mnt/btrfs
|
||
|
# ./grub-fstest ./grub-fstest --debug=btrfs ~/test.img hex "/file"
|
||
|
|
||
|
NO_HOLES feature is going to be the default mkfs feature in the incoming
|
||
|
v5.15 release, and kernel has support for it since v4.0.
|
||
|
|
||
|
[CAUSE]
|
||
|
The way GRUB btrfs code iterates through file extents relies on no gap
|
||
|
between extents.
|
||
|
|
||
|
If any gap is hit, then grub btrfs will error out, without any proper
|
||
|
reason to help debug the bug.
|
||
|
|
||
|
This is a bad assumption, since a long long time ago btrfs has a new
|
||
|
feature called NO_HOLES to allow btrfs to skip the padding hole extent
|
||
|
to reduce metadata usage.
|
||
|
|
||
|
The NO_HOLES feature is already stable since kernel v4.0 and is going to
|
||
|
be the default mkfs feature in the incoming v5.15 btrfs-progs release.
|
||
|
|
||
|
[FIX]
|
||
|
When there is a extent gap, instead of error out, just try next item.
|
||
|
|
||
|
This is still not ideal, as kernel/progs/U-boot all do the iteration
|
||
|
item by item, not relying on the file offset continuity.
|
||
|
|
||
|
But it will be way more time consuming to correct the whole behavior
|
||
|
than starting from scratch to build a proper designed btrfs module for GRUB.
|
||
|
|
||
|
Signed-off-by: Qu Wenruo <wqu@suse.com>
|
||
|
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
|
||
|
---
|
||
|
grub-core/fs/btrfs.c | 35 ++++++++++++++++++++++++++++++++---
|
||
|
1 file changed, 32 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
|
||
|
index 9625bdf16..b8625197b 100644
|
||
|
--- a/grub-core/fs/btrfs.c
|
||
|
+++ b/grub-core/fs/btrfs.c
|
||
|
@@ -1506,6 +1506,7 @@ grub_btrfs_extent_read (struct grub_btrfs_data *data,
|
||
|
grub_size_t csize;
|
||
|
grub_err_t err;
|
||
|
grub_off_t extoff;
|
||
|
+ struct grub_btrfs_leaf_descriptor desc;
|
||
|
if (!data->extent || data->extstart > pos || data->extino != ino
|
||
|
|| data->exttree != tree || data->extend <= pos)
|
||
|
{
|
||
|
@@ -1518,7 +1519,7 @@ grub_btrfs_extent_read (struct grub_btrfs_data *data,
|
||
|
key_in.type = GRUB_BTRFS_ITEM_TYPE_EXTENT_ITEM;
|
||
|
key_in.offset = grub_cpu_to_le64 (pos);
|
||
|
err = lower_bound (data, &key_in, &key_out, tree,
|
||
|
- &elemaddr, &elemsize, NULL, 0);
|
||
|
+ &elemaddr, &elemsize, &desc, 0);
|
||
|
if (err)
|
||
|
return -1;
|
||
|
if (key_out.object_id != ino
|
||
|
@@ -1557,10 +1558,38 @@ grub_btrfs_extent_read (struct grub_btrfs_data *data,
|
||
|
PRIxGRUB_UINT64_T "\n",
|
||
|
grub_le_to_cpu64 (key_out.offset),
|
||
|
grub_le_to_cpu64 (data->extent->size));
|
||
|
+ /*
|
||
|
+ * The way of extent item iteration is pretty bad, it completely
|
||
|
+ * requires all extents are contiguous, which is not ensured.
|
||
|
+ *
|
||
|
+ * Features like NO_HOLE and mixed inline/regular extents can cause
|
||
|
+ * gaps between file extent items.
|
||
|
+ *
|
||
|
+ * The correct way is to follow kernel/U-boot to iterate item by
|
||
|
+ * item, without any assumption on the file offset continuity.
|
||
|
+ *
|
||
|
+ * Here we just manually skip to next item and re-do the verification.
|
||
|
+ *
|
||
|
+ * TODO: Rework the whole extent item iteration code, if not the
|
||
|
+ * whole btrfs implementation.
|
||
|
+ */
|
||
|
if (data->extend <= pos)
|
||
|
{
|
||
|
- grub_error (GRUB_ERR_BAD_FS, "extent not found");
|
||
|
- return -1;
|
||
|
+ err = next(data, &desc, &elemaddr, &elemsize, &key_out);
|
||
|
+ if (err < 0)
|
||
|
+ return -1;
|
||
|
+ /* No next item for the inode, we hit the end */
|
||
|
+ if (err == 0 || key_out.object_id != ino ||
|
||
|
+ key_out.type != GRUB_BTRFS_ITEM_TYPE_EXTENT_ITEM)
|
||
|
+ return pos - pos0;
|
||
|
+
|
||
|
+ csize = grub_le_to_cpu64(key_out.offset) - pos;
|
||
|
+ if (csize > len)
|
||
|
+ csize = len;
|
||
|
+ buf += csize;
|
||
|
+ pos += csize;
|
||
|
+ len -= csize;
|
||
|
+ continue;
|
||
|
}
|
||
|
}
|
||
|
csize = data->extend - pos;
|
||
|
--
|
||
|
2.31.1
|
||
|
|