From 5be20c4ea7d010fc5c85de9e7259af426e00537a6f2cbfd74343754fe7d83812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Chv=C3=A1tal?= Date: Thu, 31 Oct 2013 09:12:04 +0000 Subject: [PATCH] Accepting request 205320 from home:dsterba:branches:filesystems SR: a few fixes, aimed for 13.1 RC2 - fsck updates - more mkfs sanity checks - qgroup rescan wait OBS-URL: https://build.opensuse.org/request/show/205320 OBS-URL: https://build.opensuse.org/package/show/filesystems/btrfsprogs?expand=0&rev=144 --- ...it-the-csum_root-if-we-do-init-csum-.patch | 36 +++ ...rogs-Fix-getopt-on-arm-ppc-platforms.patch | 45 ++++ ...ix-duplicate-__-su-typedefs-on-ppc64.patch | 38 +++ ...-btrfs-progs-use-reentrant-localtime.patch | 61 +++++ ...t-have-to-report-ENOMEDIUM-error-dur.patch | 36 +++ ...ded-btrfs-quota-rescan-w-switch-wait.patch | 94 +++++++ ...progs-fix-qgroup-realloc-inheritance.patch | 36 +++ ...restore-command-leaving-corrupted-fi.patch | 111 ++++++++ ...d-write-to-the-disk-before-sure-to-c.patch | 253 ++++++++++++++++++ ...rror-if-device-for-mkfs-is-too-small.patch | 46 ++++ ...r-if-device-have-no-space-to-make-pr.patch | 91 +++++++ ...ulate-available-blocks-on-device-pro.patch | 233 ++++++++++++++++ ...-track-of-transid-failures-and-fix-t.patch | 228 ++++++++++++++++ btrfsprogs.changes | 7 + btrfsprogs.spec | 26 ++ local-version-override.patch | 2 +- 16 files changed, 1342 insertions(+), 1 deletion(-) create mode 100644 0050-Btrfs-progs-commit-the-csum_root-if-we-do-init-csum-.patch create mode 100644 0051-btrfs-progs-Fix-getopt-on-arm-ppc-platforms.patch create mode 100644 0052-btrfs-progs-fix-duplicate-__-su-typedefs-on-ppc64.patch create mode 100644 0053-btrfs-progs-use-reentrant-localtime.patch create mode 100644 0054-btrfs-progs-don-t-have-to-report-ENOMEDIUM-error-dur.patch create mode 100644 0055-Btrfs-progs-added-btrfs-quota-rescan-w-switch-wait.patch create mode 100644 0056-btrfs-progs-fix-qgroup-realloc-inheritance.patch create mode 100644 0057-Btrfs-progs-fix-restore-command-leaving-corrupted-fi.patch create mode 100644 0058-btrfs-progs-avoid-write-to-the-disk-before-sure-to-c.patch create mode 100644 0059-btrfs-progs-error-if-device-for-mkfs-is-too-small.patch create mode 100644 0060-btrfs-progs-error-if-device-have-no-space-to-make-pr.patch create mode 100644 0061-btrfs-progs-calculate-available-blocks-on-device-pro.patch create mode 100644 0062-Btrfs-progs-keep-track-of-transid-failures-and-fix-t.patch diff --git a/0050-Btrfs-progs-commit-the-csum_root-if-we-do-init-csum-.patch b/0050-Btrfs-progs-commit-the-csum_root-if-we-do-init-csum-.patch new file mode 100644 index 0000000..873e37a --- /dev/null +++ b/0050-Btrfs-progs-commit-the-csum_root-if-we-do-init-csum-.patch @@ -0,0 +1,36 @@ +From bdccfd46b1f2ff668351790db42e8831ca4ec4b4 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Fri, 14 Jun 2013 14:25:54 -0400 +Subject: [PATCH 50/62] Btrfs-progs: commit the csum_root if we do + --init-csum-tree + +This is just an oddity with the commit stuff in btrfs-progs. It will just +update the generation of the root you call with, which in btrfsck case would +have been the fs_root. But because we didn't actually update the fs_root we +wouldn't have cow'ed the fs root and therefore the generation will not match the +node which will make the file system unmountable. Fix this by calling with the +csum_root which is the one we're messing with. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + cmds-check.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/cmds-check.c b/cmds-check.c +index 8015288318a2..dbb41e5a4d5b 100644 +--- a/cmds-check.c ++++ b/cmds-check.c +@@ -5800,7 +5800,7 @@ int cmd_check(int argc, char **argv) + return -EIO; + } + +- ret = btrfs_commit_transaction(trans, root); ++ ret = btrfs_commit_transaction(trans, info->csum_root); + if (ret) + exit(1); + goto out; +-- +1.8.3.1 + diff --git a/0051-btrfs-progs-Fix-getopt-on-arm-ppc-platforms.patch b/0051-btrfs-progs-Fix-getopt-on-arm-ppc-platforms.patch new file mode 100644 index 0000000..fc453e0 --- /dev/null +++ b/0051-btrfs-progs-Fix-getopt-on-arm-ppc-platforms.patch @@ -0,0 +1,45 @@ +From 892bfedb24519d95dbe3d5cdc44d26adbc1c93dc Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Tue, 9 Jul 2013 18:38:29 +0200 +Subject: [PATCH 51/62] btrfs-progs: Fix getopt on arm/ppc platforms + +(same as commit bb0eabc383e9a3fde7cdb02591ca88243f3e31fb) +There, 'char' is unsigned, so once assigned '-1' from getopt, it gets +the value 255. Then, it compared to '-1' gives false. + +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + btrfs-crc.c | 2 +- + cmds-device.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/btrfs-crc.c b/btrfs-crc.c +index e4cda4312700..1990534ad4d2 100644 +--- a/btrfs-crc.c ++++ b/btrfs-crc.c +@@ -34,7 +34,7 @@ void usage(void) + + int main(int argc, char **argv) + { +- char c; ++ int c; + unsigned long checksum = 0; + char *str; + char *buf; +diff --git a/cmds-device.c b/cmds-device.c +index 41e79d375ce4..9e7328b20a55 100644 +--- a/cmds-device.c ++++ b/cmds-device.c +@@ -294,7 +294,7 @@ static int cmd_dev_stats(int argc, char **argv) + int ret; + int fdmnt; + int i; +- char c; ++ int c; + int err = 0; + __u64 flags = 0; + +-- +1.8.3.1 + diff --git a/0052-btrfs-progs-fix-duplicate-__-su-typedefs-on-ppc64.patch b/0052-btrfs-progs-fix-duplicate-__-su-typedefs-on-ppc64.patch new file mode 100644 index 0000000..bb3b606 --- /dev/null +++ b/0052-btrfs-progs-fix-duplicate-__-su-typedefs-on-ppc64.patch @@ -0,0 +1,38 @@ +From 2fdbfac178348ec229db866bccec8dd0f23738ab Mon Sep 17 00:00:00 2001 +From: Michal Marek +Date: Tue, 9 Jul 2013 18:38:46 +0200 +Subject: [PATCH 52/62] btrfs-progs: fix duplicate __[su]* typedefs on ppc64 + +The header does attempt to avoid conflicts with +, but on ppc64, gets somehow +included by other headers. + +Include explicitly, so that +notices it. The proper fix would be to fix to not +use its own typedefs. + +Originally observed in btrfs-convert, put the include into kerncompat.h +to avoid future problems. + +Signed-off-by: Michal Marek +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + kerncompat.h | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kerncompat.h b/kerncompat.h +index 9c116b4fe841..6584818d0af3 100644 +--- a/kerncompat.h ++++ b/kerncompat.h +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + + #ifndef READ + #define READ 0 +-- +1.8.3.1 + diff --git a/0053-btrfs-progs-use-reentrant-localtime.patch b/0053-btrfs-progs-use-reentrant-localtime.patch new file mode 100644 index 0000000..a6eca85 --- /dev/null +++ b/0053-btrfs-progs-use-reentrant-localtime.patch @@ -0,0 +1,61 @@ +From 2fac6f99128560c5993a02d2de0cc3d8238f3b51 Mon Sep 17 00:00:00 2001 +From: David Sterba +Date: Tue, 9 Jul 2013 18:39:24 +0200 +Subject: [PATCH 53/62] btrfs-progs: use reentrant localtime + +localtime may return NULL (when an error is detected eg. after setting +tzname), followed by a segfault when the values is about to be used. +localtime_r works, does not set tzname and does not return NULL. + +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + btrfs-list.c | 10 ++++++---- + cmds-subvolume.c | 10 ++++++---- + 2 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/btrfs-list.c b/btrfs-list.c +index 4fab85882165..ea80bfeef2f1 100644 +--- a/btrfs-list.c ++++ b/btrfs-list.c +@@ -1337,10 +1337,12 @@ static void print_subvolume_column(struct root_info *subv, + printf("%llu", subv->top_id); + break; + case BTRFS_LIST_OTIME: +- if (subv->otime) +- strftime(tstr, 256, "%Y-%m-%d %X", +- localtime(&subv->otime)); +- else ++ if (subv->otime) { ++ struct tm tm; ++ ++ localtime_r(&subv->otime, &tm); ++ strftime(tstr, 256, "%Y-%m-%d %X", &tm); ++ } else + strcpy(tstr, "-"); + printf("%s", tstr); + break; +diff --git a/cmds-subvolume.c b/cmds-subvolume.c +index ccb476274240..faf05cab2ad9 100644 +--- a/cmds-subvolume.c ++++ b/cmds-subvolume.c +@@ -898,10 +898,12 @@ static int cmd_subvol_show(int argc, char **argv) + uuid_unparse(get_ri.puuid, uuidparse); + printf("\tParent uuid: \t\t%s\n", uuidparse); + +- if (get_ri.otime) +- strftime(tstr, 256, "%Y-%m-%d %X", +- localtime(&get_ri.otime)); +- else ++ if (get_ri.otime) { ++ struct tm tm; ++ ++ localtime_r(&get_ri.otime, &tm); ++ strftime(tstr, 256, "%Y-%m-%d %X", &tm); ++ } else + strcpy(tstr, "-"); + printf("\tCreation time: \t\t%s\n", tstr); + +-- +1.8.3.1 + diff --git a/0054-btrfs-progs-don-t-have-to-report-ENOMEDIUM-error-dur.patch b/0054-btrfs-progs-don-t-have-to-report-ENOMEDIUM-error-dur.patch new file mode 100644 index 0000000..9f30997 --- /dev/null +++ b/0054-btrfs-progs-don-t-have-to-report-ENOMEDIUM-error-dur.patch @@ -0,0 +1,36 @@ +From 3b167f3ea4e8b8ea292326924653d8862114626e Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Fri, 26 Jul 2013 01:35:30 +0800 +Subject: [PATCH 54/62] btrfs-progs: don't have to report ENOMEDIUM error + during open + +when we scan /proc/partitions the cdrom is scanned +as well, and we don't have to report ENOMEDIUM errors +against it. + +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + utils.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/utils.c b/utils.c +index be0bfd5ecac2..6c1a96f2032f 100644 +--- a/utils.c ++++ b/utils.c +@@ -1414,8 +1414,9 @@ scan_again: + + fd = open(fullpath, O_RDONLY); + if (fd < 0) { +- fprintf(stderr, "failed to open %s: %s\n", +- fullpath, strerror(errno)); ++ if (errno != ENOMEDIUM) ++ fprintf(stderr, "failed to open %s: %s\n", ++ fullpath, strerror(errno)); + continue; + } + ret = btrfs_scan_one_device(fd, fullpath, &tmp_devices, +-- +1.8.3.1 + diff --git a/0055-Btrfs-progs-added-btrfs-quota-rescan-w-switch-wait.patch b/0055-Btrfs-progs-added-btrfs-quota-rescan-w-switch-wait.patch new file mode 100644 index 0000000..f4528d1 --- /dev/null +++ b/0055-Btrfs-progs-added-btrfs-quota-rescan-w-switch-wait.patch @@ -0,0 +1,94 @@ +From d74078b9e01ad6eab5ba4d951917c29a70e7be18 Mon Sep 17 00:00:00 2001 +From: Jan Schmidt +Date: Mon, 6 May 2013 21:15:18 +0200 +Subject: [PATCH 55/62] Btrfs-progs: added "btrfs quota rescan" -w switch + (wait) + +With -w one can wait for a rescan operation to finish. It can be used when +starting a rescan operation or later to wait for the currently running +rescan operation to finish. Waiting is interruptible. + +Signed-off-by: Jan Schmidt +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + cmds-quota.c | 19 +++++++++++++++++-- + ioctl.h | 1 + + 2 files changed, 18 insertions(+), 2 deletions(-) + +diff --git a/cmds-quota.c b/cmds-quota.c +index 2e2971a41df7..af98d6e71570 100644 +--- a/cmds-quota.c ++++ b/cmds-quota.c +@@ -93,10 +93,11 @@ static int cmd_quota_disable(int argc, char **argv) + } + + static const char * const cmd_quota_rescan_usage[] = { +- "btrfs quota rescan [-s] ", ++ "btrfs quota rescan [-sw] ", + "Trash all qgroup numbers and scan the metadata again with the current config.", + "", + "-s show status of a running rescan operation", ++ "-w wait for rescan operation to finish (can be already in progress)", + NULL + }; + +@@ -108,21 +109,30 @@ static int cmd_quota_rescan(int argc, char **argv) + char *path = NULL; + struct btrfs_ioctl_quota_rescan_args args; + int ioctlnum = BTRFS_IOC_QUOTA_RESCAN; ++ int wait_for_completion = 0; + + optind = 1; + while (1) { +- int c = getopt(argc, argv, "s"); ++ int c = getopt(argc, argv, "sw"); + if (c < 0) + break; + switch (c) { + case 's': + ioctlnum = BTRFS_IOC_QUOTA_RESCAN_STATUS; + break; ++ case 'w': ++ wait_for_completion = 1; ++ break; + default: + usage(cmd_quota_rescan_usage); + } + } + ++ if (ioctlnum != BTRFS_IOC_QUOTA_RESCAN && wait_for_completion) { ++ fprintf(stderr, "ERROR: -w cannot be used with -s\n"); ++ return 12; ++ } ++ + if (check_argc_exact(argc - optind, 1)) + usage(cmd_quota_rescan_usage); + +@@ -137,6 +147,11 @@ static int cmd_quota_rescan(int argc, char **argv) + + ret = ioctl(fd, ioctlnum, &args); + e = errno; ++ ++ if (wait_for_completion && (ret == 0 || e == EINPROGRESS)) { ++ ret = ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT, &args); ++ e = errno; ++ } + close(fd); + + if (ioctlnum == BTRFS_IOC_QUOTA_RESCAN) { +diff --git a/ioctl.h b/ioctl.h +index abe6dd4234d9..c260bbf6b4bb 100644 +--- a/ioctl.h ++++ b/ioctl.h +@@ -529,6 +529,7 @@ struct btrfs_ioctl_clone_range_args { + struct btrfs_ioctl_quota_rescan_args) + #define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \ + struct btrfs_ioctl_quota_rescan_args) ++#define BTRFS_IOC_QUOTA_RESCAN_WAIT _IO(BTRFS_IOCTL_MAGIC, 46) + #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ + char[BTRFS_LABEL_SIZE]) + #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ +-- +1.8.3.1 + diff --git a/0056-btrfs-progs-fix-qgroup-realloc-inheritance.patch b/0056-btrfs-progs-fix-qgroup-realloc-inheritance.patch new file mode 100644 index 0000000..1f50e27 --- /dev/null +++ b/0056-btrfs-progs-fix-qgroup-realloc-inheritance.patch @@ -0,0 +1,36 @@ +From 85a8cc9ecfb9b61c5feaaf3ba861f27a2501691d Mon Sep 17 00:00:00 2001 +From: Zach Brown +Date: Wed, 14 Aug 2013 16:16:40 -0700 +Subject: [PATCH 56/62] btrfs-progs: fix qgroup realloc inheritance + +qgroup.c:82:23: warning: memcpy with byte count of 0 +qgroup.c:83:23: warning: memcpy with byte count of 0 + +The inheritance wasn't copying qgroups[] because a confused sizeof() +gave 0 byte memcpy()s. It's been like this for the year since it was +merged, so I guess this isn't a very important thing to do :). + +Signed-off-by: Zach Brown +Reviewed-by: Arne Jansen +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + qgroup.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/qgroup.c b/qgroup.c +index dafde12becf6..e860b309e77c 100644 +--- a/qgroup.c ++++ b/qgroup.c +@@ -74,7 +74,7 @@ int qgroup_inherit_realloc(struct btrfs_qgroup_inherit **inherit, int n, + + if (*inherit) { + struct btrfs_qgroup_inherit *i = *inherit; +- int s = sizeof(out->qgroups); ++ int s = sizeof(out->qgroups[0]); + + out->num_qgroups = i->num_qgroups; + out->num_ref_copies = i->num_ref_copies; +-- +1.8.3.1 + diff --git a/0057-Btrfs-progs-fix-restore-command-leaving-corrupted-fi.patch b/0057-Btrfs-progs-fix-restore-command-leaving-corrupted-fi.patch new file mode 100644 index 0000000..2ca8c64 --- /dev/null +++ b/0057-Btrfs-progs-fix-restore-command-leaving-corrupted-fi.patch @@ -0,0 +1,111 @@ +From 18d8ff57c3cd9ee31829b19fcd6ca57ed201720a Mon Sep 17 00:00:00 2001 +From: Filipe David Borba Manana +Date: Tue, 3 Sep 2013 12:19:58 +0100 +Subject: [PATCH 57/62] Btrfs-progs: fix restore command leaving corrupted + files + +When there are files that have parts shared with snapshots, the +restore command was incorrectly restoring them, as it was not +taking into account the offset and number of bytes fields from +the file extent item. Besides leaving the recovered file corrupt, +it was also inneficient as it read and wrote more data than needed +(with each extent copy overwriting portions of the one previously +written). + +The following steps show how to reproduce this corruption issue: + +$ mkfs.btrfs -f /dev/sdb3 +$ mount /dev/sdb3 /mnt/btrfs +$ perl -e '$d = "\x41" . ("\x00" x (1024*1024+349)); open($f,">","/mnt/btrfs/foobar"); print $f $d; close($f);' +$ du -b /mnt/btrfs/foobar +1048926 /mnt/btrfs/foobar +$ md5sum /mnt/btrfs/foobar +f9f778f3a7410c40e4ed104a3a63c3c4 /mnt/btrfs/foobar + +$ btrfs subvolume snapshot /mnt/btrfs /mnt/btrfs/my_snap +$ perl -e 'open($f, "+<", "/mnt/btrfs/foobar"); seek($f, 4096, 0); print $f "\xff"; close($f);' +$ md5sum /mnt/btrfs/foobar +b983fcefd4622a03a78936484c40272b /mnt/btrfs/foobar +$ umount /mnt/btrfs + +$ btrfs restore /dev/sdb3 /tmp/copy +$ du -b /tmp/copy/foobar +1048926 /tmp/copy/foobar +$ md5sum /tmp/copy/foobar +88db338cbc1c44dfabae083f1ce642d5 /tmp/copy/foobar +$ od -t x1 -j 8192 -N 4 /tmp/copy/foobar +0020000 41 00 00 00 +0020004 +$ mount /dev/sdb3 /mnt/btrfs +$ od -t x1 -j 8192 -N 4 /mnt/btrfs/foobar +0020000 00 00 00 00 +0020004 +$ md5sum /mnt/btrfs/foobar +b983fcefd4622a03a78936484c40272b /mnt/btrfs/foobar + +Tested this change with zlib, lzo compression and file sizes larger +than 1GiB, and found no regression or other corruption issues (so far +at least). + +Signed-off-by: Filipe David Borba Manana +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + cmds-restore.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/cmds-restore.c b/cmds-restore.c +index e48df40320f6..9688599742d9 100644 +--- a/cmds-restore.c ++++ b/cmds-restore.c +@@ -272,6 +272,7 @@ static int copy_one_extent(struct btrfs_root *root, int fd, + u64 bytenr; + u64 ram_size; + u64 disk_size; ++ u64 num_bytes; + u64 length; + u64 size_left; + u64 dev_bytenr; +@@ -288,7 +289,9 @@ static int copy_one_extent(struct btrfs_root *root, int fd, + disk_size = btrfs_file_extent_disk_num_bytes(leaf, fi); + ram_size = btrfs_file_extent_ram_bytes(leaf, fi); + offset = btrfs_file_extent_offset(leaf, fi); +- size_left = disk_size; ++ num_bytes = btrfs_file_extent_num_bytes(leaf, fi); ++ size_left = num_bytes; ++ bytenr += offset; + + if (offset) + printf("offset is %Lu\n", offset); +@@ -296,7 +299,7 @@ static int copy_one_extent(struct btrfs_root *root, int fd, + if (disk_size == 0) + return 0; + +- inbuf = malloc(disk_size); ++ inbuf = malloc(size_left); + if (!inbuf) { + fprintf(stderr, "No memory\n"); + return -1; +@@ -351,8 +354,8 @@ again: + goto again; + + if (compress == BTRFS_COMPRESS_NONE) { +- while (total < ram_size) { +- done = pwrite(fd, inbuf+total, ram_size-total, ++ while (total < num_bytes) { ++ done = pwrite(fd, inbuf+total, num_bytes-total, + pos+total); + if (done < 0) { + ret = -1; +@@ -365,7 +368,7 @@ again: + goto out; + } + +- ret = decompress(inbuf, outbuf, disk_size, &ram_size, compress); ++ ret = decompress(inbuf, outbuf, num_bytes, &ram_size, compress); + if (ret) { + num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, + bytenr, length); +-- +1.8.3.1 + diff --git a/0058-btrfs-progs-avoid-write-to-the-disk-before-sure-to-c.patch b/0058-btrfs-progs-avoid-write-to-the-disk-before-sure-to-c.patch new file mode 100644 index 0000000..6efff89 --- /dev/null +++ b/0058-btrfs-progs-avoid-write-to-the-disk-before-sure-to-c.patch @@ -0,0 +1,253 @@ +From 6620f2caf5b50c28737eff1b522c22c153f9b1fa Mon Sep 17 00:00:00 2001 +From: Anand Jain +Date: Wed, 7 Aug 2013 20:11:25 +0800 +Subject: [PATCH 58/62] btrfs-progs: avoid write to the disk before sure to + create fs + +This patch provides fix for the following bug, + +When mkfs.btrfs fails the disks shouldn't be written. +------------ +btrfs fi show /dev/sdb +Label: none uuid: 60fb76f4-3b4d-4632-a7da-6a44dea5573d + Total devices 1 FS bytes used 24.00KiB + devid 1 size 2.00GiB used 20.00MiB path /dev/sdb + +mkfs.btrfs -dsingle -mraid1 /dev/sdb -f +:: +unable to create FS with metadata profile 16 (have 1 devices) + +btrfs fi show /dev/sdb +Label: none uuid: 2da2179d-ecb1-4a4e-a44d-e7613a08c18d + Total devices 1 FS bytes used 24.00KiB + devid 1 size 2.00GiB used 20.00MiB path /dev/sdb +------------- + +Signed-off-by: Anand Jain +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + mkfs.c | 104 +++++++++++++++++++++++++--------------------------------------- + utils.c | 41 +++++++++++++++++++++++++ + utils.h | 2 ++ + 3 files changed, 84 insertions(+), 63 deletions(-) + +diff --git a/mkfs.c b/mkfs.c +index 26be20df90d9..8a68f8a9f762 100644 +--- a/mkfs.c ++++ b/mkfs.c +@@ -195,83 +195,28 @@ static int create_raid_groups(struct btrfs_trans_handle *trans, + int metadata_profile_opt, int mixed, int ssd) + { + u64 num_devices = btrfs_super_num_devices(root->fs_info->super_copy); +- u64 allowed = 0; +- u64 devices_for_raid = num_devices; + int ret; + +- /* +- * Set default profiles according to number of added devices. +- * For mixed groups defaults are single/single. +- */ +- if (!metadata_profile_opt && !mixed) { +- if (num_devices == 1 && ssd) +- printf("Detected a SSD, turning off metadata " +- "duplication. Mkfs with -m dup if you want to " +- "force metadata duplication.\n"); +- metadata_profile = (num_devices > 1) ? +- BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? 0: BTRFS_BLOCK_GROUP_DUP; +- } +- if (!data_profile_opt && !mixed) { +- data_profile = (num_devices > 1) ? +- BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */ +- } +- +- if (devices_for_raid > 4) +- devices_for_raid = 4; +- +- switch (devices_for_raid) { +- default: +- case 4: +- allowed |= BTRFS_BLOCK_GROUP_RAID10; +- case 3: +- allowed |= BTRFS_BLOCK_GROUP_RAID6; +- case 2: +- allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | +- BTRFS_BLOCK_GROUP_RAID5; +- break; +- case 1: +- allowed |= BTRFS_BLOCK_GROUP_DUP; +- } +- +- if (metadata_profile & ~allowed) { +- fprintf(stderr, "unable to create FS with metadata " +- "profile %llu (have %llu devices)\n", metadata_profile, +- num_devices); +- exit(1); +- } +- if (data_profile & ~allowed) { +- fprintf(stderr, "unable to create FS with data " +- "profile %llu (have %llu devices)\n", data_profile, +- num_devices); +- exit(1); +- } +- +- /* allow dup'ed data chunks only in mixed mode */ +- if (!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP)) { +- fprintf(stderr, "dup for data is allowed only in mixed mode\n"); +- exit(1); +- } +- +- if (allowed & metadata_profile) { ++ if (metadata_profile) { + u64 meta_flags = BTRFS_BLOCK_GROUP_METADATA; + + ret = create_one_raid_group(trans, root, + BTRFS_BLOCK_GROUP_SYSTEM | +- (allowed & metadata_profile)); ++ metadata_profile); + BUG_ON(ret); + + if (mixed) + meta_flags |= BTRFS_BLOCK_GROUP_DATA; + + ret = create_one_raid_group(trans, root, meta_flags | +- (allowed & metadata_profile)); ++ metadata_profile); + BUG_ON(ret); + + } +- if (!mixed && num_devices > 1 && (allowed & data_profile)) { ++ if (!mixed && num_devices > 1 && data_profile) { + ret = create_one_raid_group(trans, root, + BTRFS_BLOCK_GROUP_DATA | +- (allowed & data_profile)); ++ data_profile); + BUG_ON(ret); + } + recow_roots(trans, root); +@@ -1362,14 +1307,48 @@ int main(int ac, char **av) + } + } + +- /* if we are here that means all devs are good to btrfsify */ + optind = saved_optind; + dev_cnt = ac - optind; + ++ file = av[optind++]; ++ ssd = is_ssd(file); ++ ++ /* ++ * Set default profiles according to number of added devices. ++ * For mixed groups defaults are single/single. ++ */ ++ if (!mixed) { ++ if (!metadata_profile_opt) { ++ if (dev_cnt == 1 && ssd) ++ printf("Detected a SSD, turning off metadata " ++ "duplication. Mkfs with -m dup if you want to " ++ "force metadata duplication.\n"); ++ ++ metadata_profile = (dev_cnt > 1) ? ++ BTRFS_BLOCK_GROUP_RAID1 : (ssd) ? ++ 0: BTRFS_BLOCK_GROUP_DUP; ++ } ++ if (!data_profile_opt) { ++ data_profile = (dev_cnt > 1) ? ++ BTRFS_BLOCK_GROUP_RAID0 : 0; /* raid0 or single */ ++ } ++ } else { ++ /* this is not needed but just for completeness */ ++ metadata_profile = 0; ++ data_profile = 0; ++ } ++ ++ ret = test_num_disk_vs_raid(metadata_profile, data_profile, ++ dev_cnt, mixed, estr); ++ if (ret) { ++ fprintf(stderr, "Error: %s\n", estr); ++ exit(1); ++ } ++ ++ /* if we are here that means all devs are good to btrfsify */ + printf("\nWARNING! - %s IS EXPERIMENTAL\n", BTRFS_BUILD_VERSION); + printf("WARNING! - see http://btrfs.wiki.kernel.org before using\n\n"); + +- file = av[optind++]; + dev_cnt--; + + if (!source_dir_set) { +@@ -1412,7 +1391,6 @@ int main(int ac, char **av) + dev_block_count = block_count; + } + +- ssd = is_ssd(file); + + if (mixed) { + if (metadata_profile != data_profile) { +diff --git a/utils.c b/utils.c +index 6c1a96f2032f..b2c6a06ffaa5 100644 +--- a/utils.c ++++ b/utils.c +@@ -1766,6 +1766,47 @@ out: + return ret; + } + ++int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile, ++ u64 dev_cnt, int mixed, char *estr) ++{ ++ size_t sz = 100; ++ u64 allowed = 0; ++ ++ switch (dev_cnt) { ++ default: ++ case 4: ++ allowed |= BTRFS_BLOCK_GROUP_RAID10; ++ case 3: ++ allowed |= BTRFS_BLOCK_GROUP_RAID6; ++ case 2: ++ allowed |= BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | ++ BTRFS_BLOCK_GROUP_RAID5; ++ break; ++ case 1: ++ allowed |= BTRFS_BLOCK_GROUP_DUP; ++ } ++ ++ if (metadata_profile & ~allowed) { ++ snprintf(estr, sz, "unable to create FS with metadata " ++ "profile %llu (have %llu devices)\n", ++ metadata_profile, dev_cnt); ++ return 1; ++ } ++ if (data_profile & ~allowed) { ++ snprintf(estr, sz, "unable to create FS with data " ++ "profile %llu (have %llu devices)\n", ++ metadata_profile, dev_cnt); ++ return 1; ++ } ++ ++ if (!mixed && (data_profile & BTRFS_BLOCK_GROUP_DUP)) { ++ snprintf(estr, sz, ++ "dup for data is allowed only in mixed mode"); ++ return 1; ++ } ++ return 0; ++} ++ + /* Check if disk is suitable for btrfs + * returns: + * 1: something is wrong, estr provides the error +diff --git a/utils.h b/utils.h +index 3c17e14b79cd..720601723413 100644 +--- a/utils.h ++++ b/utils.h +@@ -65,5 +65,7 @@ u64 btrfs_device_size(int fd, struct stat *st); + /* Helper to always get proper size of the destination string */ + #define strncpy_null(dest, src) __strncpy__null(dest, src, sizeof(dest)) + int test_dev_for_mkfs(char *file, int force_overwrite, char *estr); ++int test_num_disk_vs_raid(u64 metadata_profile, u64 data_profile, ++ u64 dev_cnt, int mixed, char *estr); + + #endif +-- +1.8.3.1 + diff --git a/0059-btrfs-progs-error-if-device-for-mkfs-is-too-small.patch b/0059-btrfs-progs-error-if-device-for-mkfs-is-too-small.patch new file mode 100644 index 0000000..1ab5d5a --- /dev/null +++ b/0059-btrfs-progs-error-if-device-for-mkfs-is-too-small.patch @@ -0,0 +1,46 @@ +From 684d9a33f77967a9b50b597cece1b2b2e29d2b8d Mon Sep 17 00:00:00 2001 +From: Hidetoshi Seto +Date: Thu, 5 Sep 2013 15:53:34 +0900 +Subject: [PATCH 59/62] btrfs-progs: error if device for mkfs is too small + +Eric pointed out that mkfs abort if specified volume is too small: + + # truncate --size=2m testfile + # ./mkfs.btrfs testfile + : + SMALL VOLUME: forcing mixed metadata/data groups + mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. + Aborted (core dumped) + +As the first step to fix problems around there, let mkfs to report +error if the size of target volume is less than the size of the first +system block group, BTRFS_MKFS_SYSTEM_GROUP_SIZE (= 4MB). + +Reported-by: Eric Sandeen +Signed-off-by: Hidetoshi Seto +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + mkfs.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/mkfs.c b/mkfs.c +index 8a68f8a9f762..9a017c1dc95b 100644 +--- a/mkfs.c ++++ b/mkfs.c +@@ -1400,6 +1400,12 @@ int main(int ac, char **av) + } + } + ++ /* To create the first block group and chunk 0 in make_btrfs */ ++ if (dev_block_count < BTRFS_MKFS_SYSTEM_GROUP_SIZE) { ++ fprintf(stderr, "device is too small to make filesystem\n"); ++ exit(1); ++ } ++ + blocks[0] = BTRFS_SUPER_INFO_OFFSET; + for (i = 1; i < 7; i++) { + blocks[i] = BTRFS_SUPER_INFO_OFFSET + 1024 * 1024 + +-- +1.8.3.1 + diff --git a/0060-btrfs-progs-error-if-device-have-no-space-to-make-pr.patch b/0060-btrfs-progs-error-if-device-have-no-space-to-make-pr.patch new file mode 100644 index 0000000..a9364bf --- /dev/null +++ b/0060-btrfs-progs-error-if-device-have-no-space-to-make-pr.patch @@ -0,0 +1,91 @@ +From b11f9613e3b0be7e4b560419a4fec7d7d7264664 Mon Sep 17 00:00:00 2001 +From: Hidetoshi Seto +Date: Thu, 5 Sep 2013 15:55:08 +0900 +Subject: [PATCH 60/62] btrfs-progs: error if device have no space to make + primary chunks + +The previous patch works fine if the size of specified volume to mkfs +is less than 4MB. However usually btrfs requires more than 4MB to work, +and the minimum preferred size is depending on the raid setting etc. + +This patch let mkfs print error message if it cannot allocate one of +chunks should be there at first. + + [before] + # truncate --size=4500K testfile + # ./mkfs.btrfs -f testfile + : + SMALL VOLUME: forcing mixed metadata/data groups + mkfs.btrfs: mkfs.c:84: make_root_dir: Assertion `!(ret)' failed. + Aborted (core dumped) + + [After] + # truncate --size=4500K testfile + # ./mkfs.btrfs -f testfile + : + SMALL VOLUME: forcing mixed metadata/data groups + no space to alloc data/metadata chunk + failed to setup the root directory + +TBD is calculate minimum size for setting and put it in the error +message to let user know how large amount of volume is required. + +Signed-off-by: Hidetoshi Seto +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + mkfs.c | 17 +++++++++++++++++ + 1 file changed, 17 insertions(+) + +diff --git a/mkfs.c b/mkfs.c +index 9a017c1dc95b..f7105073a173 100644 +--- a/mkfs.c ++++ b/mkfs.c +@@ -81,6 +81,11 @@ static int make_root_dir(struct btrfs_root *root, int mixed) + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_METADATA | + BTRFS_BLOCK_GROUP_DATA); ++ if (ret == -ENOSPC) { ++ fprintf(stderr, ++ "no space to alloc data/metadata chunk\n"); ++ goto err; ++ } + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root, 0, + BTRFS_BLOCK_GROUP_METADATA | +@@ -93,6 +98,10 @@ static int make_root_dir(struct btrfs_root *root, int mixed) + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_METADATA); ++ if (ret == -ENOSPC) { ++ fprintf(stderr, "no space to alloc metadata chunk\n"); ++ goto err; ++ } + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root, 0, + BTRFS_BLOCK_GROUP_METADATA, +@@ -110,6 +119,10 @@ static int make_root_dir(struct btrfs_root *root, int mixed) + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, + BTRFS_BLOCK_GROUP_DATA); ++ if (ret == -ENOSPC) { ++ fprintf(stderr, "no space to alloc data chunk\n"); ++ goto err; ++ } + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root, 0, + BTRFS_BLOCK_GROUP_DATA, +@@ -181,6 +194,10 @@ static int create_one_raid_group(struct btrfs_trans_handle *trans, + + ret = btrfs_alloc_chunk(trans, root->fs_info->extent_root, + &chunk_start, &chunk_size, type); ++ if (ret == -ENOSPC) { ++ fprintf(stderr, "not enough free space\n"); ++ exit(1); ++ } + BUG_ON(ret); + ret = btrfs_make_block_group(trans, root->fs_info->extent_root, 0, + type, BTRFS_FIRST_CHUNK_TREE_OBJECTID, +-- +1.8.3.1 + diff --git a/0061-btrfs-progs-calculate-available-blocks-on-device-pro.patch b/0061-btrfs-progs-calculate-available-blocks-on-device-pro.patch new file mode 100644 index 0000000..c98e09a --- /dev/null +++ b/0061-btrfs-progs-calculate-available-blocks-on-device-pro.patch @@ -0,0 +1,233 @@ +From db3c0b4f365acb5ee9fa7e37d440b2ef6ff5636c Mon Sep 17 00:00:00 2001 +From: Hidetoshi Seto +Date: Thu, 5 Sep 2013 15:57:19 +0900 +Subject: [PATCH 61/62] btrfs-progs: calculate available blocks on device + properly + +I found that mkfs.btrfs aborts when assigned multi volumes contain +a small volume: + + # parted /dev/sdf p + Model: LSI MegaRAID SAS RMB (scsi) + Disk /dev/sdf: 72.8GB + Sector size (logical/physical): 512B/512B + Partition Table: msdos + + Number Start End Size Type File system Flags + 1 32.3kB 72.4GB 72.4GB primary + 2 72.4GB 72.8GB 461MB primary + + # ./mkfs.btrfs -f /dev/sdf1 /dev/sdf2 + : + SMALL VOLUME: forcing mixed metadata/data groups + adding device /dev/sdf2 id 2 + mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. + Aborted (core dumped) + +This failure of btrfs_alloc_chunk was caused by following steps: + 1) since there is only small space in the small device, mkfs was + going to allocate a chunk from free space as much as available. + So mkfs called btrfs_alloc_chunk with + size = device->total_bytes - device->used_bytes. + 2) (According to the comment in source code, to avoid overwriting + superblock,) btrfs_alloc_chunk starts taking chunks at an offset + of 1MB. It means that the layout of a disk will be like: + [[1MB at beginning for sb][allocated chunks]* ... free space ... ] + and you can see that the available free space for allocation is: + avail = device->total_bytes - device->used_bytes - 1MB. + 3) Therefore there is only free space 1MB less than requested. damn. + +>From further investigations I also found that this issue is easily +reproduced by using -A, --alloc-start option: + + # truncate --size=1G testfile + # ./mkfs.btrfs -A900M -f testfile + : + mkfs.btrfs: volumes.c:852: btrfs_alloc_chunk: Assertion `!(ret)' failed. + Aborted (core dumped) + +In this case there is only 100MB for allocation but btrfs_alloc_chunk +was going to allocate more than the 100MB. + +The root cause of both of above troubles is a same simple bug: +btrfs_chunk_alloc does not calculate available bytes properly even +though it researches how many devices have enough room to have a +chunk to be allocated. + +So this patch introduces new function btrfs_device_avail_bytes() +which returns available bytes for allocation in specified device. + +Signed-off-by: Hidetoshi Seto +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + ctree.h | 8 +++++ + volumes.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- + 2 files changed, 106 insertions(+), 6 deletions(-) + +diff --git a/ctree.h b/ctree.h +index 0b0d701fe679..90be7abe9ebf 100644 +--- a/ctree.h ++++ b/ctree.h +@@ -811,6 +811,14 @@ struct btrfs_csum_item { + u8 csum; + } __attribute__ ((__packed__)); + ++/* ++ * We don't want to overwrite 1M at the beginning of device, even though ++ * there is our 1st superblock at 64k. Some possible reasons: ++ * - the first 64k blank is useful for some boot loader/manager ++ * - the first 1M could be scratched by buggy partitioner or somesuch ++ */ ++#define BTRFS_BLOCK_RESERVED_1M_FOR_SUPER ((u64)1024 * 1024) ++ + /* tag for the radix tree of block groups in ram */ + #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) + #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) +diff --git a/volumes.c b/volumes.c +index 0ff22833d6be..e8d7f258dddb 100644 +--- a/volumes.c ++++ b/volumes.c +@@ -268,7 +268,7 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, + struct btrfs_dev_extent *dev_extent = NULL; + u64 hole_size = 0; + u64 last_byte = 0; +- u64 search_start = 0; ++ u64 search_start = root->fs_info->alloc_start; + u64 search_end = device->total_bytes; + int ret; + int slot = 0; +@@ -283,10 +283,12 @@ static int find_free_dev_extent(struct btrfs_trans_handle *trans, + /* we don't want to overwrite the superblock on the drive, + * so we make sure to start at an offset of at least 1MB + */ +- search_start = max((u64)1024 * 1024, search_start); ++ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start); + +- if (root->fs_info->alloc_start + num_bytes <= device->total_bytes) +- search_start = max(root->fs_info->alloc_start, search_start); ++ if (search_start >= search_end) { ++ ret = -ENOSPC; ++ goto error; ++ } + + key.objectid = device->devid; + key.offset = search_start; +@@ -660,6 +662,94 @@ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) + return 64 * 1024; + } + ++/* ++ * btrfs_device_avail_bytes - count bytes available for alloc_chunk ++ * ++ * It is not equal to "device->total_bytes - device->bytes_used". ++ * We do not allocate any chunk in 1M at beginning of device, and not ++ * allowed to allocate any chunk before alloc_start if it is specified. ++ * So search holes from max(1M, alloc_start) to device->total_bytes. ++ */ ++static int btrfs_device_avail_bytes(struct btrfs_trans_handle *trans, ++ struct btrfs_device *device, ++ u64 *avail_bytes) ++{ ++ struct btrfs_path *path; ++ struct btrfs_root *root = device->dev_root; ++ struct btrfs_key key; ++ struct btrfs_dev_extent *dev_extent = NULL; ++ struct extent_buffer *l; ++ u64 search_start = root->fs_info->alloc_start; ++ u64 search_end = device->total_bytes; ++ u64 extent_end = 0; ++ u64 free_bytes = 0; ++ int ret; ++ int slot = 0; ++ ++ search_start = max(BTRFS_BLOCK_RESERVED_1M_FOR_SUPER, search_start); ++ ++ path = btrfs_alloc_path(); ++ if (!path) ++ return -ENOMEM; ++ ++ key.objectid = device->devid; ++ key.offset = root->fs_info->alloc_start; ++ key.type = BTRFS_DEV_EXTENT_KEY; ++ ++ path->reada = 2; ++ ret = btrfs_search_slot(trans, root, &key, path, 0, 0); ++ if (ret < 0) ++ goto error; ++ ret = btrfs_previous_item(root, path, 0, key.type); ++ if (ret < 0) ++ goto error; ++ ++ while (1) { ++ l = path->nodes[0]; ++ slot = path->slots[0]; ++ if (slot >= btrfs_header_nritems(l)) { ++ ret = btrfs_next_leaf(root, path); ++ if (ret == 0) ++ continue; ++ if (ret < 0) ++ goto error; ++ break; ++ } ++ btrfs_item_key_to_cpu(l, &key, slot); ++ ++ if (key.objectid < device->devid) ++ goto next; ++ if (key.objectid > device->devid) ++ break; ++ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) ++ goto next; ++ if (key.offset > search_end) ++ break; ++ if (key.offset > search_start) ++ free_bytes += key.offset - search_start; ++ ++ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); ++ extent_end = key.offset + btrfs_dev_extent_length(l, ++ dev_extent); ++ if (extent_end > search_start) ++ search_start = extent_end; ++ if (search_start > search_end) ++ break; ++next: ++ path->slots[0]++; ++ cond_resched(); ++ } ++ ++ if (search_start < search_end) ++ free_bytes += search_end - search_start; ++ ++ *avail_bytes = free_bytes; ++ ret = 0; ++error: ++ btrfs_free_path(path); ++ return ret; ++} ++ + int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, u64 *start, + u64 *num_bytes, u64 type) +@@ -678,7 +768,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + u64 calc_size = 8 * 1024 * 1024; + u64 min_free; + u64 max_chunk_size = 4 * calc_size; +- u64 avail; ++ u64 avail = 0; + u64 max_avail = 0; + u64 percent_max; + int num_stripes = 1; +@@ -782,7 +872,9 @@ again: + /* build a private list of devices we will allocate from */ + while(index < num_stripes) { + device = list_entry(cur, struct btrfs_device, dev_list); +- avail = device->total_bytes - device->bytes_used; ++ ret = btrfs_device_avail_bytes(trans, device, &avail); ++ if (ret) ++ return ret; + cur = cur->next; + if (avail >= min_free) { + list_move_tail(&device->dev_list, &private_devs); +-- +1.8.3.1 + diff --git a/0062-Btrfs-progs-keep-track-of-transid-failures-and-fix-t.patch b/0062-Btrfs-progs-keep-track-of-transid-failures-and-fix-t.patch new file mode 100644 index 0000000..a6efe4e --- /dev/null +++ b/0062-Btrfs-progs-keep-track-of-transid-failures-and-fix-t.patch @@ -0,0 +1,228 @@ +From 174273941f266c5ba71da02cc4d71a95ca41bc20 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 1 Oct 2013 09:00:19 -0400 +Subject: [PATCH 62/62] Btrfs-progs: keep track of transid failures and fix + them if possible + +A user was reporting an issue with bad transid errors on his blocks. The thing +is that btrfs-progs will ignore transid failures for things like restore and +fsck so we can do a best effort to fix a users file system. So fsck can put +together a coherent view of the file system with stale blocks. So if everything +else is ok in the mind of fsck then we can recow these blocks to fix the +generation and the user can get their file system back. Thanks, + +Signed-off-by: Josef Bacik +Signed-off-by: David Sterba +Signed-off-by: Chris Mason +--- + cmds-check.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + ctree.c | 7 ++++++- + ctree.h | 1 + + disk-io.c | 15 +++++++++++++++ + extent_io.c | 2 ++ + extent_io.h | 2 ++ + 6 files changed, 84 insertions(+), 1 deletion(-) + +diff --git a/cmds-check.c b/cmds-check.c +index dbb41e5a4d5b..924aac08f350 100644 +--- a/cmds-check.c ++++ b/cmds-check.c +@@ -5675,6 +5675,47 @@ static int reinit_extent_tree(struct btrfs_fs_info *fs_info) + return btrfs_commit_transaction(trans, fs_info->extent_root); + } + ++static int recow_extent_buffer(struct btrfs_root *root, struct extent_buffer *eb) ++{ ++ struct btrfs_path *path; ++ struct btrfs_trans_handle *trans; ++ struct btrfs_key key; ++ int ret; ++ ++ printf("Recowing metadata block %llu\n", eb->start); ++ key.objectid = btrfs_header_owner(eb); ++ key.type = BTRFS_ROOT_ITEM_KEY; ++ key.offset = (u64)-1; ++ ++ root = btrfs_read_fs_root(root->fs_info, &key); ++ if (IS_ERR(root)) { ++ fprintf(stderr, "Couldn't find owner root %llu\n", ++ key.objectid); ++ return PTR_ERR(root); ++ } ++ ++ path = btrfs_alloc_path(); ++ if (!path) ++ return -ENOMEM; ++ ++ trans = btrfs_start_transaction(root, 1); ++ if (IS_ERR(trans)) { ++ btrfs_free_path(path); ++ return PTR_ERR(trans); ++ } ++ ++ path->lowest_level = btrfs_header_level(eb); ++ if (path->lowest_level) ++ btrfs_node_key_to_cpu(eb, &key, 0); ++ else ++ btrfs_item_key_to_cpu(eb, &key, 0); ++ ++ ret = btrfs_search_slot(trans, root, &key, path, 0, 1); ++ btrfs_commit_transaction(trans, root); ++ btrfs_free_path(path); ++ return ret; ++} ++ + static struct option long_options[] = { + { "super", 1, NULL, 's' }, + { "repair", 0, NULL, 0 }, +@@ -5826,6 +5867,23 @@ int cmd_check(int argc, char **argv) + + fprintf(stderr, "checking root refs\n"); + ret = check_root_refs(root, &root_cache); ++ if (ret) ++ goto out; ++ ++ while (repair && !list_empty(&root->fs_info->recow_ebs)) { ++ struct extent_buffer *eb; ++ ++ eb = list_first_entry(&root->fs_info->recow_ebs, ++ struct extent_buffer, recow); ++ ret = recow_extent_buffer(root, eb); ++ if (ret) ++ break; ++ } ++ ++ if (!list_empty(&root->fs_info->recow_ebs)) { ++ fprintf(stderr, "Transid errors in file system\n"); ++ ret = 1; ++ } + out: + free_root_recs_tree(&root_cache); + close_ctree(root); +diff --git a/ctree.c b/ctree.c +index 1a4f3f06f38a..e7ccfa03fb0e 100644 +--- a/ctree.c ++++ b/ctree.c +@@ -346,7 +346,8 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, + (unsigned long)btrfs_header_fsid(cow), + BTRFS_FSID_SIZE); + +- WARN_ON(btrfs_header_generation(buf) > trans->transid); ++ WARN_ON(!(buf->flags & EXTENT_BAD_TRANSID) && ++ btrfs_header_generation(buf) > trans->transid); + + update_ref_for_cow(trans, root, buf, cow); + +@@ -370,6 +371,10 @@ int __btrfs_cow_block(struct btrfs_trans_handle *trans, + btrfs_free_extent(trans, root, buf->start, buf->len, + 0, root->root_key.objectid, level, 1); + } ++ if (!list_empty(&buf->recow)) { ++ list_del_init(&buf->recow); ++ free_extent_buffer(buf); ++ } + free_extent_buffer(buf); + btrfs_mark_buffer_dirty(cow); + *cow_ret = cow; +diff --git a/ctree.h b/ctree.h +index 90be7abe9ebf..61cd93b91937 100644 +--- a/ctree.h ++++ b/ctree.h +@@ -952,6 +952,7 @@ struct btrfs_fs_info { + + struct btrfs_extent_ops *extent_ops; + struct list_head dirty_cowonly_roots; ++ struct list_head recow_ebs; + + struct btrfs_fs_devices *fs_devices; + struct list_head space_info; +diff --git a/disk-io.c b/disk-io.c +index 1b91de6fc90d..d97ff8c706c9 100644 +--- a/disk-io.c ++++ b/disk-io.c +@@ -180,6 +180,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, + (unsigned long long)parent_transid, + (unsigned long long)btrfs_header_generation(eb)); + if (ignore) { ++ eb->flags |= EXTENT_BAD_TRANSID; + printk("Ignoring transid failure\n"); + return 0; + } +@@ -274,6 +275,12 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, + csum_tree_block(root, eb, 1) == 0 && + verify_parent_transid(eb->tree, eb, parent_transid, ignore) + == 0) { ++ if (eb->flags & EXTENT_BAD_TRANSID && ++ list_empty(&eb->recow)) { ++ list_add_tail(&eb->recow, ++ &root->fs_info->recow_ebs); ++ eb->refs++; ++ } + btrfs_set_buffer_uptodate(eb); + return eb; + } +@@ -748,6 +755,7 @@ struct btrfs_fs_info *btrfs_new_fs_info(int writable, u64 sb_bytenr) + mutex_init(&fs_info->fs_mutex); + INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); + INIT_LIST_HEAD(&fs_info->space_info); ++ INIT_LIST_HEAD(&fs_info->recow_ebs); + + if (!writable) + fs_info->readonly = 1; +@@ -899,6 +907,13 @@ FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup); + + void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info) + { ++ while (!list_empty(&fs_info->recow_ebs)) { ++ struct extent_buffer *eb; ++ eb = list_first_entry(&fs_info->recow_ebs, ++ struct extent_buffer, recow); ++ list_del_init(&eb->recow); ++ free_extent_buffer(eb); ++ } + free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree); + extent_io_tree_cleanup(&fs_info->extent_cache); + extent_io_tree_cleanup(&fs_info->free_space_cache); +diff --git a/extent_io.c b/extent_io.c +index 464bd07e8d1a..398ee26b4a79 100644 +--- a/extent_io.c ++++ b/extent_io.c +@@ -585,6 +585,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, + eb->dev_bytenr = (u64)-1; + eb->cache_node.start = bytenr; + eb->cache_node.size = blocksize; ++ INIT_LIST_HEAD(&eb->recow); + + free_some_buffers(tree); + ret = insert_cache_extent(&tree->cache, &eb->cache_node); +@@ -608,6 +609,7 @@ void free_extent_buffer(struct extent_buffer *eb) + struct extent_io_tree *tree = eb->tree; + BUG_ON(eb->flags & EXTENT_DIRTY); + list_del_init(&eb->lru); ++ list_del_init(&eb->recow); + remove_cache_extent(&tree->cache, &eb->cache_node); + BUG_ON(tree->cache_size < eb->len); + tree->cache_size -= eb->len; +diff --git a/extent_io.h b/extent_io.h +index 2604dcef31e3..45080c2661ae 100644 +--- a/extent_io.h ++++ b/extent_io.h +@@ -39,6 +39,7 @@ + #define EXTENT_DEFRAG_DONE (1 << 7) + #define EXTENT_BUFFER_FILLED (1 << 8) + #define EXTENT_CSUM (1 << 9) ++#define EXTENT_BAD_TRANSID (1 << 10) + #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) + + #define BLOCK_GROUP_DATA EXTENT_WRITEBACK +@@ -72,6 +73,7 @@ struct extent_buffer { + u32 len; + struct extent_io_tree *tree; + struct list_head lru; ++ struct list_head recow; + int refs; + int flags; + int fd; +-- +1.8.3.1 + diff --git a/btrfsprogs.changes b/btrfsprogs.changes index e9706d0..7c2cbe8 100644 --- a/btrfsprogs.changes +++ b/btrfsprogs.changes @@ -1,3 +1,10 @@ +------------------------------------------------------------------- +Thu Oct 31 01:05:56 CET 2013 - dsterba@suse.cz + +- fsck updates +- more mkfs sanity checks +- qgroup rescan wait + ------------------------------------------------------------------- Fri Oct 4 20:16:02 UTC 2013 - rgoldwyn@suse.com diff --git a/btrfsprogs.spec b/btrfsprogs.spec index 2fcedeb..bbc10ab 100644 --- a/btrfsprogs.spec +++ b/btrfsprogs.spec @@ -35,6 +35,19 @@ Patch10: 0010-Btrfs-progs-make-btrfsck-a-hardlink-at-install-time.patch Patch12: 0012-libbtrfs-Set-SONAME-to-libbtrfs.so.0-instead-of-libb.patch Patch40: 0040-btrfs-progs-fix-loop-device-mount-checks.patch Patch1: btrfs-progs-mkfs-default-extref.diff +Patch50: 0050-Btrfs-progs-commit-the-csum_root-if-we-do-init-csum-.patch +Patch51: 0051-btrfs-progs-Fix-getopt-on-arm-ppc-platforms.patch +Patch52: 0052-btrfs-progs-fix-duplicate-__-su-typedefs-on-ppc64.patch +Patch53: 0053-btrfs-progs-use-reentrant-localtime.patch +Patch54: 0054-btrfs-progs-don-t-have-to-report-ENOMEDIUM-error-dur.patch +Patch55: 0055-Btrfs-progs-added-btrfs-quota-rescan-w-switch-wait.patch +Patch56: 0056-btrfs-progs-fix-qgroup-realloc-inheritance.patch +Patch57: 0057-Btrfs-progs-fix-restore-command-leaving-corrupted-fi.patch +Patch58: 0058-btrfs-progs-avoid-write-to-the-disk-before-sure-to-c.patch +Patch59: 0059-btrfs-progs-error-if-device-for-mkfs-is-too-small.patch +Patch60: 0060-btrfs-progs-error-if-device-have-no-space-to-make-pr.patch +Patch61: 0061-btrfs-progs-calculate-available-blocks-on-device-pro.patch +Patch62: 0062-Btrfs-progs-keep-track-of-transid-failures-and-fix-t.patch Patch1000: local-version-override.patch Patch1001: btrfs-progs-use-IEEE1541-suffixes-for-sizes.patch Patch1002: btrfs-progs-add-man-page-for-btrfs-convert.patch @@ -83,6 +96,19 @@ build applications to interface with btrfs. %patch1001 -p1 %patch1002 -p1 %patch1003 -p1 +%patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%patch61 -p1 +%patch62 -p1 %build make %{?_smp_mflags} CFLAGS="%{optflags}" all btrfs-convert \ diff --git a/local-version-override.patch b/local-version-override.patch index de3e2e4..6ab1385 100644 --- a/local-version-override.patch +++ b/local-version-override.patch @@ -7,7 +7,7 @@ Index: btrfs-progs-v0.19-116-g13eced9/version.sh # Released under the GNU GPLv2 -v="v0.20-rc1" -+v="v0.20-rc1+20130701" ++v="v0.20-rc1+20131031" which git &> /dev/null if [ $? == 0 -a -d .git ]; then