Accepting request 630806 from home:jeff_mahoney:branches:filesystems

- Update xfsprogs-mkfs-avoid-divide-by-zero-when-hardware-reports-opti.patch
  * Refresh to match upstream version.

- repair: shift inode back into place if corrupted by bad log
  replay (bsc#1105396).
  * Added 0001-repair-shift-inode-back-into-place-if-corrupted-by-b.patch

OBS-URL: https://build.opensuse.org/request/show/630806
OBS-URL: https://build.opensuse.org/package/show/filesystems/xfsprogs?expand=0&rev=75
This commit is contained in:
Dirk Mueller 2018-09-11 09:46:02 +00:00 committed by Git OBS Bridge
parent 313f81c462
commit c56022df67
4 changed files with 323 additions and 23 deletions

View File

@ -0,0 +1,273 @@
From 73ef905b37bbf4cd1b13f3aaa4da5457f793d1ac Mon Sep 17 00:00:00 2001
From: Jeffrey Mahoney <jeffm@suse.com>
Date: Tue, 21 Aug 2018 13:41:20 -0400
Subject: [PATCH] repair: shift inode back into place if corrupted by bad log
replay
References: bsc#1105396
SUSE kernels 3.12.74-60.64.40 through 3.12.74-60.64.99 contained
a regression where xfs_icdinode_t modified di_dmstate to be
an atomic_t. Since we only complain if an inode item is too large,
if a kernel with this patch applied mounted a file system with inode
items in the log formatted by a kernel without this patch, they would
be used but would be interpreted using the structure with the atomic_t.
As a result, the inode would be copied incorrectly, corrupting di_dmstate
and the members that follow it.
For v3 inodes, we can detect that the UUID is shifted forward
8 bytes and recover di_uuid, di_ino, di_crtime, di_pad2, di_cowextsize,
di_flags2, and di_lsn. The UUID and inode number being incorrect
will trip the verifier on iread, but it will have been flushed from the
log in a broken state.
di_changecount is lost entirely since half is overwritten by the CRC
being updated and the other half fell in a hole in the structure.
di_flags is lost entirely since it is overwritten by the half of
the generation number. Half of the generation number is lost since
it falls in a hole in the structure.
For v2 inodes, the corruption is more limited but impossible to
detect beyond invalid flags being in use.
Without this fix, xfs_repair will clear the affected inodes, causing
big problems.
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
---
repair/dinode.c | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 178 insertions(+), 6 deletions(-)
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -2200,6 +2200,158 @@ _("would clear obsolete nlink field in v
return dirty;
}
+static int
+check_shifted_uuid(xfs_dinode_t *dino, xfs_mount_t *mp)
+{
+ uint64_t tmp64;
+ char tmpuuid[16];
+ uuid_t uuid;
+
+ tmp64 = be64_to_cpu(dino->di_ino);
+ memcpy(tmpuuid, &tmp64, sizeof(tmp64));
+ memcpy(tmpuuid + 8, &dino->di_uuid, 8);
+ memcpy(uuid, tmpuuid, 16);
+
+ return !platform_uuid_compare(&uuid, &mp->m_sb.sb_meta_uuid);
+}
+
+/*
+ * There was a kernel that would use incorrectly-formatted log items.
+ * If it recovered a dirty log, corrupted inodes would result.
+ * 12 bytes of the inode are completely unrecoverable. Those are
+ * documented below.
+ */
+static void
+repair_inode_with_bad_atomic(xfs_dinode_t *dino, xfs_mount_t *mp)
+{
+ xfs_dinode_t fixed;
+ uint64_t tmp64;
+ uint32_t tmp32;
+ char tmpuuid[16];
+ char *tmpptr;
+
+ uuid_t uuid;
+
+ tmp64 = be64_to_cpu(dino->di_ino);
+ memcpy(tmpuuid, &tmp64, sizeof(tmp64));
+ tmpptr = (char *)dino->di_uuid;
+ memcpy(tmpuuid + 8, tmpptr, 8);
+ memcpy(uuid, tmpuuid, 16);
+
+ memcpy(&fixed, dino, sizeof(fixed));
+ memcpy(&fixed.di_uuid, uuid, sizeof(uuid));
+
+ tmp32 = *(uint32_t *)&dino->di_pad2[4];
+ fixed.di_crtime.t_sec = cpu_to_be32(tmp32);
+ tmp32 = *(uint32_t *)&dino->di_pad2[8];
+ fixed.di_crtime.t_nsec = cpu_to_be32(tmp32);
+
+ tmp64 = be32_to_cpu(dino->di_crtime.t_nsec);
+ tmp64 <<= 32;
+ tmp64 |= be32_to_cpu(dino->di_crtime.t_sec);
+ fixed.di_ino = cpu_to_be64(tmp64);
+
+ tmp64 = be64_to_cpu(fixed.di_ino);
+
+ memcpy(fixed.di_pad2 + 8, dino->di_pad2, 4);
+
+ tmp32 = be32_to_cpu(dino->di_cowextsize);
+ memcpy(fixed.di_pad2 + 4, &tmp32, 4);
+
+ tmp64 = be64_to_cpu(dino->di_flags2);
+ tmp32 = tmp64 >> 32;
+ memcpy(fixed.di_pad2, &tmp32, 4);
+
+ fixed.di_cowextsize = cpu_to_be32(tmp64);
+ fixed.di_flags2 = dino->di_lsn;
+ fixed.di_lsn = dino->di_changecount;
+
+ /*
+ * This is lost entirely. Half falls in padding and half
+ * is overwritten by the CRC.
+ */
+ fixed.di_changecount = 0;
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ /*
+ * Half of the generation number is lost, but it's the high bits.
+ * Pick a high number and hope for the best.
+ */
+ tmp32 = 0xff000000;
+ tmp32 |= be16_to_cpu(dino->di_flags);
+ fixed.di_gen = cpu_to_be32(tmp32);
+#else
+ /*
+ * Half of the generation number is lost, but it's the low bits,
+ * so we can fake it.
+ */
+ tmp32 = be16_to_cpu(dino->di_flags) + 1;
+ tmp32 <<= 16;
+ fixed.di_gen = cpu_to_be32(tmp32);
+#endif
+
+ /*
+ * The flags are lost since the atomic_t was 32-bit and we
+ * only keep 16.
+ */
+ fixed.di_flags = 0;
+
+ memcpy(dino, &fixed, sizeof(*dino));
+ xfs_dinode_calc_crc(mp, dino);
+}
+
+static int
+process_dinode_int(xfs_mount_t *mp, xfs_dinode_t *dino, xfs_agnumber_t agno,
+ xfs_agino_t ino, int was_free, int *dirty, int *used,
+ int verify_mode, int uncertain, int ino_discovery,
+ int check_dups, int extra_attr_check, int *isa_dir,
+ xfs_ino_t *parent, int recurse);
+
+static int
+handle_malformed_inode(xfs_mount_t *mp, xfs_dinode_t *dino,
+ xfs_agnumber_t agno, xfs_agino_t ino, int was_free,
+ int *dirty, int *used, int verify_mode, int uncertain,
+ int ino_discovery, int check_dups, int extra_attr_check,
+ int *isa_dir, xfs_ino_t *parent)
+{
+ xfs_dinode_t save;
+ int retval;
+ xfs_ino_t lino = XFS_AGINO_TO_INO(mp, agno, ino);
+
+ if (!uncertain)
+ do_warn(_("malformed inode %" PRIu64 " found%c"),
+ lino, verify_mode ? '\n' : ',');
+
+ /*
+ * We can't just pass a local copy to verify since we need the
+ * data fork to check directories.
+ */
+ if (verify_mode || no_modify)
+ memcpy(&save, dino, sizeof(*dino));
+
+ repair_inode_with_bad_atomic(dino, mp);
+ retval = process_dinode_int(mp, dino, agno, ino, was_free, dirty,
+ used, verify_mode, uncertain, ino_discovery,
+ check_dups, extra_attr_check,
+ isa_dir, parent, 1);
+
+ if (verify_mode || no_modify) {
+ memcpy(dino, &save, sizeof(*dino));
+ *dirty = 0;
+ }
+
+ if (retval == 0 && !verify_mode) {
+ if (no_modify)
+ do_warn(_(" would repair\n"));
+ else {
+ do_warn(_(" repairing\n"));
+ *dirty = 1;
+ }
+ }
+
+ return retval;
+}
+
/*
* returns 0 if the inode is ok, 1 if the inode is corrupt
* check_dups can be set to 1 *only* when called by the
@@ -2224,7 +2376,8 @@ process_dinode_int(xfs_mount_t *mp,
* duplicate blocks */
int extra_attr_check, /* 1 == do attribute format and value checks */
int *isa_dir, /* out == 1 if inode is a directory */
- xfs_ino_t *parent) /* out -- parent if ino is a dir */
+ xfs_ino_t *parent, /* out -- parent if ino is a dir */
+ int recurse)
{
xfs_rfsblock_t totblocks = 0;
xfs_rfsblock_t atotblocks = 0;
@@ -2322,6 +2475,25 @@ process_dinode_int(xfs_mount_t *mp,
* memory and hence invalidated the CRC.
*/
if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ int good_uuid = 1;
+
+ if (platform_uuid_compare(&dino->di_uuid,
+ &mp->m_sb.sb_meta_uuid))
+ good_uuid = 0;
+
+ /*
+ * Only check to see if it's a malformed inode if it has
+ * a valid magic, crc, and version and an invalid uuid.
+ */
+ if (!good_uuid && !retval && !recurse &&
+ check_shifted_uuid(dino, mp))
+ return handle_malformed_inode(mp, dino, agno, ino,
+ was_free, dirty, used,
+ verify_mode, uncertain,
+ ino_discovery, check_dups,
+ extra_attr_check,
+ isa_dir, parent);
+
if (be64_to_cpu(dino->di_ino) != lino) {
if (!uncertain)
do_warn(
@@ -2332,8 +2504,7 @@ _("inode identifier %llu mismatch on ino
return 1;
goto clear_bad_out;
}
- if (platform_uuid_compare(&dino->di_uuid,
- &mp->m_sb.sb_meta_uuid)) {
+ if (!good_uuid) {
if (!uncertain)
do_warn(
_("UUID mismatch on inode %" PRIu64 "\n"), lino);
@@ -2878,7 +3049,8 @@ process_dinode(
#endif
return process_dinode_int(mp, dino, agno, ino, was_free, dirty, used,
verify_mode, uncertain, ino_discovery,
- check_dups, extra_attr_check, isa_dir, parent);
+ check_dups, extra_attr_check, isa_dir, parent,
+ 0);
}
/*
@@ -2905,7 +3077,7 @@ verify_dinode(
return process_dinode_int(mp, dino, agno, ino, 0, &dirty, &used,
verify_mode, uncertain, ino_discovery,
- check_dups, 0, &isa_dir, &parent);
+ check_dups, 0, &isa_dir, &parent, 0);
}
/*
@@ -2931,5 +3103,5 @@ verify_uncertain_dinode(
return process_dinode_int(mp, dino, agno, ino, 0, &dirty, &used,
verify_mode, uncertain, ino_discovery,
- check_dups, 0, &isa_dir, &parent);
+ check_dups, 0, &isa_dir, &parent, 0);
}

View File

@ -1,8 +1,11 @@
From: Jeffrey Mahoney <jeffm@suse.com>
From 99a3f52e337d9c785315e64a599755bc6f7b2118 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 1 Aug 2018 17:06:45 -0500
Subject: [PATCH] mkfs: avoid divide-by-zero when hardware reports optimal i/o
size as 0
Patch-mainline: Submitted to linux-xfs, 19 Jul 2018
References: bsc#1089777
Patch-mainline: v4.18.0-rc1
Git-commit: 99a3f52e337d9c785315e64a599755bc6f7b2118
Commit 051b4e37f5e (mkfs: factor AG alignment) factored out the
AG alignment code into a separate function. It got rid of
@ -10,36 +13,45 @@ redundant checks for dswidth != 0 since calc_stripe_factors was
supposed to guarantee that if dsunit is non-zero dswidth will be
as well. Unfortunately, there's hardware out there that reports its
optimal i/o size as larger than the maximum i/o size, which the kernel
treats as broken and zeros out the optimal i/o size. We'll accept
the multi-sector dsunit but have a zero dswidth and hit a divide-by-zero
in align_ag_geometry.
treats as broken and zeros out the optimal i/o size.
To resolve this we can check the topology before consuming it, default
to using the stripe unit as the stripe width, and warn the user about it.
To resolve this we can check the topology before consuming it, and
ignore the bad stripe geometry.
Fixes: 051b4e37f5e (mkfs: factor AG alignment)
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: Eric Sandeen <sandeen@redhat.com>
[sandeen: remove guessing heuristic, just warn and ignore bad data.]
Reviewed-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
---
mkfs/xfs_mkfs.c | 6 ++++++
1 file changed, 6 insertions(+)
mkfs/xfs_mkfs.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c
index a135e06e..35542e57 100644
--- a/mkfs/xfs_mkfs.c
+++ b/mkfs/xfs_mkfs.c
@@ -2295,6 +2295,12 @@ _("data stripe width (%d) must be a multiple of the data stripe unit (%d)\n"),
if (!dsunit) {
dsunit = ft->dsunit;
dswidth = ft->dswidth;
+ if (dsunit && dswidth == 0) {
+ fprintf(stderr,
+_("%s: Volume reports stripe unit of %d bytes but stripe width of 0. Using stripe width of %d bytes, which may not be optimal.\n"),
+ progname, dsunit << 9, dsunit << 9);
+ dswidth = dsunit;
+ }
use_dev = true;
} else {
/* check and warn is alignment is sub-optimal */
--
2.16.4
@@ -2292,11 +2292,20 @@ _("data stripe width (%d) must be a mult
/* if no stripe config set, use the device default */
if (!dsunit) {
- dsunit = ft->dsunit;
- dswidth = ft->dswidth;
- use_dev = true;
+ /* Ignore nonsense from device. XXX add more validation */
+ if (ft->dsunit && ft->dswidth == 0) {
+ fprintf(stderr,
+_("%s: Volume reports stripe unit of %d bytes and stripe width of 0, ignoring.\n"),
+ progname, BBTOB(ft->dsunit));
+ ft->dsunit = 0;
+ ft->dswidth = 0;
+ } else {
+ dsunit = ft->dsunit;
+ dswidth = ft->dswidth;
+ use_dev = true;
+ }
} else {
- /* check and warn is alignment is sub-optimal */
+ /* check and warn if user-specified alignment is sub-optimal */
if (ft->dsunit && ft->dsunit != dsunit) {
fprintf(stderr,
_("%s: Specified data stripe unit %d is not the same as the volume stripe unit %d\n"),

View File

@ -1,3 +1,16 @@
-------------------------------------------------------------------
Tue Aug 21 19:09:54 UTC 2018 - jeffm@suse.com
- Update xfsprogs-mkfs-avoid-divide-by-zero-when-hardware-reports-opti.patch
* Refresh to match upstream version.
-------------------------------------------------------------------
Tue Aug 21 19:08:50 UTC 2018 - jeffm@suse.com
- repair: shift inode back into place if corrupted by bad log
replay (bsc#1105396).
* Added 0001-repair-shift-inode-back-into-place-if-corrupted-by-b.patch
-------------------------------------------------------------------
Fri Jul 20 00:07:14 UTC 2018 - jeffm@suse.com

View File

@ -39,6 +39,7 @@ Source3: module-setup.sh.in
Source4: dracut-fsck-help.txt
Patch0: xfsprogs-docdir.diff
Patch1: xfsprogs-mkfs-avoid-divide-by-zero-when-hardware-reports-opti.patch
Patch2: 0001-repair-shift-inode-back-into-place-if-corrupted-by-b.patch
BuildRequires: autoconf
BuildRequires: automake
BuildRequires: libblkid-devel
@ -101,6 +102,7 @@ want to install xfsprogs.
%setup -q
%patch0 -p1
%patch1 -p1
%patch2 -p1
%build
aclocal -I m4