Anthony Iliopoulos
c3c895815b
- update to 5.16.0: - libxfs: remove kernel stubs from xfs_shared.h - debian: Generate .gitcensus instead of .census - libxfs changes merged from kernel 5.16 - mkfs: increase the min log size to 64MB when possible - xfs_scrub: retry items that are ok except for XFAIL - xfs_scrub: fix xfrog_scrub_metadata error reporting - mkfs: enable inobtcount and bigtime by default - mkfs: prevent corruption of suboption string values - mkfs: document sample configuration file location - mkfs: add configuration files for a few LTS kernels - mkfs: add a config file for x86_64 pmem filesystems - xfs_quota: don't exit on "project" cmd failure - xfs_repair: don't guess about failure reason in phase6 - xfs_repair: update 2ndary superblocks after upgrades - xfs_scrub: fix reporting if we can't open devices - xfs_scrub: report optional features in version - libxcmd: use emacs mode for command history editing - libfrog: always use the kernel GETFSMAP definitions - mkfs.xfs(8): fix default inode allocator description - xfs_quota(8): fix up dump and report documentation - xfs_quota(8): document units in limit command - misc: add a crc32c self test to mkfs and repair OBS-URL: https://build.opensuse.org/request/show/976483 OBS-URL: https://build.opensuse.org/package/show/filesystems/xfsprogs?expand=0&rev=113
281 lines
8.4 KiB
Diff
281 lines
8.4 KiB
Diff
From e2239ef552a48edd33740fec8a005a7ac12dcc80 Mon Sep 17 00:00:00 2001
|
|
From: Jeffrey Mahoney <jeffm@suse.com>
|
|
Date: Tue, 21 Aug 2018 13:41:20 -0400
|
|
Subject: [PATCH] repair: shift inode back into place if corrupted by bad log
|
|
replay
|
|
References: bsc#1105396
|
|
|
|
SUSE kernels 3.12.74-60.64.40 through 3.12.74-60.64.99 contained
|
|
a regression where xfs_icdinode_t modified di_dmstate to be
|
|
an atomic_t. Since we only complain if an inode item is too large,
|
|
if a kernel with this patch applied mounted a file system with inode
|
|
items in the log formatted by a kernel without this patch, they would
|
|
be used but would be interpreted using the structure with the atomic_t.
|
|
|
|
As a result, the inode would be copied incorrectly, corrupting di_dmstate
|
|
and the members that follow it.
|
|
|
|
For v3 inodes, we can detect that the UUID is shifted forward
|
|
8 bytes and recover di_uuid, di_ino, di_crtime, di_pad2, di_cowextsize,
|
|
di_flags2, and di_lsn. The UUID and inode number being incorrect
|
|
will trip the verifier on iread, but it will have been flushed from the
|
|
log in a broken state.
|
|
|
|
di_changecount is lost entirely since half is overwritten by the CRC
|
|
being updated and the other half fell in a hole in the structure.
|
|
di_flags is lost entirely since it is overwritten by the half of
|
|
the generation number. Half of the generation number is lost since
|
|
it falls in a hole in the structure.
|
|
|
|
For v2 inodes, the corruption is more limited but impossible to
|
|
detect beyond invalid flags being in use.
|
|
|
|
Without this fix, xfs_repair will clear the affected inodes, causing
|
|
big problems.
|
|
|
|
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
|
|
---
|
|
repair/dinode.c | 186 ++++++++++++++++++++++++++++++++++++++++++++++--
|
|
1 file changed, 180 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/repair/dinode.c b/repair/dinode.c
|
|
index 8ea919698d14..81238568ac8e 100644
|
|
--- a/repair/dinode.c
|
|
+++ b/repair/dinode.c
|
|
@@ -2239,6 +2239,160 @@ _("Bad extent size hint %u on inode %" PRIu64 ", "),
|
|
}
|
|
}
|
|
|
|
+static int
|
|
+check_shifted_uuid(struct xfs_dinode *dino, xfs_mount_t *mp)
|
|
+{
|
|
+ uint64_t tmp64;
|
|
+ char tmpuuid[16];
|
|
+ uuid_t uuid;
|
|
+
|
|
+ tmp64 = be64_to_cpu(dino->di_ino);
|
|
+ memcpy(tmpuuid, &tmp64, sizeof(tmp64));
|
|
+ memcpy(tmpuuid + 8, &dino->di_uuid, 8);
|
|
+ memcpy(uuid, tmpuuid, 16);
|
|
+
|
|
+ return !platform_uuid_compare(&uuid, &mp->m_sb.sb_meta_uuid);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * There was a kernel that would use incorrectly-formatted log items.
|
|
+ * If it recovered a dirty log, corrupted inodes would result.
|
|
+ * 12 bytes of the inode are completely unrecoverable. Those are
|
|
+ * documented below.
|
|
+ */
|
|
+static void
|
|
+repair_inode_with_bad_atomic(struct xfs_dinode *dino, xfs_mount_t *mp)
|
|
+{
|
|
+ struct xfs_dinode fixed;
|
|
+ struct xfs_legacy_timestamp *lts;
|
|
+ uint64_t tmp64;
|
|
+ uint32_t tmp32;
|
|
+ char tmpuuid[16];
|
|
+ char *tmpptr;
|
|
+
|
|
+ uuid_t uuid;
|
|
+
|
|
+ tmp64 = be64_to_cpu(dino->di_ino);
|
|
+ memcpy(tmpuuid, &tmp64, sizeof(tmp64));
|
|
+ tmpptr = (char *)dino->di_uuid;
|
|
+ memcpy(tmpuuid + 8, tmpptr, 8);
|
|
+ memcpy(uuid, tmpuuid, 16);
|
|
+
|
|
+ memcpy(&fixed, dino, sizeof(fixed));
|
|
+ memcpy(&fixed.di_uuid, uuid, sizeof(uuid));
|
|
+
|
|
+ tmp32 = *(uint32_t *)&dino->di_pad2[4];
|
|
+ lts = (struct xfs_legacy_timestamp *)&(fixed.di_crtime);
|
|
+ lts->t_sec = cpu_to_be32(tmp32);
|
|
+ tmp32 = *(uint32_t *)&dino->di_pad2[8];
|
|
+ lts->t_nsec = cpu_to_be32(tmp32);
|
|
+
|
|
+ tmp64 = be32_to_cpu(((struct xfs_legacy_timestamp *)(&(dino->di_crtime)))->t_nsec);
|
|
+ tmp64 <<= 32;
|
|
+ tmp64 |= be32_to_cpu(((struct xfs_legacy_timestamp *)(&(dino->di_crtime)))->t_sec);
|
|
+ fixed.di_ino = cpu_to_be64(tmp64);
|
|
+
|
|
+ tmp64 = be64_to_cpu(fixed.di_ino);
|
|
+
|
|
+ memcpy(fixed.di_pad2 + 8, dino->di_pad2, 4);
|
|
+
|
|
+ tmp32 = be32_to_cpu(dino->di_cowextsize);
|
|
+ memcpy(fixed.di_pad2 + 4, &tmp32, 4);
|
|
+
|
|
+ tmp64 = be64_to_cpu(dino->di_flags2);
|
|
+ tmp32 = tmp64 >> 32;
|
|
+ memcpy(fixed.di_pad2, &tmp32, 4);
|
|
+
|
|
+ fixed.di_cowextsize = cpu_to_be32(tmp64);
|
|
+ fixed.di_flags2 = dino->di_lsn;
|
|
+ fixed.di_lsn = dino->di_changecount;
|
|
+
|
|
+ /*
|
|
+ * This is lost entirely. Half falls in padding and half
|
|
+ * is overwritten by the CRC.
|
|
+ */
|
|
+ fixed.di_changecount = 0;
|
|
+
|
|
+#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
+ /*
|
|
+ * Half of the generation number is lost, but it's the high bits.
|
|
+ * Pick a high number and hope for the best.
|
|
+ */
|
|
+ tmp32 = 0xff000000;
|
|
+ tmp32 |= be16_to_cpu(dino->di_flags);
|
|
+ fixed.di_gen = cpu_to_be32(tmp32);
|
|
+#else
|
|
+ /*
|
|
+ * Half of the generation number is lost, but it's the low bits,
|
|
+ * so we can fake it.
|
|
+ */
|
|
+ tmp32 = be16_to_cpu(dino->di_flags) + 1;
|
|
+ tmp32 <<= 16;
|
|
+ fixed.di_gen = cpu_to_be32(tmp32);
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * The flags are lost since the atomic_t was 32-bit and we
|
|
+ * only keep 16.
|
|
+ */
|
|
+ fixed.di_flags = 0;
|
|
+
|
|
+ memcpy(dino, &fixed, sizeof(*dino));
|
|
+ xfs_dinode_calc_crc(mp, dino);
|
|
+}
|
|
+
|
|
+static int
|
|
+process_dinode_int(xfs_mount_t *mp, struct xfs_dinode *dino, xfs_agnumber_t agno,
|
|
+ xfs_agino_t ino, int was_free, int *dirty, int *used,
|
|
+ int verify_mode, int uncertain, int ino_discovery,
|
|
+ int check_dups, int extra_attr_check, int *isa_dir,
|
|
+ xfs_ino_t *parent, int recurse);
|
|
+
|
|
+static int
|
|
+handle_malformed_inode(xfs_mount_t *mp, struct xfs_dinode *dino,
|
|
+ xfs_agnumber_t agno, xfs_agino_t ino, int was_free,
|
|
+ int *dirty, int *used, int verify_mode, int uncertain,
|
|
+ int ino_discovery, int check_dups, int extra_attr_check,
|
|
+ int *isa_dir, xfs_ino_t *parent)
|
|
+{
|
|
+ struct xfs_dinode save;
|
|
+ int retval;
|
|
+ xfs_ino_t lino = XFS_AGINO_TO_INO(mp, agno, ino);
|
|
+
|
|
+ if (!uncertain)
|
|
+ do_warn(_("malformed inode %" PRIu64 " found%c"),
|
|
+ lino, verify_mode ? '\n' : ',');
|
|
+
|
|
+ /*
|
|
+ * We can't just pass a local copy to verify since we need the
|
|
+ * data fork to check directories.
|
|
+ */
|
|
+ if (verify_mode || no_modify)
|
|
+ memcpy(&save, dino, sizeof(*dino));
|
|
+
|
|
+ repair_inode_with_bad_atomic(dino, mp);
|
|
+ retval = process_dinode_int(mp, dino, agno, ino, was_free, dirty,
|
|
+ used, verify_mode, uncertain, ino_discovery,
|
|
+ check_dups, extra_attr_check,
|
|
+ isa_dir, parent, 1);
|
|
+
|
|
+ if (verify_mode || no_modify) {
|
|
+ memcpy(dino, &save, sizeof(*dino));
|
|
+ *dirty = 0;
|
|
+ }
|
|
+
|
|
+ if (retval == 0 && !verify_mode) {
|
|
+ if (no_modify)
|
|
+ do_warn(_(" would repair\n"));
|
|
+ else {
|
|
+ do_warn(_(" repairing\n"));
|
|
+ *dirty = 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return retval;
|
|
+}
|
|
+
|
|
/*
|
|
* returns 0 if the inode is ok, 1 if the inode is corrupt
|
|
* check_dups can be set to 1 *only* when called by the
|
|
@@ -2263,7 +2417,8 @@ process_dinode_int(xfs_mount_t *mp,
|
|
* duplicate blocks */
|
|
int extra_attr_check, /* 1 == do attribute format and value checks */
|
|
int *isa_dir, /* out == 1 if inode is a directory */
|
|
- xfs_ino_t *parent) /* out -- parent if ino is a dir */
|
|
+ xfs_ino_t *parent, /* out -- parent if ino is a dir */
|
|
+ int recurse)
|
|
{
|
|
xfs_rfsblock_t totblocks = 0;
|
|
xfs_rfsblock_t atotblocks = 0;
|
|
@@ -2379,6 +2534,25 @@ process_dinode_int(xfs_mount_t *mp,
|
|
* memory and hence invalidated the CRC.
|
|
*/
|
|
if (xfs_has_crc(mp)) {
|
|
+ int good_uuid = 1;
|
|
+
|
|
+ if (platform_uuid_compare(&dino->di_uuid,
|
|
+ &mp->m_sb.sb_meta_uuid))
|
|
+ good_uuid = 0;
|
|
+
|
|
+ /*
|
|
+ * Only check to see if it's a malformed inode if it has
|
|
+ * a valid magic, crc, and version and an invalid uuid.
|
|
+ */
|
|
+ if (!good_uuid && !retval && !recurse &&
|
|
+ check_shifted_uuid(dino, mp))
|
|
+ return handle_malformed_inode(mp, dino, agno, ino,
|
|
+ was_free, dirty, used,
|
|
+ verify_mode, uncertain,
|
|
+ ino_discovery, check_dups,
|
|
+ extra_attr_check,
|
|
+ isa_dir, parent);
|
|
+
|
|
if (be64_to_cpu(dino->di_ino) != lino) {
|
|
if (!uncertain)
|
|
do_warn(
|
|
@@ -2389,8 +2563,7 @@ _("inode identifier %llu mismatch on inode %" PRIu64 "\n"),
|
|
return 1;
|
|
goto clear_bad_out;
|
|
}
|
|
- if (platform_uuid_compare(&dino->di_uuid,
|
|
- &mp->m_sb.sb_meta_uuid)) {
|
|
+ if (!good_uuid) {
|
|
if (!uncertain)
|
|
do_warn(
|
|
_("UUID mismatch on inode %" PRIu64 "\n"), lino);
|
|
@@ -2952,7 +3125,8 @@ process_dinode(
|
|
#endif
|
|
return process_dinode_int(mp, dino, agno, ino, was_free, dirty, used,
|
|
verify_mode, uncertain, ino_discovery,
|
|
- check_dups, extra_attr_check, isa_dir, parent);
|
|
+ check_dups, extra_attr_check, isa_dir, parent,
|
|
+ 0);
|
|
}
|
|
|
|
/*
|
|
@@ -2979,7 +3153,7 @@ verify_dinode(
|
|
|
|
return process_dinode_int(mp, dino, agno, ino, 0, &dirty, &used,
|
|
verify_mode, uncertain, ino_discovery,
|
|
- check_dups, 0, &isa_dir, &parent);
|
|
+ check_dups, 0, &isa_dir, &parent, 0);
|
|
}
|
|
|
|
/*
|
|
@@ -3005,5 +3179,5 @@ verify_uncertain_dinode(
|
|
|
|
return process_dinode_int(mp, dino, agno, ino, 0, &dirty, &used,
|
|
verify_mode, uncertain, ino_discovery,
|
|
- check_dups, 0, &isa_dir, &parent);
|
|
+ check_dups, 0, &isa_dir, &parent, 0);
|
|
}
|
|
--
|
|
2.36.1
|
|
|