229 lines
9.2 KiB
Diff
229 lines
9.2 KiB
Diff
|
From de2863739f2ea17d89d0e442379109f967b5919d Mon Sep 17 00:00:00 2001
|
||
|
From: David Teigland <teigland@redhat.com>
|
||
|
Date: Fri, 15 Jun 2018 11:42:10 -0500
|
||
|
Subject: [PATCH] scan: use full md filter when md 1.0 devices are present
|
||
|
|
||
|
The md filter can operate in two native modes:
|
||
|
- normal: reads only the start of each device
|
||
|
- full: reads both the start and end of each device
|
||
|
|
||
|
md 1.0 devices place the superblock at the end of the device,
|
||
|
so components of this version will only be identified and
|
||
|
excluded when lvm uses the full md filter.
|
||
|
|
||
|
Previously, the full md filter was only used in commands
|
||
|
that could write to the device. Now, the full md filter
|
||
|
is also applied when there is an md 1.0 device present
|
||
|
on the system. This means the 'pvs' command can avoid
|
||
|
displaying md 1.0 components (at the cost of doubling
|
||
|
the i/o to every device on the system.)
|
||
|
|
||
|
(The md filter can operate in a third mode, using udev,
|
||
|
but this is disabled by default because there have been
|
||
|
problems with reliability of the info returned from udev.)
|
||
|
---
|
||
|
lib/cache/lvmcache.c | 2 +-
|
||
|
lib/device/dev-md.c | 27 ++++++++++----
|
||
|
lib/device/dev-type.h | 1 +
|
||
|
lib/filters/filter-md.c | 74 +++++++++++++++++++-------------------
|
||
|
lib/label/label.c | 14 ++++++++
|
||
|
test/shell/pvcreate-md-fake-hdr.sh | 3 +-
|
||
|
6 files changed, 75 insertions(+), 46 deletions(-)
|
||
|
|
||
|
diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c
|
||
|
index 3e681a2ba..a2ee0cd43 100644
|
||
|
--- a/lib/cache/lvmcache.c
|
||
|
+++ b/lib/cache/lvmcache.c
|
||
|
@@ -998,7 +998,7 @@ int lvmcache_dev_is_unchosen_duplicate(struct device *dev)
|
||
|
* unused_duplicate_devs list, and restrict what we allow done with it.
|
||
|
*
|
||
|
* In the case of md components, we usually filter these out in filter-md,
|
||
|
- * but in the special case of md superblocks <= 1.0 where the superblock
|
||
|
+ * but in the special case of md superblock version 1.0 where the superblock
|
||
|
* is at the end of the device, filter-md doesn't always eliminate them
|
||
|
* first, so we eliminate them here.
|
||
|
*
|
||
|
diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c
|
||
|
index f5a736fc2..7196dc007 100644
|
||
|
--- a/lib/device/dev-md.c
|
||
|
+++ b/lib/device/dev-md.c
|
||
|
@@ -142,13 +142,6 @@ static int _native_dev_is_md(struct device *dev, uint64_t *offset_found, int ful
|
||
|
* command if it should do a full check (cmd->use_full_md_check),
|
||
|
* and set it for commands that could possibly write to an md dev
|
||
|
* (pvcreate/vgcreate/vgextend).
|
||
|
- *
|
||
|
- * For old md versions with magic numbers at the end of devices,
|
||
|
- * the md dev components won't be filtered out here when full is 0,
|
||
|
- * so they will be scanned, and appear as duplicate PVs in lvmcache.
|
||
|
- * The md device itself will be chosen as the primary duplicate,
|
||
|
- * and the components are dropped from the list of duplicates in,
|
||
|
- * i.e. a kind of post-scan filtering.
|
||
|
*/
|
||
|
if (!full) {
|
||
|
sb_offset = 0;
|
||
|
@@ -414,6 +407,26 @@ unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev)
|
||
|
return stripe_width_sectors;
|
||
|
}
|
||
|
|
||
|
+int dev_is_md_with_end_superblock(struct dev_types *dt, struct device *dev)
|
||
|
+{
|
||
|
+ char version_string[MD_MAX_SYSFS_SIZE];
|
||
|
+ const char *attribute = "metadata_version";
|
||
|
+
|
||
|
+ if (MAJOR(dev->dev) != dt->md_major)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ if (_md_sysfs_attribute_scanf(dt, dev, attribute,
|
||
|
+ "%s", &version_string) != 1)
|
||
|
+ return -1;
|
||
|
+
|
||
|
+ log_very_verbose("Device %s %s is %s.",
|
||
|
+ dev_name(dev), attribute, version_string);
|
||
|
+
|
||
|
+ if (!strcmp(version_string, "1.0"))
|
||
|
+ return 1;
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
#else
|
||
|
|
||
|
int dev_is_md(struct device *dev __attribute__((unused)),
|
||
|
diff --git a/lib/device/dev-type.h b/lib/device/dev-type.h
|
||
|
index 843e2545b..f629a0278 100644
|
||
|
--- a/lib/device/dev-type.h
|
||
|
+++ b/lib/device/dev-type.h
|
||
|
@@ -76,6 +76,7 @@ int wipe_known_signatures(struct cmd_context *cmd, struct device *dev, const cha
|
||
|
|
||
|
/* Type-specific device properties */
|
||
|
unsigned long dev_md_stripe_width(struct dev_types *dt, struct device *dev);
|
||
|
+int dev_is_md_with_end_superblock(struct dev_types *dt, struct device *dev);
|
||
|
|
||
|
/* Partitioning */
|
||
|
int major_max_partitions(struct dev_types *dt, int major);
|
||
|
diff --git a/lib/filters/filter-md.c b/lib/filters/filter-md.c
|
||
|
index ab97b5946..ad5b8e4e8 100644
|
||
|
--- a/lib/filters/filter-md.c
|
||
|
+++ b/lib/filters/filter-md.c
|
||
|
@@ -29,43 +29,43 @@
|
||
|
*
|
||
|
* (This is assuming lvm.conf md_component_detection=1.)
|
||
|
*
|
||
|
- * If lvm does *not* ignore the components, then lvm will read lvm
|
||
|
- * labels from the md dev and from the component devs, and will see
|
||
|
- * them all as duplicates of each other. LVM duplicate resolution
|
||
|
- * will then kick in and keep the md dev around to use and ignore
|
||
|
- * the components.
|
||
|
- *
|
||
|
- * It is better to exclude the components as early as possible during
|
||
|
- * lvm processing, ideally before lvm even looks for labels on the
|
||
|
- * components, so that duplicate resolution can be avoided. There are
|
||
|
- * a number of ways that md components can be excluded earlier than
|
||
|
- * the duplicate resolution phase:
|
||
|
- *
|
||
|
- * - When external_device_info_source="udev", lvm discovers a device is
|
||
|
- * an md component by asking udev during the initial filtering phase.
|
||
|
- * However, lvm's default is to not use udev for this. The
|
||
|
- * alternative is "native" detection in which lvm tries to detect
|
||
|
- * md components itself.
|
||
|
- *
|
||
|
- * - When using native detection, lvm's md filter looks for the md
|
||
|
- * superblock at the start of devices. It will see the md superblock
|
||
|
- * on the components, exclude them in the md filter, and avoid
|
||
|
- * handling them later in duplicate resolution.
|
||
|
- *
|
||
|
- * - When using native detection, lvm's md filter will not detect
|
||
|
- * components when the md device has an older superblock version that
|
||
|
- * places the superblock at the end of the device. This case will
|
||
|
- * fall back to duplicate resolution to exclude components.
|
||
|
- *
|
||
|
- * A variation of the description above occurs for lvm commands that
|
||
|
- * intend to create new PVs on devices (pvcreate, vgcreate, vgextend).
|
||
|
- * For these commands, the native md filter also reads the end of all
|
||
|
- * devices to check for the odd md superblocks.
|
||
|
- *
|
||
|
- * (The reason that external_device_info_source is not set to udev by
|
||
|
- * default is that there have be issues with udev not being promptly
|
||
|
- * or reliably updated about md state changes, causing the udev info
|
||
|
- * that lvm uses to be occasionally wrong.)
|
||
|
+ * If lvm does *not* ignore the components, then lvm may read lvm
|
||
|
+ * labels from the component devs and potentially the md dev,
|
||
|
+ * which can trigger duplicate detection, and/or cause lvm to display
|
||
|
+ * md components as PVs rather than ignoring them.
|
||
|
+ *
|
||
|
+ * If scanning md componenents causes duplicates to be seen, then
|
||
|
+ * the lvm duplicate resolution will exclude the components.
|
||
|
+ *
|
||
|
+ * The lvm md filter has three modes:
|
||
|
+ *
|
||
|
+ * 1. look for md superblock at the start of the device
|
||
|
+ * 2. look for md superblock at the start and end of the device
|
||
|
+ * 3. use udev to detect components
|
||
|
+ *
|
||
|
+ * mode 1 will not detect and exclude components of md devices
|
||
|
+ * that use superblock version 1.0 which is at the end of the device.
|
||
|
+ *
|
||
|
+ * mode 2 will detect these, but mode 2 doubles the i/o done by label
|
||
|
+ * scan, since there's a read at both the start and end of every device.
|
||
|
+ *
|
||
|
+ * mode 3 is used when external_device_info_source="udev". It does
|
||
|
+ * not require any io from lvm, but this mode is not used by default
|
||
|
+ * because there have been problems getting reliable info from udev.
|
||
|
+ *
|
||
|
+ * lvm uses mode 2 when:
|
||
|
+ *
|
||
|
+ * - the command is pvcreate/vgcreate/vgextend, which format new
|
||
|
+ * devices, and if the user ran these commands on a component
|
||
|
+ * device of an md device 1.0, then it would cause problems.
|
||
|
+ * FIXME: this would only really need to scan the end of the
|
||
|
+ * devices being formatted, not all devices.
|
||
|
+ *
|
||
|
+ * - it sees an md device on the system using version 1.0.
|
||
|
+ * The point of this is just to avoid displaying md components
|
||
|
+ * from the 'pvs' command.
|
||
|
+ * FIXME: the cost (double i/o) may not be worth the benefit
|
||
|
+ * (not showing md components).
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
diff --git a/lib/label/label.c b/lib/label/label.c
|
||
|
index 837033c4b..e76ddd4b2 100644
|
||
|
--- a/lib/label/label.c
|
||
|
+++ b/lib/label/label.c
|
||
|
@@ -856,6 +856,20 @@ int label_scan(struct cmd_context *cmd)
|
||
|
bcache_invalidate_fd(scan_bcache, dev->bcache_fd);
|
||
|
_scan_dev_close(dev);
|
||
|
}
|
||
|
+
|
||
|
+ /*
|
||
|
+ * When md devices exist that use the old superblock at the
|
||
|
+ * end of the device, then in order to detect and filter out
|
||
|
+ * the component devices of those md devs, we need to enable
|
||
|
+ * the full md filter which scans both the start and the end
|
||
|
+ * of every device. This doubles the amount of scanning i/o,
|
||
|
+ * which we want to avoid. FIXME: it may not be worth the
|
||
|
+ * cost of double i/o just to avoid displaying md component
|
||
|
+ * devs in 'pvs', which is a pretty harmless effect from a
|
||
|
+ * pretty uncommon situation.
|
||
|
+ */
|
||
|
+ if (dev_is_md_with_end_superblock(cmd->dev_types, dev))
|
||
|
+ cmd->use_full_md_check = 1;
|
||
|
};
|
||
|
dev_iter_destroy(iter);
|
||
|
|
||
|
diff --git a/test/shell/pvcreate-md-fake-hdr.sh b/test/shell/pvcreate-md-fake-hdr.sh
|
||
|
index b89fe4377..4c9ac7cbc 100644
|
||
|
--- a/test/shell/pvcreate-md-fake-hdr.sh
|
||
|
+++ b/test/shell/pvcreate-md-fake-hdr.sh
|
||
|
@@ -89,6 +89,7 @@ sleep 1
|
||
|
# (when mdadm supports repair)
|
||
|
if mdadm --action=repair "$mddev" ; then
|
||
|
sleep 1
|
||
|
+ pvscan -vvvv
|
||
|
# should be showing correctly PV3 & PV4
|
||
|
- pvs
|
||
|
+ pvs -vvvv "$dev3" "$dev4"
|
||
|
fi
|
||
|
--
|
||
|
2.12.3
|
||
|
|