204 lines
8.9 KiB
Diff
204 lines
8.9 KiB
Diff
|
xen-unstable commit 6ec48cf41b6656c98148380f39010063e62628c5
|
||
|
Frp,: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
|
||
|
Date: Wed Apr 30 16:06:24 2014 +0100
|
||
|
Subject: libxl: introduce an option for disabling the non-O_DIRECT workaround
|
||
|
|
||
|
Document and implement a new option that permits disk backends which
|
||
|
would otherwise have to avoid O_DIRECT (because of the network memory
|
||
|
lifetime bug) to use it anyway. This is:
|
||
|
direct-io-safe in the xl domain disk config specification
|
||
|
direct_io_safe in the libxl disk API
|
||
|
direct-io-safe in the backend xenstore interface
|
||
|
|
||
|
Add a reference to xen/include/public/io/blkif.h in
|
||
|
docs/misc/vbd-interface.txt.
|
||
|
|
||
|
This change does not break ABI. Instead of adding a new member
|
||
|
direct_io_safe to struct libxl_device_disk the existing readwrite member
|
||
|
is reused.
|
||
|
|
||
|
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
|
||
|
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
|
||
|
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||
|
Tested-by: Felipe Franciosi <felipe@paradoxo.org>
|
||
|
---
|
||
|
docs/misc/vbd-interface.txt | 6 +++++
|
||
|
docs/misc/xl-disk-configuration.txt | 38 ++++++++++++++++++++++++++++++++++++
|
||
|
tools/libxl/libxl.c | 2 +
|
||
|
tools/libxl/libxl.h | 11 ++++++++++
|
||
|
tools/libxl/libxlu_disk.c | 2 +
|
||
|
tools/libxl/libxlu_disk_i.h | 2 -
|
||
|
tools/libxl/libxlu_disk_l.l | 1
|
||
|
xen/include/public/io/blkif.h | 22 ++++++++++++++++++++
|
||
|
8 files changed, 83 insertions(+), 1 deletion(-)
|
||
|
|
||
|
Index: xen-4.4.0-testing/docs/misc/vbd-interface.txt
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/docs/misc/vbd-interface.txt
|
||
|
+++ xen-4.4.0-testing/docs/misc/vbd-interface.txt
|
||
|
@@ -125,3 +125,9 @@ because they directly map the bottom 8 b
|
||
|
directly to the Linux guest's device number and throw away the rest;
|
||
|
they can crash due to minor number clashes. With these guests, the
|
||
|
workaround is not to supply problematic combinations of devices.
|
||
|
+
|
||
|
+
|
||
|
+Other frontend and backend options
|
||
|
+----------------------------------
|
||
|
+
|
||
|
+See xen/include/public/io/blkif.h for the full list of options.
|
||
|
Index: xen-4.4.0-testing/docs/misc/xl-disk-configuration.txt
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/docs/misc/xl-disk-configuration.txt
|
||
|
+++ xen-4.4.0-testing/docs/misc/xl-disk-configuration.txt
|
||
|
@@ -178,6 +178,44 @@ information to be interpreted by the exe
|
||
|
These scripts are normally called "block-<script>".
|
||
|
|
||
|
|
||
|
+direct-io-safe
|
||
|
+--------------
|
||
|
+
|
||
|
+Description: Disables non-O_DIRECT workaround
|
||
|
+Supported values: absent, present
|
||
|
+Mandatory: No
|
||
|
+Default value: absent (workaround may be enabled)
|
||
|
+
|
||
|
+There is a memory lifetime bug in some driver domain (dom0) kernels
|
||
|
+which can cause crashes when using O_DIRECT. The bug occurs due to a
|
||
|
+mismatch between the backend-visible lifetime of pages used for the
|
||
|
+Xen PV network protocol and that expected by the backend kernel's
|
||
|
+networking subsystem. This can cause crashes when using certain
|
||
|
+backends with certain underlying storage.
|
||
|
+
|
||
|
+See:
|
||
|
+ http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
|
||
|
+
|
||
|
+For this reason, (this version of) the Xen libxl toolstack disables
|
||
|
+O_DIRECT when using the qemu-based Xen PV backend ("qdisk").
|
||
|
+
|
||
|
+However, this workaround has performance and scaling implications, and
|
||
|
+it is only necessary if the underlying device is a network filesystem.
|
||
|
+If the underlying device is not, then it is good to disable it; that
|
||
|
+is what this option is for.
|
||
|
+
|
||
|
+This option simply requests that the workaround be disabled. (However,
|
||
|
+not all backends versions which use the workaround understand this
|
||
|
+option, so this is on a best effort basis.)
|
||
|
+
|
||
|
+It's important to note that if you are storing the VM disk on a
|
||
|
+network filesystem or a network block device (NFS or ISCSI) it might
|
||
|
+not be safe to use this option. Otherwise specifying it is safe and
|
||
|
+can give better performances.
|
||
|
+
|
||
|
+If in the future the bug is fixed properly this option will then be
|
||
|
+silently ignored.
|
||
|
+
|
||
|
|
||
|
discard / no-discard
|
||
|
---------------
|
||
|
Index: xen-4.4.0-testing/tools/libxl/libxl.c
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/tools/libxl/libxl.c
|
||
|
+++ xen-4.4.0-testing/tools/libxl/libxl.c
|
||
|
@@ -2480,6 +2480,8 @@ static void device_disk_add(libxl__egc *
|
||
|
flexarray_append(back, disk->readwrite ? "w" : "r");
|
||
|
flexarray_append(back, "device-type");
|
||
|
flexarray_append(back, disk->is_cdrom ? "cdrom" : "disk");
|
||
|
+ if ((disk->readwrite & ~LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MASK) == LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MAGIC)
|
||
|
+ flexarray_append_pair(back, "direct-io-safe", "1");
|
||
|
if ((disk->readwrite & ~LIBXL_HAVE_LIBXL_DEVICE_DISK_DISCARD_DISABLE_MASK) == LIBXL_HAVE_LIBXL_DEVICE_DISK_DISCARD_DISABLE_MAGIC)
|
||
|
flexarray_append_pair(back, "discard-enable", "0");
|
||
|
|
||
|
Index: xen-4.4.0-testing/tools/libxl/libxl.h
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/tools/libxl/libxl.h
|
||
|
+++ xen-4.4.0-testing/tools/libxl/libxl.h
|
||
|
@@ -95,6 +95,17 @@
|
||
|
#define LIBXL_HAVE_BUILDINFO_EVENT_CHANNELS 1
|
||
|
|
||
|
/*
|
||
|
+ * The libxl_device_disk lacks some "cache" field, enabling directio
|
||
|
+ * is supported without breaking the ABI. This is done by overloading
|
||
|
+ * struct libxl_device_disk->readwrite:
|
||
|
+ * readwrite == 0: disk is readonly, no directio
|
||
|
+ * readwrite == 1: disk is readwrite, backend driver may enable directio
|
||
|
+ * readwrite == MAGIC: disk is readwrite, backend driver should use direct IO
|
||
|
+ */
|
||
|
+#define LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MAGIC 0x00000600U
|
||
|
+#define LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MASK 0xfffff0ffU
|
||
|
+
|
||
|
+/*
|
||
|
* The libxl_device_disk lacks discard_enable field, disabling discard
|
||
|
* is supported without breaking the ABI. This is done by overloading
|
||
|
* struct libxl_device_disk->readwrite:
|
||
|
Index: xen-4.4.0-testing/tools/libxl/libxlu_disk.c
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/tools/libxl/libxlu_disk.c
|
||
|
+++ xen-4.4.0-testing/tools/libxl/libxlu_disk.c
|
||
|
@@ -80,6 +80,8 @@ int xlu_disk_parse(XLU_Config *cfg,
|
||
|
disk->format = LIBXL_DISK_FORMAT_EMPTY;
|
||
|
}
|
||
|
|
||
|
+ if (disk->readwrite && dpc.direct_io_safe)
|
||
|
+ disk->readwrite = (disk->readwrite & LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MASK) | LIBXL_HAVE_LIBXL_DEVICE_DISK_DIRECT_IO_SAFE_MAGIC;
|
||
|
if (disk->readwrite && dpc.disable_discard)
|
||
|
disk->readwrite = (disk->readwrite & LIBXL_HAVE_LIBXL_DEVICE_DISK_DISCARD_DISABLE_MASK) | LIBXL_HAVE_LIBXL_DEVICE_DISK_DISCARD_DISABLE_MAGIC;
|
||
|
|
||
|
Index: xen-4.4.0-testing/tools/libxl/libxlu_disk_i.h
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/tools/libxl/libxlu_disk_i.h
|
||
|
+++ xen-4.4.0-testing/tools/libxl/libxlu_disk_i.h
|
||
|
@@ -10,7 +10,7 @@ typedef struct {
|
||
|
void *scanner;
|
||
|
YY_BUFFER_STATE buf;
|
||
|
libxl_device_disk *disk;
|
||
|
- int access_set, disable_discard, had_depr_prefix;
|
||
|
+ int access_set, direct_io_safe, disable_discard, had_depr_prefix;
|
||
|
const char *spec;
|
||
|
} DiskParseContext;
|
||
|
|
||
|
Index: xen-4.4.0-testing/tools/libxl/libxlu_disk_l.l
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/tools/libxl/libxlu_disk_l.l
|
||
|
+++ xen-4.4.0-testing/tools/libxl/libxlu_disk_l.l
|
||
|
@@ -173,6 +173,7 @@ backendtype=[^,]*,? { STRIP(','); setbac
|
||
|
|
||
|
vdev=[^,]*,? { STRIP(','); SAVESTRING("vdev", vdev, FROMEQUALS); }
|
||
|
script=[^,]*,? { STRIP(','); SAVESTRING("script", script, FROMEQUALS); }
|
||
|
+direct-io-safe,? { DPC->direct_io_safe = 1; }
|
||
|
discard,? { DPC->disable_discard = 0; }
|
||
|
no-discard,? { DPC->disable_discard = 1; }
|
||
|
|
||
|
Index: xen-4.4.0-testing/xen/include/public/io/blkif.h
|
||
|
===================================================================
|
||
|
--- xen-4.4.0-testing.orig/xen/include/public/io/blkif.h
|
||
|
+++ xen-4.4.0-testing/xen/include/public/io/blkif.h
|
||
|
@@ -97,6 +97,28 @@
|
||
|
*
|
||
|
* The type of the backing device/object.
|
||
|
*
|
||
|
+ *
|
||
|
+ * direct-io-safe
|
||
|
+ * Values: 0/1 (boolean)
|
||
|
+ * Default Value: 0
|
||
|
+ *
|
||
|
+ * The underlying storage is not affected by the direct IO memory
|
||
|
+ * lifetime bug. See:
|
||
|
+ * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
|
||
|
+ *
|
||
|
+ * Therefore this option gives the backend permission to use
|
||
|
+ * O_DIRECT, notwithstanding that bug.
|
||
|
+ *
|
||
|
+ * That is, if this option is enabled, use of O_DIRECT is safe,
|
||
|
+ * in circumstances where we would normally have avoided it as a
|
||
|
+ * workaround for that bug. This option is not relevant for all
|
||
|
+ * backends, and even not necessarily supported for those for
|
||
|
+ * which it is relevant. A backend which knows that it is not
|
||
|
+ * affected by the bug can ignore this option.
|
||
|
+ *
|
||
|
+ * This option doesn't require a backend to use O_DIRECT, so it
|
||
|
+ * should not be used to try to control the caching behaviour.
|
||
|
+ *
|
||
|
*--------------------------------- Features ---------------------------------
|
||
|
*
|
||
|
* feature-barrier
|