Accepting request 52950 from Virtualization

Accepted submit request 52950 from user charlesa

OBS-URL: https://build.opensuse.org/request/show/52950
OBS-URL: https://build.opensuse.org/package/show/openSUSE:Factory/xen?expand=0&rev=104
This commit is contained in:
Ruediger Oertel 2010-11-15 14:40:11 +00:00 committed by Git OBS Bridge
commit 213eeab11d
38 changed files with 2510 additions and 783 deletions

View File

@ -0,0 +1,55 @@
# HG changeset patch
# User Michael Young <m.a.young@durham.ac.uk>
# Date 1282234170 -3600
# Node ID 2940165380de2348e0ea3f628dea35750a2b4c8f
# Parent 60746a2c14a6cc123892f973fbdd6acb73251d39
tools/python: fix xm list for Python 2.7
This patch fixes
Unexpected error: <type 'exceptions.AttributeError'>
This is due to xmlrpc changes in Python 2.7. This patch should
fixe it for both old and new versions.
Signed-off-by: Michael Young <m.a.young@durham.ac.uk>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Index: xen-4.0.1-testing/tools/python/xen/util/xmlrpcclient.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/util/xmlrpcclient.py
+++ xen-4.0.1-testing/tools/python/xen/util/xmlrpcclient.py
@@ -22,6 +22,7 @@ import socket
import string
import xmlrpclib
from types import StringTypes
+from sys import hexversion
try:
@@ -54,7 +55,12 @@ class UnixTransport(xmlrpclib.Transport)
return xmlrpclib.Transport.request(self, host, '/RPC2',
request_body, verbose)
def make_connection(self, host):
- return HTTPUnix(self.__handler)
+ if hexversion < 0x02070000:
+ # python 2.6 or earlier
+ return HTTPUnix(self.__handler)
+ else:
+ # xmlrpclib.Transport changed in python 2.7
+ return HTTPUnixConnection(self.__handler)
# We need our own transport for HTTPS, because xmlrpclib.SafeTransport is
Index: xen-4.0.1-testing/tools/python/xen/util/xmlrpclib2.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/util/xmlrpclib2.py
+++ xen-4.0.1-testing/tools/python/xen/util/xmlrpclib2.py
@@ -58,6 +58,9 @@ def stringify(value):
# some bugs in Keep-Alive handling and also enabled it by default
class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler):
protocol_version = "HTTP/1.1"
+ # xend crashes in python 2.7 unless disable_nagle_algorithm = False
+ # it isn't used in earlier versions so it is harmless to set it generally
+ disable_nagle_algorithm = False
def __init__(self, hosts_allowed, request, client_address, server):
self.hosts_allowed = hosts_allowed

View File

@ -1,131 +0,0 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286028261 -3600
# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc
# Parent aed9fd361340158daf2d7160d1b367478b6312d6
Vt-d: fix dom0 graphics problem on Levnovo T410.
References: bnc#643477
The patch is derived from a similar quirk in Linux kernel by David
Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC
register. If VT is not enabled correctly in the IGD, Xen does not
enable VT-d translation for IGD VT-d engine. In case where iommu boot
parameter is set to force, Xen calls panic().
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
jb: Simplified and switched operands of && in first if() added to
iommu_enable_translation().
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units);
LIST_HEAD(acpi_atsr_units);
LIST_HEAD(acpi_rhsa_units);
+static u64 igd_drhd_address;
u8 dmar_host_address_width;
void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus)
@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str
return NULL;
}
+int is_igd_drhd(struct acpi_drhd_unit *drhd)
+{
+ return ( drhd->address == igd_drhd_address ? 1 : 0);
+}
+
/*
* Count number of devices in device scope. Do not include PCI sub
* hierarchies.
@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v
if ( iommu_verbose )
dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n",
bus, path->dev, path->fn);
+
+ if ( type == DMAR_TYPE )
+ {
+ struct acpi_drhd_unit *drhd = acpi_entry;
+
+ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) )
+ igd_drhd_address = drhd->address;
+ }
+
break;
case ACPI_DEV_IOAPIC:
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus,
int vtd_hw_check(void);
void disable_pmr(struct iommu *iommu);
int is_usb_device(u8 bus, u8 devfn);
+int is_igd_drhd(struct acpi_drhd_unit *drhd);
#endif /* _DMAR_H_ */
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -688,10 +688,34 @@ static int iommu_set_root_entry(struct i
return 0;
}
-static void iommu_enable_translation(struct iommu *iommu)
+#define GGC 0x52
+#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
+static int is_igd_vt_enabled(void)
+{
+ unsigned short ggc;
+
+ /* integrated graphics on Intel platforms is located at 0:2.0 */
+ ggc = pci_conf_read16(0, 2, 0, GGC);
+ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 );
+}
+
+static void iommu_enable_translation(struct acpi_drhd_unit *drhd)
{
u32 sts;
unsigned long flags;
+ struct iommu *iommu = drhd->iommu;
+
+ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled() )
+ {
+ if ( force_iommu )
+ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n");
+ else
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n");
+ return;
+ }
+ }
if ( iommu_verbose )
dprintk(VTDPREFIX,
@@ -1178,7 +1202,6 @@ static int intel_iommu_domain_init(struc
static void intel_iommu_dom0_init(struct domain *d)
{
- struct iommu *iommu;
struct acpi_drhd_unit *drhd;
if ( !iommu_passthrough && !need_iommu(d) )
@@ -1194,8 +1217,7 @@ static void intel_iommu_dom0_init(struct
for_each_drhd_unit ( drhd )
{
- iommu = drhd->iommu;
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}
@@ -2163,7 +2185,7 @@ static void vtd_resume(void)
(u32) iommu_state[i][DMAR_FEUADDR_REG]);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}

719
22223-vtd-workarounds.patch Normal file
View File

@ -0,0 +1,719 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286028261 -3600
# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc
# Parent aed9fd361340158daf2d7160d1b367478b6312d6
Vt-d: fix dom0 graphics problem on Levnovo T410.
References: bnc#643477
The patch is derived from a similar quirk in Linux kernel by David
Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC
register. If VT is not enabled correctly in the IGD, Xen does not
enable VT-d translation for IGD VT-d engine. In case where iommu boot
parameter is set to force, Xen calls panic().
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1288344554 -3600
# Node ID b48d8f27fca251c2df0222d195ffcb772d6a1128
# Parent 2d5e8f4ac43a120bbb5d4c52d08f6980848f0166
vtd: consolidate VT-d quirks into a single file quirks.c
Consolidate VT-d quirks into a single file - quirks.c. This includes
quirks to workaround OEM BIOS issue with VT-d enabling in IGD, Cantiga
VT-d buffer flush issue, Cantiga IGD Vt-d low power related errata,
and a quirk to workaround issues related to wifi direct assignment.
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
Reviewed-by: Jan Beulich <JBeulich@novell.com>
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1288888517 0
# Node ID fedcd4cbcc1eb3e210628bdf95766ca0c400fc18
# Parent d508b18a68447f91cd879b79a498f06536d89f8e
[VTD] fix a typo and some minor cleanup of quirks.c
Fixed a typo for IGD_DEV define and some minor cleanup to ease future
enhancement.
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -6,3 +6,4 @@ obj-y += dmar.o
obj-y += utils.o
obj-y += qinval.o
obj-y += intremap.o
+obj-y += quirks.o
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units);
LIST_HEAD(acpi_atsr_units);
LIST_HEAD(acpi_rhsa_units);
+static u64 igd_drhd_address;
u8 dmar_host_address_width;
void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus)
@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str
return NULL;
}
+int is_igd_drhd(struct acpi_drhd_unit *drhd)
+{
+ return drhd && (drhd->address == igd_drhd_address);
+}
+
/*
* Count number of devices in device scope. Do not include PCI sub
* hierarchies.
@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v
if ( iommu_verbose )
dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n",
bus, path->dev, path->fn);
+
+ if ( type == DMAR_TYPE )
+ {
+ struct acpi_drhd_unit *drhd = acpi_entry;
+
+ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) )
+ igd_drhd_address = drhd->address;
+ }
+
break;
case ACPI_DEV_IOAPIC:
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus,
int vtd_hw_check(void);
void disable_pmr(struct iommu *iommu);
int is_usb_device(u8 bus, u8 devfn);
+int is_igd_drhd(struct acpi_drhd_unit *drhd);
#endif /* _DMAR_H_ */
--- a/xen/drivers/passthrough/vtd/extern.h
+++ b/xen/drivers/passthrough/vtd/extern.h
@@ -26,6 +26,7 @@
extern int qinval_enabled;
extern int ats_enabled;
+extern bool_t rwbf_quirk;
void print_iommu_regs(struct acpi_drhd_unit *drhd);
void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
@@ -35,6 +36,12 @@ int enable_qinval(struct iommu *iommu);
void disable_qinval(struct iommu *iommu);
int enable_intremap(struct iommu *iommu, int eim);
void disable_intremap(struct iommu *iommu);
+
+void iommu_flush_cache_entry(void *addr, unsigned int size);
+void iommu_flush_cache_page(void *addr, unsigned long npages);
+int iommu_alloc(struct acpi_drhd_unit *drhd);
+void iommu_free(struct acpi_drhd_unit *drhd);
+
int queue_invalidate_context(struct iommu *iommu,
u16 did, u16 source_id, u8 function_mask, u8 granu);
int queue_invalidate_iotlb(struct iommu *iommu,
@@ -44,19 +51,41 @@ int queue_invalidate_iec(struct iommu *i
int invalidate_sync(struct iommu *iommu);
int iommu_flush_iec_global(struct iommu *iommu);
int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx);
+void clear_fault_bits(struct iommu *iommu);
+
struct iommu * ioapic_to_iommu(unsigned int apic_id);
struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id);
struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu);
struct acpi_rhsa_unit * drhd_to_rhsa(struct acpi_drhd_unit *drhd);
-void clear_fault_bits(struct iommu *iommu);
+struct acpi_drhd_unit * find_ats_dev_drhd(struct iommu *iommu);
+
int ats_device(int seg, int bus, int devfn);
int enable_ats_device(int seg, int bus, int devfn);
int disable_ats_device(int seg, int bus, int devfn);
int invalidate_ats_tcs(struct iommu *iommu);
+
int qinval_device_iotlb(struct iommu *iommu,
u32 max_invs_pend, u16 sid, u16 size, u64 addr);
int dev_invalidate_iotlb(struct iommu *iommu, u16 did,
u64 addr, unsigned int size_order, u64 type);
-struct acpi_drhd_unit * find_ats_dev_drhd(struct iommu *iommu);
+
+unsigned int get_cache_line_size(void);
+void cacheline_flush(char *);
+void flush_all_cache(void);
+
+u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);
+void free_pgtable_maddr(u64 maddr);
+void *map_vtd_domain_page(u64 maddr);
+void unmap_vtd_domain_page(void *va);
+int domain_context_mapping_one(struct domain *domain, struct iommu *iommu,
+ u8 bus, u8 devfn);
+int domain_context_unmap_one(struct domain *domain, struct iommu *iommu,
+ u8 bus, u8 devfn);
+
+int is_igd_vt_enabled_quirk(void);
+void __init platform_quirks_init(void);
+void vtd_ops_preamble_quirk(struct iommu* iommu);
+void vtd_ops_postamble_quirk(struct iommu* iommu);
+void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map);
#endif // _VTD_EXTERN_H_
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -43,7 +43,6 @@
#endif
int nr_iommus;
-static bool_t rwbf_quirk;
static void setup_dom0_devices(struct domain *d);
static void setup_dom0_rmrr(struct domain *d);
@@ -481,16 +480,36 @@ static int inline iommu_flush_iotlb_glob
int flush_non_present_entry, int flush_dev_iotlb)
{
struct iommu_flush *flush = iommu_get_flush(iommu);
- return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
+ int status;
+
+ /* apply platform specific errata workarounds */
+ vtd_ops_preamble_quirk(iommu);
+
+ status = flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
flush_non_present_entry, flush_dev_iotlb);
+
+ /* undo platform specific errata workarounds */
+ vtd_ops_postamble_quirk(iommu);
+
+ return status;
}
static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did,
int flush_non_present_entry, int flush_dev_iotlb)
{
struct iommu_flush *flush = iommu_get_flush(iommu);
- return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
+ int status;
+
+ /* apply platform specific errata workarounds */
+ vtd_ops_preamble_quirk(iommu);
+
+ status = flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
flush_non_present_entry, flush_dev_iotlb);
+
+ /* undo platform specific errata workarounds */
+ vtd_ops_postamble_quirk(iommu);
+
+ return status;
}
static int inline get_alignment(u64 base, unsigned int size)
@@ -514,6 +533,7 @@ static int inline iommu_flush_iotlb_psi(
{
unsigned int align;
struct iommu_flush *flush = iommu_get_flush(iommu);
+ int status;
ASSERT(!(addr & (~PAGE_MASK_4K)));
ASSERT(pages > 0);
@@ -534,8 +554,16 @@ static int inline iommu_flush_iotlb_psi(
addr >>= PAGE_SHIFT_4K + align;
addr <<= PAGE_SHIFT_4K + align;
- return flush->iotlb(iommu, did, addr, align, DMA_TLB_PSI_FLUSH,
+ /* apply platform specific errata workarounds */
+ vtd_ops_preamble_quirk(iommu);
+
+ status = flush->iotlb(iommu, did, addr, align, DMA_TLB_PSI_FLUSH,
flush_non_present_entry, flush_dev_iotlb);
+
+ /* undo platform specific errata workarounds */
+ vtd_ops_postamble_quirk(iommu);
+
+ return status;
}
static void iommu_flush_all(void)
@@ -688,10 +716,26 @@ static int iommu_set_root_entry(struct i
return 0;
}
-static void iommu_enable_translation(struct iommu *iommu)
+static void iommu_enable_translation(struct acpi_drhd_unit *drhd)
{
u32 sts;
unsigned long flags;
+ struct iommu *iommu = drhd->iommu;
+
+ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled_quirk() )
+ {
+ if ( force_iommu )
+ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n");
+ else
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n");
+ return;
+ }
+ }
+
+ /* apply platform specific errata workarounds */
+ vtd_ops_preamble_quirk(iommu);
if ( iommu_verbose )
dprintk(VTDPREFIX,
@@ -705,6 +749,9 @@ static void iommu_enable_translation(str
(sts & DMA_GSTS_TES), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
+ /* undo platform specific errata workarounds */
+ vtd_ops_postamble_quirk(iommu);
+
/* Disable PMRs when VT-d engine takes effect per spec definition */
disable_pmr(iommu);
}
@@ -714,6 +761,9 @@ static void iommu_disable_translation(st
u32 sts;
unsigned long flags;
+ /* apply platform specific errata workarounds */
+ vtd_ops_preamble_quirk(iommu);
+
spin_lock_irqsave(&iommu->register_lock, flags);
sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_TE));
@@ -722,6 +772,9 @@ static void iommu_disable_translation(st
IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl,
!(sts & DMA_GSTS_TES), sts);
spin_unlock_irqrestore(&iommu->register_lock, flags);
+
+ /* undo platform specific errata workarounds */
+ vtd_ops_postamble_quirk(iommu);
}
enum faulttype {
@@ -1065,6 +1118,7 @@ int __init iommu_alloc(struct acpi_drhd_
xfree(iommu);
return -ENOMEM;
}
+ iommu->intel->drhd = drhd;
iommu->reg = map_to_nocache_virt(nr_iommus, drhd->address);
iommu->index = nr_iommus++;
@@ -1178,7 +1232,6 @@ static int intel_iommu_domain_init(struc
static void intel_iommu_dom0_init(struct domain *d)
{
- struct iommu *iommu;
struct acpi_drhd_unit *drhd;
if ( !iommu_passthrough && !need_iommu(d) )
@@ -1194,12 +1247,11 @@ static void intel_iommu_dom0_init(struct
for_each_drhd_unit ( drhd )
{
- iommu = drhd->iommu;
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}
-static int domain_context_mapping_one(
+int domain_context_mapping_one(
struct domain *domain,
struct iommu *iommu,
u8 bus, u8 devfn)
@@ -1301,6 +1353,8 @@ static int domain_context_mapping_one(
unmap_vtd_domain_page(context_entries);
+ me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC);
+
return 0;
}
@@ -1382,7 +1436,7 @@ static int domain_context_mapping(struct
return ret;
}
-static int domain_context_unmap_one(
+int domain_context_unmap_one(
struct domain *domain,
struct iommu *iommu,
u8 bus, u8 devfn)
@@ -1430,6 +1484,8 @@ static int domain_context_unmap_one(
spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
+ me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC);
+
return 0;
}
@@ -1928,19 +1984,6 @@ static void setup_dom0_rmrr(struct domai
spin_unlock(&pcidevs_lock);
}
-static void platform_quirks(void)
-{
- u32 id;
-
- /* Mobile 4 Series Chipset neglects to set RWBF capability. */
- id = pci_conf_read32(0, 0, 0, 0);
- if ( id == 0x2a408086 )
- {
- dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n");
- rwbf_quirk = 1;
- }
-}
-
int intel_vtd_setup(void)
{
struct acpi_drhd_unit *drhd;
@@ -1949,7 +1992,7 @@ int intel_vtd_setup(void)
if ( list_empty(&acpi_drhd_units) )
return -ENODEV;
- platform_quirks();
+ platform_quirks_init();
irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
BUG_ON(!irq_to_iommu);
@@ -2163,7 +2206,7 @@ static void vtd_resume(void)
(u32) iommu_state[i][DMAR_FEUADDR_REG]);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -501,6 +501,7 @@ struct intel_iommu {
struct qi_ctrl qi_ctrl;
struct ir_ctrl ir_ctrl;
struct iommu_flush flush;
+ struct acpi_drhd_unit *drhd;
};
#endif
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Allen Kay <allen.m.kay@intel.com>
+ */
+
+#include <xen/irq.h>
+#include <xen/sched.h>
+#include <xen/xmalloc.h>
+#include <xen/domain_page.h>
+#include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
+#include <xen/numa.h>
+#include <xen/softirq.h>
+#include <xen/time.h>
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
+#include <xen/keyhandler.h>
+#include <asm/msi.h>
+#include <asm/irq.h>
+#include <mach_apic.h>
+#include "iommu.h"
+#include "dmar.h"
+#include "extern.h"
+#include "vtd.h"
+
+#define IOH_DEV 0
+#define IGD_DEV 2
+
+#define IGD_BAR_MASK 0xFFFFFFFFFFFF0000
+#define GGC 0x52
+#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
+
+#define IS_CTG(id) (id == 0x2a408086)
+#define IS_ILK(id) (id == 0x00408086 || id == 0x00448086 || id== 0x00628086 || id == 0x006A8086)
+#define IS_CPT(id) (id == 0x01008086 || id == 0x01048086)
+
+u32 ioh_id;
+u32 igd_id;
+bool_t rwbf_quirk;
+static int is_cantiga_b3;
+static u8 *igd_reg_va;
+
+/*
+ * QUIRK to workaround Xen boot issue on Calpella/Ironlake OEM BIOS
+ * not enabling VT-d properly in IGD. The workaround is to not enabling
+ * IGD VT-d translation if VT is not enabled in IGD.
+ */
+int is_igd_vt_enabled_quirk(void)
+{
+ u16 ggc;
+
+ if ( !IS_ILK(ioh_id) )
+ return 1;
+
+ /* integrated graphics on Intel platforms is located at 0:2.0 */
+ ggc = pci_conf_read16(0, IGD_DEV, 0, GGC);
+ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 );
+}
+
+/*
+ * QUIRK to workaround cantiga VT-d buffer flush issue.
+ * The workaround is to force write buffer flush even if
+ * VT-d capability indicates it is not required.
+ */
+static void cantiga_b3_errata_init(void)
+{
+ u16 vid;
+ u8 did_hi, rid;
+
+ vid = pci_conf_read16(0, IGD_DEV, 0, 0);
+ if ( vid != 0x8086 )
+ return;
+
+ did_hi = pci_conf_read8(0, IGD_DEV, 0, 3);
+ rid = pci_conf_read8(0, IGD_DEV, 0, 8);
+
+ if ( (did_hi == 0x2A) && (rid == 0x7) )
+ is_cantiga_b3 = 1;
+}
+
+/*
+ * QUIRK to workaround Cantiga IGD VT-d low power errata.
+ * This errata impacts IGD assignment on Cantiga systems
+ * and can potentially cause VT-d operations to hang.
+ * The workaround is to access an IGD PCI config register
+ * to get IGD out of low power state before VT-d translation
+ * enable/disable and IOTLB flushes.
+ */
+
+/*
+ * map IGD MMIO+0x2000 page to allow Xen access to IGD 3D register.
+ */
+static void map_igd_reg(void)
+{
+ u64 igd_mmio, igd_reg;
+
+ if ( !is_cantiga_b3 || igd_reg_va != NULL )
+ return;
+
+ /* get IGD mmio address in PCI BAR */
+ igd_mmio = ((u64)pci_conf_read32(0, IGD_DEV, 0, 0x14) << 32) +
+ pci_conf_read32(0, IGD_DEV, 0, 0x10);
+
+ /* offset of IGD regster we want to access is in 0x2000 range */
+ igd_reg = (igd_mmio & IGD_BAR_MASK) + 0x2000;
+
+ /* ioremap this physical page */
+ set_fixmap_nocache(FIX_IGD_MMIO, igd_reg);
+ igd_reg_va = (u8 *)fix_to_virt(FIX_IGD_MMIO);
+}
+
+/*
+ * force IGD to exit low power mode by accessing a IGD 3D regsiter.
+ */
+static int cantiga_vtd_ops_preamble(struct iommu* iommu)
+{
+ struct intel_iommu *intel = iommu->intel;
+ struct acpi_drhd_unit *drhd = intel ? intel->drhd : NULL;
+
+ if ( !is_igd_drhd(drhd) || !is_cantiga_b3 )
+ return 0;
+
+ /*
+ * read IGD register at IGD MMIO + 0x20A4 to force IGD
+ * to exit low power state. Since map_igd_reg()
+ * already mapped page starting 0x2000, we just need to
+ * add page offset 0x0A4 to virtual address base.
+ */
+ return ( *((volatile int *)(igd_reg_va + 0x0A4)) );
+}
+
+/*
+ * call before VT-d translation enable and IOTLB flush operations.
+ */
+void vtd_ops_preamble_quirk(struct iommu* iommu)
+{
+ cantiga_vtd_ops_preamble(iommu);
+}
+
+/*
+ * call after VT-d translation enable and IOTLB flush operations.
+ */
+void vtd_ops_postamble_quirk(struct iommu* iommu)
+{
+ return;
+}
+
+/* initialize platform identification flags */
+void __init platform_quirks_init(void)
+{
+ ioh_id = pci_conf_read32(0, IOH_DEV, 0, 0);
+ igd_id = pci_conf_read32(0, IGD_DEV, 0, 0);
+
+ /* Mobile 4 Series Chipset neglects to set RWBF capability. */
+ if ( ioh_id == 0x2a408086 )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n");
+ rwbf_quirk = 1;
+ }
+
+ /* initialize cantiga B3 identification */
+ cantiga_b3_errata_init();
+
+ /* ioremap IGD MMIO+0x2000 page */
+ map_igd_reg();
+}
+
+/*
+ * QUIRK to workaround wifi direct assignment issue. This issue
+ * impacts only cases where Intel integrated wifi device is directly
+ * is directly assigned to a guest.
+ *
+ * The workaround is to map ME phantom device 0:3.7 or 0:22.7
+ * to the ME vt-d engine if detect the user is trying to directly
+ * assigning Intel integrated wifi device to a guest.
+ */
+
+static void map_me_phantom_function(struct domain *domain, u32 dev, int map)
+{
+ struct acpi_drhd_unit *drhd;
+ struct pci_dev *pdev;
+
+ /* find ME VT-d engine base on a real ME device */
+ pdev = pci_get_pdev(0, PCI_DEVFN(dev, 0));
+ drhd = acpi_find_matched_drhd_unit(pdev);
+
+ /* map or unmap ME phantom function */
+ if ( map )
+ domain_context_mapping_one(domain, drhd->iommu, 0,
+ PCI_DEVFN(dev, 7));
+ else
+ domain_context_unmap_one(domain, drhd->iommu, 0,
+ PCI_DEVFN(dev, 7));
+}
+
+void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map)
+{
+ u32 id;
+
+ id = pci_conf_read32(0, 0, 0, 0);
+ if ( IS_CTG(id) )
+ {
+ /* quit if ME does not exist */
+ if ( pci_conf_read32(0, 3, 0, 0) == 0xffffffff )
+ return;
+
+ /* if device is WLAN device, map ME phantom device 0:3.7 */
+ id = pci_conf_read32(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0);
+ switch (id)
+ {
+ case 0x42328086:
+ case 0x42358086:
+ case 0x42368086:
+ case 0x42378086:
+ case 0x423a8086:
+ case 0x423b8086:
+ case 0x423c8086:
+ case 0x423d8086:
+ map_me_phantom_function(domain, 3, map);
+ break;
+ default:
+ break;
+ }
+ }
+ else if ( IS_ILK(id) || IS_CPT(id) )
+ {
+ /* quit if ME does not exist */
+ if ( pci_conf_read32(0, 22, 0, 0) == 0xffffffff )
+ return;
+
+ /* if device is WLAN device, map ME phantom device 0:22.7 */
+ id = pci_conf_read32(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0);
+ switch (id)
+ {
+ case 0x00878086:
+ case 0x00898086:
+ case 0x00828086:
+ case 0x00858086:
+ case 0x42388086:
+ case 0x422b8086:
+ map_me_phantom_function(domain, 22, map);
+ break;
+ default:
+ break;
+ }
+
+ }
+}
--- a/xen/drivers/passthrough/vtd/vtd.h
+++ b/xen/drivers/passthrough/vtd/vtd.h
@@ -23,6 +23,9 @@
#include <xen/iommu.h>
+#define MAP_ME_PHANTOM_FUNC 1
+#define UNMAP_ME_PHANTOM_FUNC 0
+
/* Accomodate both IOAPIC and IOSAPIC. */
struct IO_xAPIC_route_entry {
__u32 vector : 8,
@@ -97,18 +100,4 @@ struct msi_msg_remap_entry {
u32 data; /* msi message data */
};
-unsigned int get_cache_line_size(void);
-void cacheline_flush(char *);
-void flush_all_cache(void);
-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages);
-void free_pgtable_maddr(u64 maddr);
-void *map_vtd_domain_page(u64 maddr);
-void unmap_vtd_domain_page(void *va);
-
-void iommu_flush_cache_entry(void *addr, unsigned int size);
-void iommu_flush_cache_page(void *addr, unsigned long npages);
-
-int iommu_alloc(struct acpi_drhd_unit *drhd);
-void iommu_free(struct acpi_drhd_unit *drhd);
-
#endif // _VTD_H_
--- a/xen/drivers/passthrough/vtd/x86/vtd.c
+++ b/xen/drivers/passthrough/vtd/x86/vtd.c
@@ -27,6 +27,7 @@
#include "../iommu.h"
#include "../dmar.h"
#include "../vtd.h"
+#include "../extern.h"
/*
* iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0
--- a/xen/include/asm-x86/fixmap.h
+++ b/xen/include/asm-x86/fixmap.h
@@ -52,6 +52,7 @@ enum fixed_addresses {
FIX_MSIX_IO_RESERV_BASE,
FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
FIX_TBOOT_MAP_ADDRESS,
+ FIX_IGD_MMIO,
__end_of_fixed_addresses
};

View File

@ -0,0 +1,68 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286784105 -3600
# Node ID a1405385db77c7c81aac27bd88d6c4b2d90b1389
# Parent a33886146b45da46a5161a7ebed4d2f607642aee
x86: emulate MSR_IA32_UCODE_REV Intel access protocol
Intel requires a write of zeros (hence such writes now get silently
ignored) followed by a cpuid(1) followed by the actual read.
Includes some code redundancy elimination possible after the actual
change.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2268,6 +2268,14 @@ static int emulate_privileged_op(struct
if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
goto fail;
break;
+ case MSR_IA32_UCODE_REV:
+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
+ goto fail;
+ if ( rdmsr_safe(regs->ecx, l, h) )
+ goto fail;
+ if ( l | h )
+ goto invalid;
+ break;
case MSR_IA32_MISC_ENABLE:
if ( rdmsr_safe(regs->ecx, l, h) )
goto invalid;
@@ -2375,16 +2383,21 @@ static int emulate_privileged_op(struct
regs->eax = regs->edx = 0;
break;
}
- if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) != 0 )
- goto fail;
- break;
+ goto rdmsr_normal;
+ case MSR_IA32_UCODE_REV:
+ BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL);
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ if ( wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) )
+ goto fail;
+ sync_core();
+ }
+ goto rdmsr_normal;
case MSR_IA32_MISC_ENABLE:
if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
goto fail;
regs->eax = guest_misc_enable(regs->eax);
break;
- case MSR_EFER:
- case MSR_AMD_PATCHLEVEL:
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &val) )
{
@@ -2400,6 +2413,8 @@ static int emulate_privileged_op(struct
if ( rc )
goto rdmsr_writeback;
+ case MSR_EFER:
+ rdmsr_normal:
/* Everyone can read the MSR space. */
/* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n",
_p(regs->ecx));*/

View File

@ -0,0 +1,89 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286784156 -3600
# Node ID eb964c4b4f31c6b7bdfe8504839c4acac776f506
# Parent a1405385db77c7c81aac27bd88d6c4b2d90b1389
x86-64: workaround for BIOSes wrongly enabling LAHF_LM feature indicator
This workaround is taken from Linux, and the main motivation (besides
such workarounds indeed belonging in the hypervisor rather than each
kernel) is to suppress the warnings in the Xen log each Linux guest
would cause due to the disallowed wrmsr.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -44,6 +44,47 @@ static inline void wrmsr_amd(unsigned in
);
}
+static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo,
+ unsigned int *hi)
+{
+ int err;
+
+ asm volatile("1: rdmsr\n2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %6,%2\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ __FIXUP_ALIGN "\n"
+ __FIXUP_WORD " 1b,3b\n"
+ ".previous\n"
+ : "=a" (*lo), "=d" (*hi), "=r" (err)
+ : "c" (msr), "D" (0x9c5a203a), "2" (0), "i" (-EFAULT));
+
+ return err;
+}
+
+static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo,
+ unsigned int hi)
+{
+ int err;
+
+ asm volatile("1: wrmsr\n2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %6,%0\n"
+ " jmp 2b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
+ __FIXUP_ALIGN "\n"
+ __FIXUP_WORD " 1b,3b\n"
+ ".previous\n"
+ : "=r" (err)
+ : "c" (msr), "a" (lo), "d" (hi), "D" (0x9c5a203a),
+ "0" (0), "i" (-EFAULT));
+
+ return err;
+}
+
/*
* Mask the features and extended features returned by CPUID. Parameters are
* set from the boot line via two methods:
@@ -330,6 +371,24 @@ static void __devinit init_amd(struct cp
3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
clear_bit(0*32+31, c->x86_capability);
+#ifdef CONFIG_X86_64
+ if (c->x86 == 0xf && c->x86_model < 0x14
+ && cpu_has(c, X86_FEATURE_LAHF_LM)) {
+ /*
+ * Some BIOSes incorrectly force this feature, but only K8
+ * revision D (model = 0x14) and later actually support it.
+ * (AMD Erratum #110, docId: 25759).
+ */
+ unsigned int lo, hi;
+
+ clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability);
+ if (!rdmsr_amd_safe(0xc001100d, &lo, &hi)) {
+ hi &= ~1;
+ wrmsr_amd_safe(0xc001100d, lo, hi);
+ }
+ }
+#endif
+
r = get_model_name(c);
switch(c->x86)

71
22280-kexec.patch Normal file
View File

@ -0,0 +1,71 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1287922506 -3600
# Node ID d6e3cd10a9a6ab16d0cc772ee70b2ae99e8fac31
# Parent 2208a036f8d9d932de8ab0aa0206f5c57dba8728
x86/kexec: fix very old regression and make compatible with modern Linux
References: bnc#619122
c/s 13829 lost the (32-bit only) cpu_has_pae argument passed to the
primary kernel's stub (in the 32-bit Xen case only), and Linux
2.6.27/.30 (32-/64-bit) introduced a new argument (for KEXEC_JUMP)
which for now simply gets passed a hardcoded value.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -23,7 +23,11 @@
typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long *page_list,
- unsigned long start_address);
+ unsigned long start_address,
+#ifdef __i386__
+ unsigned int cpu_has_pae,
+#endif
+ unsigned int preserve_context);
extern int machine_kexec_get_xen(xen_kexec_range_t *range);
@@ -121,7 +125,11 @@ void machine_kexec(xen_kexec_image_t *im
rnk = (relocate_new_kernel_t) image->page_list[1];
(*rnk)(image->indirection_page, image->page_list,
- image->start_address);
+ image->start_address,
+#ifdef __i386__
+ 1 /* cpu_has_pae */,
+#endif
+ 0 /* preserve_context */);
}
}
--- a/xen/arch/x86/x86_64/compat_kexec.S
+++ b/xen/arch/x86/x86_64/compat_kexec.S
@@ -119,6 +119,7 @@ compatibility_mode:
movl %eax, %ss
/* Push arguments onto stack. */
+ pushl $0 /* 20(%esp) - preserve context */
pushl $1 /* 16(%esp) - cpu has pae */
pushl %ecx /* 12(%esp) - start address */
pushl %edx /* 8(%esp) - page list */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -138,7 +138,6 @@
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
#define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE)
#define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC)
-#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE)
#define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE)
#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)
@@ -164,7 +163,6 @@
#define cpu_has_de 1
#define cpu_has_pse 1
#define cpu_has_tsc 1
-#define cpu_has_pae 1
#define cpu_has_pge 1
#define cpu_has_pat 1
#define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC)

View File

@ -0,0 +1,43 @@
# HG changeset patch
# User Juergen Gross <juergen.gross@ts.fujitsu.com>
# Date 1288264929 -3600
# Node ID dc66c13a29f9af67e0258f688bcd6330cf515383
# Parent 4ac03710fc320e8f76014ca27849da03b85dff9d
avoid numa placement of cpus with active cpupools
When using cpupools don't pin vcpus to numa nodes as this might
conflict with the cpupool definition.
numa placement should be handled by cpupool configuration instead.
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Index: xen-4.0.1-testing/tools/python/xen/xend/XendCPUPool.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendCPUPool.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendCPUPool.py
@@ -881,6 +881,11 @@ class XendCPUPool(XendBase):
lookup_pool = classmethod(lookup_pool)
+ def number_of_pools(cls):
+ return len(xc.cpupool_getinfo())
+
+ number_of_pools = classmethod(number_of_pools)
+
def _cpu_number_to_ref(cls, number):
node = XendNode.instance()
for cpu_ref in node.get_host_cpu_refs():
Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2790,7 +2790,7 @@ class XendDomainInfo:
return map(lambda x: x[0], sorted(enumerate(nodeload), key=lambda x:x[1]))
info = xc.physinfo()
- if info['nr_nodes'] > 1:
+ if info['nr_nodes'] > 1 and XendCPUPool.number_of_pools() < 2:
node_memory_list = info['node_to_memory']
needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
candidate_node_list = []

View File

@ -0,0 +1,22 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1288371977 -3600
# Node ID 7afd8dd1d6cb97484d263311f3f0e6ab74d27ed3
# Parent 49803ac994f4094719ec2c3b67d82561a24ba293
VT-d: only scan secondary functions on multi-function devices
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -442,6 +442,10 @@ int __init scan_pci_devices(void)
spin_unlock(&pcidevs_lock);
return -EINVAL;
}
+
+ if ( !func && !(pci_conf_read8(bus, dev, func,
+ PCI_HEADER_TYPE) & 0x80) )
+ break;
}
}
}

View File

@ -0,0 +1,52 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1288772331 0
# Node ID 2dfba250c50bafac4e333d84450324daedf5ae2c
# Parent 16093532f384eee02518520662a38ad16915b063
VT-d: fix device assignment failure (regression from Xen c/s 19805:2f1fa2215e60)
References: bnc#647681
If the device at <secbus>:00.0 is the device the mapping operation was
initiated for, trying to map it a second time will fail, and hence
this second mapping attempt must be prevented (as was done prior to
said c/s).
While at it, simplify the code a little, too.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Weidong Han <weidong.han@intel.com>
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1402,23 +1402,16 @@ static int domain_context_mapping(struct
if ( find_upstream_bridge(&bus, &devfn, &secbus) < 1 )
break;
- /* PCIe to PCI/PCIx bridge */
- if ( pdev_type(bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE )
- {
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( ret )
- return ret;
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- /*
- * Devices behind PCIe-to-PCI/PCIx bridge may generate
- * different requester-id. It may originate from devfn=0
- * on the secondary bus behind the bridge. Map that id
- * as well.
- */
+ /*
+ * Devices behind PCIe-to-PCI/PCIx bridge may generate different
+ * requester-id. It may originate from devfn=0 on the secondary bus
+ * behind the bridge. Map that id as well if we didn't already.
+ */
+ if ( !ret && pdev_type(bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE &&
+ (secbus != pdev->bus || pdev->devfn != 0) )
ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
- }
- else /* Legacy PCI bridge */
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
break;

View File

@ -0,0 +1,63 @@
# HG changeset patch
# User Jim Fehlig <jfehlig@novell.com>
# Date 1288301229 21600
# Branch xend-pci
# Node ID 461b9d3a643a2c67c961d9fc468a804891f3770d
# Parent 28a16074681582f1209c9077f870ccf44927133e
Fix pci passthru in xend interface used by libvirt
Attempting to define or create a domain whose XML config contains a
passthru PCI device fails with libvirt
xen84: # cat win2k8r2.xml
...
<hostdev mode='subsystem' type='pci' managed='no'>
<source>
<address domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
</source>
</hostdev>
...
xen84: # virsh create ./win2k8r2.xml
error: Failed to create domain from ./win2k8r2.xml
error: POST operation failed: xend_post: error from xen daemon:
(xend.err "Error creating domain: 'key'")
The PCI device config maintained by xend includes a 'key' field, which is
initialized by xm client when using that tool and traditional xen config
file. libvirt, which uses xend's sexpr-over-http interface (is that the
proper name for that interface??), does not initialize this field - and
shouldn't be expected to do so IMO. Clients should not be bothered with
xend's internal representation of a PCI device.
This patch populates the 'key' field within xend if it is uninitialized,
similar to current initialization of 'uuid' field. The 'vdevfn' field
suffers a similar problem if not (optionally) specified by user.
AFAICT, the xm client initializes 'vdevfn' to 0x100 if not specified so
I've done the same here.
Signed-off-by: Jim Fehlig <jfehlig@novell.com>
diff -r 28a160746815 -r 461b9d3a643a tools/python/xen/util/pci.py
--- a/tools/python/xen/util/pci.py Wed Oct 27 12:24:28 2010 +0100
+++ b/tools/python/xen/util/pci.py Thu Oct 28 15:27:09 2010 -0600
@@ -240,10 +240,16 @@
pci_dev_info = dict(pci_dev[1:])
if 'opts' in pci_dev_info:
pci_dev_info['opts'] = pci_opts_list_from_sxp(pci_dev)
- # append uuid to each pci device that does't already have one.
+ # If necessary, initialize uuid, key, and vdevfn for each pci device
if not pci_dev_info.has_key('uuid'):
- dpci_uuid = pci_dev_info.get('uuid', uuid.createString())
- pci_dev_info['uuid'] = dpci_uuid
+ pci_dev_info['uuid'] = uuid.createString()
+ if not pci_dev_info.has_key('key'):
+ pci_dev_info['key'] = "%02x:%02x.%x" % \
+ (int(pci_dev_info['bus'], 16),
+ int(pci_dev_info['slot'], 16),
+ int(pci_dev_info['func'], 16))
+ if not pci_dev_info.has_key('vdevfn'):
+ pci_dev_info['vdevfn'] = "0x%02x" % AUTO_PHP_SLOT
pci_devs.append(pci_dev_info)
dev_config['devs'] = pci_devs

32
bdrv_default_rwflag.patch Normal file
View File

@ -0,0 +1,32 @@
Subject: modify default read/write flag in bdrv_init.
Signed-off by Chunyan Liu <cyliu@novell.com>
Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vl.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/vl.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/vl.c
@@ -2612,6 +2612,8 @@ int drive_init(struct drive_opt *arg, in
strncpy(drives_table[nb_drives].serial, serial, sizeof(serial));
nb_drives++;
+ bdrv_flags = BDRV_O_RDWR;
+
switch(type) {
case IF_IDE:
case IF_SCSI:
@@ -2624,6 +2626,7 @@ int drive_init(struct drive_opt *arg, in
break;
case MEDIA_CDROM:
bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM);
+ bdrv_flags &= ~BDRV_O_RDWR;
break;
}
break;
@@ -2644,7 +2647,6 @@ int drive_init(struct drive_opt *arg, in
}
if (!file[0])
return -2;
- bdrv_flags = 0;
if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
cache = 2; /* always use write-back with snapshot */

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -227,6 +227,7 @@ static int open_disk(struct td_state *s,
@@ -237,6 +237,7 @@ static int open_disk(struct td_state *s,
BlockDriver* drv;
char* devname;
static int devnumber = 0;
@ -10,7 +10,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
int i;
DPRINTF("Opening %s as blktap%d\n", path, devnumber);
@@ -249,7 +250,7 @@ static int open_disk(struct td_state *s,
@@ -259,7 +260,7 @@ static int open_disk(struct td_state *s,
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");
/* Open the image */

View File

@ -741,7 +741,7 @@ Index: xen-4.0.1-testing/tools/blktap/lib/blktaplib.h
===================================================================
--- xen-4.0.1-testing.orig/tools/blktap/lib/blktaplib.h
+++ xen-4.0.1-testing/tools/blktap/lib/blktaplib.h
@@ -220,6 +220,7 @@ typedef struct msg_pid {
@@ -225,6 +225,7 @@ typedef struct msg_pid {
#define DISK_TYPE_RAM 3
#define DISK_TYPE_QCOW 4
#define DISK_TYPE_QCOW2 5

View File

@ -46,8 +46,6 @@ case "$command" in
# load modules and start iscsid
/etc/init.d/open-iscsi status >/dev/null 2>&1 ||
{ /etc/init.d/open-iscsi start >/dev/null 2>&1; sleep 1; }
# list of targets on node
/sbin/iscsiadm -m discovery | sed "s/ .*//g" | while read line; do /sbin/iscsiadm -m discovery -t sendtargets -p $line; done >/dev/null
par=`xenstore-read $XENBUS_PATH/params` || true
TGTID=`echo $par | sed "s/\/\///g"`
while read rec uuid; do

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -249,8 +249,11 @@ static int open_disk(struct td_state *s,
@@ -259,8 +259,11 @@ static int open_disk(struct td_state *s,
drv = blktap_drivers[i].drv;
DPRINTF("%s driver specified\n", drv ? drv->format_name : "No");

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -362,6 +362,15 @@ static void qemu_send_responses(void* op
@@ -360,6 +360,15 @@ static void qemu_send_responses(void* op
}
/**
@ -18,7 +18,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
* Callback function for the IO message pipe. Reads requests from the ring
* and processes them (call qemu read/write functions).
*
@@ -380,6 +389,7 @@ static void handle_blktap_iomsg(void* pr
@@ -378,6 +387,7 @@ static void handle_blktap_iomsg(void* pr
blkif_t *blkif = s->blkif;
tapdev_info_t *info = s->ring_info;
int page_size = getpagesize();
@ -26,7 +26,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
struct aiocb_info *aiocb_info;
@@ -412,7 +422,7 @@ static void handle_blktap_iomsg(void* pr
@@ -410,7 +420,7 @@ static void handle_blktap_iomsg(void* pr
/* Don't allow writes on readonly devices */
if ((s->flags & TD_RDONLY) &&
@ -35,7 +35,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
blkif->pending_list[idx].status = BLKIF_RSP_ERROR;
goto send_response;
}
@@ -433,7 +443,7 @@ static void handle_blktap_iomsg(void* pr
@@ -431,7 +441,7 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("Sector request failed:\n");
DPRINTF("%s request, idx [%d,%d] size [%llu], "
"sector [%llu,%llu]\n",
@ -44,7 +44,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
"WRITE" : "READ"),
idx,i,
(long long unsigned)
@@ -446,8 +456,14 @@ static void handle_blktap_iomsg(void* pr
@@ -444,8 +454,14 @@ static void handle_blktap_iomsg(void* pr
blkif->pending_list[idx].secs_pending += nsects;
@ -60,7 +60,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
case BLKIF_OP_WRITE:
aiocb_info = malloc(sizeof(*aiocb_info));
@@ -467,6 +483,10 @@ static void handle_blktap_iomsg(void* pr
@@ -465,6 +481,10 @@ static void handle_blktap_iomsg(void* pr
DPRINTF("ERROR: bdrv_write() == NULL\n");
goto send_response;
}

View File

@ -19,7 +19,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -220,9 +220,10 @@ static int map_new_dev(struct td_state *
@@ -230,9 +230,10 @@ static int map_new_dev(struct td_state *
return -1;
}
@ -31,7 +31,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
char* devname;
static int devnumber = 0;
int i;
@@ -232,7 +233,22 @@ static int open_disk(struct td_state *s,
@@ -242,7 +243,22 @@ static int open_disk(struct td_state *s,
bs = bdrv_new(devname);
free(devname);
@ -55,7 +55,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
fprintf(stderr, "Could not open image file %s\n", path);
return -ENOMEM;
}
@@ -527,7 +543,7 @@ static void handle_blktap_ctrlmsg(void*
@@ -525,7 +541,7 @@ static void handle_blktap_ctrlmsg(void*
s = state_init();
/*Open file*/

View File

@ -16,7 +16,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -258,6 +258,12 @@ static int open_disk(struct td_state *s,
@@ -268,6 +268,12 @@ static int open_disk(struct td_state *s,
s->size = bs->total_sectors;
s->sector_size = 512;
@ -28,7 +28,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
+
s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
#ifndef QEMU_TOOL
return 0;
Index: xen-4.0.1-testing/tools/python/xen/xend/server/DevController.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/server/DevController.py

40
minios-fixups.patch Normal file
View File

@ -0,0 +1,40 @@
Index: xen-4.0.1-testing/extras/mini-os/netfront.c
===================================================================
--- xen-4.0.1-testing.orig/extras/mini-os/netfront.c
+++ xen-4.0.1-testing/extras/mini-os/netfront.c
@@ -25,8 +25,8 @@ DECLARE_WAIT_QUEUE_HEAD(netfront_queue);
-#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
+#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
+#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
#define GRANT_INVALID_REF 0
Index: xen-4.0.1-testing/extras/mini-os/lib/math.c
===================================================================
--- xen-4.0.1-testing.orig/extras/mini-os/lib/math.c
+++ xen-4.0.1-testing/extras/mini-os/lib/math.c
@@ -191,6 +191,7 @@ __qdivrem(uint64_t uq, uint64_t vq, uint
* and thus
* m = 4 - n <= 2
*/
+ tmp.ul[H] = tmp.ul[L] = 0;
tmp.uq = uq;
u[0] = 0;
u[1] = HHALF(tmp.ul[H]);
Index: xen-4.0.1-testing/extras/mini-os/arch/x86/mm.c
===================================================================
--- xen-4.0.1-testing.orig/extras/mini-os/arch/x86/mm.c
+++ xen-4.0.1-testing/extras/mini-os/arch/x86/mm.c
@@ -281,7 +281,7 @@ static void build_pagetable(unsigned lon
/*
* Mark portion of the address space read only.
*/
-extern void shared_info;
+extern char shared_info[PAGE_SIZE];
static void set_readonly(void *text, void *etext)
{
unsigned long start_address =

View File

@ -0,0 +1,379 @@
Subject: add the drive into drives_table[] only if guest is using PV driver
now when blktapctrl asks qemu to add a device, it also set a watch
on the xenstore backend state path of the device, e.g.
/local/domain/<domian-id>/device/vbd/<device-id>/state and when the
state changed to 4, that means guest is using the PV driver and it's
ready, so the watch will tell qemu to add the disk entry to
drives_table[], otherwise the disk in qemu will just stay opened,not
showing up in drives_table[].
Signed-off-by: Li Dongyang <lidongyang@novell.com>
---
tools/blktap/drivers/blktapctrl.c | 81 +++++++++++++++++++++++++++++++++-
tools/blktap/lib/blkif.c | 23 ++++++++++
tools/blktap/lib/blktaplib.h | 5 ++
tools/blktap/lib/xenbus.c | 69 +++++++++++++++++++++++++++++
tools/ioemu-qemu-xen/hw/xen_blktap.c | 49 +++++++++++++++-----
5 files changed, 213 insertions(+), 14 deletions(-)
diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c
index bcc3152..8b58e3e 100644
--- a/tools/blktap/drivers/blktapctrl.c
+++ b/tools/blktap/drivers/blktapctrl.c
@@ -381,7 +381,22 @@ static int write_msg(int fd, int msgtype, void *ptr, void *ptr2)
msg->cookie = blkif->cookie;
break;
-
+
+ case CTLMSG_ADDDEV:
+ DPRINTF("Write_msg called: CTLMSG_ADDDEV\n");
+
+ msglen = sizeof(msg_hdr_t);
+ buf = malloc(msglen);
+
+ /*Assign header fields*/
+ msg = (msg_hdr_t *)buf;
+ msg->type = CTLMSG_ADDDEV;
+ msg->len = msglen;
+ msg->drivertype = blkif->drivertype;
+ msg->cookie = blkif->cookie;
+
+ break;
+
default:
return -1;
}
@@ -476,6 +491,12 @@ static int read_msg(int fd, int msgtype, void *ptr)
DPRINTF("\tPID: [%d]\n",blkif->tappid);
}
break;
+
+ case CTLMSG_ADDDEV_RSP:
+ DPRINTF("Received CTLMSG_ADDDEV_RSP\n");
+ if (msgtype != CTLMSG_ADDDEV_RSP) ret = 0;
+ break;
+
default:
DPRINTF("UNKNOWN MESSAGE TYPE RECEIVED\n");
ret = 0;
@@ -758,6 +779,63 @@ static int unmap_blktapctrl(blkif_t *blkif)
return 0;
}
+static int blktapctrl_blkif_state(blkif_t *blkif, XenbusState state)
+{
+ struct disk_info *drivertype = NULL;
+
+ if (!blkif)
+ return -EINVAL;
+
+ switch (state)
+ {
+ case XenbusStateUnknown:
+ break;
+
+ case XenbusStateInitialising:
+ break;
+
+ case XenbusStateInitWait:
+ break;
+
+ case XenbusStateInitialised:
+ break;
+
+ case XenbusStateConnected:
+ drivertype = dtypes[blkif->drivertype];
+ if (drivertype->use_ioemu && blkif->state == CONNECTED) {
+ if (write_msg(blkif->fds[WRITE], CTLMSG_ADDDEV, blkif, NULL)
+ <=0) {
+ DPRINTF("Write_msg failed - CTLMSG_ADDDEV\n");
+ return -1;
+ }
+ if (read_msg(blkif->fds[READ], CTLMSG_ADDDEV_RSP, blkif) <= 0) {
+ DPRINTF("Read_msg failure - CTLMSG_ADDDEV\n");
+ return -1;
+ }
+ }
+
+ break;
+
+ case XenbusStateClosing:
+ break;
+
+ case XenbusStateClosed:
+ break;
+
+ case XenbusStateReconfiguring:
+ break;
+
+ case XenbusStateReconfigured:
+ break;
+
+ default:
+ DPRINTF("Unrecognized XenbusState %d\n", state);
+ return -1;
+ }
+
+ return 0;
+}
+
int open_ctrl_socket(char *devname)
{
int ret;
@@ -854,6 +932,7 @@ int main(int argc, char *argv[])
register_new_blkif_hook(blktapctrl_new_blkif);
register_new_devmap_hook(map_new_blktapctrl);
register_new_unmap_hook(unmap_blktapctrl);
+ register_blkif_state_hook(blktapctrl_blkif_state);
ctlfd = blktap_interface_open();
if (ctlfd < 0) {
diff --git a/tools/blktap/lib/blkif.c b/tools/blktap/lib/blkif.c
index 9a19596..11b63dc 100644
--- a/tools/blktap/lib/blkif.c
+++ b/tools/blktap/lib/blkif.c
@@ -89,6 +89,11 @@ void register_new_blkif_hook(int (*fn)(blkif_t *blkif))
{
new_blkif_hook = fn;
}
+static int (*blkif_state_hook)(blkif_t *blkif, XenbusState state) = NULL;
+void register_blkif_state_hook(int (*fn)(blkif_t *blkif, XenbusState state))
+{
+ blkif_state_hook = fn;
+}
int blkif_init(blkif_t *blkif, long int handle, long int pdev,
long int readonly)
@@ -179,6 +184,24 @@ void free_blkif(blkif_t *blkif)
}
}
+int blkif_handle_state(blkif_t *blkif, XenbusState state)
+{
+ if (blkif == NULL)
+ return -EINVAL;
+
+ if (blkif_state_hook == NULL)
+ {
+ DPRINTF("Probe handling blkif state, but no blkif_state_hook!\n");
+ return -1;
+ }
+ if (blkif_state_hook(blkif, state)!=0) {
+ DPRINTF("BLKIF: blkif_state_hook failed!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
void __init_blkif(void)
{
memset(blkif_hash, 0, sizeof(blkif_hash));
diff --git a/tools/blktap/lib/blktaplib.h b/tools/blktap/lib/blktaplib.h
index 733b924..2a6a078 100644
--- a/tools/blktap/lib/blktaplib.h
+++ b/tools/blktap/lib/blktaplib.h
@@ -38,6 +38,7 @@
#include <xen/xen.h>
#include <xen/io/blkif.h>
#include <xen/io/ring.h>
+#include <xen/io/xenbus.h>
#include <xs.h>
#include <sys/types.h>
#include <unistd.h>
@@ -138,11 +139,13 @@ typedef struct blkif_info {
void register_new_devmap_hook(int (*fn)(blkif_t *blkif));
void register_new_unmap_hook(int (*fn)(blkif_t *blkif));
void register_new_blkif_hook(int (*fn)(blkif_t *blkif));
+void register_blkif_state_hook(int (*fn)(blkif_t *blkif, XenbusState state));
blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle);
blkif_t *alloc_blkif(domid_t domid);
int blkif_init(blkif_t *blkif, long int handle, long int pdev,
long int readonly);
void free_blkif(blkif_t *blkif);
+int blkif_handle_state(blkif_t *blkif, XenbusState state);
void __init_blkif(void);
typedef struct busy_state {
@@ -210,6 +213,8 @@ typedef struct msg_pid {
#define CTLMSG_CLOSE_RSP 8
#define CTLMSG_PID 9
#define CTLMSG_PID_RSP 10
+#define CTLMSG_ADDDEV 11
+#define CTLMSG_ADDDEV_RSP 12
/* disk driver types */
#define MAX_DISK_TYPES 20
diff --git a/tools/blktap/lib/xenbus.c b/tools/blktap/lib/xenbus.c
index 53db3c8..96f75a5 100644
--- a/tools/blktap/lib/xenbus.c
+++ b/tools/blktap/lib/xenbus.c
@@ -318,6 +318,72 @@ static int check_image(struct xs_handle *h, struct backend_info *be,
return 0;
}
+static void check_frontend_state(struct xs_handle *h, struct xenbus_watch *w,
+ const char *state_path_im)
+{
+ struct backend_info *be = NULL;
+ struct blkif *blkif = NULL;
+ char *fepath = NULL, *bepath = NULL;
+ XenbusState state;
+ int er, len;
+
+ len = strsep_len(state_path_im, '/', 6);
+ if (len < 0)
+ return;
+ if (!(fepath = malloc(len + 1)))
+ return;
+ memset(fepath, 0, len + 1);
+ strncpy(fepath, state_path_im, len);
+
+ er = xs_gather(h, fepath, "state", "%d", &state,
+ "backend", NULL, &bepath,
+ NULL);
+
+ if (er) {
+ DPRINTF("Error getting state [%s]\n", fepath);
+ goto free_fe;
+ }
+
+ be = be_lookup_be(bepath);
+ if (!be || !be->blkif)
+ goto free_fe;
+
+ blkif = be->blkif;
+ blkif_handle_state(blkif, state);
+
+free_fe:
+ if (fepath)
+ free(fepath);
+ if (bepath)
+ free(bepath);
+ return;
+}
+
+static int add_blockdevice_state_watch(struct xs_handle *h, const char *frontend)
+{
+ char *path = NULL;
+ struct xenbus_watch *vbd_watch;
+
+ if (asprintf(&path, frontend) == -1)
+ return -ENOMEM;
+ if (!(path = realloc(path, strlen(path) + strlen("/state") + 1)))
+ return -ENOMEM;
+ strcpy(path + strlen(path), "/state");
+
+ vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch));
+ if (!vbd_watch) {
+ DPRINTF("ERROR: unable to malloc vbd_watch [%s]\n", path);
+ return -EINVAL;
+ }
+ vbd_watch->node = path;
+ vbd_watch->callback = check_frontend_state;
+ if (register_xenbus_watch(h, vbd_watch) != 0) {
+ DPRINTF("ERROR: adding vbd probe watch %s\n", path);
+ return -EINVAL;
+ }
+ return 0;
+}
+
static void ueblktap_setup(struct xs_handle *h, char *bepath)
{
struct backend_info *be;
@@ -512,6 +578,9 @@ static void ueblktap_probe(struct xs_handle *h, struct xenbus_watch *w,
be->backpath = bepath;
be->frontpath = frontend;
+
+ if (add_blockdevice_state_watch(h, frontend) != 0)
+ goto free_be;
list_add(&be->list, &belist);
diff --git a/tools/ioemu-qemu-xen/hw/xen_blktap.c b/tools/ioemu-qemu-xen/hw/xen_blktap.c
index c2236fd..c925283 100644
--- a/tools/ioemu-qemu-xen/hw/xen_blktap.c
+++ b/tools/ioemu-qemu-xen/hw/xen_blktap.c
@@ -83,8 +83,18 @@ static void unmap_disk(struct td_state *s)
{
tapdev_info_t *info = s->ring_info;
fd_list_entry_t *entry;
+ int i;
bdrv_close(s->bs);
+#ifndef QEMU_TOOL
+ for (i = 0; i < MAX_DRIVES + 1; i++) {
+ if (drives_table[i].bdrv == s->bs) {
+ drives_table[i].bdrv = NULL;
+ nb_drives--;
+ break;
+ }
+ }
+#endif
if (info != NULL && info->mem > 0)
munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE);
@@ -244,18 +254,6 @@ static int open_disk(struct td_state *s, char *path, int readonly)
s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0);
-#ifndef QEMU_TOOL
- for (i = 0; i < MAX_DRIVES + 1; i++) {
- if (drives_table[i].bdrv == NULL) {
- drives_table[i].bdrv = bs;
- drives_table[i].type = IF_BLKTAP;
- drives_table[i].bus = 0;
- drives_table[i].unit = 0;
- break;
- }
- }
-#endif
-
return 0;
}
@@ -496,7 +494,7 @@ static void handle_blktap_ctrlmsg(void* private)
msg_hdr_t *msg;
msg_newdev_t *msg_dev;
msg_pid_t *msg_pid;
- int ret = -1;
+ int ret = -1, i;
struct td_state *s = NULL;
fd_list_entry_t *entry;
@@ -622,6 +620,31 @@ static void handle_blktap_ctrlmsg(void* private)
len = write(write_fd, buf, msglen);
break;
+ case CTLMSG_ADDDEV:
+ s = get_state(msg->cookie);
+ if (s) {
+#ifndef QEMU_TOOL
+ for (i = 0; i < MAX_DRIVES + 1; i++) {
+ if (drives_table[i].bdrv == NULL) {
+ drives_table[i].bdrv = s->bs;
+ drives_table[i].type = IF_BLKTAP;
+ drives_table[i].bus = 0;
+ drives_table[i].unit = 0;
+ drives_table[i].used = 1;
+ nb_drives++;
+ break;
+ }
+ }
+#endif
+ }
+
+ memset(buf, 0x00, MSG_SIZE);
+ msglen = sizeof(msg_hdr_t);
+ msg->type = CTLMSG_ADDDEV_RSP;
+ msg->len = msglen;
+ len = write(write_fd, buf, msglen);
+ break;
+
default:
break;
}

16
stdvga-cache.patch Normal file
View File

@ -0,0 +1,16 @@
Index: xen-4.0.1-testing/xen/arch/x86/hvm/stdvga.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/stdvga.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/stdvga.c
@@ -135,7 +135,10 @@ static int stdvga_outb(uint64_t addr, ui
/* When in standard vga mode, emulate here all writes to the vram buffer
* so we can immediately satisfy reads without waiting for qemu. */
- s->stdvga = (s->sr[7] == 0x00);
+ s->stdvga =
+ (s->sr[7] == 0x00) && /* standard vga mode */
+ (s->gr[6] == 0x05); /* misc graphics register w/ MemoryMapSelect=1
+ * 0xa0000-0xaffff (64k region), AlphaDis=1 */
if ( !prev_stdvga && s->stdvga )
{

3
stubdom.tar.bz2 Normal file
View File

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6215320ea160b7296e463abb60e3264fc35f88b47374ae3e054ba4352f6d43dc
size 16864982

View File

@ -28,7 +28,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
fd_list_entry_t *fd_start = NULL;
extern char* get_snapshot_name(int devid);
@@ -547,6 +548,7 @@ static void handle_blktap_ctrlmsg(void*
@@ -545,6 +546,7 @@ static void handle_blktap_ctrlmsg(void*
/* Allocate the disk structs */
s = state_init();
@ -36,7 +36,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c
/*Open file*/
if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) {
@@ -629,7 +631,8 @@ static void handle_blktap_ctrlmsg(void*
@@ -627,7 +629,8 @@ static void handle_blktap_ctrlmsg(void*
case CTLMSG_CLOSE:
s = get_state(msg->cookie);
if (s) unmap_disk(s);

View File

@ -1,3 +1,109 @@
-------------------------------------------------------------------
Fri Nov 12 09:48:14 MST 2010 - carnold@novell.com
- Upstream patch for python 2.7 compatibility
22045-python27-compat.patch
-------------------------------------------------------------------
Thu Nov 11 18:44:48 CST 2010 - cyliu@novell.com
- bnc#641144 - FV Xen VM running windows or linux cannot write to
virtual floppy drive
bdrv_default_rwflag.patch
-------------------------------------------------------------------
Thu Nov 11 21:01:12 CET 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.optimize_p2m_mem_paging_populate.patch
xenpaging.HVMCOPY_gfn_paged_out.patch
-------------------------------------------------------------------
Thu Nov 11 10:11:35 MST 2010 - carnold@novell.com
- bnc#649864 - automatic numa cpu placement of xen conflicts with
cpupools
22326-cpu-pools-numa-placement.patch
-------------------------------------------------------------------
Wed Nov 10 17:38:39 CET 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.populate_only_if_paged.patch
- revert logic, populate needs to happen unconditionally
xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch
- invalidate current mfn only if gfn is not in flight or done
xenpaging.mem_event_check_ring-free_requests.patch
- print info only if 1 instead of 2 slots are free
xenpaging.guest_remove_page.patch
- check mfn before usage in resume function
xenpaging.machine_to_phys_mapping.patch
- check mfn before usage in resume function
-------------------------------------------------------------------
Tue Nov 9 10:00:48 MST 2010 - jfehlig@novell.com
- bnc#552115 - Remove target discovery in block-iscsi
modified block-iscsi script
-------------------------------------------------------------------
Mon Nov 8 13:11:02 MDT 2010 - jfehlig@novell.com
- bnc#649277 - Fix pci passthru in xend interface used by libvirt
22369-xend-pci-passthru-fix.patch
-------------------------------------------------------------------
Mon Nov 8 02:49:00 UTC 2010 - lidongyang@novell.com
- bnc#642078 - xm snapshot-create causes qemu-dm to SEGV
snapshot-without-pv-fix.patch
-------------------------------------------------------------------
Fri Nov 5 16:22:15 CET 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.num_pages_equal_max_pages.patch
-------------------------------------------------------------------
Fri Nov 5 09:13:38 MDT 2010 - carnold@novell.com
- bnc#647681 - L3: Passthrough of certain PCI device broken after
SLES 11 to SP1 upgrade
- bnc#650871 - Regression in Xen PCI Passthrough
22348-vtd-check-secbus-devfn.patch
- Upstream patches from Jan
22223-vtd-workarounds.patch
22231-x86-pv-ucode-msr-intel.patch
22232-x86-64-lahf-lm-bios-workaround.patch
22280-kexec.patch
22337-vtd-scan-single-func.patch
-------------------------------------------------------------------
Wed Nov 3 16:26:10 MDT 2010 - carnold@novell.com
- bnc#497149 - SLES11 64bit Xen - SLES11 64bit HVM guest has
corrupt text console
stdvga-cache.patch
-------------------------------------------------------------------
Wed Nov 3 17:52:14 CET 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.page_already_populated.patch
xenpaging.notify_policy_only_once.patch
xenpaging.guest_remove_page.patch
xenpaging.machine_to_phys_mapping.patch
remove xenpaging.memory_op.patch, retry loops are not needed
-------------------------------------------------------------------
Tue Nov 2 14:10:34 MDT 2010 - carnold@novell.com
- bnc#474789 - xen-tools 3.3 rpm misses pv-grub
- PV-GRUB replaces PyGrub to boot domU images safely: it runs the
regular grub inside the created domain itself and uses regular
domU facilities to read the disk / fetch files from network etc.;
it eventually loads the PV kernel and chain-boots it.
-------------------------------------------------------------------
Wed Oct 27 20:08:51 CEST 2010 - ohering@suse.de
@ -65,7 +171,6 @@ Mon Oct 11 08:59:35 MDT 2010 - carnold@novell.com
bnc#615206 - Xen kernel fails to boot with IO-APIC problem
bnc#640773 - Xen kernel crashing right after grub
bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain
22223-vtd-igd-workaround.patch
22222-x86-timer-extint.patch
22214-x86-msr-misc-enable.patch
22213-x86-xsave-cpuid-check.patch

189
xen.spec
View File

@ -25,44 +25,47 @@ ExclusiveArch: %ix86 x86_64
%define changeset 21326
%define xen_build_dir xen-4.0.1-testing
%define with_kmp 1
BuildRequires: LibVNCServer-devel
BuildRequires: SDL-devel
BuildRequires: automake
BuildRequires: bin86
BuildRequires: curl-devel
BuildRequires: dev86
BuildRequires: graphviz
BuildRequires: latex2html
BuildRequires: libjpeg-devel
BuildRequires: libxml2-devel
BuildRequires: ncurses-devel
BuildRequires: openssl
BuildRequires: openssl-devel
BuildRequires: pciutils-devel
BuildRequires: python-devel
BuildRequires: texinfo
BuildRequires: transfig
BuildRequires: LibVNCServer-devel
BuildRequires: SDL-devel
BuildRequires: automake
BuildRequires: bin86
BuildRequires: curl-devel
BuildRequires: dev86
BuildRequires: graphviz
BuildRequires: latex2html
BuildRequires: libjpeg-devel
BuildRequires: libxml2-devel
BuildRequires: ncurses-devel
BuildRequires: openssl
BuildRequires: openssl-devel
BuildRequires: pciutils-devel
BuildRequires: python-devel
BuildRequires: texinfo
BuildRequires: transfig
%if %suse_version <= 1110
BuildRequires: pmtools
BuildRequires: pmtools
%else
BuildRequires: acpica
BuildRequires: acpica
%endif
%if %suse_version >= 1030
BuildRequires: texlive
BuildRequires: texlive-latex
BuildRequires: texlive
BuildRequires: texlive-latex
%else
BuildRequires: te_ams
BuildRequires: te_latex
BuildRequires: tetex
BuildRequires: te_ams
BuildRequires: te_latex
BuildRequires: tetex
%endif
%ifarch x86_64
BuildRequires: glibc-32bit glibc-devel-32bit
BuildRequires: gcc-32bit
BuildRequires: gcc43-32bit
%define max_cpus 256
%define pae_enabled n
%else
%define max_cpus 32
%define pae_enabled y
%endif
BuildRequires: glibc-devel
%if %{?with_kmp}0
BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11
%endif
@ -74,6 +77,7 @@ AutoReqProv: on
PreReq: %insserv_prereq %fillup_prereq
Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel)
Source0: xen-4.0.1-testing-src.tar.bz2
Source1: stubdom.tar.bz2
Source2: README.SuSE
Source3: boot.xen
Source4: boot.local.xenU
@ -121,25 +125,32 @@ Patch15: 21847-pscsi.patch
Patch16: 21866-xenapi.patch
Patch17: 21894-intel-unmask-cpuid.patch
Patch18: 22019-x86-cpuidle-online-check.patch
Patch19: 22051-x86-forced-EOI.patch
Patch20: 22067-x86-irq-domain.patch
Patch21: 22068-vtd-irte-RH-bit.patch
Patch22: 22071-ept-get-entry-lock.patch
Patch23: 22084-x86-xsave-off.patch
Patch24: 7410-qemu-alt-gr.patch
Patch25: 22135-heap-lock.patch
Patch26: 22148-serial-irq-dest.patch
Patch27: 22157-x86-debug-key-i.patch
Patch28: 22159-notify-evtchn-dying.patch
Patch29: 22160-Intel-C6-EOI.patch
Patch30: 22174-x86-pmtimer-accuracy.patch
Patch31: 22175-x86-irq-enter-exit.patch
Patch32: 22177-i386-irq-safe-map_domain_page.patch
Patch33: 22194-tmem-check-pv-mfn.patch
Patch34: 22213-x86-xsave-cpuid-check.patch
Patch35: 22214-x86-msr-misc-enable.patch
Patch36: 22222-x86-timer-extint.patch
Patch37: 22223-vtd-igd-workaround.patch
Patch19: 22045-python27-compat.patch
Patch20: 22051-x86-forced-EOI.patch
Patch21: 22067-x86-irq-domain.patch
Patch22: 22068-vtd-irte-RH-bit.patch
Patch23: 22071-ept-get-entry-lock.patch
Patch24: 22084-x86-xsave-off.patch
Patch25: 7410-qemu-alt-gr.patch
Patch26: 22135-heap-lock.patch
Patch27: 22148-serial-irq-dest.patch
Patch28: 22157-x86-debug-key-i.patch
Patch29: 22159-notify-evtchn-dying.patch
Patch30: 22160-Intel-C6-EOI.patch
Patch31: 22174-x86-pmtimer-accuracy.patch
Patch32: 22175-x86-irq-enter-exit.patch
Patch33: 22177-i386-irq-safe-map_domain_page.patch
Patch34: 22194-tmem-check-pv-mfn.patch
Patch35: 22213-x86-xsave-cpuid-check.patch
Patch36: 22214-x86-msr-misc-enable.patch
Patch37: 22222-x86-timer-extint.patch
Patch38: 22223-vtd-workarounds.patch
Patch39: 22231-x86-pv-ucode-msr-intel.patch
Patch40: 22232-x86-64-lahf-lm-bios-workaround.patch
Patch41: 22280-kexec.patch
Patch42: 22337-vtd-scan-single-func.patch
Patch43: 22348-vtd-check-secbus-devfn.patch
Patch44: 22369-xend-pci-passthru-fix.patch
# Our patches
Patch300: xen-config.diff
Patch301: xend-config.diff
@ -200,11 +211,13 @@ Patch371: domu-usb-controller.patch
Patch372: popen2-argument-fix.patch
Patch373: usb-list.patch
Patch374: xend-devid-or-name.patch
Patch375: 22326-cpu-pools-numa-placement.patch
# Patches for snapshot support
Patch400: snapshot-ioemu-save.patch
Patch401: snapshot-ioemu-restore.patch
Patch402: snapshot-ioemu-delete.patch
Patch403: snapshot-xend.patch
Patch404: snapshot-without-pv-fix.patch
# ioemu part of blktap patch series
Patch410: ioemu-blktap-fv-init.patch
Patch411: ioemu-blktap-image-format.patch
@ -230,6 +243,9 @@ Patch434: check_device_status.patch
Patch435: change_home_server.patch
Patch436: altgr_2.patch
Patch437: xenfb_32bpp.patch
Patch438: stdvga-cache.patch
Patch439: minios-fixups.patch
Patch440: bdrv_default_rwflag.patch
# Jim's domain lock patch
Patch450: xend-domain-lock.patch
# Hypervisor and PV driver Patches
@ -257,24 +273,31 @@ Patch704: hv_apic.patch
# Build patch
Patch999: tmp_build.patch
# FATE 310510
Patch10001: xenpaging.tools_xenpaging_cleanup.patch
Patch10002: xenpaging.pageout_policy.patch
Patch10003: xenpaging.xs_daemon_close.patch
Patch10004: xenpaging.get_paged_frame.patch
Patch10005: xenpaging.makefile.patch
Patch10010: xenpaging.policy_linear.patch
Patch10011: xenpaging.pagefile.patch
Patch10012: xenpaging.xenpaging_init.patch
Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch
Patch10014: xenpaging.memory_op.patch
Patch10015: xenpaging.populate_only_if_paged.patch
Patch10017: xenpaging.autostart.patch
Patch10018: xenpaging.signal_handling.patch
Patch10019: xenpaging.MRU_SIZE.patch
Patch10021: xenpaging.mem_event_check_ring-free_requests.patch
Patch10022: xenpaging.blacklist.patch
Patch10023: xenpaging.autostart_delay.patch
Patch10040: xenpaging.doc.patch
Patch10001: xenpaging.tools_xenpaging_cleanup.patch
Patch10002: xenpaging.pageout_policy.patch
Patch10003: xenpaging.xs_daemon_close.patch
Patch10004: xenpaging.get_paged_frame.patch
Patch10005: xenpaging.makefile.patch
Patch10010: xenpaging.policy_linear.patch
Patch10011: xenpaging.pagefile.patch
Patch10012: xenpaging.xenpaging_init.patch
Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch
Patch10014: xenpaging.machine_to_phys_mapping.patch
Patch10015: xenpaging.populate_only_if_paged.patch
Patch10017: xenpaging.autostart.patch
Patch10018: xenpaging.signal_handling.patch
Patch10019: xenpaging.MRU_SIZE.patch
Patch10020: xenpaging.guest_remove_page.patch
Patch10021: xenpaging.mem_event_check_ring-free_requests.patch
Patch10022: xenpaging.blacklist.patch
Patch10023: xenpaging.autostart_delay.patch
Patch10024: xenpaging.page_already_populated.patch
Patch10025: xenpaging.notify_policy_only_once.patch
Patch10026: xenpaging.num_pages_equal_max_pages.patch
Patch10027: xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch
Patch10028: xenpaging.HVMCOPY_gfn_paged_out.patch
Patch10029: xenpaging.optimize_p2m_mem_paging_populate.patch
Patch10040: xenpaging.doc.patch
Url: http://www.cl.cam.ac.uk/Research/SRG/netos/xen/
BuildRoot: %{_tmppath}/%{name}-%{version}-build
#%define pysite %(python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_lib()")
@ -606,7 +629,7 @@ Authors:
Ian Pratt <ian.pratt@cl.cam.ac.uk>
%prep
%setup -q -n %xen_build_dir
%setup -q -n %xen_build_dir -a 1
%patch0 -p1
%patch1 -p1
%patch2 -p1
@ -645,6 +668,13 @@ Authors:
%patch35 -p1
%patch36 -p1
%patch37 -p1
%patch38 -p1
%patch39 -p1
%patch40 -p1
%patch41 -p1
%patch42 -p1
%patch43 -p1
%patch44 -p1
%patch300 -p1
%patch301 -p1
%patch302 -p1
@ -703,10 +733,12 @@ Authors:
%patch372 -p1
%patch373 -p1
%patch374 -p1
%patch375 -p1
%patch400 -p1
%patch401 -p1
%patch402 -p1
%patch403 -p1
%patch404 -p1
%patch410 -p1
%patch411 -p1
%patch412 -p1
@ -730,6 +762,9 @@ Authors:
%patch435 -p1
%patch436 -p1
%patch437 -p1
%patch438 -p1
%patch439 -p1
%patch440 -p1
%patch450 -p1
%patch500 -p1
%patch501 -p1
@ -766,11 +801,19 @@ Authors:
%patch10017 -p1
%patch10018 -p1
%patch10019 -p1
%patch10020 -p1
%patch10021 -p1
%patch10022 -p1
%patch10023 -p1
%patch10024 -p1
%patch10025 -p1
%patch10026 -p1
%patch10027 -p1
%patch10028 -p1
%patch10029 -p1
%patch10040 -p1
%build
XEN_EXTRAVERSION=%version-%release
XEN_EXTRAVERSION=${XEN_EXTRAVERSION#%{xvers}}
@ -857,6 +900,17 @@ done
mkdir -p $RPM_BUILD_ROOT/etc/modprobe.d
install -m644 %SOURCE19 $RPM_BUILD_ROOT/etc/modprobe.d/xen_pvdrivers.conf
%endif
# stubdom
make stubdom
make -C stubdom install \
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} \
DOCDIR=%{_defaultdocdir}/xen INCDIR=%{_includedir}
mkdir -p $RPM_BUILD_ROOT/%{_defaultdocdir}/xen
%ifarch x86_64
ln -s /usr/lib/xen/boot/pv-grub-x86_32.gz $RPM_BUILD_ROOT/usr/lib/xen/boot/pv-grub-x86_32.gz
ln -s /usr/lib/xen/bin/stubdom-dm $RPM_BUILD_ROOT/usr/lib64/xen/bin/stubdom-dm
ln -s /usr/lib/xen/bin/stubdompath.sh $RPM_BUILD_ROOT/usr/lib64/xen/bin/stubdompath.sh
%endif
# docs
make -C docs install \
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} \
@ -1077,6 +1131,12 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
%{_datadir}/xen/qemu/*
%{_datadir}/xen/man/man1/*
%{_datadir}/xen/man/man8/*
/usr/lib/xen/bin/stubdom-dm
/usr/lib/xen/bin/stubdompath.sh
%ifarch x86_64
%{_libdir}/xen/bin/stubdom-dm
%{_libdir}/xen/bin/stubdompath.sh
%endif
%{_libdir}/xen/bin/qemu-dm
%ifarch x86_64
/usr/lib/xen/bin/qemu-dm
@ -1092,6 +1152,11 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
%{_libdir}/python%{pyver}/site-packages/grub/*
#%pysite/fsimage.so
%{_libdir}/python%{pyver}/site-packages/fsimage.so
/usr/lib/xen/boot/ioemu-stubdom.gz
/usr/lib/xen/boot/pv-grub-x86_32.gz
%ifarch x86_64
/usr/lib/xen/boot/pv-grub-x86_64.gz
%endif
%files tools-domU
%defattr(-,root,root)

View File

@ -0,0 +1,144 @@
Subject: xenpaging: handle HVMCOPY_gfn_paged_out in copy_from/to_user
copy_from_user_hvm can fail when __hvm_copy returns
HVMCOPY_gfn_paged_out for a referenced gfn, for example during guests
pagetable walk. This has to be handled in some way. One hypercall that
failed was do_memory_op/XENMEM_decrease_reservation which lead to a
BUG_ON balloon.c. Since do_memory_op already has restart support for
the hypercall, copy_from_guest uses this existing retry code. In
addition, cleanup on error was added to increase_reservation and
populate_physmap.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/hvm.c | 4 ++++
xen/common/memory.c | 43 ++++++++++++++++++++++++++++++++++++++-----
2 files changed, 42 insertions(+), 5 deletions(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c
@@ -1853,6 +1853,8 @@ unsigned long copy_to_user_hvm(void *to,
rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from,
len, 0);
+ if ( rc == HVMCOPY_gfn_paged_out )
+ return -EAGAIN;
return rc ? len : 0; /* fake a copy_to_user() return code */
}
@@ -1869,6 +1871,8 @@ unsigned long copy_from_user_hvm(void *t
#endif
rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0);
+ if ( rc == HVMCOPY_gfn_paged_out )
+ return -EAGAIN;
return rc ? len : 0; /* fake a copy_from_user() return code */
}
--- xen-4.0.1-testing.orig/xen/common/memory.c
+++ xen-4.0.1-testing/xen/common/memory.c
@@ -47,6 +47,7 @@ static void increase_reservation(struct
{
struct page_info *page;
unsigned long i;
+ unsigned long ctg_ret;
xen_pfn_t mfn;
struct domain *d = a->domain;
@@ -80,8 +81,14 @@ static void increase_reservation(struct
if ( !guest_handle_is_null(a->extent_list) )
{
mfn = page_to_mfn(page);
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1);
+ if ( unlikely(ctg_ret) )
+ {
+ free_domheap_pages(page, a->extent_order);
+ if ( (long)ctg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
}
}
@@ -93,6 +100,7 @@ static void populate_physmap(struct memo
{
struct page_info *page;
unsigned long i, j;
+ unsigned long ctg_ret;
xen_pfn_t gpfn, mfn;
struct domain *d = a->domain;
@@ -111,8 +119,13 @@ static void populate_physmap(struct memo
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
+ j = __copy_from_guest_offset(&gpfn, a->extent_list, i, 1);
+ if ( unlikely(j) )
+ {
+ if ( (long)j == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
if ( a->memflags & MEMF_populate_on_demand )
{
@@ -142,8 +155,17 @@ static void populate_physmap(struct memo
set_gpfn_from_mfn(mfn + j, gpfn + j);
/* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1);
+ if ( unlikely(ctg_ret) )
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, INVALID_M2P_ENTRY);
+ guest_physmap_remove_page(d, gpfn, mfn, a->extent_order);
+ free_domheap_pages(page, a->extent_order);
+ if ( (long)ctg_ret == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
}
}
}
@@ -226,8 +248,13 @@ static void decrease_reservation(struct
goto out;
}
- if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
+ j = __copy_from_guest_offset(&gmfn, a->extent_list, i, 1);
+ if ( unlikely(j) )
+ {
+ if ( (long)j == -EAGAIN )
+ a->preempted = 1;
goto out;
+ }
if ( tb_init_done )
{
@@ -511,6 +538,7 @@ long do_memory_op(unsigned long cmd, XEN
int rc, op;
unsigned int address_bits;
unsigned long start_extent;
+ unsigned long cfg_ret;
struct xen_memory_reservation reservation;
struct memop_args args;
domid_t domid;
@@ -524,8 +552,13 @@ long do_memory_op(unsigned long cmd, XEN
case XENMEM_populate_physmap:
start_extent = cmd >> MEMOP_EXTENT_SHIFT;
- if ( copy_from_guest(&reservation, arg, 1) )
+ cfg_ret = copy_from_guest(&reservation, arg, 1);
+ if ( unlikely(cfg_ret) )
+ {
+ if ( (long)cfg_ret == -EAGAIN )
+ return hypercall_create_continuation(__HYPERVISOR_memory_op, "lh", cmd, arg);
return start_extent;
+ }
/* Is size too large for us to encode a continuation? */
if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )

View File

@ -14,10 +14,10 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
tools/python/README.sxpcfg | 1
tools/python/xen/xend/XendConfig.py | 3 +
tools/python/xen/xend/XendDomainInfo.py | 6 ++
tools/python/xen/xend/image.py | 87 ++++++++++++++++++++++++++++++++
tools/python/xen/xend/image.py | 91 ++++++++++++++++++++++++++++++++
tools/python/xen/xm/create.py | 5 +
tools/python/xen/xm/xenapi_create.py | 1
8 files changed, 107 insertions(+)
8 files changed, 111 insertions(+)
Index: xen-4.0.1-testing/tools/examples/xmexample.hvm
===================================================================
@ -121,7 +121,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/image.py
rtc_timeoffset = int(vmConfig['platform'].get('rtc_timeoffset', 0))
if int(vmConfig['platform'].get('localtime', 0)):
if time.localtime(time.time())[8]:
@@ -392,6 +394,91 @@ class ImageHandler:
@@ -392,6 +394,95 @@ class ImageHandler:
sentinel_fifos_inuse[sentinel_path_fifo] = 1
self.sentinel_path_fifo = sentinel_path_fifo
@ -140,6 +140,10 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/image.py
+ self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" % str(self.vm.info['name_label'])
+ logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC
+ null = os.open("/dev/null", os.O_RDONLY)
+ try:
+ os.unlink(self.xenpaging_logfile)
+ except:
+ pass
+ logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644)
+ sys.stderr.flush()
+ contract = osdep.prefork("%s:%d" % (self.vm.getName(), self.vm.getDomid()))

View File

@ -11,8 +11,8 @@ TODO: find the correct place to remove the xenstore directory when the guest is
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/python/xen/xend/image.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
tools/python/xen/xend/image.py | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
--- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.0.1-testing/tools/python/xen/xend/image.py
@ -53,7 +53,7 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
xenpaging_bin = auxbin.pathTo("xenpaging")
args = [xenpaging_bin]
args = args + ([ "%d" % self.vm.getDomid()])
@@ -430,6 +453,9 @@ class ImageHandler:
@@ -434,6 +457,9 @@ class ImageHandler:
except:
log.warn("chdir %s failed" % xenpaging_dir)
try:
@ -63,7 +63,15 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
log.info("starting %s" % args)
os.execve(xenpaging_bin, args, env)
except Exception, e:
@@ -449,6 +475,8 @@ class ImageHandler:
@@ -449,10 +475,16 @@ class ImageHandler:
self.xenpaging_pid = xenpaging_pid
os.close(null)
os.close(logfd)
+ if self.xenpaging_delay == 0.0:
+ log.warn("waiting for xenpaging ...")
+ time.sleep(22)
+ log.warn("waiting for xenpaging done.")
def destroyXenPaging(self):
if self.xenpaging is None:
return

View File

@ -0,0 +1,176 @@
Subject: xenpaging: drop paged pages in guest_remove_page
Simply drop paged-pages in guest_remove_page(), and notify xenpaging to
drop reference to the gfn.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 17 +++++++---
xen/arch/x86/mm/p2m.c | 65 +++++++++++++++++++++++++++++++----------
xen/common/memory.c | 6 +++
xen/include/asm-x86/p2m.h | 4 ++
xen/include/public/mem_event.h | 1
5 files changed, 73 insertions(+), 20 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -598,12 +598,19 @@ int main(int argc, char *argv[])
goto out;
}
- /* Populate the page */
- rc = xenpaging_populate_page(paging, &req.gfn, fd, i);
- if ( rc != 0 )
+ if ( req.flags & MEM_EVENT_FLAG_DROP_PAGE )
{
- ERROR("Error populating page");
- goto out;
+ DPRINTF("Dropping page %"PRIx64" p2mt %x\n", req.gfn, req.p2mt);
+ }
+ else
+ {
+ /* Populate the page */
+ rc = xenpaging_populate_page(paging, &req.gfn, fd, i);
+ if ( rc != 0 )
+ {
+ ERROR("Error populating page");
+ goto out;
+ }
}
/* Prepare the response */
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2000,12 +2000,15 @@ p2m_remove_page(struct domain *d, unsign
P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
- for ( i = 0; i < (1UL << page_order); i++ )
+ if ( mfn_valid(_mfn(mfn)) )
{
- mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query);
- if ( !p2m_is_grant(t) )
- set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
- ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
+ for ( i = 0; i < (1UL << page_order); i++ )
+ {
+ mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query);
+ if ( !p2m_is_grant(t) )
+ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY);
+ ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) );
+ }
}
set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid);
}
@@ -2533,6 +2536,35 @@ int p2m_mem_paging_evict(struct domain *
return 0;
}
+void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+{
+ struct vcpu *v = current;
+ mem_event_request_t req;
+ p2m_type_t p2mt;
+
+ memset(&req, 0, sizeof(req));
+
+ /* Check that there's space on the ring for this request */
+ if ( mem_event_check_ring(d) )
+ return;
+
+ gfn_to_mfn(d, gfn, &p2mt);
+ /* Pause domain */
+ if ( v->domain->domain_id == d->domain_id )
+ {
+ vcpu_pause_nosync(v);
+ req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
+ }
+
+ /* Send request to pager */
+ req.flags |= MEM_EVENT_FLAG_DROP_PAGE;
+ req.gfn = gfn;
+ req.p2mt = p2mt;
+ req.vcpu_id = v->vcpu_id;
+
+ mem_event_put_request(d, &req);
+}
+
void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
{
struct vcpu *v = current;
@@ -2597,17 +2629,20 @@ void p2m_mem_paging_resume(struct domain
/* Pull the response off the ring */
mem_event_get_response(d, &rsp);
- /* Fix p2m entry */
- mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
- if (mfn_valid(mfn))
+ if ( !( rsp.flags & MEM_EVENT_FLAG_DROP_PAGE ) )
{
- p2m_lock(d->arch.p2m);
- set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw);
- set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
- p2m_unlock(d->arch.p2m);
- } else {
- gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n",
- mfn_x(mfn), rsp.gfn, p2mt);
+ /* Fix p2m entry */
+ mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
+ if (mfn_valid(mfn))
+ {
+ p2m_lock(d->arch.p2m);
+ set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw);
+ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+ p2m_unlock(d->arch.p2m);
+ } else {
+ gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n",
+ mfn_x(mfn), rsp.gfn, p2mt);
+ }
}
/* Unpause domain */
--- xen-4.0.1-testing.orig/xen/common/memory.c
+++ xen-4.0.1-testing/xen/common/memory.c
@@ -162,6 +162,12 @@ int guest_remove_page(struct domain *d,
#ifdef CONFIG_X86
mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+ guest_physmap_remove_page(d, gmfn, mfn, 0);
+ p2m_mem_paging_drop_page(d, gmfn);
+ return 1;
+ }
#else
mfn = gmfn_to_mfn(d, gmfn);
#endif
--- xen-4.0.1-testing.orig/xen/include/asm-x86/p2m.h
+++ xen-4.0.1-testing/xen/include/asm-x86/p2m.h
@@ -441,6 +441,8 @@ int set_shared_p2m_entry(struct domain *
int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn);
/* Evict a frame */
int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
+/* Tell xenpaging to drop a paged out frame */
+void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn);
/* Start populating a paged out frame */
void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
/* Prepare the p2m for paging a frame in */
@@ -448,6 +450,8 @@ int p2m_mem_paging_prep(struct domain *d
/* Resume normal operation (in case a domain was paused) */
void p2m_mem_paging_resume(struct domain *d);
#else
+static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn)
+{ }
static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn)
{ }
#endif
--- xen-4.0.1-testing.orig/xen/include/public/mem_event.h
+++ xen-4.0.1-testing/xen/include/public/mem_event.h
@@ -37,6 +37,7 @@
#define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0)
#define MEM_EVENT_FLAG_DOM_PAUSED (1 << 1)
#define MEM_EVENT_FLAG_OUT_OF_MEM (1 << 2)
+#define MEM_EVENT_FLAG_DROP_PAGE (1 << 3)
typedef struct mem_event_shared_page {

View File

@ -0,0 +1,62 @@
Subject: xenpaging: update machine_to_phys_mapping during page-in and page-out
The machine_to_phys_mapping array needs updating during page-out.
If a page is gone, a call to get_gpfn_from_mfn will still return the old
gfn for an already paged-out page. This happens when the entire guest
ram is paged-out before xen_vga_populate_vram() runs. Then
XENMEM_populate_physmap is called with gfn 0xff000. A new page is
allocated with alloc_domheap_pages. This new page does not have a gfn
yet. However, in guest_physmap_add_entry() the passed mfn maps still to
an old gfn. This old gfn is paged-out and has no mfn anymore. As a
result, the ASSERT() triggers because p2m_is_ram() is true for
p2m_ram_paging* types.
If the machine_to_phys_mapping array is updated properly, both loops in
guest_physmap_add_entry() turn into no-ops for the new page and the
mfn/gfn mapping will be done at the end of the function.
The same thing needs to happen dring a page-in.
If XENMEM_add_to_physmap is used with XENMAPSPACE_gmfn,
get_gpfn_from_mfn() will return an appearently valid gfn. As a result,
guest_physmap_remove_page() is called. The ASSERT in p2m_remove_page
triggers because the passed mfn does not match the old mfn for the
passed gfn.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
v2: check wether mfn is valid
xen/arch/x86/mm/p2m.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2524,6 +2524,7 @@ int p2m_mem_paging_evict(struct domain *
/* Remove mapping from p2m table */
p2m_lock(d->arch.p2m);
set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged);
+ set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
p2m_unlock(d->arch.p2m);
/* Put the page back so it gets freed */
@@ -2598,9 +2599,16 @@ void p2m_mem_paging_resume(struct domain
/* Fix p2m entry */
mfn = gfn_to_mfn(d, rsp.gfn, &p2mt);
- p2m_lock(d->arch.p2m);
- set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw);
- p2m_unlock(d->arch.p2m);
+ if (mfn_valid(mfn))
+ {
+ p2m_lock(d->arch.p2m);
+ set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw);
+ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn);
+ p2m_unlock(d->arch.p2m);
+ } else {
+ gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n",
+ mfn_x(mfn), rsp.gfn, p2mt);
+ }
/* Unpause domain */
if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED )

View File

@ -1,4 +1,4 @@
Subject: xenpaging: print info when free request slots drop below 3
Subject: xenpaging: print info when free request slots drop below 2
Add debugging aid to free request slots in the ring buffer.
It should not happen that the ring gets full, print info anyway if it happens.
@ -15,7 +15,7 @@ Signed-off-by: Olaf Hering <olaf@aepfle.de>
mem_event_ring_lock(d);
free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
+ if ( unlikely(free_requests < 3) )
+ if ( unlikely(free_requests < 2) )
+ {
+ gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests);
+ WARN_ON(free_requests == 0);

View File

@ -1,456 +0,0 @@
Subject: xenpaging: handle paged-out pages in XENMEM_* commands
Fix these two warings:
(XEN) Assertion '__mfn_valid(mfn_x(omfn))' failed at p2m.c:2200
(XEN) memory.c:171:d1 Domain 1 page number 37ff0 invalid
Handle paged-out pages in xc_memory_op, guest_physmap_add_entry and
guest_remove_page. Use new do_xenmem_op_retry helper function.
In addition, export also xen/errno.h to hvmloader to get ENOENT define.
XENMEM_populate_physmap
populate_physmap
-> guest_physmap_add_entry
XENMEM_exchange
memory_exchange
-> guest_physmap_add_entry
XENMEM_add_to_physmap
guest_physmap_add_page
-> guest_physmap_add_entry
__gnttab_map_grant_ref
create_grant_host_mapping
create_grant_p2m_mapping
-> guest_physmap_add_entry
XENMEM_decrease_reservation
decrease_reservation
-> guest_remove_page
XENMEM_add_to_physmap
-> guest_remove_page
XENMEM_add_to_physmap
-> XENMAPSPACE_gmfn
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/firmware/hvmloader/hvmloader.c | 9 +++-
tools/firmware/hvmloader/util.c | 26 +++++++++++-
tools/include/Makefile | 1
tools/ioemu-qemu-xen/hw/vga.c | 5 +-
tools/libxc/xc_domain.c | 73 ++++++++++++++++++++++-------------
xen/arch/x86/mm.c | 26 ++++++++++--
xen/arch/x86/mm/p2m.c | 7 +++
xen/common/memory.c | 25 +++++++++++
8 files changed, 133 insertions(+), 39 deletions(-)
--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.0.1-testing/tools/firmware/hvmloader/hvmloader.c
@@ -29,6 +29,7 @@
#include "pci_regs.h"
#include "e820.h"
#include "option_rom.h"
+#include <xen/errno.h>
#include <xen/version.h>
#include <xen/hvm/params.h>
#include <xen/memory.h>
@@ -306,13 +307,19 @@ static void pci_setup(void)
while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
{
struct xen_add_to_physmap xatp;
+ int rc;
if ( hvm_info->high_mem_pgend == 0 )
hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
xatp.domid = DOMID_SELF;
xatp.space = XENMAPSPACE_gmfn;
xatp.idx = --hvm_info->low_mem_pgend;
xatp.gpfn = hvm_info->high_mem_pgend++;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
}
--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/util.c
+++ xen-4.0.1-testing/tools/firmware/hvmloader/util.c
@@ -23,6 +23,7 @@
#include "e820.h"
#include "hypercall.h"
#include <stdint.h>
+#include <xen/errno.h>
#include <xen/xen.h>
#include <xen/memory.h>
@@ -323,19 +324,27 @@ void *mem_alloc(uint32_t size, uint32_t
while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
{
+ int rc;
reserve += PAGE_SIZE;
mfn = reserve >> PAGE_SHIFT;
/* Try to allocate a brand new page in the reserved area. */
if ( !over_allocated )
{
+ uint8_t delay = 0;
xmr.domid = DOMID_SELF;
xmr.mem_flags = 0;
xmr.extent_order = 0;
xmr.nr_extents = 1;
set_xen_guest_handle(xmr.extent_start, &mfn);
- if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
+ do {
+ rc = hypercall_memory_op(XENMEM_populate_physmap, &xmr);
+ if ( rc == 0 )
+ cpu_relax();
+ } while ( rc == 0 && ++delay );
+ if ( rc == 1 )
continue;
+ printf("%s: over_allocated\n", __func__);
over_allocated = 1;
}
@@ -353,7 +362,12 @@ void *mem_alloc(uint32_t size, uint32_t
xatp.domid = DOMID_SELF;
xatp.space = XENMAPSPACE_gmfn;
xatp.gpfn = mfn;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
}
@@ -595,6 +609,7 @@ uint16_t get_cpu_mhz(void)
uint64_t cpu_khz;
uint32_t tsc_to_nsec_mul, version;
int8_t tsc_shift;
+ int rc;
static uint16_t cpu_mhz;
if ( cpu_mhz != 0 )
@@ -605,7 +620,12 @@ uint16_t get_cpu_mhz(void)
xatp.space = XENMAPSPACE_shared_info;
xatp.idx = 0;
xatp.gpfn = (unsigned long)shared_info >> 12;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
/* Get a consistent snapshot of scale factor (multiplier and shift). */
--- xen-4.0.1-testing.orig/tools/include/Makefile
+++ xen-4.0.1-testing/tools/include/Makefile
@@ -12,6 +12,7 @@ xen/.dir:
@rm -rf xen
mkdir -p xen/libelf
ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
+ ln -sf ../$(XEN_ROOT)/xen/include/xen/errno.h xen
ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen
ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen
ln -sf ../xen-sys/$(XEN_OS) xen/sys
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/vga.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/vga.c
@@ -2157,9 +2157,10 @@ void set_vram_mapping(void *opaque, unsi
for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) {
xatp.idx = (s->vram_gmfn >> TARGET_PAGE_BITS) + i;
xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i;
- rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp);
+ while ((rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp)) && errno == ENOENT)
+ usleep(1000);
if (rc) {
- fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc);
+ fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, errno);
return;
}
}
--- xen-4.0.1-testing.orig/tools/libxc/xc_domain.c
+++ xen-4.0.1-testing/tools/libxc/xc_domain.c
@@ -536,6 +536,46 @@ int xc_domain_get_tsc_info(int xc_handle
return rc;
}
+static int do_xenmem_op_retry(int xc_handle, int cmd, struct xen_memory_reservation *reservation, unsigned long nr_extents, xen_pfn_t *extent_start)
+{
+ int err = 0;
+ unsigned long count = nr_extents;
+ unsigned long delay = 0;
+ unsigned long start = 0;
+
+ fprintf(stderr, "%s: cmd %d count %lx\n",__func__,cmd,count);
+ while ( count && start < nr_extents )
+ {
+ set_xen_guest_handle(reservation->extent_start, extent_start + start);
+ reservation->nr_extents = count;
+
+ err = xc_memory_op(xc_handle, cmd, reservation);
+ if ( err == count )
+ {
+ err = 0;
+ break;
+ }
+
+ if ( err > count || err < 0 || delay > 1000 * 1000)
+ {
+ fprintf(stderr, "%s: %d err %x count %lx start %lx delay %lu/%lu\n",__func__,cmd,err,count,start,delay,delay/(1<<15));
+ err = -1;
+ break;
+ }
+
+ if ( err )
+ {
+ delay = 0;
+ start += err;
+ count -= err;
+ }
+
+ usleep(delay);
+ delay += 1 << 15; /* 31 iterations, 15 seconds */
+ }
+
+ return err;
+}
int xc_domain_memory_increase_reservation(int xc_handle,
uint32_t domid,
@@ -546,26 +586,18 @@ int xc_domain_memory_increase_reservatio
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = mem_flags,
.domid = domid
};
- /* may be NULL */
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
- err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation);
- if ( err == nr_extents )
- return 0;
-
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_increase_reservation, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed allocation for dom %d: "
"%ld extents of order %d, mem_flags %x\n",
domid, nr_extents, extent_order, mem_flags);
errno = ENOMEM;
- err = -1;
}
return err;
@@ -579,14 +611,11 @@ int xc_domain_memory_decrease_reservatio
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = 0,
.domid = domid
};
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
if ( extent_start == NULL )
{
DPRINTF("decrease_reservation extent_start is NULL!\n");
@@ -594,16 +623,12 @@ int xc_domain_memory_decrease_reservatio
return -1;
}
- err = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation);
- if ( err == nr_extents )
- return 0;
-
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_decrease_reservation, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed deallocation for dom %d: %ld extents of order %d\n",
domid, nr_extents, extent_order);
errno = EINVAL;
- err = -1;
}
return err;
@@ -618,23 +643,17 @@ int xc_domain_memory_populate_physmap(in
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = mem_flags,
.domid = domid
};
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
- err = xc_memory_op(xc_handle, XENMEM_populate_physmap, &reservation);
- if ( err == nr_extents )
- return 0;
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_populate_physmap, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n",
domid, nr_extents, extent_order);
errno = EBUSY;
- err = -1;
}
return err;
--- xen-4.0.1-testing.orig/xen/arch/x86/mm.c
+++ xen-4.0.1-testing/xen/arch/x86/mm.c
@@ -3660,6 +3660,8 @@ static int create_grant_p2m_mapping(uint
p2mt = p2m_grant_map_rw;
rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT,
frame, 0, p2mt);
+ if ( rc == -ENOENT )
+ return GNTST_eagain;
if ( rc )
return GNTST_general_error;
else
@@ -4315,17 +4317,25 @@ long arch_memory_op(int op, XEN_GUEST_HA
case XENMAPSPACE_gmfn:
{
p2m_type_t p2mt;
+ unsigned long tmp_mfn;
- xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0));
+ tmp_mfn = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0));
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, xatp.idx);
+ rcu_unlock_domain(d);
+ return -ENOENT;
+ }
/* If the page is still shared, exit early */
if ( p2m_is_shared(p2mt) )
{
rcu_unlock_domain(d);
return -ENOMEM;
}
- if ( !get_page_from_pagenr(xatp.idx, d) )
+ if ( !get_page_from_pagenr(tmp_mfn, d) )
break;
- mfn = xatp.idx;
+ mfn = tmp_mfn;
page = mfn_to_page(mfn);
break;
}
@@ -4354,8 +4364,16 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Xen heap frames are simply unhooked from this phys slot. */
guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
+ {
/* Normal domain memory is freed, to avoid leaking memory. */
- guest_remove_page(d, xatp.gpfn);
+ rc = guest_remove_page(d, xatp.gpfn);
+ if ( rc == -ENOENT )
+ {
+ domain_unlock(d);
+ rcu_unlock_domain(d);
+ return rc;
+ }
+ }
}
/* Unmap from old location, if any. */
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2186,6 +2186,13 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
mfn + i, ogfn, gfn + i);
omfn = gfn_to_mfn_query(d, ogfn, &ot);
+ if ( unlikely(p2m_is_paging(ot)) )
+ {
+ p2m_unlock(d->arch.p2m);
+ if ( p2m_is_paged(ot) )
+ p2m_mem_paging_populate(d, ogfn);
+ return -ENOENT;
+ }
/* If we get here, we know the local domain owns the page,
so it can't have been grant mapped in. */
BUG_ON( p2m_is_grant(ot) );
--- xen-4.0.1-testing.orig/xen/common/memory.c
+++ xen-4.0.1-testing/xen/common/memory.c
@@ -95,6 +95,7 @@ static void populate_physmap(struct memo
unsigned long i, j;
xen_pfn_t gpfn, mfn;
struct domain *d = a->domain;
+ int rc;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
a->nr_extents-1) )
@@ -134,7 +135,12 @@ static void populate_physmap(struct memo
}
mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ rc = guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ if ( rc != 0 )
+ {
+ free_domheap_pages(page, a->extent_order);
+ goto out;
+ }
if ( !paging_mode_translate(d) )
{
@@ -162,6 +168,12 @@ int guest_remove_page(struct domain *d,
#ifdef CONFIG_X86
mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
+ return -ENOENT;
+ }
#else
mfn = gmfn_to_mfn(d, gmfn);
#endif
@@ -360,6 +372,13 @@ static long memory_exchange(XEN_GUEST_HA
/* Shared pages cannot be exchanged */
mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0));
+ if ( p2m_is_paging(p2mt) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
+ rc = -ENOENT;
+ goto fail;
+ }
if ( p2m_is_shared(p2mt) )
{
rc = -ENOMEM;
@@ -456,7 +475,9 @@ static long memory_exchange(XEN_GUEST_HA
&gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+ rc = guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+ if ( rc == -ENOENT )
+ goto fail;
if ( !paging_mode_translate(d) )
{

View File

@ -0,0 +1,55 @@
Subject: xenpaging: notify policy only on resume
If a page is requested more than once, the policy is also notified more
than once about the page-in. However, a page-in happens only once. Any
further resume will only unpause the other vcpu. The multiple notify
will put the page into the mru list multiple times and it will unlock
other already resumed pages too early. In the worst case, a page that
was just resumed can be evicted right away, causing a deadlock in the
guest.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -377,7 +377,7 @@ int xenpaging_evict_page(xenpaging_t *pa
return ret;
}
-static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp)
+static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp, int notify_policy)
{
int ret;
@@ -387,7 +387,8 @@ static int xenpaging_resume_page(xenpagi
goto out;
/* Notify policy of page being paged in */
- policy_notify_paged_in(paging->mem_event.domain_id, rsp->gfn);
+ if ( notify_policy )
+ policy_notify_paged_in(paging->mem_event.domain_id, rsp->gfn);
/* Tell Xen page is ready */
ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
@@ -619,7 +620,7 @@ int main(int argc, char *argv[])
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
- rc = xenpaging_resume_page(paging, &rsp);
+ rc = xenpaging_resume_page(paging, &rsp, 1);
if ( rc != 0 )
{
ERROR("Error resuming page");
@@ -648,7 +649,7 @@ int main(int argc, char *argv[])
rsp.vcpu_id = req.vcpu_id;
rsp.flags = req.flags;
- rc = xenpaging_resume_page(paging, &rsp);
+ rc = xenpaging_resume_page(paging, &rsp, 0);
if ( rc != 0 )
{
ERROR("Error resuming");

View File

@ -0,0 +1,37 @@
Subject: xenpaging: allow negative num_pages and limit num_pages
Simplify paging size argument. If a negative number is specified, it
means the entire guest memory should be paged out. This is useful for
debugging. Also limit num_pages to the guests max_pages.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -512,8 +512,6 @@ int main(int argc, char *argv[])
domain_id = atoi(argv[1]);
num_pages = atoi(argv[2]);
- victims = calloc(num_pages, sizeof(xenpaging_victim_t));
-
/* Seed random-number generator */
srand(time(NULL));
@@ -534,6 +532,13 @@ int main(int argc, char *argv[])
return 2;
}
+ if ( num_pages < 0 || num_pages > paging->domain_info->max_pages )
+ {
+ num_pages = paging->domain_info->max_pages;
+ DPRINTF("setting num_pages to %d\n", num_pages);
+ }
+ victims = calloc(num_pages, sizeof(xenpaging_victim_t));
+
/* ensure that if we get a signal, we'll do cleanup, then exit */
act.sa_handler = close_handler;
act.sa_flags = 0;

View File

@ -0,0 +1,44 @@
Subject: xenpaging: optimize p2m_mem_paging_populate
p2m_mem_paging_populate will always put another request in the ring. To
reduce pressure on the ring, place only required requests in the ring.
If the gfn was already processed by another thread, and the current vcpu
does not need to be paused, p2m_mem_paging_resume will do nothing with
the request. And also xenpaging will drop the request if the vcpu does
not need a wakeup.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/mm/p2m.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2571,12 +2571,12 @@ void p2m_mem_paging_populate(struct doma
mem_event_request_t req;
p2m_type_t p2mt;
- memset(&req, 0, sizeof(req));
-
/* Check that there's space on the ring for this request */
if ( mem_event_check_ring(d) )
return;
+ memset(&req, 0, sizeof(req));
+
/* Fix p2m mapping */
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
@@ -2594,6 +2594,11 @@ void p2m_mem_paging_populate(struct doma
vcpu_pause_nosync(v);
req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED;
}
+ else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
+ {
+ /* gfn is already on its way back and vcpu is not paused */
+ return;
+ }
/* Send request to pager */
req.gfn = gfn;

View File

@ -0,0 +1,29 @@
Subject: xenpaging: when populating a page, check if populating is already in progress
p2m_mem_paging_populate can be called serveral times from different
vcpus. If the page is already in state p2m_ram_paging_in and has a new
valid mfn, invalidating this new mfn will cause trouble later if
p2m_mem_paging_resume will set the new gfn/mfn pair back to state
p2m_ram_rw.
Detect this situation and change p2m state not if the page is in the
process of being still paged-out or already paged-in.
In fact, p2m state p2m_ram_paged is the only state where the mfn type
can be invalidated.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/mm/p2m.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2581,7 +2581,7 @@ void p2m_mem_paging_populate(struct doma
/* XXX: It seems inefficient to have this here, as it's only needed
* in one case (ept guest accessing paging out page) */
gfn_to_mfn(d, gfn, &p2mt);
- if ( p2mt != p2m_ram_paging_out )
+ if ( p2mt == p2m_ram_paged )
{
p2m_lock(d->arch.p2m);
set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start);

View File

@ -0,0 +1,23 @@
Subject: xenpaging: print p2mt for already paged-in pages
Add more debug output, print p2mt for pages which were requested more than once.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 2 ++
1 file changed, 2 insertions(+)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -632,8 +632,10 @@ int main(int argc, char *argv[])
else
{
DPRINTF("page already populated (domain = %d; vcpu = %d;"
+ " p2mt = %x;"
" gfn = %"PRIx64"; paused = %"PRId64")\n",
paging->mem_event.domain_id, req.vcpu_id,
+ req.p2mt,
req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED);
/* Tell Xen to resume the vcpu */

View File

@ -1,114 +1,26 @@
Subject: xenpaging: populate only paged-out pages
Subject: xenpaging: populate paged-out pages unconditionally
populdate a paged-out page only once to reduce pressure in the ringbuffer.
Several cpus may still request a page at once. xenpaging can handle this.
Populate a page unconditionally to avoid missing a page-in request.
If the page is already in the process of being paged-in, the this vcpu
will be stopped and later resumed once the page content is usable again.
This matches other p2m_mem_paging_populate usage in the source tree.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/emulate.c | 3 ++-
xen/arch/x86/hvm/hvm.c | 17 ++++++++++-------
xen/arch/x86/mm/guest_walk.c | 3 ++-
xen/arch/x86/mm/hap/guest_walk.c | 6 ++++--
4 files changed, 18 insertions(+), 11 deletions(-)
xen/common/grant_table.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/emulate.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/emulate.c
@@ -65,7 +65,8 @@ static int hvmemul_do_io(
ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(curr->domain, ram_gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(curr->domain, ram_gfn);
return X86EMUL_RETRY;
}
if ( p2m_is_shared(p2mt) )
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c
@@ -291,7 +291,8 @@ static int hvm_set_ioreq_page(
return -EINVAL;
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(d, gmfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
return -ENOENT;
}
if ( p2m_is_shared(p2mt) )
@@ -1324,7 +1325,8 @@ static void *hvm_map_entry(unsigned long
mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(current->domain, gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(current->domain, gfn);
return NULL;
}
if ( p2m_is_shared(p2mt) )
@@ -1723,7 +1725,8 @@ static enum hvm_copy_result __hvm_copy(
--- xen-4.0.1-testing.orig/xen/common/grant_table.c
+++ xen-4.0.1-testing/xen/common/grant_table.c
@@ -156,8 +156,7 @@ static int __get_paged_frame(unsigned lo
*frame = mfn_x(mfn);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(curr->domain, gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(curr->domain, gfn);
return HVMCOPY_gfn_paged_out;
- if ( p2m_is_paged(p2mt) )
- p2m_mem_paging_populate(rd, gfn);
+ p2m_mem_paging_populate(rd, gfn);
rc = GNTST_eagain;
}
if ( p2m_is_shared(p2mt) )
@@ -3032,8 +3035,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn_t mfn = gfn_to_mfn(d, pfn, &t);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(d, pfn);
-
+ if ( p2m_is_paged(t) )
+ p2m_mem_paging_populate(d, pfn);
rc = -EINVAL;
goto param_fail3;
}
@@ -3096,8 +3099,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn = gfn_to_mfn_unshare(d, pfn, &t, 0);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(d, pfn);
-
+ if ( p2m_is_paged(t) )
+ p2m_mem_paging_populate(d, pfn);
rc = -EINVAL;
goto param_fail4;
}
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/guest_walk.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/guest_walk.c
@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc
*mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0);
if ( p2m_is_paging(*p2mt) )
{
- p2m_mem_paging_populate(d, gfn_x(gfn));
+ if ( p2m_is_paged(*p2mt) )
+ p2m_mem_paging_populate(d, gfn_x(gfn));
*rc = _PAGE_PAGED;
return NULL;
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/hap/guest_walk.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/hap/guest_walk.c
@@ -49,7 +49,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT);
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
@@ -81,7 +82,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(v->domain, gfn_x(gfn));
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(v->domain, gfn_x(gfn));
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
}