diff --git a/22045-python27-compat.patch b/22045-python27-compat.patch new file mode 100644 index 0000000..0b33d35 --- /dev/null +++ b/22045-python27-compat.patch @@ -0,0 +1,55 @@ +# HG changeset patch +# User Michael Young +# Date 1282234170 -3600 +# Node ID 2940165380de2348e0ea3f628dea35750a2b4c8f +# Parent 60746a2c14a6cc123892f973fbdd6acb73251d39 +tools/python: fix xm list for Python 2.7 + +This patch fixes + Unexpected error: +This is due to xmlrpc changes in Python 2.7. This patch should +fixe it for both old and new versions. + +Signed-off-by: Michael Young +Signed-off-by: Ian Jackson + +Index: xen-4.0.1-testing/tools/python/xen/util/xmlrpcclient.py +=================================================================== +--- xen-4.0.1-testing.orig/tools/python/xen/util/xmlrpcclient.py ++++ xen-4.0.1-testing/tools/python/xen/util/xmlrpcclient.py +@@ -22,6 +22,7 @@ import socket + import string + import xmlrpclib + from types import StringTypes ++from sys import hexversion + + + try: +@@ -54,7 +55,12 @@ class UnixTransport(xmlrpclib.Transport) + return xmlrpclib.Transport.request(self, host, '/RPC2', + request_body, verbose) + def make_connection(self, host): +- return HTTPUnix(self.__handler) ++ if hexversion < 0x02070000: ++ # python 2.6 or earlier ++ return HTTPUnix(self.__handler) ++ else: ++ # xmlrpclib.Transport changed in python 2.7 ++ return HTTPUnixConnection(self.__handler) + + + # We need our own transport for HTTPS, because xmlrpclib.SafeTransport is +Index: xen-4.0.1-testing/tools/python/xen/util/xmlrpclib2.py +=================================================================== +--- xen-4.0.1-testing.orig/tools/python/xen/util/xmlrpclib2.py ++++ xen-4.0.1-testing/tools/python/xen/util/xmlrpclib2.py +@@ -58,6 +58,9 @@ def stringify(value): + # some bugs in Keep-Alive handling and also enabled it by default + class XMLRPCRequestHandler(SimpleXMLRPCRequestHandler): + protocol_version = "HTTP/1.1" ++ # xend crashes in python 2.7 unless disable_nagle_algorithm = False ++ # it isn't used in earlier versions so it is harmless to set it generally ++ disable_nagle_algorithm = False + + def __init__(self, hosts_allowed, request, client_address, server): + self.hosts_allowed = hosts_allowed diff --git a/22223-vtd-igd-workaround.patch b/22223-vtd-igd-workaround.patch deleted file mode 100644 index cb9ab44..0000000 --- a/22223-vtd-igd-workaround.patch +++ /dev/null @@ -1,131 +0,0 @@ -# HG changeset patch -# User Keir Fraser -# Date 1286028261 -3600 -# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc -# Parent aed9fd361340158daf2d7160d1b367478b6312d6 -Vt-d: fix dom0 graphics problem on Levnovo T410. -References: bnc#643477 - -The patch is derived from a similar quirk in Linux kernel by David -Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC -register. If VT is not enabled correctly in the IGD, Xen does not -enable VT-d translation for IGD VT-d engine. In case where iommu boot -parameter is set to force, Xen calls panic(). - -Signed-off-by: Allen Kay - -jb: Simplified and switched operands of && in first if() added to -iommu_enable_translation(). - ---- a/xen/drivers/passthrough/vtd/dmar.c -+++ b/xen/drivers/passthrough/vtd/dmar.c -@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units); - LIST_HEAD(acpi_atsr_units); - LIST_HEAD(acpi_rhsa_units); - -+static u64 igd_drhd_address; - u8 dmar_host_address_width; - - void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus) -@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str - return NULL; - } - -+int is_igd_drhd(struct acpi_drhd_unit *drhd) -+{ -+ return ( drhd->address == igd_drhd_address ? 1 : 0); -+} -+ - /* - * Count number of devices in device scope. Do not include PCI sub - * hierarchies. -@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v - if ( iommu_verbose ) - dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n", - bus, path->dev, path->fn); -+ -+ if ( type == DMAR_TYPE ) -+ { -+ struct acpi_drhd_unit *drhd = acpi_entry; -+ -+ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) ) -+ igd_drhd_address = drhd->address; -+ } -+ - break; - - case ACPI_DEV_IOAPIC: ---- a/xen/drivers/passthrough/vtd/dmar.h -+++ b/xen/drivers/passthrough/vtd/dmar.h -@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus, - int vtd_hw_check(void); - void disable_pmr(struct iommu *iommu); - int is_usb_device(u8 bus, u8 devfn); -+int is_igd_drhd(struct acpi_drhd_unit *drhd); - - #endif /* _DMAR_H_ */ ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -688,10 +688,34 @@ static int iommu_set_root_entry(struct i - return 0; - } - --static void iommu_enable_translation(struct iommu *iommu) -+#define GGC 0x52 -+#define GGC_MEMORY_VT_ENABLED (0x8 << 8) -+static int is_igd_vt_enabled(void) -+{ -+ unsigned short ggc; -+ -+ /* integrated graphics on Intel platforms is located at 0:2.0 */ -+ ggc = pci_conf_read16(0, 2, 0, GGC); -+ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 ); -+} -+ -+static void iommu_enable_translation(struct acpi_drhd_unit *drhd) - { - u32 sts; - unsigned long flags; -+ struct iommu *iommu = drhd->iommu; -+ -+ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled() ) -+ { -+ if ( force_iommu ) -+ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n"); -+ else -+ { -+ dprintk(XENLOG_WARNING VTDPREFIX, -+ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); -+ return; -+ } -+ } - - if ( iommu_verbose ) - dprintk(VTDPREFIX, -@@ -1178,7 +1202,6 @@ static int intel_iommu_domain_init(struc - - static void intel_iommu_dom0_init(struct domain *d) - { -- struct iommu *iommu; - struct acpi_drhd_unit *drhd; - - if ( !iommu_passthrough && !need_iommu(d) ) -@@ -1194,8 +1217,7 @@ static void intel_iommu_dom0_init(struct - - for_each_drhd_unit ( drhd ) - { -- iommu = drhd->iommu; -- iommu_enable_translation(iommu); -+ iommu_enable_translation(drhd); - } - } - -@@ -2163,7 +2185,7 @@ static void vtd_resume(void) - (u32) iommu_state[i][DMAR_FEUADDR_REG]); - spin_unlock_irqrestore(&iommu->register_lock, flags); - -- iommu_enable_translation(iommu); -+ iommu_enable_translation(drhd); - } - } - diff --git a/22223-vtd-workarounds.patch b/22223-vtd-workarounds.patch new file mode 100644 index 0000000..7f18d0f --- /dev/null +++ b/22223-vtd-workarounds.patch @@ -0,0 +1,719 @@ +# HG changeset patch +# User Keir Fraser +# Date 1286028261 -3600 +# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc +# Parent aed9fd361340158daf2d7160d1b367478b6312d6 +Vt-d: fix dom0 graphics problem on Levnovo T410. +References: bnc#643477 + +The patch is derived from a similar quirk in Linux kernel by David +Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC +register. If VT is not enabled correctly in the IGD, Xen does not +enable VT-d translation for IGD VT-d engine. In case where iommu boot +parameter is set to force, Xen calls panic(). + +Signed-off-by: Allen Kay + +# HG changeset patch +# User Keir Fraser +# Date 1288344554 -3600 +# Node ID b48d8f27fca251c2df0222d195ffcb772d6a1128 +# Parent 2d5e8f4ac43a120bbb5d4c52d08f6980848f0166 +vtd: consolidate VT-d quirks into a single file quirks.c + +Consolidate VT-d quirks into a single file - quirks.c. This includes +quirks to workaround OEM BIOS issue with VT-d enabling in IGD, Cantiga +VT-d buffer flush issue, Cantiga IGD Vt-d low power related errata, +and a quirk to workaround issues related to wifi direct assignment. + +Signed-off-by: Allen Kay +Reviewed-by: Jan Beulich + +# HG changeset patch +# User Keir Fraser +# Date 1288888517 0 +# Node ID fedcd4cbcc1eb3e210628bdf95766ca0c400fc18 +# Parent d508b18a68447f91cd879b79a498f06536d89f8e +[VTD] fix a typo and some minor cleanup of quirks.c + +Fixed a typo for IGD_DEV define and some minor cleanup to ease future +enhancement. + +Signed-off-by: Allen Kay + +--- a/xen/drivers/passthrough/vtd/Makefile ++++ b/xen/drivers/passthrough/vtd/Makefile +@@ -6,3 +6,4 @@ obj-y += dmar.o + obj-y += utils.o + obj-y += qinval.o + obj-y += intremap.o ++obj-y += quirks.o +--- a/xen/drivers/passthrough/vtd/dmar.c ++++ b/xen/drivers/passthrough/vtd/dmar.c +@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units); + LIST_HEAD(acpi_atsr_units); + LIST_HEAD(acpi_rhsa_units); + ++static u64 igd_drhd_address; + u8 dmar_host_address_width; + + void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus) +@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str + return NULL; + } + ++int is_igd_drhd(struct acpi_drhd_unit *drhd) ++{ ++ return drhd && (drhd->address == igd_drhd_address); ++} ++ + /* + * Count number of devices in device scope. Do not include PCI sub + * hierarchies. +@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v + if ( iommu_verbose ) + dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n", + bus, path->dev, path->fn); ++ ++ if ( type == DMAR_TYPE ) ++ { ++ struct acpi_drhd_unit *drhd = acpi_entry; ++ ++ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) ) ++ igd_drhd_address = drhd->address; ++ } ++ + break; + + case ACPI_DEV_IOAPIC: +--- a/xen/drivers/passthrough/vtd/dmar.h ++++ b/xen/drivers/passthrough/vtd/dmar.h +@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus, + int vtd_hw_check(void); + void disable_pmr(struct iommu *iommu); + int is_usb_device(u8 bus, u8 devfn); ++int is_igd_drhd(struct acpi_drhd_unit *drhd); + + #endif /* _DMAR_H_ */ +--- a/xen/drivers/passthrough/vtd/extern.h ++++ b/xen/drivers/passthrough/vtd/extern.h +@@ -26,6 +26,7 @@ + + extern int qinval_enabled; + extern int ats_enabled; ++extern bool_t rwbf_quirk; + + void print_iommu_regs(struct acpi_drhd_unit *drhd); + void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn); +@@ -35,6 +36,12 @@ int enable_qinval(struct iommu *iommu); + void disable_qinval(struct iommu *iommu); + int enable_intremap(struct iommu *iommu, int eim); + void disable_intremap(struct iommu *iommu); ++ ++void iommu_flush_cache_entry(void *addr, unsigned int size); ++void iommu_flush_cache_page(void *addr, unsigned long npages); ++int iommu_alloc(struct acpi_drhd_unit *drhd); ++void iommu_free(struct acpi_drhd_unit *drhd); ++ + int queue_invalidate_context(struct iommu *iommu, + u16 did, u16 source_id, u8 function_mask, u8 granu); + int queue_invalidate_iotlb(struct iommu *iommu, +@@ -44,19 +51,41 @@ int queue_invalidate_iec(struct iommu *i + int invalidate_sync(struct iommu *iommu); + int iommu_flush_iec_global(struct iommu *iommu); + int iommu_flush_iec_index(struct iommu *iommu, u8 im, u16 iidx); ++void clear_fault_bits(struct iommu *iommu); ++ + struct iommu * ioapic_to_iommu(unsigned int apic_id); + struct acpi_drhd_unit * ioapic_to_drhd(unsigned int apic_id); + struct acpi_drhd_unit * iommu_to_drhd(struct iommu *iommu); + struct acpi_rhsa_unit * drhd_to_rhsa(struct acpi_drhd_unit *drhd); +-void clear_fault_bits(struct iommu *iommu); ++struct acpi_drhd_unit * find_ats_dev_drhd(struct iommu *iommu); ++ + int ats_device(int seg, int bus, int devfn); + int enable_ats_device(int seg, int bus, int devfn); + int disable_ats_device(int seg, int bus, int devfn); + int invalidate_ats_tcs(struct iommu *iommu); ++ + int qinval_device_iotlb(struct iommu *iommu, + u32 max_invs_pend, u16 sid, u16 size, u64 addr); + int dev_invalidate_iotlb(struct iommu *iommu, u16 did, + u64 addr, unsigned int size_order, u64 type); +-struct acpi_drhd_unit * find_ats_dev_drhd(struct iommu *iommu); ++ ++unsigned int get_cache_line_size(void); ++void cacheline_flush(char *); ++void flush_all_cache(void); ++ ++u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages); ++void free_pgtable_maddr(u64 maddr); ++void *map_vtd_domain_page(u64 maddr); ++void unmap_vtd_domain_page(void *va); ++int domain_context_mapping_one(struct domain *domain, struct iommu *iommu, ++ u8 bus, u8 devfn); ++int domain_context_unmap_one(struct domain *domain, struct iommu *iommu, ++ u8 bus, u8 devfn); ++ ++int is_igd_vt_enabled_quirk(void); ++void __init platform_quirks_init(void); ++void vtd_ops_preamble_quirk(struct iommu* iommu); ++void vtd_ops_postamble_quirk(struct iommu* iommu); ++void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map); + + #endif // _VTD_EXTERN_H_ +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -43,7 +43,6 @@ + #endif + + int nr_iommus; +-static bool_t rwbf_quirk; + + static void setup_dom0_devices(struct domain *d); + static void setup_dom0_rmrr(struct domain *d); +@@ -481,16 +480,36 @@ static int inline iommu_flush_iotlb_glob + int flush_non_present_entry, int flush_dev_iotlb) + { + struct iommu_flush *flush = iommu_get_flush(iommu); +- return flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, ++ int status; ++ ++ /* apply platform specific errata workarounds */ ++ vtd_ops_preamble_quirk(iommu); ++ ++ status = flush->iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH, + flush_non_present_entry, flush_dev_iotlb); ++ ++ /* undo platform specific errata workarounds */ ++ vtd_ops_postamble_quirk(iommu); ++ ++ return status; + } + + static int inline iommu_flush_iotlb_dsi(struct iommu *iommu, u16 did, + int flush_non_present_entry, int flush_dev_iotlb) + { + struct iommu_flush *flush = iommu_get_flush(iommu); +- return flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, ++ int status; ++ ++ /* apply platform specific errata workarounds */ ++ vtd_ops_preamble_quirk(iommu); ++ ++ status = flush->iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH, + flush_non_present_entry, flush_dev_iotlb); ++ ++ /* undo platform specific errata workarounds */ ++ vtd_ops_postamble_quirk(iommu); ++ ++ return status; + } + + static int inline get_alignment(u64 base, unsigned int size) +@@ -514,6 +533,7 @@ static int inline iommu_flush_iotlb_psi( + { + unsigned int align; + struct iommu_flush *flush = iommu_get_flush(iommu); ++ int status; + + ASSERT(!(addr & (~PAGE_MASK_4K))); + ASSERT(pages > 0); +@@ -534,8 +554,16 @@ static int inline iommu_flush_iotlb_psi( + addr >>= PAGE_SHIFT_4K + align; + addr <<= PAGE_SHIFT_4K + align; + +- return flush->iotlb(iommu, did, addr, align, DMA_TLB_PSI_FLUSH, ++ /* apply platform specific errata workarounds */ ++ vtd_ops_preamble_quirk(iommu); ++ ++ status = flush->iotlb(iommu, did, addr, align, DMA_TLB_PSI_FLUSH, + flush_non_present_entry, flush_dev_iotlb); ++ ++ /* undo platform specific errata workarounds */ ++ vtd_ops_postamble_quirk(iommu); ++ ++ return status; + } + + static void iommu_flush_all(void) +@@ -688,10 +716,26 @@ static int iommu_set_root_entry(struct i + return 0; + } + +-static void iommu_enable_translation(struct iommu *iommu) ++static void iommu_enable_translation(struct acpi_drhd_unit *drhd) + { + u32 sts; + unsigned long flags; ++ struct iommu *iommu = drhd->iommu; ++ ++ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled_quirk() ) ++ { ++ if ( force_iommu ) ++ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n"); ++ else ++ { ++ dprintk(XENLOG_WARNING VTDPREFIX, ++ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); ++ return; ++ } ++ } ++ ++ /* apply platform specific errata workarounds */ ++ vtd_ops_preamble_quirk(iommu); + + if ( iommu_verbose ) + dprintk(VTDPREFIX, +@@ -705,6 +749,9 @@ static void iommu_enable_translation(str + (sts & DMA_GSTS_TES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); + ++ /* undo platform specific errata workarounds */ ++ vtd_ops_postamble_quirk(iommu); ++ + /* Disable PMRs when VT-d engine takes effect per spec definition */ + disable_pmr(iommu); + } +@@ -714,6 +761,9 @@ static void iommu_disable_translation(st + u32 sts; + unsigned long flags; + ++ /* apply platform specific errata workarounds */ ++ vtd_ops_preamble_quirk(iommu); ++ + spin_lock_irqsave(&iommu->register_lock, flags); + sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + dmar_writel(iommu->reg, DMAR_GCMD_REG, sts & (~DMA_GCMD_TE)); +@@ -722,6 +772,9 @@ static void iommu_disable_translation(st + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, dmar_readl, + !(sts & DMA_GSTS_TES), sts); + spin_unlock_irqrestore(&iommu->register_lock, flags); ++ ++ /* undo platform specific errata workarounds */ ++ vtd_ops_postamble_quirk(iommu); + } + + enum faulttype { +@@ -1065,6 +1118,7 @@ int __init iommu_alloc(struct acpi_drhd_ + xfree(iommu); + return -ENOMEM; + } ++ iommu->intel->drhd = drhd; + + iommu->reg = map_to_nocache_virt(nr_iommus, drhd->address); + iommu->index = nr_iommus++; +@@ -1178,7 +1232,6 @@ static int intel_iommu_domain_init(struc + + static void intel_iommu_dom0_init(struct domain *d) + { +- struct iommu *iommu; + struct acpi_drhd_unit *drhd; + + if ( !iommu_passthrough && !need_iommu(d) ) +@@ -1194,12 +1247,11 @@ static void intel_iommu_dom0_init(struct + + for_each_drhd_unit ( drhd ) + { +- iommu = drhd->iommu; +- iommu_enable_translation(iommu); ++ iommu_enable_translation(drhd); + } + } + +-static int domain_context_mapping_one( ++int domain_context_mapping_one( + struct domain *domain, + struct iommu *iommu, + u8 bus, u8 devfn) +@@ -1301,6 +1353,8 @@ static int domain_context_mapping_one( + + unmap_vtd_domain_page(context_entries); + ++ me_wifi_quirk(domain, bus, devfn, MAP_ME_PHANTOM_FUNC); ++ + return 0; + } + +@@ -1382,7 +1436,7 @@ static int domain_context_mapping(struct + return ret; + } + +-static int domain_context_unmap_one( ++int domain_context_unmap_one( + struct domain *domain, + struct iommu *iommu, + u8 bus, u8 devfn) +@@ -1430,6 +1484,8 @@ static int domain_context_unmap_one( + spin_unlock(&iommu->lock); + unmap_vtd_domain_page(context_entries); + ++ me_wifi_quirk(domain, bus, devfn, UNMAP_ME_PHANTOM_FUNC); ++ + return 0; + } + +@@ -1928,19 +1984,6 @@ static void setup_dom0_rmrr(struct domai + spin_unlock(&pcidevs_lock); + } + +-static void platform_quirks(void) +-{ +- u32 id; +- +- /* Mobile 4 Series Chipset neglects to set RWBF capability. */ +- id = pci_conf_read32(0, 0, 0, 0); +- if ( id == 0x2a408086 ) +- { +- dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n"); +- rwbf_quirk = 1; +- } +-} +- + int intel_vtd_setup(void) + { + struct acpi_drhd_unit *drhd; +@@ -1949,7 +1992,7 @@ int intel_vtd_setup(void) + if ( list_empty(&acpi_drhd_units) ) + return -ENODEV; + +- platform_quirks(); ++ platform_quirks_init(); + + irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs); + BUG_ON(!irq_to_iommu); +@@ -2163,7 +2206,7 @@ static void vtd_resume(void) + (u32) iommu_state[i][DMAR_FEUADDR_REG]); + spin_unlock_irqrestore(&iommu->register_lock, flags); + +- iommu_enable_translation(iommu); ++ iommu_enable_translation(drhd); + } + } + +--- a/xen/drivers/passthrough/vtd/iommu.h ++++ b/xen/drivers/passthrough/vtd/iommu.h +@@ -501,6 +501,7 @@ struct intel_iommu { + struct qi_ctrl qi_ctrl; + struct ir_ctrl ir_ctrl; + struct iommu_flush flush; ++ struct acpi_drhd_unit *drhd; + }; + + #endif +--- /dev/null ++++ b/xen/drivers/passthrough/vtd/quirks.c +@@ -0,0 +1,262 @@ ++/* ++ * Copyright (c) 2010, Intel Corporation. ++ * ++ * This program is free software; you can redistribute it and/or modify it ++ * under the terms and conditions of the GNU General Public License, ++ * version 2, as published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ * more details. ++ * ++ * You should have received a copy of the GNU General Public License along with ++ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple ++ * Place - Suite 330, Boston, MA 02111-1307 USA. ++ * ++ * Author: Allen Kay ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iommu.h" ++#include "dmar.h" ++#include "extern.h" ++#include "vtd.h" ++ ++#define IOH_DEV 0 ++#define IGD_DEV 2 ++ ++#define IGD_BAR_MASK 0xFFFFFFFFFFFF0000 ++#define GGC 0x52 ++#define GGC_MEMORY_VT_ENABLED (0x8 << 8) ++ ++#define IS_CTG(id) (id == 0x2a408086) ++#define IS_ILK(id) (id == 0x00408086 || id == 0x00448086 || id== 0x00628086 || id == 0x006A8086) ++#define IS_CPT(id) (id == 0x01008086 || id == 0x01048086) ++ ++u32 ioh_id; ++u32 igd_id; ++bool_t rwbf_quirk; ++static int is_cantiga_b3; ++static u8 *igd_reg_va; ++ ++/* ++ * QUIRK to workaround Xen boot issue on Calpella/Ironlake OEM BIOS ++ * not enabling VT-d properly in IGD. The workaround is to not enabling ++ * IGD VT-d translation if VT is not enabled in IGD. ++ */ ++int is_igd_vt_enabled_quirk(void) ++{ ++ u16 ggc; ++ ++ if ( !IS_ILK(ioh_id) ) ++ return 1; ++ ++ /* integrated graphics on Intel platforms is located at 0:2.0 */ ++ ggc = pci_conf_read16(0, IGD_DEV, 0, GGC); ++ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 ); ++} ++ ++/* ++ * QUIRK to workaround cantiga VT-d buffer flush issue. ++ * The workaround is to force write buffer flush even if ++ * VT-d capability indicates it is not required. ++ */ ++static void cantiga_b3_errata_init(void) ++{ ++ u16 vid; ++ u8 did_hi, rid; ++ ++ vid = pci_conf_read16(0, IGD_DEV, 0, 0); ++ if ( vid != 0x8086 ) ++ return; ++ ++ did_hi = pci_conf_read8(0, IGD_DEV, 0, 3); ++ rid = pci_conf_read8(0, IGD_DEV, 0, 8); ++ ++ if ( (did_hi == 0x2A) && (rid == 0x7) ) ++ is_cantiga_b3 = 1; ++} ++ ++/* ++ * QUIRK to workaround Cantiga IGD VT-d low power errata. ++ * This errata impacts IGD assignment on Cantiga systems ++ * and can potentially cause VT-d operations to hang. ++ * The workaround is to access an IGD PCI config register ++ * to get IGD out of low power state before VT-d translation ++ * enable/disable and IOTLB flushes. ++ */ ++ ++/* ++ * map IGD MMIO+0x2000 page to allow Xen access to IGD 3D register. ++ */ ++static void map_igd_reg(void) ++{ ++ u64 igd_mmio, igd_reg; ++ ++ if ( !is_cantiga_b3 || igd_reg_va != NULL ) ++ return; ++ ++ /* get IGD mmio address in PCI BAR */ ++ igd_mmio = ((u64)pci_conf_read32(0, IGD_DEV, 0, 0x14) << 32) + ++ pci_conf_read32(0, IGD_DEV, 0, 0x10); ++ ++ /* offset of IGD regster we want to access is in 0x2000 range */ ++ igd_reg = (igd_mmio & IGD_BAR_MASK) + 0x2000; ++ ++ /* ioremap this physical page */ ++ set_fixmap_nocache(FIX_IGD_MMIO, igd_reg); ++ igd_reg_va = (u8 *)fix_to_virt(FIX_IGD_MMIO); ++} ++ ++/* ++ * force IGD to exit low power mode by accessing a IGD 3D regsiter. ++ */ ++static int cantiga_vtd_ops_preamble(struct iommu* iommu) ++{ ++ struct intel_iommu *intel = iommu->intel; ++ struct acpi_drhd_unit *drhd = intel ? intel->drhd : NULL; ++ ++ if ( !is_igd_drhd(drhd) || !is_cantiga_b3 ) ++ return 0; ++ ++ /* ++ * read IGD register at IGD MMIO + 0x20A4 to force IGD ++ * to exit low power state. Since map_igd_reg() ++ * already mapped page starting 0x2000, we just need to ++ * add page offset 0x0A4 to virtual address base. ++ */ ++ return ( *((volatile int *)(igd_reg_va + 0x0A4)) ); ++} ++ ++/* ++ * call before VT-d translation enable and IOTLB flush operations. ++ */ ++void vtd_ops_preamble_quirk(struct iommu* iommu) ++{ ++ cantiga_vtd_ops_preamble(iommu); ++} ++ ++/* ++ * call after VT-d translation enable and IOTLB flush operations. ++ */ ++void vtd_ops_postamble_quirk(struct iommu* iommu) ++{ ++ return; ++} ++ ++/* initialize platform identification flags */ ++void __init platform_quirks_init(void) ++{ ++ ioh_id = pci_conf_read32(0, IOH_DEV, 0, 0); ++ igd_id = pci_conf_read32(0, IGD_DEV, 0, 0); ++ ++ /* Mobile 4 Series Chipset neglects to set RWBF capability. */ ++ if ( ioh_id == 0x2a408086 ) ++ { ++ dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n"); ++ rwbf_quirk = 1; ++ } ++ ++ /* initialize cantiga B3 identification */ ++ cantiga_b3_errata_init(); ++ ++ /* ioremap IGD MMIO+0x2000 page */ ++ map_igd_reg(); ++} ++ ++/* ++ * QUIRK to workaround wifi direct assignment issue. This issue ++ * impacts only cases where Intel integrated wifi device is directly ++ * is directly assigned to a guest. ++ * ++ * The workaround is to map ME phantom device 0:3.7 or 0:22.7 ++ * to the ME vt-d engine if detect the user is trying to directly ++ * assigning Intel integrated wifi device to a guest. ++ */ ++ ++static void map_me_phantom_function(struct domain *domain, u32 dev, int map) ++{ ++ struct acpi_drhd_unit *drhd; ++ struct pci_dev *pdev; ++ ++ /* find ME VT-d engine base on a real ME device */ ++ pdev = pci_get_pdev(0, PCI_DEVFN(dev, 0)); ++ drhd = acpi_find_matched_drhd_unit(pdev); ++ ++ /* map or unmap ME phantom function */ ++ if ( map ) ++ domain_context_mapping_one(domain, drhd->iommu, 0, ++ PCI_DEVFN(dev, 7)); ++ else ++ domain_context_unmap_one(domain, drhd->iommu, 0, ++ PCI_DEVFN(dev, 7)); ++} ++ ++void me_wifi_quirk(struct domain *domain, u8 bus, u8 devfn, int map) ++{ ++ u32 id; ++ ++ id = pci_conf_read32(0, 0, 0, 0); ++ if ( IS_CTG(id) ) ++ { ++ /* quit if ME does not exist */ ++ if ( pci_conf_read32(0, 3, 0, 0) == 0xffffffff ) ++ return; ++ ++ /* if device is WLAN device, map ME phantom device 0:3.7 */ ++ id = pci_conf_read32(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0); ++ switch (id) ++ { ++ case 0x42328086: ++ case 0x42358086: ++ case 0x42368086: ++ case 0x42378086: ++ case 0x423a8086: ++ case 0x423b8086: ++ case 0x423c8086: ++ case 0x423d8086: ++ map_me_phantom_function(domain, 3, map); ++ break; ++ default: ++ break; ++ } ++ } ++ else if ( IS_ILK(id) || IS_CPT(id) ) ++ { ++ /* quit if ME does not exist */ ++ if ( pci_conf_read32(0, 22, 0, 0) == 0xffffffff ) ++ return; ++ ++ /* if device is WLAN device, map ME phantom device 0:22.7 */ ++ id = pci_conf_read32(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 0); ++ switch (id) ++ { ++ case 0x00878086: ++ case 0x00898086: ++ case 0x00828086: ++ case 0x00858086: ++ case 0x42388086: ++ case 0x422b8086: ++ map_me_phantom_function(domain, 22, map); ++ break; ++ default: ++ break; ++ } ++ ++ } ++} +--- a/xen/drivers/passthrough/vtd/vtd.h ++++ b/xen/drivers/passthrough/vtd/vtd.h +@@ -23,6 +23,9 @@ + + #include + ++#define MAP_ME_PHANTOM_FUNC 1 ++#define UNMAP_ME_PHANTOM_FUNC 0 ++ + /* Accomodate both IOAPIC and IOSAPIC. */ + struct IO_xAPIC_route_entry { + __u32 vector : 8, +@@ -97,18 +100,4 @@ struct msi_msg_remap_entry { + u32 data; /* msi message data */ + }; + +-unsigned int get_cache_line_size(void); +-void cacheline_flush(char *); +-void flush_all_cache(void); +-u64 alloc_pgtable_maddr(struct acpi_drhd_unit *drhd, unsigned long npages); +-void free_pgtable_maddr(u64 maddr); +-void *map_vtd_domain_page(u64 maddr); +-void unmap_vtd_domain_page(void *va); +- +-void iommu_flush_cache_entry(void *addr, unsigned int size); +-void iommu_flush_cache_page(void *addr, unsigned long npages); +- +-int iommu_alloc(struct acpi_drhd_unit *drhd); +-void iommu_free(struct acpi_drhd_unit *drhd); +- + #endif // _VTD_H_ +--- a/xen/drivers/passthrough/vtd/x86/vtd.c ++++ b/xen/drivers/passthrough/vtd/x86/vtd.c +@@ -27,6 +27,7 @@ + #include "../iommu.h" + #include "../dmar.h" + #include "../vtd.h" ++#include "../extern.h" + + /* + * iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0 +--- a/xen/include/asm-x86/fixmap.h ++++ b/xen/include/asm-x86/fixmap.h +@@ -52,6 +52,7 @@ enum fixed_addresses { + FIX_MSIX_IO_RESERV_BASE, + FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1, + FIX_TBOOT_MAP_ADDRESS, ++ FIX_IGD_MMIO, + __end_of_fixed_addresses + }; + diff --git a/22231-x86-pv-ucode-msr-intel.patch b/22231-x86-pv-ucode-msr-intel.patch new file mode 100644 index 0000000..3272c13 --- /dev/null +++ b/22231-x86-pv-ucode-msr-intel.patch @@ -0,0 +1,68 @@ +# HG changeset patch +# User Keir Fraser +# Date 1286784105 -3600 +# Node ID a1405385db77c7c81aac27bd88d6c4b2d90b1389 +# Parent a33886146b45da46a5161a7ebed4d2f607642aee +x86: emulate MSR_IA32_UCODE_REV Intel access protocol + +Intel requires a write of zeros (hence such writes now get silently +ignored) followed by a cpuid(1) followed by the actual read. + +Includes some code redundancy elimination possible after the actual +change. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -2268,6 +2268,14 @@ static int emulate_privileged_op(struct + if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 ) + goto fail; + break; ++ case MSR_IA32_UCODE_REV: ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) ++ goto fail; ++ if ( rdmsr_safe(regs->ecx, l, h) ) ++ goto fail; ++ if ( l | h ) ++ goto invalid; ++ break; + case MSR_IA32_MISC_ENABLE: + if ( rdmsr_safe(regs->ecx, l, h) ) + goto invalid; +@@ -2375,16 +2383,21 @@ static int emulate_privileged_op(struct + regs->eax = regs->edx = 0; + break; + } +- if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) != 0 ) +- goto fail; +- break; ++ goto rdmsr_normal; ++ case MSR_IA32_UCODE_REV: ++ BUILD_BUG_ON(MSR_IA32_UCODE_REV != MSR_AMD_PATCHLEVEL); ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ++ { ++ if ( wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) ) ++ goto fail; ++ sync_core(); ++ } ++ goto rdmsr_normal; + case MSR_IA32_MISC_ENABLE: + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) + goto fail; + regs->eax = guest_misc_enable(regs->eax); + break; +- case MSR_EFER: +- case MSR_AMD_PATCHLEVEL: + default: + if ( rdmsr_hypervisor_regs(regs->ecx, &val) ) + { +@@ -2400,6 +2413,8 @@ static int emulate_privileged_op(struct + if ( rc ) + goto rdmsr_writeback; + ++ case MSR_EFER: ++ rdmsr_normal: + /* Everyone can read the MSR space. */ + /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n", + _p(regs->ecx));*/ diff --git a/22232-x86-64-lahf-lm-bios-workaround.patch b/22232-x86-64-lahf-lm-bios-workaround.patch new file mode 100644 index 0000000..a44be0a --- /dev/null +++ b/22232-x86-64-lahf-lm-bios-workaround.patch @@ -0,0 +1,89 @@ +# HG changeset patch +# User Keir Fraser +# Date 1286784156 -3600 +# Node ID eb964c4b4f31c6b7bdfe8504839c4acac776f506 +# Parent a1405385db77c7c81aac27bd88d6c4b2d90b1389 +x86-64: workaround for BIOSes wrongly enabling LAHF_LM feature indicator + +This workaround is taken from Linux, and the main motivation (besides +such workarounds indeed belonging in the hypervisor rather than each +kernel) is to suppress the warnings in the Xen log each Linux guest +would cause due to the disallowed wrmsr. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -44,6 +44,47 @@ static inline void wrmsr_amd(unsigned in + ); + } + ++static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, ++ unsigned int *hi) ++{ ++ int err; ++ ++ asm volatile("1: rdmsr\n2:\n" ++ ".section .fixup,\"ax\"\n" ++ "3: movl %6,%2\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ __FIXUP_ALIGN "\n" ++ __FIXUP_WORD " 1b,3b\n" ++ ".previous\n" ++ : "=a" (*lo), "=d" (*hi), "=r" (err) ++ : "c" (msr), "D" (0x9c5a203a), "2" (0), "i" (-EFAULT)); ++ ++ return err; ++} ++ ++static inline int wrmsr_amd_safe(unsigned int msr, unsigned int lo, ++ unsigned int hi) ++{ ++ int err; ++ ++ asm volatile("1: wrmsr\n2:\n" ++ ".section .fixup,\"ax\"\n" ++ "3: movl %6,%0\n" ++ " jmp 2b\n" ++ ".previous\n" ++ ".section __ex_table,\"a\"\n" ++ __FIXUP_ALIGN "\n" ++ __FIXUP_WORD " 1b,3b\n" ++ ".previous\n" ++ : "=r" (err) ++ : "c" (msr), "a" (lo), "d" (hi), "D" (0x9c5a203a), ++ "0" (0), "i" (-EFAULT)); ++ ++ return err; ++} ++ + /* + * Mask the features and extended features returned by CPUID. Parameters are + * set from the boot line via two methods: +@@ -330,6 +371,24 @@ static void __devinit init_amd(struct cp + 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ + clear_bit(0*32+31, c->x86_capability); + ++#ifdef CONFIG_X86_64 ++ if (c->x86 == 0xf && c->x86_model < 0x14 ++ && cpu_has(c, X86_FEATURE_LAHF_LM)) { ++ /* ++ * Some BIOSes incorrectly force this feature, but only K8 ++ * revision D (model = 0x14) and later actually support it. ++ * (AMD Erratum #110, docId: 25759). ++ */ ++ unsigned int lo, hi; ++ ++ clear_bit(X86_FEATURE_LAHF_LM, c->x86_capability); ++ if (!rdmsr_amd_safe(0xc001100d, &lo, &hi)) { ++ hi &= ~1; ++ wrmsr_amd_safe(0xc001100d, lo, hi); ++ } ++ } ++#endif ++ + r = get_model_name(c); + + switch(c->x86) diff --git a/22280-kexec.patch b/22280-kexec.patch new file mode 100644 index 0000000..b0281fe --- /dev/null +++ b/22280-kexec.patch @@ -0,0 +1,71 @@ +# HG changeset patch +# User Keir Fraser +# Date 1287922506 -3600 +# Node ID d6e3cd10a9a6ab16d0cc772ee70b2ae99e8fac31 +# Parent 2208a036f8d9d932de8ab0aa0206f5c57dba8728 +x86/kexec: fix very old regression and make compatible with modern Linux +References: bnc#619122 + +c/s 13829 lost the (32-bit only) cpu_has_pae argument passed to the +primary kernel's stub (in the 32-bit Xen case only), and Linux +2.6.27/.30 (32-/64-bit) introduced a new argument (for KEXEC_JUMP) +which for now simply gets passed a hardcoded value. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/machine_kexec.c ++++ b/xen/arch/x86/machine_kexec.c +@@ -23,7 +23,11 @@ + typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long *page_list, +- unsigned long start_address); ++ unsigned long start_address, ++#ifdef __i386__ ++ unsigned int cpu_has_pae, ++#endif ++ unsigned int preserve_context); + + extern int machine_kexec_get_xen(xen_kexec_range_t *range); + +@@ -121,7 +125,11 @@ void machine_kexec(xen_kexec_image_t *im + + rnk = (relocate_new_kernel_t) image->page_list[1]; + (*rnk)(image->indirection_page, image->page_list, +- image->start_address); ++ image->start_address, ++#ifdef __i386__ ++ 1 /* cpu_has_pae */, ++#endif ++ 0 /* preserve_context */); + } + } + +--- a/xen/arch/x86/x86_64/compat_kexec.S ++++ b/xen/arch/x86/x86_64/compat_kexec.S +@@ -119,6 +119,7 @@ compatibility_mode: + movl %eax, %ss + + /* Push arguments onto stack. */ ++ pushl $0 /* 20(%esp) - preserve context */ + pushl $1 /* 16(%esp) - cpu has pae */ + pushl %ecx /* 12(%esp) - start address */ + pushl %edx /* 8(%esp) - page list */ +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -138,7 +138,6 @@ + #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) + #define cpu_has_pse boot_cpu_has(X86_FEATURE_PSE) + #define cpu_has_tsc boot_cpu_has(X86_FEATURE_TSC) +-#define cpu_has_pae boot_cpu_has(X86_FEATURE_PAE) + #define cpu_has_pge boot_cpu_has(X86_FEATURE_PGE) + #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) + #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) +@@ -164,7 +163,6 @@ + #define cpu_has_de 1 + #define cpu_has_pse 1 + #define cpu_has_tsc 1 +-#define cpu_has_pae 1 + #define cpu_has_pge 1 + #define cpu_has_pat 1 + #define cpu_has_apic boot_cpu_has(X86_FEATURE_APIC) diff --git a/22326-cpu-pools-numa-placement.patch b/22326-cpu-pools-numa-placement.patch new file mode 100644 index 0000000..5daca4c --- /dev/null +++ b/22326-cpu-pools-numa-placement.patch @@ -0,0 +1,43 @@ +# HG changeset patch +# User Juergen Gross +# Date 1288264929 -3600 +# Node ID dc66c13a29f9af67e0258f688bcd6330cf515383 +# Parent 4ac03710fc320e8f76014ca27849da03b85dff9d +avoid numa placement of cpus with active cpupools + +When using cpupools don't pin vcpus to numa nodes as this might +conflict with the cpupool definition. +numa placement should be handled by cpupool configuration instead. + +Signed-off-by: Juergen Gross +Signed-off-by: Ian Jackson + +Index: xen-4.0.1-testing/tools/python/xen/xend/XendCPUPool.py +=================================================================== +--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendCPUPool.py ++++ xen-4.0.1-testing/tools/python/xen/xend/XendCPUPool.py +@@ -881,6 +881,11 @@ class XendCPUPool(XendBase): + lookup_pool = classmethod(lookup_pool) + + ++ def number_of_pools(cls): ++ return len(xc.cpupool_getinfo()) ++ ++ number_of_pools = classmethod(number_of_pools) ++ + def _cpu_number_to_ref(cls, number): + node = XendNode.instance() + for cpu_ref in node.get_host_cpu_refs(): +Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py +=================================================================== +--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -2790,7 +2790,7 @@ class XendDomainInfo: + return map(lambda x: x[0], sorted(enumerate(nodeload), key=lambda x:x[1])) + + info = xc.physinfo() +- if info['nr_nodes'] > 1: ++ if info['nr_nodes'] > 1 and XendCPUPool.number_of_pools() < 2: + node_memory_list = info['node_to_memory'] + needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024 + candidate_node_list = [] diff --git a/22337-vtd-scan-single-func.patch b/22337-vtd-scan-single-func.patch new file mode 100644 index 0000000..a59267a --- /dev/null +++ b/22337-vtd-scan-single-func.patch @@ -0,0 +1,22 @@ +# HG changeset patch +# User Keir Fraser +# Date 1288371977 -3600 +# Node ID 7afd8dd1d6cb97484d263311f3f0e6ab74d27ed3 +# Parent 49803ac994f4094719ec2c3b67d82561a24ba293 +VT-d: only scan secondary functions on multi-function devices + +Signed-off-by: Jan Beulich + +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -442,6 +442,10 @@ int __init scan_pci_devices(void) + spin_unlock(&pcidevs_lock); + return -EINVAL; + } ++ ++ if ( !func && !(pci_conf_read8(bus, dev, func, ++ PCI_HEADER_TYPE) & 0x80) ) ++ break; + } + } + } diff --git a/22348-vtd-check-secbus-devfn.patch b/22348-vtd-check-secbus-devfn.patch new file mode 100644 index 0000000..d57f620 --- /dev/null +++ b/22348-vtd-check-secbus-devfn.patch @@ -0,0 +1,52 @@ +# HG changeset patch +# User Keir Fraser +# Date 1288772331 0 +# Node ID 2dfba250c50bafac4e333d84450324daedf5ae2c +# Parent 16093532f384eee02518520662a38ad16915b063 +VT-d: fix device assignment failure (regression from Xen c/s 19805:2f1fa2215e60) +References: bnc#647681 + +If the device at :00.0 is the device the mapping operation was +initiated for, trying to map it a second time will fail, and hence +this second mapping attempt must be prevented (as was done prior to +said c/s). + +While at it, simplify the code a little, too. + +Signed-off-by: Jan Beulich +Acked-by: Weidong Han + +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -1402,23 +1402,16 @@ static int domain_context_mapping(struct + if ( find_upstream_bridge(&bus, &devfn, &secbus) < 1 ) + break; + +- /* PCIe to PCI/PCIx bridge */ +- if ( pdev_type(bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE ) +- { +- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); +- if ( ret ) +- return ret; ++ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + +- /* +- * Devices behind PCIe-to-PCI/PCIx bridge may generate +- * different requester-id. It may originate from devfn=0 +- * on the secondary bus behind the bridge. Map that id +- * as well. +- */ ++ /* ++ * Devices behind PCIe-to-PCI/PCIx bridge may generate different ++ * requester-id. It may originate from devfn=0 on the secondary bus ++ * behind the bridge. Map that id as well if we didn't already. ++ */ ++ if ( !ret && pdev_type(bus, devfn) == DEV_TYPE_PCIe2PCI_BRIDGE && ++ (secbus != pdev->bus || pdev->devfn != 0) ) + ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0); +- } +- else /* Legacy PCI bridge */ +- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); + + break; + diff --git a/22369-xend-pci-passthru-fix.patch b/22369-xend-pci-passthru-fix.patch new file mode 100644 index 0000000..f4a8be6 --- /dev/null +++ b/22369-xend-pci-passthru-fix.patch @@ -0,0 +1,63 @@ +# HG changeset patch +# User Jim Fehlig +# Date 1288301229 21600 +# Branch xend-pci +# Node ID 461b9d3a643a2c67c961d9fc468a804891f3770d +# Parent 28a16074681582f1209c9077f870ccf44927133e +Fix pci passthru in xend interface used by libvirt + +Attempting to define or create a domain whose XML config contains a +passthru PCI device fails with libvirt + +xen84: # cat win2k8r2.xml +... + + +
+ + +... + +xen84: # virsh create ./win2k8r2.xml +error: Failed to create domain from ./win2k8r2.xml +error: POST operation failed: xend_post: error from xen daemon: + (xend.err "Error creating domain: 'key'") + +The PCI device config maintained by xend includes a 'key' field, which is +initialized by xm client when using that tool and traditional xen config +file. libvirt, which uses xend's sexpr-over-http interface (is that the +proper name for that interface??), does not initialize this field - and +shouldn't be expected to do so IMO. Clients should not be bothered with +xend's internal representation of a PCI device. + +This patch populates the 'key' field within xend if it is uninitialized, +similar to current initialization of 'uuid' field. The 'vdevfn' field +suffers a similar problem if not (optionally) specified by user. +AFAICT, the xm client initializes 'vdevfn' to 0x100 if not specified so +I've done the same here. + + Signed-off-by: Jim Fehlig + +diff -r 28a160746815 -r 461b9d3a643a tools/python/xen/util/pci.py +--- a/tools/python/xen/util/pci.py Wed Oct 27 12:24:28 2010 +0100 ++++ b/tools/python/xen/util/pci.py Thu Oct 28 15:27:09 2010 -0600 +@@ -240,10 +240,16 @@ + pci_dev_info = dict(pci_dev[1:]) + if 'opts' in pci_dev_info: + pci_dev_info['opts'] = pci_opts_list_from_sxp(pci_dev) +- # append uuid to each pci device that does't already have one. ++ # If necessary, initialize uuid, key, and vdevfn for each pci device + if not pci_dev_info.has_key('uuid'): +- dpci_uuid = pci_dev_info.get('uuid', uuid.createString()) +- pci_dev_info['uuid'] = dpci_uuid ++ pci_dev_info['uuid'] = uuid.createString() ++ if not pci_dev_info.has_key('key'): ++ pci_dev_info['key'] = "%02x:%02x.%x" % \ ++ (int(pci_dev_info['bus'], 16), ++ int(pci_dev_info['slot'], 16), ++ int(pci_dev_info['func'], 16)) ++ if not pci_dev_info.has_key('vdevfn'): ++ pci_dev_info['vdevfn'] = "0x%02x" % AUTO_PHP_SLOT + pci_devs.append(pci_dev_info) + dev_config['devs'] = pci_devs + diff --git a/bdrv_default_rwflag.patch b/bdrv_default_rwflag.patch new file mode 100644 index 0000000..33a6956 --- /dev/null +++ b/bdrv_default_rwflag.patch @@ -0,0 +1,32 @@ +Subject: modify default read/write flag in bdrv_init. +Signed-off by Chunyan Liu + +Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vl.c +=================================================================== +--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/vl.c ++++ xen-4.0.1-testing/tools/ioemu-qemu-xen/vl.c +@@ -2612,6 +2612,8 @@ int drive_init(struct drive_opt *arg, in + strncpy(drives_table[nb_drives].serial, serial, sizeof(serial)); + nb_drives++; + ++ bdrv_flags = BDRV_O_RDWR; ++ + switch(type) { + case IF_IDE: + case IF_SCSI: +@@ -2624,6 +2626,7 @@ int drive_init(struct drive_opt *arg, in + break; + case MEDIA_CDROM: + bdrv_set_type_hint(bdrv, BDRV_TYPE_CDROM); ++ bdrv_flags &= ~BDRV_O_RDWR; + break; + } + break; +@@ -2644,7 +2647,6 @@ int drive_init(struct drive_opt *arg, in + } + if (!file[0]) + return -2; +- bdrv_flags = 0; + if (snapshot) { + bdrv_flags |= BDRV_O_SNAPSHOT; + cache = 2; /* always use write-back with snapshot */ diff --git a/bdrv_open2_flags_2.patch b/bdrv_open2_flags_2.patch index 1fc9e2f..e5fbb73 100644 --- a/bdrv_open2_flags_2.patch +++ b/bdrv_open2_flags_2.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c -@@ -227,6 +227,7 @@ static int open_disk(struct td_state *s, +@@ -237,6 +237,7 @@ static int open_disk(struct td_state *s, BlockDriver* drv; char* devname; static int devnumber = 0; @@ -10,7 +10,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c int i; DPRINTF("Opening %s as blktap%d\n", path, devnumber); -@@ -249,7 +250,7 @@ static int open_disk(struct td_state *s, +@@ -259,7 +260,7 @@ static int open_disk(struct td_state *s, DPRINTF("%s driver specified\n", drv ? drv->format_name : "No"); /* Open the image */ diff --git a/blktap-pv-cdrom.patch b/blktap-pv-cdrom.patch index 2fec572..675b093 100644 --- a/blktap-pv-cdrom.patch +++ b/blktap-pv-cdrom.patch @@ -741,7 +741,7 @@ Index: xen-4.0.1-testing/tools/blktap/lib/blktaplib.h =================================================================== --- xen-4.0.1-testing.orig/tools/blktap/lib/blktaplib.h +++ xen-4.0.1-testing/tools/blktap/lib/blktaplib.h -@@ -220,6 +220,7 @@ typedef struct msg_pid { +@@ -225,6 +225,7 @@ typedef struct msg_pid { #define DISK_TYPE_RAM 3 #define DISK_TYPE_QCOW 4 #define DISK_TYPE_QCOW2 5 diff --git a/block-iscsi b/block-iscsi index 3f99c67..bc2ead6 100644 --- a/block-iscsi +++ b/block-iscsi @@ -46,8 +46,6 @@ case "$command" in # load modules and start iscsid /etc/init.d/open-iscsi status >/dev/null 2>&1 || { /etc/init.d/open-iscsi start >/dev/null 2>&1; sleep 1; } - # list of targets on node - /sbin/iscsiadm -m discovery | sed "s/ .*//g" | while read line; do /sbin/iscsiadm -m discovery -t sendtargets -p $line; done >/dev/null par=`xenstore-read $XENBUS_PATH/params` || true TGTID=`echo $par | sed "s/\/\///g"` while read rec uuid; do diff --git a/ioemu-bdrv-open-CACHE_WB.patch b/ioemu-bdrv-open-CACHE_WB.patch index 1350a2a..eb48cff 100644 --- a/ioemu-bdrv-open-CACHE_WB.patch +++ b/ioemu-bdrv-open-CACHE_WB.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c -@@ -249,8 +249,11 @@ static int open_disk(struct td_state *s, +@@ -259,8 +259,11 @@ static int open_disk(struct td_state *s, drv = blktap_drivers[i].drv; DPRINTF("%s driver specified\n", drv ? drv->format_name : "No"); diff --git a/ioemu-blktap-barriers.patch b/ioemu-blktap-barriers.patch index 1c6f411..43d7dc5 100644 --- a/ioemu-blktap-barriers.patch +++ b/ioemu-blktap-barriers.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c -@@ -362,6 +362,15 @@ static void qemu_send_responses(void* op +@@ -360,6 +360,15 @@ static void qemu_send_responses(void* op } /** @@ -18,7 +18,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c * Callback function for the IO message pipe. Reads requests from the ring * and processes them (call qemu read/write functions). * -@@ -380,6 +389,7 @@ static void handle_blktap_iomsg(void* pr +@@ -378,6 +387,7 @@ static void handle_blktap_iomsg(void* pr blkif_t *blkif = s->blkif; tapdev_info_t *info = s->ring_info; int page_size = getpagesize(); @@ -26,7 +26,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c struct aiocb_info *aiocb_info; -@@ -412,7 +422,7 @@ static void handle_blktap_iomsg(void* pr +@@ -410,7 +420,7 @@ static void handle_blktap_iomsg(void* pr /* Don't allow writes on readonly devices */ if ((s->flags & TD_RDONLY) && @@ -35,7 +35,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c blkif->pending_list[idx].status = BLKIF_RSP_ERROR; goto send_response; } -@@ -433,7 +443,7 @@ static void handle_blktap_iomsg(void* pr +@@ -431,7 +441,7 @@ static void handle_blktap_iomsg(void* pr DPRINTF("Sector request failed:\n"); DPRINTF("%s request, idx [%d,%d] size [%llu], " "sector [%llu,%llu]\n", @@ -44,7 +44,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c "WRITE" : "READ"), idx,i, (long long unsigned) -@@ -446,8 +456,14 @@ static void handle_blktap_iomsg(void* pr +@@ -444,8 +454,14 @@ static void handle_blktap_iomsg(void* pr blkif->pending_list[idx].secs_pending += nsects; @@ -60,7 +60,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c case BLKIF_OP_WRITE: aiocb_info = malloc(sizeof(*aiocb_info)); -@@ -467,6 +483,10 @@ static void handle_blktap_iomsg(void* pr +@@ -465,6 +481,10 @@ static void handle_blktap_iomsg(void* pr DPRINTF("ERROR: bdrv_write() == NULL\n"); goto send_response; } diff --git a/ioemu-blktap-image-format.patch b/ioemu-blktap-image-format.patch index 5aeb136..9fd0920 100644 --- a/ioemu-blktap-image-format.patch +++ b/ioemu-blktap-image-format.patch @@ -19,7 +19,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c -@@ -220,9 +220,10 @@ static int map_new_dev(struct td_state * +@@ -230,9 +230,10 @@ static int map_new_dev(struct td_state * return -1; } @@ -31,7 +31,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c char* devname; static int devnumber = 0; int i; -@@ -232,7 +233,22 @@ static int open_disk(struct td_state *s, +@@ -242,7 +243,22 @@ static int open_disk(struct td_state *s, bs = bdrv_new(devname); free(devname); @@ -55,7 +55,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c fprintf(stderr, "Could not open image file %s\n", path); return -ENOMEM; } -@@ -527,7 +543,7 @@ static void handle_blktap_ctrlmsg(void* +@@ -525,7 +541,7 @@ static void handle_blktap_ctrlmsg(void* s = state_init(); /*Open file*/ diff --git a/ioemu-blktap-zero-size.patch b/ioemu-blktap-zero-size.patch index 8a1ba56..2ec271e 100644 --- a/ioemu-blktap-zero-size.patch +++ b/ioemu-blktap-zero-size.patch @@ -16,7 +16,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/xen_blktap.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c -@@ -258,6 +258,12 @@ static int open_disk(struct td_state *s, +@@ -268,6 +268,12 @@ static int open_disk(struct td_state *s, s->size = bs->total_sectors; s->sector_size = 512; @@ -28,7 +28,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c + s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0); - #ifndef QEMU_TOOL + return 0; Index: xen-4.0.1-testing/tools/python/xen/xend/server/DevController.py =================================================================== --- xen-4.0.1-testing.orig/tools/python/xen/xend/server/DevController.py diff --git a/minios-fixups.patch b/minios-fixups.patch new file mode 100644 index 0000000..312b770 --- /dev/null +++ b/minios-fixups.patch @@ -0,0 +1,40 @@ +Index: xen-4.0.1-testing/extras/mini-os/netfront.c +=================================================================== +--- xen-4.0.1-testing.orig/extras/mini-os/netfront.c ++++ xen-4.0.1-testing/extras/mini-os/netfront.c +@@ -25,8 +25,8 @@ DECLARE_WAIT_QUEUE_HEAD(netfront_queue); + + + +-#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE) +-#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE) ++#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) ++#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) + #define GRANT_INVALID_REF 0 + + +Index: xen-4.0.1-testing/extras/mini-os/lib/math.c +=================================================================== +--- xen-4.0.1-testing.orig/extras/mini-os/lib/math.c ++++ xen-4.0.1-testing/extras/mini-os/lib/math.c +@@ -191,6 +191,7 @@ __qdivrem(uint64_t uq, uint64_t vq, uint + * and thus + * m = 4 - n <= 2 + */ ++ tmp.ul[H] = tmp.ul[L] = 0; + tmp.uq = uq; + u[0] = 0; + u[1] = HHALF(tmp.ul[H]); +Index: xen-4.0.1-testing/extras/mini-os/arch/x86/mm.c +=================================================================== +--- xen-4.0.1-testing.orig/extras/mini-os/arch/x86/mm.c ++++ xen-4.0.1-testing/extras/mini-os/arch/x86/mm.c +@@ -281,7 +281,7 @@ static void build_pagetable(unsigned lon + /* + * Mark portion of the address space read only. + */ +-extern void shared_info; ++extern char shared_info[PAGE_SIZE]; + static void set_readonly(void *text, void *etext) + { + unsigned long start_address = diff --git a/snapshot-without-pv-fix.patch b/snapshot-without-pv-fix.patch new file mode 100644 index 0000000..05e8fac --- /dev/null +++ b/snapshot-without-pv-fix.patch @@ -0,0 +1,379 @@ +Subject: add the drive into drives_table[] only if guest is using PV driver + +now when blktapctrl asks qemu to add a device, it also set a watch +on the xenstore backend state path of the device, e.g. +/local/domain//device/vbd//state and when the +state changed to 4, that means guest is using the PV driver and it's +ready, so the watch will tell qemu to add the disk entry to +drives_table[], otherwise the disk in qemu will just stay opened,not +showing up in drives_table[]. + + +Signed-off-by: Li Dongyang +--- + tools/blktap/drivers/blktapctrl.c | 81 +++++++++++++++++++++++++++++++++- + tools/blktap/lib/blkif.c | 23 ++++++++++ + tools/blktap/lib/blktaplib.h | 5 ++ + tools/blktap/lib/xenbus.c | 69 +++++++++++++++++++++++++++++ + tools/ioemu-qemu-xen/hw/xen_blktap.c | 49 +++++++++++++++----- + 5 files changed, 213 insertions(+), 14 deletions(-) + +diff --git a/tools/blktap/drivers/blktapctrl.c b/tools/blktap/drivers/blktapctrl.c +index bcc3152..8b58e3e 100644 +--- a/tools/blktap/drivers/blktapctrl.c ++++ b/tools/blktap/drivers/blktapctrl.c +@@ -381,7 +381,22 @@ static int write_msg(int fd, int msgtype, void *ptr, void *ptr2) + msg->cookie = blkif->cookie; + + break; +- ++ ++ case CTLMSG_ADDDEV: ++ DPRINTF("Write_msg called: CTLMSG_ADDDEV\n"); ++ ++ msglen = sizeof(msg_hdr_t); ++ buf = malloc(msglen); ++ ++ /*Assign header fields*/ ++ msg = (msg_hdr_t *)buf; ++ msg->type = CTLMSG_ADDDEV; ++ msg->len = msglen; ++ msg->drivertype = blkif->drivertype; ++ msg->cookie = blkif->cookie; ++ ++ break; ++ + default: + return -1; + } +@@ -476,6 +491,12 @@ static int read_msg(int fd, int msgtype, void *ptr) + DPRINTF("\tPID: [%d]\n",blkif->tappid); + } + break; ++ ++ case CTLMSG_ADDDEV_RSP: ++ DPRINTF("Received CTLMSG_ADDDEV_RSP\n"); ++ if (msgtype != CTLMSG_ADDDEV_RSP) ret = 0; ++ break; ++ + default: + DPRINTF("UNKNOWN MESSAGE TYPE RECEIVED\n"); + ret = 0; +@@ -758,6 +779,63 @@ static int unmap_blktapctrl(blkif_t *blkif) + return 0; + } + ++static int blktapctrl_blkif_state(blkif_t *blkif, XenbusState state) ++{ ++ struct disk_info *drivertype = NULL; ++ ++ if (!blkif) ++ return -EINVAL; ++ ++ switch (state) ++ { ++ case XenbusStateUnknown: ++ break; ++ ++ case XenbusStateInitialising: ++ break; ++ ++ case XenbusStateInitWait: ++ break; ++ ++ case XenbusStateInitialised: ++ break; ++ ++ case XenbusStateConnected: ++ drivertype = dtypes[blkif->drivertype]; ++ if (drivertype->use_ioemu && blkif->state == CONNECTED) { ++ if (write_msg(blkif->fds[WRITE], CTLMSG_ADDDEV, blkif, NULL) ++ <=0) { ++ DPRINTF("Write_msg failed - CTLMSG_ADDDEV\n"); ++ return -1; ++ } ++ if (read_msg(blkif->fds[READ], CTLMSG_ADDDEV_RSP, blkif) <= 0) { ++ DPRINTF("Read_msg failure - CTLMSG_ADDDEV\n"); ++ return -1; ++ } ++ } ++ ++ break; ++ ++ case XenbusStateClosing: ++ break; ++ ++ case XenbusStateClosed: ++ break; ++ ++ case XenbusStateReconfiguring: ++ break; ++ ++ case XenbusStateReconfigured: ++ break; ++ ++ default: ++ DPRINTF("Unrecognized XenbusState %d\n", state); ++ return -1; ++ } ++ ++ return 0; ++} ++ + int open_ctrl_socket(char *devname) + { + int ret; +@@ -854,6 +932,7 @@ int main(int argc, char *argv[]) + register_new_blkif_hook(blktapctrl_new_blkif); + register_new_devmap_hook(map_new_blktapctrl); + register_new_unmap_hook(unmap_blktapctrl); ++ register_blkif_state_hook(blktapctrl_blkif_state); + + ctlfd = blktap_interface_open(); + if (ctlfd < 0) { +diff --git a/tools/blktap/lib/blkif.c b/tools/blktap/lib/blkif.c +index 9a19596..11b63dc 100644 +--- a/tools/blktap/lib/blkif.c ++++ b/tools/blktap/lib/blkif.c +@@ -89,6 +89,11 @@ void register_new_blkif_hook(int (*fn)(blkif_t *blkif)) + { + new_blkif_hook = fn; + } ++static int (*blkif_state_hook)(blkif_t *blkif, XenbusState state) = NULL; ++void register_blkif_state_hook(int (*fn)(blkif_t *blkif, XenbusState state)) ++{ ++ blkif_state_hook = fn; ++} + + int blkif_init(blkif_t *blkif, long int handle, long int pdev, + long int readonly) +@@ -179,6 +184,24 @@ void free_blkif(blkif_t *blkif) + } + } + ++int blkif_handle_state(blkif_t *blkif, XenbusState state) ++{ ++ if (blkif == NULL) ++ return -EINVAL; ++ ++ if (blkif_state_hook == NULL) ++ { ++ DPRINTF("Probe handling blkif state, but no blkif_state_hook!\n"); ++ return -1; ++ } ++ if (blkif_state_hook(blkif, state)!=0) { ++ DPRINTF("BLKIF: blkif_state_hook failed!\n"); ++ return -1; ++ } ++ ++ return 0; ++} ++ + void __init_blkif(void) + { + memset(blkif_hash, 0, sizeof(blkif_hash)); +diff --git a/tools/blktap/lib/blktaplib.h b/tools/blktap/lib/blktaplib.h +index 733b924..2a6a078 100644 +--- a/tools/blktap/lib/blktaplib.h ++++ b/tools/blktap/lib/blktaplib.h +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -138,11 +139,13 @@ typedef struct blkif_info { + void register_new_devmap_hook(int (*fn)(blkif_t *blkif)); + void register_new_unmap_hook(int (*fn)(blkif_t *blkif)); + void register_new_blkif_hook(int (*fn)(blkif_t *blkif)); ++void register_blkif_state_hook(int (*fn)(blkif_t *blkif, XenbusState state)); + blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); + blkif_t *alloc_blkif(domid_t domid); + int blkif_init(blkif_t *blkif, long int handle, long int pdev, + long int readonly); + void free_blkif(blkif_t *blkif); ++int blkif_handle_state(blkif_t *blkif, XenbusState state); + void __init_blkif(void); + + typedef struct busy_state { +@@ -210,6 +213,8 @@ typedef struct msg_pid { + #define CTLMSG_CLOSE_RSP 8 + #define CTLMSG_PID 9 + #define CTLMSG_PID_RSP 10 ++#define CTLMSG_ADDDEV 11 ++#define CTLMSG_ADDDEV_RSP 12 + + /* disk driver types */ + #define MAX_DISK_TYPES 20 +diff --git a/tools/blktap/lib/xenbus.c b/tools/blktap/lib/xenbus.c +index 53db3c8..96f75a5 100644 +--- a/tools/blktap/lib/xenbus.c ++++ b/tools/blktap/lib/xenbus.c +@@ -318,6 +318,72 @@ static int check_image(struct xs_handle *h, struct backend_info *be, + return 0; + } + ++static void check_frontend_state(struct xs_handle *h, struct xenbus_watch *w, ++ const char *state_path_im) ++{ ++ struct backend_info *be = NULL; ++ struct blkif *blkif = NULL; ++ char *fepath = NULL, *bepath = NULL; ++ XenbusState state; ++ int er, len; ++ ++ len = strsep_len(state_path_im, '/', 6); ++ if (len < 0) ++ return; ++ if (!(fepath = malloc(len + 1))) ++ return; ++ memset(fepath, 0, len + 1); ++ strncpy(fepath, state_path_im, len); ++ ++ er = xs_gather(h, fepath, "state", "%d", &state, ++ "backend", NULL, &bepath, ++ NULL); ++ ++ if (er) { ++ DPRINTF("Error getting state [%s]\n", fepath); ++ goto free_fe; ++ } ++ ++ be = be_lookup_be(bepath); ++ if (!be || !be->blkif) ++ goto free_fe; ++ ++ blkif = be->blkif; ++ blkif_handle_state(blkif, state); ++ ++free_fe: ++ if (fepath) ++ free(fepath); ++ if (bepath) ++ free(bepath); ++ return; ++} ++ ++static int add_blockdevice_state_watch(struct xs_handle *h, const char *frontend) ++{ ++ char *path = NULL; ++ struct xenbus_watch *vbd_watch; ++ ++ if (asprintf(&path, frontend) == -1) ++ return -ENOMEM; ++ if (!(path = realloc(path, strlen(path) + strlen("/state") + 1))) ++ return -ENOMEM; ++ strcpy(path + strlen(path), "/state"); ++ ++ vbd_watch = (struct xenbus_watch *)malloc(sizeof(struct xenbus_watch)); ++ if (!vbd_watch) { ++ DPRINTF("ERROR: unable to malloc vbd_watch [%s]\n", path); ++ return -EINVAL; ++ } ++ vbd_watch->node = path; ++ vbd_watch->callback = check_frontend_state; ++ if (register_xenbus_watch(h, vbd_watch) != 0) { ++ DPRINTF("ERROR: adding vbd probe watch %s\n", path); ++ return -EINVAL; ++ } ++ return 0; ++} ++ + static void ueblktap_setup(struct xs_handle *h, char *bepath) + { + struct backend_info *be; +@@ -512,6 +578,9 @@ static void ueblktap_probe(struct xs_handle *h, struct xenbus_watch *w, + + be->backpath = bepath; + be->frontpath = frontend; ++ ++ if (add_blockdevice_state_watch(h, frontend) != 0) ++ goto free_be; + + list_add(&be->list, &belist); + +diff --git a/tools/ioemu-qemu-xen/hw/xen_blktap.c b/tools/ioemu-qemu-xen/hw/xen_blktap.c +index c2236fd..c925283 100644 +--- a/tools/ioemu-qemu-xen/hw/xen_blktap.c ++++ b/tools/ioemu-qemu-xen/hw/xen_blktap.c +@@ -83,8 +83,18 @@ static void unmap_disk(struct td_state *s) + { + tapdev_info_t *info = s->ring_info; + fd_list_entry_t *entry; ++ int i; + + bdrv_close(s->bs); ++#ifndef QEMU_TOOL ++ for (i = 0; i < MAX_DRIVES + 1; i++) { ++ if (drives_table[i].bdrv == s->bs) { ++ drives_table[i].bdrv = NULL; ++ nb_drives--; ++ break; ++ } ++ } ++#endif + + if (info != NULL && info->mem > 0) + munmap(info->mem, getpagesize() * BLKTAP_MMAP_REGION_SIZE); +@@ -244,18 +254,6 @@ static int open_disk(struct td_state *s, char *path, int readonly) + + s->info = ((s->flags & TD_RDONLY) ? VDISK_READONLY : 0); + +-#ifndef QEMU_TOOL +- for (i = 0; i < MAX_DRIVES + 1; i++) { +- if (drives_table[i].bdrv == NULL) { +- drives_table[i].bdrv = bs; +- drives_table[i].type = IF_BLKTAP; +- drives_table[i].bus = 0; +- drives_table[i].unit = 0; +- break; +- } +- } +-#endif +- + return 0; + } + +@@ -496,7 +494,7 @@ static void handle_blktap_ctrlmsg(void* private) + msg_hdr_t *msg; + msg_newdev_t *msg_dev; + msg_pid_t *msg_pid; +- int ret = -1; ++ int ret = -1, i; + struct td_state *s = NULL; + fd_list_entry_t *entry; + +@@ -622,6 +620,31 @@ static void handle_blktap_ctrlmsg(void* private) + len = write(write_fd, buf, msglen); + break; + ++ case CTLMSG_ADDDEV: ++ s = get_state(msg->cookie); ++ if (s) { ++#ifndef QEMU_TOOL ++ for (i = 0; i < MAX_DRIVES + 1; i++) { ++ if (drives_table[i].bdrv == NULL) { ++ drives_table[i].bdrv = s->bs; ++ drives_table[i].type = IF_BLKTAP; ++ drives_table[i].bus = 0; ++ drives_table[i].unit = 0; ++ drives_table[i].used = 1; ++ nb_drives++; ++ break; ++ } ++ } ++#endif ++ } ++ ++ memset(buf, 0x00, MSG_SIZE); ++ msglen = sizeof(msg_hdr_t); ++ msg->type = CTLMSG_ADDDEV_RSP; ++ msg->len = msglen; ++ len = write(write_fd, buf, msglen); ++ break; ++ + default: + break; + } diff --git a/stdvga-cache.patch b/stdvga-cache.patch new file mode 100644 index 0000000..8fdddae --- /dev/null +++ b/stdvga-cache.patch @@ -0,0 +1,16 @@ +Index: xen-4.0.1-testing/xen/arch/x86/hvm/stdvga.c +=================================================================== +--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/stdvga.c ++++ xen-4.0.1-testing/xen/arch/x86/hvm/stdvga.c +@@ -135,7 +135,10 @@ static int stdvga_outb(uint64_t addr, ui + + /* When in standard vga mode, emulate here all writes to the vram buffer + * so we can immediately satisfy reads without waiting for qemu. */ +- s->stdvga = (s->sr[7] == 0x00); ++ s->stdvga = ++ (s->sr[7] == 0x00) && /* standard vga mode */ ++ (s->gr[6] == 0x05); /* misc graphics register w/ MemoryMapSelect=1 ++ * 0xa0000-0xaffff (64k region), AlphaDis=1 */ + + if ( !prev_stdvga && s->stdvga ) + { diff --git a/stubdom.tar.bz2 b/stubdom.tar.bz2 new file mode 100644 index 0000000..08029de --- /dev/null +++ b/stubdom.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6215320ea160b7296e463abb60e3264fc35f88b47374ae3e054ba4352f6d43dc +size 16864982 diff --git a/tapdisk-ioemu-shutdown-fix.patch b/tapdisk-ioemu-shutdown-fix.patch index 55913ea..467490c 100644 --- a/tapdisk-ioemu-shutdown-fix.patch +++ b/tapdisk-ioemu-shutdown-fix.patch @@ -28,7 +28,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c fd_list_entry_t *fd_start = NULL; extern char* get_snapshot_name(int devid); -@@ -547,6 +548,7 @@ static void handle_blktap_ctrlmsg(void* +@@ -545,6 +546,7 @@ static void handle_blktap_ctrlmsg(void* /* Allocate the disk structs */ s = state_init(); @@ -36,7 +36,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/xen_blktap.c /*Open file*/ if (s == NULL || open_disk(s, path, msg->drivertype, msg->readonly)) { -@@ -629,7 +631,8 @@ static void handle_blktap_ctrlmsg(void* +@@ -627,7 +629,8 @@ static void handle_blktap_ctrlmsg(void* case CTLMSG_CLOSE: s = get_state(msg->cookie); if (s) unmap_disk(s); diff --git a/xen.changes b/xen.changes index 703954c..bef111a 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,109 @@ +------------------------------------------------------------------- +Fri Nov 12 09:48:14 MST 2010 - carnold@novell.com + +- Upstream patch for python 2.7 compatibility + 22045-python27-compat.patch + +------------------------------------------------------------------- +Thu Nov 11 18:44:48 CST 2010 - cyliu@novell.com + +- bnc#641144 - FV Xen VM running windows or linux cannot write to + virtual floppy drive + bdrv_default_rwflag.patch + +------------------------------------------------------------------- +Thu Nov 11 21:01:12 CET 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.optimize_p2m_mem_paging_populate.patch + xenpaging.HVMCOPY_gfn_paged_out.patch + +------------------------------------------------------------------- +Thu Nov 11 10:11:35 MST 2010 - carnold@novell.com + +- bnc#649864 - automatic numa cpu placement of xen conflicts with + cpupools + 22326-cpu-pools-numa-placement.patch + +------------------------------------------------------------------- +Wed Nov 10 17:38:39 CET 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.populate_only_if_paged.patch + - revert logic, populate needs to happen unconditionally + xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch + - invalidate current mfn only if gfn is not in flight or done + xenpaging.mem_event_check_ring-free_requests.patch + - print info only if 1 instead of 2 slots are free + xenpaging.guest_remove_page.patch + - check mfn before usage in resume function + xenpaging.machine_to_phys_mapping.patch + - check mfn before usage in resume function + +------------------------------------------------------------------- +Tue Nov 9 10:00:48 MST 2010 - jfehlig@novell.com + +- bnc#552115 - Remove target discovery in block-iscsi + modified block-iscsi script + +------------------------------------------------------------------- +Mon Nov 8 13:11:02 MDT 2010 - jfehlig@novell.com + +- bnc#649277 - Fix pci passthru in xend interface used by libvirt + 22369-xend-pci-passthru-fix.patch + +------------------------------------------------------------------- +Mon Nov 8 02:49:00 UTC 2010 - lidongyang@novell.com + +- bnc#642078 - xm snapshot-create causes qemu-dm to SEGV + snapshot-without-pv-fix.patch + +------------------------------------------------------------------- +Fri Nov 5 16:22:15 CET 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.num_pages_equal_max_pages.patch + +------------------------------------------------------------------- +Fri Nov 5 09:13:38 MDT 2010 - carnold@novell.com + +- bnc#647681 - L3: Passthrough of certain PCI device broken after + SLES 11 to SP1 upgrade +- bnc#650871 - Regression in Xen PCI Passthrough + 22348-vtd-check-secbus-devfn.patch +- Upstream patches from Jan + 22223-vtd-workarounds.patch + 22231-x86-pv-ucode-msr-intel.patch + 22232-x86-64-lahf-lm-bios-workaround.patch + 22280-kexec.patch + 22337-vtd-scan-single-func.patch + +------------------------------------------------------------------- +Wed Nov 3 16:26:10 MDT 2010 - carnold@novell.com + +- bnc#497149 - SLES11 64bit Xen - SLES11 64bit HVM guest has + corrupt text console + stdvga-cache.patch + +------------------------------------------------------------------- +Wed Nov 3 17:52:14 CET 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.page_already_populated.patch + xenpaging.notify_policy_only_once.patch + xenpaging.guest_remove_page.patch + xenpaging.machine_to_phys_mapping.patch + remove xenpaging.memory_op.patch, retry loops are not needed + +------------------------------------------------------------------- +Tue Nov 2 14:10:34 MDT 2010 - carnold@novell.com + +- bnc#474789 - xen-tools 3.3 rpm misses pv-grub +- PV-GRUB replaces PyGrub to boot domU images safely: it runs the + regular grub inside the created domain itself and uses regular + domU facilities to read the disk / fetch files from network etc.; + it eventually loads the PV kernel and chain-boots it. + ------------------------------------------------------------------- Wed Oct 27 20:08:51 CEST 2010 - ohering@suse.de @@ -65,7 +171,6 @@ Mon Oct 11 08:59:35 MDT 2010 - carnold@novell.com bnc#615206 - Xen kernel fails to boot with IO-APIC problem bnc#640773 - Xen kernel crashing right after grub bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain - 22223-vtd-igd-workaround.patch 22222-x86-timer-extint.patch 22214-x86-msr-misc-enable.patch 22213-x86-xsave-cpuid-check.patch diff --git a/xen.spec b/xen.spec index c7cabb9..6277e33 100644 --- a/xen.spec +++ b/xen.spec @@ -25,44 +25,47 @@ ExclusiveArch: %ix86 x86_64 %define changeset 21326 %define xen_build_dir xen-4.0.1-testing %define with_kmp 1 -BuildRequires: LibVNCServer-devel -BuildRequires: SDL-devel -BuildRequires: automake -BuildRequires: bin86 -BuildRequires: curl-devel -BuildRequires: dev86 -BuildRequires: graphviz -BuildRequires: latex2html -BuildRequires: libjpeg-devel -BuildRequires: libxml2-devel -BuildRequires: ncurses-devel -BuildRequires: openssl -BuildRequires: openssl-devel -BuildRequires: pciutils-devel -BuildRequires: python-devel -BuildRequires: texinfo -BuildRequires: transfig +BuildRequires: LibVNCServer-devel +BuildRequires: SDL-devel +BuildRequires: automake +BuildRequires: bin86 +BuildRequires: curl-devel +BuildRequires: dev86 +BuildRequires: graphviz +BuildRequires: latex2html +BuildRequires: libjpeg-devel +BuildRequires: libxml2-devel +BuildRequires: ncurses-devel +BuildRequires: openssl +BuildRequires: openssl-devel +BuildRequires: pciutils-devel +BuildRequires: python-devel +BuildRequires: texinfo +BuildRequires: transfig %if %suse_version <= 1110 -BuildRequires: pmtools +BuildRequires: pmtools %else -BuildRequires: acpica +BuildRequires: acpica %endif %if %suse_version >= 1030 -BuildRequires: texlive -BuildRequires: texlive-latex +BuildRequires: texlive +BuildRequires: texlive-latex %else -BuildRequires: te_ams -BuildRequires: te_latex -BuildRequires: tetex +BuildRequires: te_ams +BuildRequires: te_latex +BuildRequires: tetex %endif %ifarch x86_64 BuildRequires: glibc-32bit glibc-devel-32bit +BuildRequires: gcc-32bit +BuildRequires: gcc43-32bit %define max_cpus 256 %define pae_enabled n %else %define max_cpus 32 %define pae_enabled y %endif +BuildRequires: glibc-devel %if %{?with_kmp}0 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 %endif @@ -74,6 +77,7 @@ AutoReqProv: on PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) Source0: xen-4.0.1-testing-src.tar.bz2 +Source1: stubdom.tar.bz2 Source2: README.SuSE Source3: boot.xen Source4: boot.local.xenU @@ -121,25 +125,32 @@ Patch15: 21847-pscsi.patch Patch16: 21866-xenapi.patch Patch17: 21894-intel-unmask-cpuid.patch Patch18: 22019-x86-cpuidle-online-check.patch -Patch19: 22051-x86-forced-EOI.patch -Patch20: 22067-x86-irq-domain.patch -Patch21: 22068-vtd-irte-RH-bit.patch -Patch22: 22071-ept-get-entry-lock.patch -Patch23: 22084-x86-xsave-off.patch -Patch24: 7410-qemu-alt-gr.patch -Patch25: 22135-heap-lock.patch -Patch26: 22148-serial-irq-dest.patch -Patch27: 22157-x86-debug-key-i.patch -Patch28: 22159-notify-evtchn-dying.patch -Patch29: 22160-Intel-C6-EOI.patch -Patch30: 22174-x86-pmtimer-accuracy.patch -Patch31: 22175-x86-irq-enter-exit.patch -Patch32: 22177-i386-irq-safe-map_domain_page.patch -Patch33: 22194-tmem-check-pv-mfn.patch -Patch34: 22213-x86-xsave-cpuid-check.patch -Patch35: 22214-x86-msr-misc-enable.patch -Patch36: 22222-x86-timer-extint.patch -Patch37: 22223-vtd-igd-workaround.patch +Patch19: 22045-python27-compat.patch +Patch20: 22051-x86-forced-EOI.patch +Patch21: 22067-x86-irq-domain.patch +Patch22: 22068-vtd-irte-RH-bit.patch +Patch23: 22071-ept-get-entry-lock.patch +Patch24: 22084-x86-xsave-off.patch +Patch25: 7410-qemu-alt-gr.patch +Patch26: 22135-heap-lock.patch +Patch27: 22148-serial-irq-dest.patch +Patch28: 22157-x86-debug-key-i.patch +Patch29: 22159-notify-evtchn-dying.patch +Patch30: 22160-Intel-C6-EOI.patch +Patch31: 22174-x86-pmtimer-accuracy.patch +Patch32: 22175-x86-irq-enter-exit.patch +Patch33: 22177-i386-irq-safe-map_domain_page.patch +Patch34: 22194-tmem-check-pv-mfn.patch +Patch35: 22213-x86-xsave-cpuid-check.patch +Patch36: 22214-x86-msr-misc-enable.patch +Patch37: 22222-x86-timer-extint.patch +Patch38: 22223-vtd-workarounds.patch +Patch39: 22231-x86-pv-ucode-msr-intel.patch +Patch40: 22232-x86-64-lahf-lm-bios-workaround.patch +Patch41: 22280-kexec.patch +Patch42: 22337-vtd-scan-single-func.patch +Patch43: 22348-vtd-check-secbus-devfn.patch +Patch44: 22369-xend-pci-passthru-fix.patch # Our patches Patch300: xen-config.diff Patch301: xend-config.diff @@ -200,11 +211,13 @@ Patch371: domu-usb-controller.patch Patch372: popen2-argument-fix.patch Patch373: usb-list.patch Patch374: xend-devid-or-name.patch +Patch375: 22326-cpu-pools-numa-placement.patch # Patches for snapshot support Patch400: snapshot-ioemu-save.patch Patch401: snapshot-ioemu-restore.patch Patch402: snapshot-ioemu-delete.patch Patch403: snapshot-xend.patch +Patch404: snapshot-without-pv-fix.patch # ioemu part of blktap patch series Patch410: ioemu-blktap-fv-init.patch Patch411: ioemu-blktap-image-format.patch @@ -230,6 +243,9 @@ Patch434: check_device_status.patch Patch435: change_home_server.patch Patch436: altgr_2.patch Patch437: xenfb_32bpp.patch +Patch438: stdvga-cache.patch +Patch439: minios-fixups.patch +Patch440: bdrv_default_rwflag.patch # Jim's domain lock patch Patch450: xend-domain-lock.patch # Hypervisor and PV driver Patches @@ -257,24 +273,31 @@ Patch704: hv_apic.patch # Build patch Patch999: tmp_build.patch # FATE 310510 -Patch10001: xenpaging.tools_xenpaging_cleanup.patch -Patch10002: xenpaging.pageout_policy.patch -Patch10003: xenpaging.xs_daemon_close.patch -Patch10004: xenpaging.get_paged_frame.patch -Patch10005: xenpaging.makefile.patch -Patch10010: xenpaging.policy_linear.patch -Patch10011: xenpaging.pagefile.patch -Patch10012: xenpaging.xenpaging_init.patch -Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch -Patch10014: xenpaging.memory_op.patch -Patch10015: xenpaging.populate_only_if_paged.patch -Patch10017: xenpaging.autostart.patch -Patch10018: xenpaging.signal_handling.patch -Patch10019: xenpaging.MRU_SIZE.patch -Patch10021: xenpaging.mem_event_check_ring-free_requests.patch -Patch10022: xenpaging.blacklist.patch -Patch10023: xenpaging.autostart_delay.patch -Patch10040: xenpaging.doc.patch +Patch10001: xenpaging.tools_xenpaging_cleanup.patch +Patch10002: xenpaging.pageout_policy.patch +Patch10003: xenpaging.xs_daemon_close.patch +Patch10004: xenpaging.get_paged_frame.patch +Patch10005: xenpaging.makefile.patch +Patch10010: xenpaging.policy_linear.patch +Patch10011: xenpaging.pagefile.patch +Patch10012: xenpaging.xenpaging_init.patch +Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch +Patch10014: xenpaging.machine_to_phys_mapping.patch +Patch10015: xenpaging.populate_only_if_paged.patch +Patch10017: xenpaging.autostart.patch +Patch10018: xenpaging.signal_handling.patch +Patch10019: xenpaging.MRU_SIZE.patch +Patch10020: xenpaging.guest_remove_page.patch +Patch10021: xenpaging.mem_event_check_ring-free_requests.patch +Patch10022: xenpaging.blacklist.patch +Patch10023: xenpaging.autostart_delay.patch +Patch10024: xenpaging.page_already_populated.patch +Patch10025: xenpaging.notify_policy_only_once.patch +Patch10026: xenpaging.num_pages_equal_max_pages.patch +Patch10027: xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch +Patch10028: xenpaging.HVMCOPY_gfn_paged_out.patch +Patch10029: xenpaging.optimize_p2m_mem_paging_populate.patch +Patch10040: xenpaging.doc.patch Url: http://www.cl.cam.ac.uk/Research/SRG/netos/xen/ BuildRoot: %{_tmppath}/%{name}-%{version}-build #%define pysite %(python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_lib()") @@ -606,7 +629,7 @@ Authors: Ian Pratt %prep -%setup -q -n %xen_build_dir +%setup -q -n %xen_build_dir -a 1 %patch0 -p1 %patch1 -p1 %patch2 -p1 @@ -645,6 +668,13 @@ Authors: %patch35 -p1 %patch36 -p1 %patch37 -p1 +%patch38 -p1 +%patch39 -p1 +%patch40 -p1 +%patch41 -p1 +%patch42 -p1 +%patch43 -p1 +%patch44 -p1 %patch300 -p1 %patch301 -p1 %patch302 -p1 @@ -703,10 +733,12 @@ Authors: %patch372 -p1 %patch373 -p1 %patch374 -p1 +%patch375 -p1 %patch400 -p1 %patch401 -p1 %patch402 -p1 %patch403 -p1 +%patch404 -p1 %patch410 -p1 %patch411 -p1 %patch412 -p1 @@ -730,6 +762,9 @@ Authors: %patch435 -p1 %patch436 -p1 %patch437 -p1 +%patch438 -p1 +%patch439 -p1 +%patch440 -p1 %patch450 -p1 %patch500 -p1 %patch501 -p1 @@ -766,11 +801,19 @@ Authors: %patch10017 -p1 %patch10018 -p1 %patch10019 -p1 +%patch10020 -p1 %patch10021 -p1 %patch10022 -p1 %patch10023 -p1 +%patch10024 -p1 +%patch10025 -p1 +%patch10026 -p1 +%patch10027 -p1 +%patch10028 -p1 +%patch10029 -p1 %patch10040 -p1 + %build XEN_EXTRAVERSION=%version-%release XEN_EXTRAVERSION=${XEN_EXTRAVERSION#%{xvers}} @@ -857,6 +900,17 @@ done mkdir -p $RPM_BUILD_ROOT/etc/modprobe.d install -m644 %SOURCE19 $RPM_BUILD_ROOT/etc/modprobe.d/xen_pvdrivers.conf %endif +# stubdom +make stubdom +make -C stubdom install \ + DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} \ + DOCDIR=%{_defaultdocdir}/xen INCDIR=%{_includedir} +mkdir -p $RPM_BUILD_ROOT/%{_defaultdocdir}/xen +%ifarch x86_64 +ln -s /usr/lib/xen/boot/pv-grub-x86_32.gz $RPM_BUILD_ROOT/usr/lib/xen/boot/pv-grub-x86_32.gz +ln -s /usr/lib/xen/bin/stubdom-dm $RPM_BUILD_ROOT/usr/lib64/xen/bin/stubdom-dm +ln -s /usr/lib/xen/bin/stubdompath.sh $RPM_BUILD_ROOT/usr/lib64/xen/bin/stubdompath.sh +%endif # docs make -C docs install \ DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} \ @@ -1077,6 +1131,12 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug %{_datadir}/xen/qemu/* %{_datadir}/xen/man/man1/* %{_datadir}/xen/man/man8/* +/usr/lib/xen/bin/stubdom-dm +/usr/lib/xen/bin/stubdompath.sh +%ifarch x86_64 +%{_libdir}/xen/bin/stubdom-dm +%{_libdir}/xen/bin/stubdompath.sh +%endif %{_libdir}/xen/bin/qemu-dm %ifarch x86_64 /usr/lib/xen/bin/qemu-dm @@ -1092,6 +1152,11 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug %{_libdir}/python%{pyver}/site-packages/grub/* #%pysite/fsimage.so %{_libdir}/python%{pyver}/site-packages/fsimage.so +/usr/lib/xen/boot/ioemu-stubdom.gz +/usr/lib/xen/boot/pv-grub-x86_32.gz +%ifarch x86_64 +/usr/lib/xen/boot/pv-grub-x86_64.gz +%endif %files tools-domU %defattr(-,root,root) diff --git a/xenpaging.HVMCOPY_gfn_paged_out.patch b/xenpaging.HVMCOPY_gfn_paged_out.patch new file mode 100644 index 0000000..a26d9b2 --- /dev/null +++ b/xenpaging.HVMCOPY_gfn_paged_out.patch @@ -0,0 +1,144 @@ +Subject: xenpaging: handle HVMCOPY_gfn_paged_out in copy_from/to_user + +copy_from_user_hvm can fail when __hvm_copy returns +HVMCOPY_gfn_paged_out for a referenced gfn, for example during guests +pagetable walk. This has to be handled in some way. One hypercall that +failed was do_memory_op/XENMEM_decrease_reservation which lead to a +BUG_ON balloon.c. Since do_memory_op already has restart support for +the hypercall, copy_from_guest uses this existing retry code. In +addition, cleanup on error was added to increase_reservation and +populate_physmap. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/hvm/hvm.c | 4 ++++ + xen/common/memory.c | 43 ++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 42 insertions(+), 5 deletions(-) + +--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c +@@ -1853,6 +1853,8 @@ unsigned long copy_to_user_hvm(void *to, + + rc = hvm_copy_to_guest_virt_nofault((unsigned long)to, (void *)from, + len, 0); ++ if ( rc == HVMCOPY_gfn_paged_out ) ++ return -EAGAIN; + return rc ? len : 0; /* fake a copy_to_user() return code */ + } + +@@ -1869,6 +1871,8 @@ unsigned long copy_from_user_hvm(void *t + #endif + + rc = hvm_copy_from_guest_virt_nofault(to, (unsigned long)from, len, 0); ++ if ( rc == HVMCOPY_gfn_paged_out ) ++ return -EAGAIN; + return rc ? len : 0; /* fake a copy_from_user() return code */ + } + +--- xen-4.0.1-testing.orig/xen/common/memory.c ++++ xen-4.0.1-testing/xen/common/memory.c +@@ -47,6 +47,7 @@ static void increase_reservation(struct + { + struct page_info *page; + unsigned long i; ++ unsigned long ctg_ret; + xen_pfn_t mfn; + struct domain *d = a->domain; + +@@ -80,8 +81,14 @@ static void increase_reservation(struct + if ( !guest_handle_is_null(a->extent_list) ) + { + mfn = page_to_mfn(page); +- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) ++ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1); ++ if ( unlikely(ctg_ret) ) ++ { ++ free_domheap_pages(page, a->extent_order); ++ if ( (long)ctg_ret == -EAGAIN ) ++ a->preempted = 1; + goto out; ++ } + } + } + +@@ -93,6 +100,7 @@ static void populate_physmap(struct memo + { + struct page_info *page; + unsigned long i, j; ++ unsigned long ctg_ret; + xen_pfn_t gpfn, mfn; + struct domain *d = a->domain; + +@@ -111,8 +119,13 @@ static void populate_physmap(struct memo + goto out; + } + +- if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) ) ++ j = __copy_from_guest_offset(&gpfn, a->extent_list, i, 1); ++ if ( unlikely(j) ) ++ { ++ if ( (long)j == -EAGAIN ) ++ a->preempted = 1; + goto out; ++ } + + if ( a->memflags & MEMF_populate_on_demand ) + { +@@ -142,8 +155,17 @@ static void populate_physmap(struct memo + set_gpfn_from_mfn(mfn + j, gpfn + j); + + /* Inform the domain of the new page's machine address. */ +- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) ) ++ ctg_ret = __copy_to_guest_offset(a->extent_list, i, &mfn, 1); ++ if ( unlikely(ctg_ret) ) ++ { ++ for ( j = 0; j < (1 << a->extent_order); j++ ) ++ set_gpfn_from_mfn(mfn + j, INVALID_M2P_ENTRY); ++ guest_physmap_remove_page(d, gpfn, mfn, a->extent_order); ++ free_domheap_pages(page, a->extent_order); ++ if ( (long)ctg_ret == -EAGAIN ) ++ a->preempted = 1; + goto out; ++ } + } + } + } +@@ -226,8 +248,13 @@ static void decrease_reservation(struct + goto out; + } + +- if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) ) ++ j = __copy_from_guest_offset(&gmfn, a->extent_list, i, 1); ++ if ( unlikely(j) ) ++ { ++ if ( (long)j == -EAGAIN ) ++ a->preempted = 1; + goto out; ++ } + + if ( tb_init_done ) + { +@@ -511,6 +538,7 @@ long do_memory_op(unsigned long cmd, XEN + int rc, op; + unsigned int address_bits; + unsigned long start_extent; ++ unsigned long cfg_ret; + struct xen_memory_reservation reservation; + struct memop_args args; + domid_t domid; +@@ -524,8 +552,13 @@ long do_memory_op(unsigned long cmd, XEN + case XENMEM_populate_physmap: + start_extent = cmd >> MEMOP_EXTENT_SHIFT; + +- if ( copy_from_guest(&reservation, arg, 1) ) ++ cfg_ret = copy_from_guest(&reservation, arg, 1); ++ if ( unlikely(cfg_ret) ) ++ { ++ if ( (long)cfg_ret == -EAGAIN ) ++ return hypercall_create_continuation(__HYPERVISOR_memory_op, "lh", cmd, arg); + return start_extent; ++ } + + /* Is size too large for us to encode a continuation? */ + if ( reservation.nr_extents > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) ) diff --git a/xenpaging.autostart.patch b/xenpaging.autostart.patch index cb5a509..28004c7 100644 --- a/xenpaging.autostart.patch +++ b/xenpaging.autostart.patch @@ -14,10 +14,10 @@ Signed-off-by: Olaf Hering tools/python/README.sxpcfg | 1 tools/python/xen/xend/XendConfig.py | 3 + tools/python/xen/xend/XendDomainInfo.py | 6 ++ - tools/python/xen/xend/image.py | 87 ++++++++++++++++++++++++++++++++ + tools/python/xen/xend/image.py | 91 ++++++++++++++++++++++++++++++++ tools/python/xen/xm/create.py | 5 + tools/python/xen/xm/xenapi_create.py | 1 - 8 files changed, 107 insertions(+) + 8 files changed, 111 insertions(+) Index: xen-4.0.1-testing/tools/examples/xmexample.hvm =================================================================== @@ -121,7 +121,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/image.py rtc_timeoffset = int(vmConfig['platform'].get('rtc_timeoffset', 0)) if int(vmConfig['platform'].get('localtime', 0)): if time.localtime(time.time())[8]: -@@ -392,6 +394,91 @@ class ImageHandler: +@@ -392,6 +394,95 @@ class ImageHandler: sentinel_fifos_inuse[sentinel_path_fifo] = 1 self.sentinel_path_fifo = sentinel_path_fifo @@ -140,6 +140,10 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/image.py + self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" % str(self.vm.info['name_label']) + logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC + null = os.open("/dev/null", os.O_RDONLY) ++ try: ++ os.unlink(self.xenpaging_logfile) ++ except: ++ pass + logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644) + sys.stderr.flush() + contract = osdep.prefork("%s:%d" % (self.vm.getName(), self.vm.getDomid())) diff --git a/xenpaging.autostart_delay.patch b/xenpaging.autostart_delay.patch index d9c8835..0c28af0 100644 --- a/xenpaging.autostart_delay.patch +++ b/xenpaging.autostart_delay.patch @@ -11,8 +11,8 @@ TODO: find the correct place to remove the xenstore directory when the guest is Signed-off-by: Olaf Hering --- - tools/python/xen/xend/image.py | 28 ++++++++++++++++++++++++++++ - 1 file changed, 28 insertions(+) + tools/python/xen/xend/image.py | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) --- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py +++ xen-4.0.1-testing/tools/python/xen/xend/image.py @@ -53,7 +53,7 @@ Signed-off-by: Olaf Hering xenpaging_bin = auxbin.pathTo("xenpaging") args = [xenpaging_bin] args = args + ([ "%d" % self.vm.getDomid()]) -@@ -430,6 +453,9 @@ class ImageHandler: +@@ -434,6 +457,9 @@ class ImageHandler: except: log.warn("chdir %s failed" % xenpaging_dir) try: @@ -63,7 +63,15 @@ Signed-off-by: Olaf Hering log.info("starting %s" % args) os.execve(xenpaging_bin, args, env) except Exception, e: -@@ -449,6 +475,8 @@ class ImageHandler: +@@ -449,10 +475,16 @@ class ImageHandler: + self.xenpaging_pid = xenpaging_pid + os.close(null) + os.close(logfd) ++ if self.xenpaging_delay == 0.0: ++ log.warn("waiting for xenpaging ...") ++ time.sleep(22) ++ log.warn("waiting for xenpaging done.") + def destroyXenPaging(self): if self.xenpaging is None: return diff --git a/xenpaging.guest_remove_page.patch b/xenpaging.guest_remove_page.patch new file mode 100644 index 0000000..e6f9c46 --- /dev/null +++ b/xenpaging.guest_remove_page.patch @@ -0,0 +1,176 @@ +Subject: xenpaging: drop paged pages in guest_remove_page + +Simply drop paged-pages in guest_remove_page(), and notify xenpaging to +drop reference to the gfn. + +Signed-off-by: Olaf Hering +--- + tools/xenpaging/xenpaging.c | 17 +++++++--- + xen/arch/x86/mm/p2m.c | 65 +++++++++++++++++++++++++++++++---------- + xen/common/memory.c | 6 +++ + xen/include/asm-x86/p2m.h | 4 ++ + xen/include/public/mem_event.h | 1 + 5 files changed, 73 insertions(+), 20 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -598,12 +598,19 @@ int main(int argc, char *argv[]) + goto out; + } + +- /* Populate the page */ +- rc = xenpaging_populate_page(paging, &req.gfn, fd, i); +- if ( rc != 0 ) ++ if ( req.flags & MEM_EVENT_FLAG_DROP_PAGE ) + { +- ERROR("Error populating page"); +- goto out; ++ DPRINTF("Dropping page %"PRIx64" p2mt %x\n", req.gfn, req.p2mt); ++ } ++ else ++ { ++ /* Populate the page */ ++ rc = xenpaging_populate_page(paging, &req.gfn, fd, i); ++ if ( rc != 0 ) ++ { ++ ERROR("Error populating page"); ++ goto out; ++ } + } + + /* Prepare the response */ +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c +@@ -2000,12 +2000,15 @@ p2m_remove_page(struct domain *d, unsign + + P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); + +- for ( i = 0; i < (1UL << page_order); i++ ) ++ if ( mfn_valid(_mfn(mfn)) ) + { +- mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query); +- if ( !p2m_is_grant(t) ) +- set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); +- ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); ++ for ( i = 0; i < (1UL << page_order); i++ ) ++ { ++ mfn_return = d->arch.p2m->get_entry(d, gfn + i, &t, p2m_query); ++ if ( !p2m_is_grant(t) ) ++ set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); ++ ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); ++ } + } + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); + } +@@ -2533,6 +2536,35 @@ int p2m_mem_paging_evict(struct domain * + return 0; + } + ++void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn) ++{ ++ struct vcpu *v = current; ++ mem_event_request_t req; ++ p2m_type_t p2mt; ++ ++ memset(&req, 0, sizeof(req)); ++ ++ /* Check that there's space on the ring for this request */ ++ if ( mem_event_check_ring(d) ) ++ return; ++ ++ gfn_to_mfn(d, gfn, &p2mt); ++ /* Pause domain */ ++ if ( v->domain->domain_id == d->domain_id ) ++ { ++ vcpu_pause_nosync(v); ++ req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; ++ } ++ ++ /* Send request to pager */ ++ req.flags |= MEM_EVENT_FLAG_DROP_PAGE; ++ req.gfn = gfn; ++ req.p2mt = p2mt; ++ req.vcpu_id = v->vcpu_id; ++ ++ mem_event_put_request(d, &req); ++} ++ + void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) + { + struct vcpu *v = current; +@@ -2597,17 +2629,20 @@ void p2m_mem_paging_resume(struct domain + /* Pull the response off the ring */ + mem_event_get_response(d, &rsp); + +- /* Fix p2m entry */ +- mfn = gfn_to_mfn(d, rsp.gfn, &p2mt); +- if (mfn_valid(mfn)) ++ if ( !( rsp.flags & MEM_EVENT_FLAG_DROP_PAGE ) ) + { +- p2m_lock(d->arch.p2m); +- set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw); +- set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn); +- p2m_unlock(d->arch.p2m); +- } else { +- gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n", +- mfn_x(mfn), rsp.gfn, p2mt); ++ /* Fix p2m entry */ ++ mfn = gfn_to_mfn(d, rsp.gfn, &p2mt); ++ if (mfn_valid(mfn)) ++ { ++ p2m_lock(d->arch.p2m); ++ set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw); ++ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn); ++ p2m_unlock(d->arch.p2m); ++ } else { ++ gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n", ++ mfn_x(mfn), rsp.gfn, p2mt); ++ } + } + + /* Unpause domain */ +--- xen-4.0.1-testing.orig/xen/common/memory.c ++++ xen-4.0.1-testing/xen/common/memory.c +@@ -162,6 +162,12 @@ int guest_remove_page(struct domain *d, + + #ifdef CONFIG_X86 + mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); ++ if ( unlikely(p2m_is_paging(p2mt)) ) ++ { ++ guest_physmap_remove_page(d, gmfn, mfn, 0); ++ p2m_mem_paging_drop_page(d, gmfn); ++ return 1; ++ } + #else + mfn = gmfn_to_mfn(d, gmfn); + #endif +--- xen-4.0.1-testing.orig/xen/include/asm-x86/p2m.h ++++ xen-4.0.1-testing/xen/include/asm-x86/p2m.h +@@ -441,6 +441,8 @@ int set_shared_p2m_entry(struct domain * + int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn); + /* Evict a frame */ + int p2m_mem_paging_evict(struct domain *d, unsigned long gfn); ++/* Tell xenpaging to drop a paged out frame */ ++void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn); + /* Start populating a paged out frame */ + void p2m_mem_paging_populate(struct domain *d, unsigned long gfn); + /* Prepare the p2m for paging a frame in */ +@@ -448,6 +450,8 @@ int p2m_mem_paging_prep(struct domain *d + /* Resume normal operation (in case a domain was paused) */ + void p2m_mem_paging_resume(struct domain *d); + #else ++static inline void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn) ++{ } + static inline void p2m_mem_paging_populate(struct domain *d, unsigned long gfn) + { } + #endif +--- xen-4.0.1-testing.orig/xen/include/public/mem_event.h ++++ xen-4.0.1-testing/xen/include/public/mem_event.h +@@ -37,6 +37,7 @@ + #define MEM_EVENT_FLAG_VCPU_PAUSED (1 << 0) + #define MEM_EVENT_FLAG_DOM_PAUSED (1 << 1) + #define MEM_EVENT_FLAG_OUT_OF_MEM (1 << 2) ++#define MEM_EVENT_FLAG_DROP_PAGE (1 << 3) + + + typedef struct mem_event_shared_page { diff --git a/xenpaging.machine_to_phys_mapping.patch b/xenpaging.machine_to_phys_mapping.patch new file mode 100644 index 0000000..ccd0bad --- /dev/null +++ b/xenpaging.machine_to_phys_mapping.patch @@ -0,0 +1,62 @@ +Subject: xenpaging: update machine_to_phys_mapping during page-in and page-out + +The machine_to_phys_mapping array needs updating during page-out. +If a page is gone, a call to get_gpfn_from_mfn will still return the old +gfn for an already paged-out page. This happens when the entire guest +ram is paged-out before xen_vga_populate_vram() runs. Then +XENMEM_populate_physmap is called with gfn 0xff000. A new page is +allocated with alloc_domheap_pages. This new page does not have a gfn +yet. However, in guest_physmap_add_entry() the passed mfn maps still to +an old gfn. This old gfn is paged-out and has no mfn anymore. As a +result, the ASSERT() triggers because p2m_is_ram() is true for +p2m_ram_paging* types. + +If the machine_to_phys_mapping array is updated properly, both loops in +guest_physmap_add_entry() turn into no-ops for the new page and the +mfn/gfn mapping will be done at the end of the function. + + +The same thing needs to happen dring a page-in. +If XENMEM_add_to_physmap is used with XENMAPSPACE_gmfn, +get_gpfn_from_mfn() will return an appearently valid gfn. As a result, +guest_physmap_remove_page() is called. The ASSERT in p2m_remove_page +triggers because the passed mfn does not match the old mfn for the +passed gfn. + +Signed-off-by: Olaf Hering + +--- +v2: check wether mfn is valid + xen/arch/x86/mm/p2m.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c +@@ -2524,6 +2524,7 @@ int p2m_mem_paging_evict(struct domain * + /* Remove mapping from p2m table */ + p2m_lock(d->arch.p2m); + set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paged); ++ set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY); + p2m_unlock(d->arch.p2m); + + /* Put the page back so it gets freed */ +@@ -2598,9 +2599,16 @@ void p2m_mem_paging_resume(struct domain + + /* Fix p2m entry */ + mfn = gfn_to_mfn(d, rsp.gfn, &p2mt); +- p2m_lock(d->arch.p2m); +- set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw); +- p2m_unlock(d->arch.p2m); ++ if (mfn_valid(mfn)) ++ { ++ p2m_lock(d->arch.p2m); ++ set_p2m_entry(d, rsp.gfn, mfn, 0, p2m_ram_rw); ++ set_gpfn_from_mfn(mfn_x(mfn), rsp.gfn); ++ p2m_unlock(d->arch.p2m); ++ } else { ++ gdprintk(XENLOG_ERR, "invalid mfn %lx for gfn %lx p2mt %x\n", ++ mfn_x(mfn), rsp.gfn, p2mt); ++ } + + /* Unpause domain */ + if ( rsp.flags & MEM_EVENT_FLAG_VCPU_PAUSED ) diff --git a/xenpaging.mem_event_check_ring-free_requests.patch b/xenpaging.mem_event_check_ring-free_requests.patch index 727e437..8757d12 100644 --- a/xenpaging.mem_event_check_ring-free_requests.patch +++ b/xenpaging.mem_event_check_ring-free_requests.patch @@ -1,4 +1,4 @@ -Subject: xenpaging: print info when free request slots drop below 3 +Subject: xenpaging: print info when free request slots drop below 2 Add debugging aid to free request slots in the ring buffer. It should not happen that the ring gets full, print info anyway if it happens. @@ -15,7 +15,7 @@ Signed-off-by: Olaf Hering mem_event_ring_lock(d); free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring); -+ if ( unlikely(free_requests < 3) ) ++ if ( unlikely(free_requests < 2) ) + { + gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests); + WARN_ON(free_requests == 0); diff --git a/xenpaging.memory_op.patch b/xenpaging.memory_op.patch deleted file mode 100644 index a8eec85..0000000 --- a/xenpaging.memory_op.patch +++ /dev/null @@ -1,456 +0,0 @@ -Subject: xenpaging: handle paged-out pages in XENMEM_* commands - -Fix these two warings: -(XEN) Assertion '__mfn_valid(mfn_x(omfn))' failed at p2m.c:2200 -(XEN) memory.c:171:d1 Domain 1 page number 37ff0 invalid - -Handle paged-out pages in xc_memory_op, guest_physmap_add_entry and -guest_remove_page. Use new do_xenmem_op_retry helper function. -In addition, export also xen/errno.h to hvmloader to get ENOENT define. - - -XENMEM_populate_physmap - populate_physmap - -> guest_physmap_add_entry - -XENMEM_exchange - memory_exchange - -> guest_physmap_add_entry - -XENMEM_add_to_physmap - guest_physmap_add_page - -> guest_physmap_add_entry - -__gnttab_map_grant_ref - create_grant_host_mapping - create_grant_p2m_mapping - -> guest_physmap_add_entry - -XENMEM_decrease_reservation - decrease_reservation - -> guest_remove_page - -XENMEM_add_to_physmap - -> guest_remove_page - -XENMEM_add_to_physmap - -> XENMAPSPACE_gmfn - -Signed-off-by: Olaf Hering - ---- - tools/firmware/hvmloader/hvmloader.c | 9 +++- - tools/firmware/hvmloader/util.c | 26 +++++++++++- - tools/include/Makefile | 1 - tools/ioemu-qemu-xen/hw/vga.c | 5 +- - tools/libxc/xc_domain.c | 73 ++++++++++++++++++++++------------- - xen/arch/x86/mm.c | 26 ++++++++++-- - xen/arch/x86/mm/p2m.c | 7 +++ - xen/common/memory.c | 25 +++++++++++ - 8 files changed, 133 insertions(+), 39 deletions(-) - ---- xen-4.0.1-testing.orig/tools/firmware/hvmloader/hvmloader.c -+++ xen-4.0.1-testing/tools/firmware/hvmloader/hvmloader.c -@@ -29,6 +29,7 @@ - #include "pci_regs.h" - #include "e820.h" - #include "option_rom.h" -+#include - #include - #include - #include -@@ -306,13 +307,19 @@ static void pci_setup(void) - while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend ) - { - struct xen_add_to_physmap xatp; -+ int rc; - if ( hvm_info->high_mem_pgend == 0 ) - hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT); - xatp.domid = DOMID_SELF; - xatp.space = XENMAPSPACE_gmfn; - xatp.idx = --hvm_info->low_mem_pgend; - xatp.gpfn = hvm_info->high_mem_pgend++; -- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) -+ do { -+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); -+ if ( rc == -ENOENT ) -+ cpu_relax(); -+ } while ( rc == -ENOENT ); -+ if ( rc != 0 ) - BUG(); - } - ---- xen-4.0.1-testing.orig/tools/firmware/hvmloader/util.c -+++ xen-4.0.1-testing/tools/firmware/hvmloader/util.c -@@ -23,6 +23,7 @@ - #include "e820.h" - #include "hypercall.h" - #include -+#include - #include - #include - -@@ -323,19 +324,27 @@ void *mem_alloc(uint32_t size, uint32_t - - while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) ) - { -+ int rc; - reserve += PAGE_SIZE; - mfn = reserve >> PAGE_SHIFT; - - /* Try to allocate a brand new page in the reserved area. */ - if ( !over_allocated ) - { -+ uint8_t delay = 0; - xmr.domid = DOMID_SELF; - xmr.mem_flags = 0; - xmr.extent_order = 0; - xmr.nr_extents = 1; - set_xen_guest_handle(xmr.extent_start, &mfn); -- if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 ) -+ do { -+ rc = hypercall_memory_op(XENMEM_populate_physmap, &xmr); -+ if ( rc == 0 ) -+ cpu_relax(); -+ } while ( rc == 0 && ++delay ); -+ if ( rc == 1 ) - continue; -+ printf("%s: over_allocated\n", __func__); - over_allocated = 1; - } - -@@ -353,7 +362,12 @@ void *mem_alloc(uint32_t size, uint32_t - xatp.domid = DOMID_SELF; - xatp.space = XENMAPSPACE_gmfn; - xatp.gpfn = mfn; -- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) -+ do { -+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); -+ if ( rc == -ENOENT ) -+ cpu_relax(); -+ } while ( rc == -ENOENT ); -+ if ( rc != 0 ) - BUG(); - } - -@@ -595,6 +609,7 @@ uint16_t get_cpu_mhz(void) - uint64_t cpu_khz; - uint32_t tsc_to_nsec_mul, version; - int8_t tsc_shift; -+ int rc; - - static uint16_t cpu_mhz; - if ( cpu_mhz != 0 ) -@@ -605,7 +620,12 @@ uint16_t get_cpu_mhz(void) - xatp.space = XENMAPSPACE_shared_info; - xatp.idx = 0; - xatp.gpfn = (unsigned long)shared_info >> 12; -- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) -+ do { -+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); -+ if ( rc == -ENOENT ) -+ cpu_relax(); -+ } while ( rc == -ENOENT ); -+ if ( rc != 0 ) - BUG(); - - /* Get a consistent snapshot of scale factor (multiplier and shift). */ ---- xen-4.0.1-testing.orig/tools/include/Makefile -+++ xen-4.0.1-testing/tools/include/Makefile -@@ -12,6 +12,7 @@ xen/.dir: - @rm -rf xen - mkdir -p xen/libelf - ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen -+ ln -sf ../$(XEN_ROOT)/xen/include/xen/errno.h xen - ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen - ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen - ln -sf ../xen-sys/$(XEN_OS) xen/sys ---- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/vga.c -+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/vga.c -@@ -2157,9 +2157,10 @@ void set_vram_mapping(void *opaque, unsi - for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) { - xatp.idx = (s->vram_gmfn >> TARGET_PAGE_BITS) + i; - xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i; -- rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); -+ while ((rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp)) && errno == ENOENT) -+ usleep(1000); - if (rc) { -- fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc); -+ fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, errno); - return; - } - } ---- xen-4.0.1-testing.orig/tools/libxc/xc_domain.c -+++ xen-4.0.1-testing/tools/libxc/xc_domain.c -@@ -536,6 +536,46 @@ int xc_domain_get_tsc_info(int xc_handle - return rc; - } - -+static int do_xenmem_op_retry(int xc_handle, int cmd, struct xen_memory_reservation *reservation, unsigned long nr_extents, xen_pfn_t *extent_start) -+{ -+ int err = 0; -+ unsigned long count = nr_extents; -+ unsigned long delay = 0; -+ unsigned long start = 0; -+ -+ fprintf(stderr, "%s: cmd %d count %lx\n",__func__,cmd,count); -+ while ( count && start < nr_extents ) -+ { -+ set_xen_guest_handle(reservation->extent_start, extent_start + start); -+ reservation->nr_extents = count; -+ -+ err = xc_memory_op(xc_handle, cmd, reservation); -+ if ( err == count ) -+ { -+ err = 0; -+ break; -+ } -+ -+ if ( err > count || err < 0 || delay > 1000 * 1000) -+ { -+ fprintf(stderr, "%s: %d err %x count %lx start %lx delay %lu/%lu\n",__func__,cmd,err,count,start,delay,delay/(1<<15)); -+ err = -1; -+ break; -+ } -+ -+ if ( err ) -+ { -+ delay = 0; -+ start += err; -+ count -= err; -+ } -+ -+ usleep(delay); -+ delay += 1 << 15; /* 31 iterations, 15 seconds */ -+ } -+ -+ return err; -+} - - int xc_domain_memory_increase_reservation(int xc_handle, - uint32_t domid, -@@ -546,26 +586,18 @@ int xc_domain_memory_increase_reservatio - { - int err; - struct xen_memory_reservation reservation = { -- .nr_extents = nr_extents, - .extent_order = extent_order, - .mem_flags = mem_flags, - .domid = domid - }; - -- /* may be NULL */ -- set_xen_guest_handle(reservation.extent_start, extent_start); -- -- err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation); -- if ( err == nr_extents ) -- return 0; -- -- if ( err >= 0 ) -+ err = do_xenmem_op_retry(xc_handle, XENMEM_increase_reservation, &reservation, nr_extents, extent_start); -+ if ( err < 0 ) - { - DPRINTF("Failed allocation for dom %d: " - "%ld extents of order %d, mem_flags %x\n", - domid, nr_extents, extent_order, mem_flags); - errno = ENOMEM; -- err = -1; - } - - return err; -@@ -579,14 +611,11 @@ int xc_domain_memory_decrease_reservatio - { - int err; - struct xen_memory_reservation reservation = { -- .nr_extents = nr_extents, - .extent_order = extent_order, - .mem_flags = 0, - .domid = domid - }; - -- set_xen_guest_handle(reservation.extent_start, extent_start); -- - if ( extent_start == NULL ) - { - DPRINTF("decrease_reservation extent_start is NULL!\n"); -@@ -594,16 +623,12 @@ int xc_domain_memory_decrease_reservatio - return -1; - } - -- err = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation); -- if ( err == nr_extents ) -- return 0; -- -- if ( err >= 0 ) -+ err = do_xenmem_op_retry(xc_handle, XENMEM_decrease_reservation, &reservation, nr_extents, extent_start); -+ if ( err < 0 ) - { - DPRINTF("Failed deallocation for dom %d: %ld extents of order %d\n", - domid, nr_extents, extent_order); - errno = EINVAL; -- err = -1; - } - - return err; -@@ -618,23 +643,17 @@ int xc_domain_memory_populate_physmap(in - { - int err; - struct xen_memory_reservation reservation = { -- .nr_extents = nr_extents, - .extent_order = extent_order, - .mem_flags = mem_flags, - .domid = domid - }; -- set_xen_guest_handle(reservation.extent_start, extent_start); -- -- err = xc_memory_op(xc_handle, XENMEM_populate_physmap, &reservation); -- if ( err == nr_extents ) -- return 0; - -- if ( err >= 0 ) -+ err = do_xenmem_op_retry(xc_handle, XENMEM_populate_physmap, &reservation, nr_extents, extent_start); -+ if ( err < 0 ) - { - DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n", - domid, nr_extents, extent_order); - errno = EBUSY; -- err = -1; - } - - return err; ---- xen-4.0.1-testing.orig/xen/arch/x86/mm.c -+++ xen-4.0.1-testing/xen/arch/x86/mm.c -@@ -3660,6 +3660,8 @@ static int create_grant_p2m_mapping(uint - p2mt = p2m_grant_map_rw; - rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT, - frame, 0, p2mt); -+ if ( rc == -ENOENT ) -+ return GNTST_eagain; - if ( rc ) - return GNTST_general_error; - else -@@ -4315,17 +4317,25 @@ long arch_memory_op(int op, XEN_GUEST_HA - case XENMAPSPACE_gmfn: - { - p2m_type_t p2mt; -+ unsigned long tmp_mfn; - -- xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); -+ tmp_mfn = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); -+ if ( unlikely(p2m_is_paging(p2mt)) ) -+ { -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(d, xatp.idx); -+ rcu_unlock_domain(d); -+ return -ENOENT; -+ } - /* If the page is still shared, exit early */ - if ( p2m_is_shared(p2mt) ) - { - rcu_unlock_domain(d); - return -ENOMEM; - } -- if ( !get_page_from_pagenr(xatp.idx, d) ) -+ if ( !get_page_from_pagenr(tmp_mfn, d) ) - break; -- mfn = xatp.idx; -+ mfn = tmp_mfn; - page = mfn_to_page(mfn); - break; - } -@@ -4354,8 +4364,16 @@ long arch_memory_op(int op, XEN_GUEST_HA - /* Xen heap frames are simply unhooked from this phys slot. */ - guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0); - else -+ { - /* Normal domain memory is freed, to avoid leaking memory. */ -- guest_remove_page(d, xatp.gpfn); -+ rc = guest_remove_page(d, xatp.gpfn); -+ if ( rc == -ENOENT ) -+ { -+ domain_unlock(d); -+ rcu_unlock_domain(d); -+ return rc; -+ } -+ } - } - - /* Unmap from old location, if any. */ ---- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c -+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c -@@ -2186,6 +2186,13 @@ guest_physmap_add_entry(struct domain *d - P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", - mfn + i, ogfn, gfn + i); - omfn = gfn_to_mfn_query(d, ogfn, &ot); -+ if ( unlikely(p2m_is_paging(ot)) ) -+ { -+ p2m_unlock(d->arch.p2m); -+ if ( p2m_is_paged(ot) ) -+ p2m_mem_paging_populate(d, ogfn); -+ return -ENOENT; -+ } - /* If we get here, we know the local domain owns the page, - so it can't have been grant mapped in. */ - BUG_ON( p2m_is_grant(ot) ); ---- xen-4.0.1-testing.orig/xen/common/memory.c -+++ xen-4.0.1-testing/xen/common/memory.c -@@ -95,6 +95,7 @@ static void populate_physmap(struct memo - unsigned long i, j; - xen_pfn_t gpfn, mfn; - struct domain *d = a->domain; -+ int rc; - - if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, - a->nr_extents-1) ) -@@ -134,7 +135,12 @@ static void populate_physmap(struct memo - } - - mfn = page_to_mfn(page); -- guest_physmap_add_page(d, gpfn, mfn, a->extent_order); -+ rc = guest_physmap_add_page(d, gpfn, mfn, a->extent_order); -+ if ( rc != 0 ) -+ { -+ free_domheap_pages(page, a->extent_order); -+ goto out; -+ } - - if ( !paging_mode_translate(d) ) - { -@@ -162,6 +168,12 @@ int guest_remove_page(struct domain *d, - - #ifdef CONFIG_X86 - mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); -+ if ( unlikely(p2m_is_paging(p2mt)) ) -+ { -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(d, gmfn); -+ return -ENOENT; -+ } - #else - mfn = gmfn_to_mfn(d, gmfn); - #endif -@@ -360,6 +372,13 @@ static long memory_exchange(XEN_GUEST_HA - - /* Shared pages cannot be exchanged */ - mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0)); -+ if ( p2m_is_paging(p2mt) ) -+ { -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(d, gmfn); -+ rc = -ENOENT; -+ goto fail; -+ } - if ( p2m_is_shared(p2mt) ) - { - rc = -ENOMEM; -@@ -456,7 +475,9 @@ static long memory_exchange(XEN_GUEST_HA - &gpfn, exch.out.extent_start, (i< + +--- + tools/xenpaging/xenpaging.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -377,7 +377,7 @@ int xenpaging_evict_page(xenpaging_t *pa + return ret; + } + +-static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp) ++static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp, int notify_policy) + { + int ret; + +@@ -387,7 +387,8 @@ static int xenpaging_resume_page(xenpagi + goto out; + + /* Notify policy of page being paged in */ +- policy_notify_paged_in(paging->mem_event.domain_id, rsp->gfn); ++ if ( notify_policy ) ++ policy_notify_paged_in(paging->mem_event.domain_id, rsp->gfn); + + /* Tell Xen page is ready */ + ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id, +@@ -619,7 +620,7 @@ int main(int argc, char *argv[]) + rsp.vcpu_id = req.vcpu_id; + rsp.flags = req.flags; + +- rc = xenpaging_resume_page(paging, &rsp); ++ rc = xenpaging_resume_page(paging, &rsp, 1); + if ( rc != 0 ) + { + ERROR("Error resuming page"); +@@ -648,7 +649,7 @@ int main(int argc, char *argv[]) + rsp.vcpu_id = req.vcpu_id; + rsp.flags = req.flags; + +- rc = xenpaging_resume_page(paging, &rsp); ++ rc = xenpaging_resume_page(paging, &rsp, 0); + if ( rc != 0 ) + { + ERROR("Error resuming"); diff --git a/xenpaging.num_pages_equal_max_pages.patch b/xenpaging.num_pages_equal_max_pages.patch new file mode 100644 index 0000000..e7c8f8f --- /dev/null +++ b/xenpaging.num_pages_equal_max_pages.patch @@ -0,0 +1,37 @@ +Subject: xenpaging: allow negative num_pages and limit num_pages + +Simplify paging size argument. If a negative number is specified, it +means the entire guest memory should be paged out. This is useful for +debugging. Also limit num_pages to the guests max_pages. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/xenpaging.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -512,8 +512,6 @@ int main(int argc, char *argv[]) + domain_id = atoi(argv[1]); + num_pages = atoi(argv[2]); + +- victims = calloc(num_pages, sizeof(xenpaging_victim_t)); +- + /* Seed random-number generator */ + srand(time(NULL)); + +@@ -534,6 +532,13 @@ int main(int argc, char *argv[]) + return 2; + } + ++ if ( num_pages < 0 || num_pages > paging->domain_info->max_pages ) ++ { ++ num_pages = paging->domain_info->max_pages; ++ DPRINTF("setting num_pages to %d\n", num_pages); ++ } ++ victims = calloc(num_pages, sizeof(xenpaging_victim_t)); ++ + /* ensure that if we get a signal, we'll do cleanup, then exit */ + act.sa_handler = close_handler; + act.sa_flags = 0; diff --git a/xenpaging.optimize_p2m_mem_paging_populate.patch b/xenpaging.optimize_p2m_mem_paging_populate.patch new file mode 100644 index 0000000..21d1cee --- /dev/null +++ b/xenpaging.optimize_p2m_mem_paging_populate.patch @@ -0,0 +1,44 @@ +Subject: xenpaging: optimize p2m_mem_paging_populate + +p2m_mem_paging_populate will always put another request in the ring. To +reduce pressure on the ring, place only required requests in the ring. +If the gfn was already processed by another thread, and the current vcpu +does not need to be paused, p2m_mem_paging_resume will do nothing with +the request. And also xenpaging will drop the request if the vcpu does +not need a wakeup. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/mm/p2m.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c +@@ -2571,12 +2571,12 @@ void p2m_mem_paging_populate(struct doma + mem_event_request_t req; + p2m_type_t p2mt; + +- memset(&req, 0, sizeof(req)); +- + /* Check that there's space on the ring for this request */ + if ( mem_event_check_ring(d) ) + return; + ++ memset(&req, 0, sizeof(req)); ++ + /* Fix p2m mapping */ + /* XXX: It seems inefficient to have this here, as it's only needed + * in one case (ept guest accessing paging out page) */ +@@ -2594,6 +2594,11 @@ void p2m_mem_paging_populate(struct doma + vcpu_pause_nosync(v); + req.flags |= MEM_EVENT_FLAG_VCPU_PAUSED; + } ++ else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged ) ++ { ++ /* gfn is already on its way back and vcpu is not paused */ ++ return; ++ } + + /* Send request to pager */ + req.gfn = gfn; diff --git a/xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch b/xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch new file mode 100644 index 0000000..0833c26 --- /dev/null +++ b/xenpaging.p2m_mem_paging_populate_if_p2m_ram_paged.patch @@ -0,0 +1,29 @@ +Subject: xenpaging: when populating a page, check if populating is already in progress + +p2m_mem_paging_populate can be called serveral times from different +vcpus. If the page is already in state p2m_ram_paging_in and has a new +valid mfn, invalidating this new mfn will cause trouble later if +p2m_mem_paging_resume will set the new gfn/mfn pair back to state +p2m_ram_rw. +Detect this situation and change p2m state not if the page is in the +process of being still paged-out or already paged-in. +In fact, p2m state p2m_ram_paged is the only state where the mfn type +can be invalidated. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/mm/p2m.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c +@@ -2581,7 +2581,7 @@ void p2m_mem_paging_populate(struct doma + /* XXX: It seems inefficient to have this here, as it's only needed + * in one case (ept guest accessing paging out page) */ + gfn_to_mfn(d, gfn, &p2mt); +- if ( p2mt != p2m_ram_paging_out ) ++ if ( p2mt == p2m_ram_paged ) + { + p2m_lock(d->arch.p2m); + set_p2m_entry(d, gfn, _mfn(PAGING_MFN), 0, p2m_ram_paging_in_start); diff --git a/xenpaging.page_already_populated.patch b/xenpaging.page_already_populated.patch new file mode 100644 index 0000000..67be96a --- /dev/null +++ b/xenpaging.page_already_populated.patch @@ -0,0 +1,23 @@ +Subject: xenpaging: print p2mt for already paged-in pages + +Add more debug output, print p2mt for pages which were requested more than once. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/xenpaging.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -632,8 +632,10 @@ int main(int argc, char *argv[]) + else + { + DPRINTF("page already populated (domain = %d; vcpu = %d;" ++ " p2mt = %x;" + " gfn = %"PRIx64"; paused = %"PRId64")\n", + paging->mem_event.domain_id, req.vcpu_id, ++ req.p2mt, + req.gfn, req.flags & MEM_EVENT_FLAG_VCPU_PAUSED); + + /* Tell Xen to resume the vcpu */ diff --git a/xenpaging.populate_only_if_paged.patch b/xenpaging.populate_only_if_paged.patch index 008f3e3..85c31f1 100644 --- a/xenpaging.populate_only_if_paged.patch +++ b/xenpaging.populate_only_if_paged.patch @@ -1,114 +1,26 @@ -Subject: xenpaging: populate only paged-out pages +Subject: xenpaging: populate paged-out pages unconditionally -populdate a paged-out page only once to reduce pressure in the ringbuffer. -Several cpus may still request a page at once. xenpaging can handle this. +Populate a page unconditionally to avoid missing a page-in request. +If the page is already in the process of being paged-in, the this vcpu +will be stopped and later resumed once the page content is usable again. + +This matches other p2m_mem_paging_populate usage in the source tree. Signed-off-by: Olaf Hering --- - xen/arch/x86/hvm/emulate.c | 3 ++- - xen/arch/x86/hvm/hvm.c | 17 ++++++++++------- - xen/arch/x86/mm/guest_walk.c | 3 ++- - xen/arch/x86/mm/hap/guest_walk.c | 6 ++++-- - 4 files changed, 18 insertions(+), 11 deletions(-) + xen/common/grant_table.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) ---- xen-4.0.1-testing.orig/xen/arch/x86/hvm/emulate.c -+++ xen-4.0.1-testing/xen/arch/x86/hvm/emulate.c -@@ -65,7 +65,8 @@ static int hvmemul_do_io( - ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0); - if ( p2m_is_paging(p2mt) ) - { -- p2m_mem_paging_populate(curr->domain, ram_gfn); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(curr->domain, ram_gfn); - return X86EMUL_RETRY; - } - if ( p2m_is_shared(p2mt) ) ---- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c -+++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c -@@ -291,7 +291,8 @@ static int hvm_set_ioreq_page( - return -EINVAL; - if ( p2m_is_paging(p2mt) ) - { -- p2m_mem_paging_populate(d, gmfn); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(d, gmfn); - return -ENOENT; - } - if ( p2m_is_shared(p2mt) ) -@@ -1324,7 +1325,8 @@ static void *hvm_map_entry(unsigned long - mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0)); - if ( p2m_is_paging(p2mt) ) - { -- p2m_mem_paging_populate(current->domain, gfn); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(current->domain, gfn); - return NULL; - } - if ( p2m_is_shared(p2mt) ) -@@ -1723,7 +1725,8 @@ static enum hvm_copy_result __hvm_copy( - +--- xen-4.0.1-testing.orig/xen/common/grant_table.c ++++ xen-4.0.1-testing/xen/common/grant_table.c +@@ -156,8 +156,7 @@ static int __get_paged_frame(unsigned lo + *frame = mfn_x(mfn); if ( p2m_is_paging(p2mt) ) { -- p2m_mem_paging_populate(curr->domain, gfn); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(curr->domain, gfn); - return HVMCOPY_gfn_paged_out; +- if ( p2m_is_paged(p2mt) ) +- p2m_mem_paging_populate(rd, gfn); ++ p2m_mem_paging_populate(rd, gfn); + rc = GNTST_eagain; } - if ( p2m_is_shared(p2mt) ) -@@ -3032,8 +3035,8 @@ long do_hvm_op(unsigned long op, XEN_GUE - mfn_t mfn = gfn_to_mfn(d, pfn, &t); - if ( p2m_is_paging(t) ) - { -- p2m_mem_paging_populate(d, pfn); -- -+ if ( p2m_is_paged(t) ) -+ p2m_mem_paging_populate(d, pfn); - rc = -EINVAL; - goto param_fail3; - } -@@ -3096,8 +3099,8 @@ long do_hvm_op(unsigned long op, XEN_GUE - mfn = gfn_to_mfn_unshare(d, pfn, &t, 0); - if ( p2m_is_paging(t) ) - { -- p2m_mem_paging_populate(d, pfn); -- -+ if ( p2m_is_paged(t) ) -+ p2m_mem_paging_populate(d, pfn); - rc = -EINVAL; - goto param_fail4; - } ---- xen-4.0.1-testing.orig/xen/arch/x86/mm/guest_walk.c -+++ xen-4.0.1-testing/xen/arch/x86/mm/guest_walk.c -@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc - *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0); - if ( p2m_is_paging(*p2mt) ) - { -- p2m_mem_paging_populate(d, gfn_x(gfn)); -+ if ( p2m_is_paged(*p2mt) ) -+ p2m_mem_paging_populate(d, gfn_x(gfn)); - - *rc = _PAGE_PAGED; - return NULL; ---- xen-4.0.1-testing.orig/xen/arch/x86/mm/hap/guest_walk.c -+++ xen-4.0.1-testing/xen/arch/x86/mm/hap/guest_walk.c -@@ -49,7 +49,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN - top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0); - if ( p2m_is_paging(p2mt) ) - { -- p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT); - - pfec[0] = PFEC_page_paged; - return INVALID_GFN; -@@ -81,7 +82,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN - gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0); - if ( p2m_is_paging(p2mt) ) - { -- p2m_mem_paging_populate(v->domain, gfn_x(gfn)); -+ if ( p2m_is_paged(p2mt) ) -+ p2m_mem_paging_populate(v->domain, gfn_x(gfn)); - - pfec[0] = PFEC_page_paged; - return INVALID_GFN; + }