diff --git a/18778-msi-irq-fix.patch b/18778-msi-irq-fix.patch index 996397c..32ef067 100644 --- a/18778-msi-irq-fix.patch +++ b/18778-msi-irq-fix.patch @@ -14,10 +14,35 @@ the entry_nr range check in __pci_enable_msix(). Signed-off-by: Jan Beulich -Index: xen-3.3.1-testing/xen/arch/x86/irq.c -=================================================================== ---- xen-3.3.1-testing.orig/xen/arch/x86/irq.c -+++ xen-3.3.1-testing/xen/arch/x86/irq.c +# HG changeset patch +# User Keir Fraser +# Date 1232549083 0 +# Node ID af1d9af1a993001bdfdb81d9af1af4fd4a9d3852 +# Parent 033945166a3a5f3078b1e583bc5e50871ef7e801 +x86: Fix unmaskable MSI handling. + +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1567,11 +1567,14 @@ static unsigned int startup_msi_vector(u + + static void ack_msi_vector(unsigned int vector) + { +- ack_APIC_irq(); ++ if ( msi_maskable_irq(irq_desc[vector].msi_desc) ) ++ ack_APIC_irq(); /* ACKTYPE_NONE */ + } + + static void end_msi_vector(unsigned int vector) + { ++ if ( !msi_maskable_irq(irq_desc[vector].msi_desc) ) ++ ack_APIC_irq(); /* ACKTYPE_EOI */ + } + + static void shutdown_msi_vector(unsigned int vector) +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c @@ -463,14 +463,19 @@ int pirq_acktype(struct domain *d, int i /* * Edge-triggered IO-APIC and LAPIC interrupts need no final @@ -41,10 +66,8 @@ Index: xen-3.3.1-testing/xen/arch/x86/irq.c * Level-triggered IO-APIC interrupts need to be acknowledged on the CPU * on which they were received. This is because we tickle the LAPIC to EOI. */ -Index: xen-3.3.1-testing/xen/arch/x86/msi.c -=================================================================== ---- xen-3.3.1-testing.orig/xen/arch/x86/msi.c -+++ xen-3.3.1-testing/xen/arch/x86/msi.c +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c @@ -303,6 +303,13 @@ static void msix_flush_writes(unsigned i } } @@ -77,10 +100,8 @@ Index: xen-3.3.1-testing/xen/arch/x86/msi.c { spin_unlock(&pdev->lock); return -EINVAL; -Index: xen-3.3.1-testing/xen/include/asm-x86/msi.h -=================================================================== ---- xen-3.3.1-testing.orig/xen/include/asm-x86/msi.h -+++ xen-3.3.1-testing/xen/include/asm-x86/msi.h +--- a/xen/include/asm-x86/msi.h ++++ b/xen/include/asm-x86/msi.h @@ -97,6 +97,8 @@ struct msi_desc { int remap_index; /* index in interrupt remapping table */ }; diff --git a/18795-x86-ioapic-guest-write.patch b/18795-x86-ioapic-guest-write.patch index 1db847d..57924ce 100644 --- a/18795-x86-ioapic-guest-write.patch +++ b/18795-x86-ioapic-guest-write.patch @@ -7,11 +7,9 @@ x86: secure ioapic_guest_write() against FREE_TO_ASSIGN irq values Signed-off-by: Jan Beulich -Index: xen-3.3.1-testing/xen/arch/x86/io_apic.c -=================================================================== ---- xen-3.3.1-testing.orig/xen/arch/x86/io_apic.c -+++ xen-3.3.1-testing/xen/arch/x86/io_apic.c -@@ -2196,7 +2196,7 @@ int ioapic_guest_write(unsigned long phy +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -2199,7 +2199,7 @@ int ioapic_guest_write(unsigned long phy if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR ) new_irq = vector_irq[new_rte.vector]; @@ -20,7 +18,7 @@ Index: xen-3.3.1-testing/xen/arch/x86/io_apic.c { if ( irq_desc[IO_APIC_VECTOR(old_irq)].action ) { -@@ -2208,7 +2208,7 @@ int ioapic_guest_write(unsigned long phy +@@ -2211,7 +2211,7 @@ int ioapic_guest_write(unsigned long phy remove_pin_at_irq(old_irq, apic, pin); } diff --git a/18879-cpufreq-params.patch b/18879-cpufreq-params.patch index 8d66d35..e0ce2dc 100644 --- a/18879-cpufreq-params.patch +++ b/18879-cpufreq-params.patch @@ -21,6 +21,15 @@ depending on whether performance or power reduction is preferred, Signed-off-by: Jan Beulich +# HG changeset patch +# User Keir Fraser +# Date 1230557866 0 +# Node ID 4035ea96ae2fafba7a5a4c1e810aa7d591758e8c +# Parent 0af9fbf3f05306d4972cf05e4b6d7be2199a41cb +cpufreq: Fix a cpufreq cmdline parse bug, and change sample_rate unit + +Signed-off-by: Liu Jinsong + --- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c +++ b/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c @@ -22,15 +22,22 @@ @@ -100,7 +109,7 @@ Signed-off-by: Jan Beulich + + if ( !strcmp(str, "rate") && val ) + { -+ usr_sampling_rate = simple_strtoull(val, NULL, 0); ++ usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1); + } + else if ( !strcmp(str, "threshold") && val ) + { diff --git a/vtd-error-handling.patch b/18887-vtd-error-handling.patch similarity index 87% rename from vtd-error-handling.patch rename to 18887-vtd-error-handling.patch index dc98afc..da219f7 100644 --- a/vtd-error-handling.patch +++ b/18887-vtd-error-handling.patch @@ -1,3 +1,16 @@ +# HG changeset patch +# User Keir Fraser +# Date 1228827329 0 +# Node ID 043aba2b67a195d2c2707f8fd0c05bbbf2078d2a +# Parent f7f8f44b9292a30707bd645739390ef3d0f22232 +VT-d: check return value of pirq_guest_bind() + +The eliminates a hypervisor crash when the respective domain dies or +gets the device hot removed. + +Signed-off-by: Jan Beulich +Reviewed-by: Weidong Han + --- a/xen/drivers/passthrough/io.c +++ b/xen/drivers/passthrough/io.c @@ -57,7 +57,7 @@ int pt_irq_create_bind_vtd( diff --git a/18905-x86-ioapic-boot-panic.patch b/18905-x86-ioapic-boot-panic.patch index 7de7eae..a8d7329 100644 --- a/18905-x86-ioapic-boot-panic.patch +++ b/18905-x86-ioapic-boot-panic.patch @@ -49,7 +49,7 @@ Signed-off-by: Thomas Gleixner /* * Expect a few ticks at least, to be sure some possible -@@ -1717,6 +1719,9 @@ static inline void check_timer(void) +@@ -1720,6 +1722,9 @@ static inline void check_timer(void) { int apic1, pin1, apic2, pin2; int vector; @@ -59,7 +59,7 @@ Signed-off-by: Thomas Gleixner /* * get/set the timer IRQ vector: -@@ -1758,6 +1763,7 @@ static inline void check_timer(void) +@@ -1761,6 +1766,7 @@ static inline void check_timer(void) */ unmask_IO_APIC_irq(0); if (timer_irq_works()) { @@ -67,7 +67,7 @@ Signed-off-by: Thomas Gleixner if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(apic1, pin1); return; -@@ -1775,6 +1781,7 @@ static inline void check_timer(void) +@@ -1778,6 +1784,7 @@ static inline void check_timer(void) */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { @@ -75,7 +75,7 @@ Signed-off-by: Thomas Gleixner printk("works.\n"); if (pin1 != -1) replace_pin_at_irq(0, apic1, pin1, apic2, pin2); -@@ -1802,6 +1809,7 @@ static inline void check_timer(void) +@@ -1805,6 +1812,7 @@ static inline void check_timer(void) enable_8259A_irq(0); if (timer_irq_works()) { @@ -83,7 +83,7 @@ Signed-off-by: Thomas Gleixner printk(" works.\n"); return; } -@@ -1817,6 +1825,8 @@ static inline void check_timer(void) +@@ -1820,6 +1828,8 @@ static inline void check_timer(void) unlock_ExtINT_logic(); diff --git a/18934-vtd-PCI-X-dev-assign.patch b/18934-vtd-PCI-X-dev-assign.patch new file mode 100644 index 0000000..15658a7 --- /dev/null +++ b/18934-vtd-PCI-X-dev-assign.patch @@ -0,0 +1,212 @@ +# HG changeset patch +# User Keir Fraser +# Date 1229694124 0 +# Node ID d238101c1832ba178bfc00a20b461fcebe21d5df +# Parent 8c35da364ab39605839869d8eb0ac9b831c370f0 +VT-d: Fix PCI-X device assignment + +When assign PCI device, current code just map its bridge and its +secondary bus number and devfn 0. It doesn't work for PCI-x device +assignment, because the request may be the source-id in the original +PCI-X transaction or the source-id provided by the bridge. It needs to +map the device itself, and its upstream bridges till PCIe-to-PCI/PCI-x +bridge. + +In addition, add description for DEV_TYPE_PCIe_BRIDGE and +DEV_TYPE_PCI_BRIDGE for understandability. + +Signed-off-by: Weidong Han + +# HG changeset patch +# User Keir Fraser +# Date 1231154002 0 +# Node ID b3a9bc72624166a230da74c498154ae2cb45eacc +# Parent 9cc632cc6d400685679671b6bbc58dfe4c5e287e +vtd: avoid redundant context mapping + +After changeset 18934 (VT-d: Fix PCI-X device assignment), my assigned +PCI E1000 NIC doesn't work in guest. + +The NIC is 03:00.0. Its parent bridge is: 00:1e.0. +In domain_context_mapping(): + case DEV_TYPE_PCI: + After we domain_context_mapping_one() 03:00.0 and 00:1e.0, the + 'secbus' is 3 and 'bus' is 0, so we domain_context_mapping_one() + 03:00.0 again -- this redundant invocation returns -EINVAL because + we have created the mapping but haven't changed pdev->domain from + Dom0 to a new domain at this time and eventually the + XEN_DOMCTL_assign_device hypercall returns a failure. + +The attached patch detects this case and avoids the redundant +invocation. + +Signed-off-by: Dexuan Cui + +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -1155,8 +1155,8 @@ static int domain_context_mapping_one( + + enum { + DEV_TYPE_PCIe_ENDPOINT, +- DEV_TYPE_PCIe_BRIDGE, +- DEV_TYPE_PCI_BRIDGE, ++ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch ++ DEV_TYPE_PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge, PCI-to-PCI bridge + DEV_TYPE_PCI, + }; + +@@ -1170,7 +1170,8 @@ int pdev_type(u8 bus, u8 devfn) + class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE); + if ( class_device == PCI_CLASS_BRIDGE_PCI ) + { +- pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP); ++ pos = pci_find_next_cap(bus, devfn, ++ PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP); + if ( !pos ) + return DEV_TYPE_PCI_BRIDGE; + creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS); +@@ -1219,9 +1220,9 @@ static int domain_context_mapping(struct + { + struct acpi_drhd_unit *drhd; + int ret = 0; +- u16 sec_bus, sub_bus, ob, odf; ++ u16 sec_bus, sub_bus; + u32 type; +- u8 secbus; ++ u8 secbus, secdevfn; + + drhd = acpi_find_matched_drhd_unit(bus, devfn); + if ( !drhd ) +@@ -1231,15 +1232,13 @@ static int domain_context_mapping(struct + switch ( type ) + { + case DEV_TYPE_PCIe_BRIDGE: ++ break; ++ + case DEV_TYPE_PCI_BRIDGE: + sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SECONDARY_BUS); + sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), + PCI_SUBORDINATE_BUS); +- /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/ +- +- if ( type == DEV_TYPE_PCIe_BRIDGE ) +- break; + + for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ ) + { +@@ -1258,26 +1257,28 @@ static int domain_context_mapping(struct + + case DEV_TYPE_PCI: + gdprintk(XENLOG_INFO VTDPREFIX, +- "domain_context_mapping:PCI: bdf = %x:%x.%x\n", ++ "domain_context_mapping:PCI: bdf = %x:%x.%x\n", + bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + +- ob = bus; odf = devfn; +- if ( !find_pcie_endpoint(&bus, &devfn, &secbus) ) ++ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); ++ if ( ret ) ++ break; ++ ++ secbus = bus; ++ secdevfn = devfn; ++ /* dependent devices mapping */ ++ while ( bus2bridge[bus].map ) + { +- gdprintk(XENLOG_WARNING VTDPREFIX, +- "domain_context_mapping:invalid\n"); +- break; ++ secbus = bus; ++ secdevfn = devfn; ++ devfn = bus2bridge[bus].devfn; ++ bus = bus2bridge[bus].bus; ++ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); ++ if ( ret ) ++ return ret; + } + +- if ( ob != bus || odf != devfn ) +- gdprintk(XENLOG_INFO VTDPREFIX, +- "domain_context_mapping:map: " +- "bdf = %x:%x.%x -> %x:%x.%x\n", +- ob, PCI_SLOT(odf), PCI_FUNC(odf), +- bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); +- +- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn); +- if ( secbus != bus ) ++ if ( (secbus != bus) && (secdevfn != 0) ) + /* + * The source-id for transactions on non-PCIe buses seem + * to originate from devfn=0 on the secondary bus behind +@@ -1285,7 +1286,7 @@ static int domain_context_mapping(struct + * these scanarios is not particularly well documented + * anywhere. + */ +- domain_context_mapping_one(domain, drhd->iommu, secbus, 0); ++ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0); + break; + + default: +@@ -1339,10 +1340,9 @@ static int domain_context_unmap_one( + static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn) + { + struct acpi_drhd_unit *drhd; +- u16 sec_bus, sub_bus; + int ret = 0; + u32 type; +- u8 secbus; ++ u8 secbus, secdevfn; + + drhd = acpi_find_matched_drhd_unit(bus, devfn); + if ( !drhd ) +@@ -1353,24 +1353,39 @@ static int domain_context_unmap(struct d + { + case DEV_TYPE_PCIe_BRIDGE: + case DEV_TYPE_PCI_BRIDGE: +- sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), +- PCI_SECONDARY_BUS); +- sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn), +- PCI_SUBORDINATE_BUS); +- /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/ +- if ( DEV_TYPE_PCI_BRIDGE ) +- ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn); + break; + + case DEV_TYPE_PCIe_ENDPOINT: ++ gdprintk(XENLOG_INFO VTDPREFIX, ++ "domain_context_unmap:PCIe: bdf = %x:%x.%x\n", ++ bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn); + break; + + case DEV_TYPE_PCI: +- if ( find_pcie_endpoint(&bus, &devfn, &secbus) ) ++ gdprintk(XENLOG_INFO VTDPREFIX, ++ "domain_context_unmap:PCI: bdf = %x:%x.%x\n", ++ bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn); ++ if ( ret ) ++ break; ++ ++ secbus = bus; ++ secdevfn = devfn; ++ /* dependent devices unmapping */ ++ while ( bus2bridge[bus].map ) ++ { ++ secbus = bus; ++ secdevfn = devfn; ++ devfn = bus2bridge[bus].devfn; ++ bus = bus2bridge[bus].bus; + ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn); +- if ( bus != secbus ) +- domain_context_unmap_one(domain, drhd->iommu, secbus, 0); ++ if ( ret ) ++ return ret; ++ } ++ ++ if ( (secbus != bus) && (secdevfn != 0) ) ++ ret = domain_context_unmap_one(domain, drhd->iommu, secbus, 0); + break; + + default: diff --git a/18970-vmx-print-features.patch b/18970-vmx-print-features.patch new file mode 100644 index 0000000..03e9377 --- /dev/null +++ b/18970-vmx-print-features.patch @@ -0,0 +1,44 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231156354 0 +# Node ID 2c5a2e99a1d69d635843955310488fbd5e1bcdd2 +# Parent d6889b3b64231dd4c2cd86ca6e66d0a4ef2d5dfc +vmx: Print advanced features during boot +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -55,6 +55,25 @@ static DEFINE_PER_CPU(struct list_head, + + static u32 vmcs_revision_id __read_mostly; + ++static void __init vmx_display_features(void) ++{ ++ int printed = 0; ++ ++ printk("VMX: Supported advanced features:\n"); ++ ++#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; } ++ P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation"); ++ P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow"); ++ P(cpu_has_vmx_ept, "Extended Page Tables (EPT)"); ++ P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)"); ++ P(cpu_has_vmx_vnmi, "Virtual NMI"); ++ P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap"); ++#undef P ++ ++ if ( !printed ) ++ printk(" - none\n"); ++} ++ + static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr) + { + u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt; +@@ -168,6 +187,7 @@ static void vmx_init_vmcs_config(void) + vmx_vmexit_control = _vmx_vmexit_control; + vmx_vmentry_control = _vmx_vmentry_control; + cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22)); ++ vmx_display_features(); + } + else + { diff --git a/19009-x86_64-note-init-p2m.patch b/19009-x86_64-note-init-p2m.patch new file mode 100644 index 0000000..c7f8cdc --- /dev/null +++ b/19009-x86_64-note-init-p2m.patch @@ -0,0 +1,411 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231414359 0 +# Node ID 292919f6123823916f1274f3d512794f72f3e903 +# Parent 97f8d6453fdae1a865a3c875d7b712a494304fb0 +x86-64: guest directed placement of initial p->m map + +By adding another ELF note, the kernel can now direct the hypervisor +(for Dom0) and in the future also the tools (for DomU-s) to place the +initial phys->mach translation table at other than an address +immediately above the kernel/initrd images. This eliminates the size +restriction imposed on this table by Linux (the kernel loads above the +-2Gb boundary, and hence the entire initial mapping cannot reach or +even exceed 2Gb). + +There are a few items in this patch I'm not particularly happy with, +but couldn't think of a better solution: +- there is a hidden assumption that pages allocated for the domain are + put on the domain's page list sequentially +- the way backward compatibility is maintained is placing requirements + on the kernel side that make the code somewhat convoluted (because + it + needs to check where the map is actually placed in quite a few + places) +- code is there to use 1Gb mappings for the hypervisor created table, + but lacking a machine with 512G+ memory for immediate testing I + can't + verify this works; I know that 2Mb mappings work, and hence imply + that 1Gb ones would too (of course, if the kernel replaces the table + - like Linux does -, it cannot use 2Mb/1Gb mappings or even try to + re-use the page table entries, but I don't consider this a problem) + +Signed-off-by: Jan Beulich + +--- a/tools/include/xen-foreign/reference.size ++++ b/tools/include/xen-foreign/reference.size +@@ -1,7 +1,7 @@ + + structs | x86_32 x86_64 ia64 + +-start_info | 1104 1152 1152 ++start_info | 1112 1168 1168 + trap_info | 8 16 - + pt_fpreg | - - 16 + cpu_user_regs | 68 200 - +--- a/xen/arch/x86/domain_build.c ++++ b/xen/arch/x86/domain_build.c +@@ -341,6 +341,12 @@ int __init construct_dom0( + #endif + } + ++ if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) ) ++ { ++ printk(XENLOG_WARNING "P2M table base ignored\n"); ++ parms.p2m_base = UNSET_ADDR; ++ } ++ + domain_set_alloc_bitsize(d); + + /* +@@ -359,6 +365,8 @@ int __init construct_dom0( + vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ? + sizeof(unsigned long) : + sizeof(unsigned int))); ++ if ( parms.p2m_base != UNSET_ADDR ) ++ vphysmap_end = vphysmap_start; + vstartinfo_start = round_pgup(vphysmap_end); + vstartinfo_end = (vstartinfo_start + + sizeof(struct start_info) + +@@ -400,6 +408,11 @@ int __init construct_dom0( + /* Ensure that our low-memory 1:1 mapping covers the allocation. */ + page = alloc_domheap_pages(d, order, MEMF_bits(30)); + #else ++ if ( parms.p2m_base != UNSET_ADDR ) ++ { ++ vphysmap_start = parms.p2m_base; ++ vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long); ++ } + page = alloc_domheap_pages(d, order, 0); + #endif + if ( page == NULL ) +@@ -749,8 +762,109 @@ int __init construct_dom0( + snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s", + elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : ""); + ++ count = d->tot_pages; ++#ifdef __x86_64__ ++ /* Set up the phys->machine table if not part of the initial mapping. */ ++ if ( parms.p2m_base != UNSET_ADDR ) ++ { ++ unsigned long va = vphysmap_start; ++ ++ if ( v_start <= vphysmap_end && vphysmap_start <= v_end ) ++ panic("DOM0 P->M table overlaps initial mapping"); ++ ++ while ( va < vphysmap_end ) ++ { ++ if ( d->tot_pages + ((round_pgup(vphysmap_end) - va) ++ >> PAGE_SHIFT) + 3 > nr_pages ) ++ panic("Dom0 allocation too small for initial P->M table.\n"); ++ ++ l4tab = l4start + l4_table_offset(va); ++ if ( !l4e_get_intpte(*l4tab) ) ++ { ++ page = alloc_domheap_pages(d, 0, 0); ++ if ( !page ) ++ break; ++ /* No mapping, PGC_allocated + page-table page. */ ++ page->count_info = PGC_allocated | 2; ++ page->u.inuse.type_info = ++ PGT_l3_page_table | PGT_validated | 1; ++ clear_page(page_to_virt(page)); ++ *l4tab = l4e_from_page(page, L4_PROT); ++ } ++ l3tab = page_to_virt(l4e_get_page(*l4tab)); ++ l3tab += l3_table_offset(va); ++ if ( !l3e_get_intpte(*l3tab) ) ++ { ++ if ( cpu_has_page1gb && ++ !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) && ++ vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) && ++ (page = alloc_domheap_pages(d, ++ L3_PAGETABLE_SHIFT - ++ PAGE_SHIFT, ++ 0)) != NULL ) ++ { ++ *l3tab = l3e_from_page(page, ++ L1_PROT|_PAGE_DIRTY|_PAGE_PSE); ++ va += 1UL << L3_PAGETABLE_SHIFT; ++ continue; ++ } ++ if ( (page = alloc_domheap_pages(d, 0, 0)) == NULL ) ++ break; ++ else ++ { ++ /* No mapping, PGC_allocated + page-table page. */ ++ page->count_info = PGC_allocated | 2; ++ page->u.inuse.type_info = ++ PGT_l2_page_table | PGT_validated | 1; ++ clear_page(page_to_virt(page)); ++ *l3tab = l3e_from_page(page, L3_PROT); ++ } ++ } ++ l2tab = page_to_virt(l3e_get_page(*l3tab)); ++ l2tab += l2_table_offset(va); ++ if ( !l2e_get_intpte(*l2tab) ) ++ { ++ if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) && ++ vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) && ++ (page = alloc_domheap_pages(d, ++ L2_PAGETABLE_SHIFT - ++ PAGE_SHIFT, ++ 0)) != NULL ) ++ { ++ *l2tab = l2e_from_page(page, ++ L1_PROT|_PAGE_DIRTY|_PAGE_PSE); ++ va += 1UL << L2_PAGETABLE_SHIFT; ++ continue; ++ } ++ if ( (page = alloc_domheap_pages(d, 0, 0)) == NULL ) ++ break; ++ else ++ { ++ /* No mapping, PGC_allocated + page-table page. */ ++ page->count_info = PGC_allocated | 2; ++ page->u.inuse.type_info = ++ PGT_l1_page_table | PGT_validated | 1; ++ clear_page(page_to_virt(page)); ++ *l2tab = l2e_from_page(page, L2_PROT); ++ } ++ } ++ l1tab = page_to_virt(l2e_get_page(*l2tab)); ++ l1tab += l1_table_offset(va); ++ BUG_ON(l1e_get_intpte(*l1tab)); ++ page = alloc_domheap_pages(d, 0, 0); ++ if ( !page ) ++ break; ++ *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY); ++ va += PAGE_SIZE; ++ va &= PAGE_MASK; ++ } ++ if ( !page ) ++ panic("Not enough RAM for DOM0 P->M table.\n"); ++ } ++#endif ++ + /* Write the phys->machine and machine->phys table entries. */ +- for ( pfn = 0; pfn < d->tot_pages; pfn++ ) ++ for ( pfn = 0; pfn < count; pfn++ ) + { + mfn = pfn + alloc_spfn; + #ifndef NDEBUG +@@ -764,6 +878,26 @@ int __init construct_dom0( + ((unsigned int *)vphysmap_start)[pfn] = mfn; + set_gpfn_from_mfn(mfn, pfn); + } ++ si->first_p2m_pfn = pfn; ++ si->nr_p2m_frames = d->tot_pages - count; ++ list_for_each_entry ( page, &d->page_list, list ) ++ { ++ mfn = page_to_mfn(page); ++ if ( get_gpfn_from_mfn(mfn) >= count ) ++ { ++ BUG_ON(is_pv_32bit_domain(d)); ++ if ( !page->u.inuse.type_info && ++ !get_page_and_type(page, d, PGT_writable_page) ) ++ BUG(); ++ ((unsigned long *)vphysmap_start)[pfn] = mfn; ++ set_gpfn_from_mfn(mfn, pfn); ++ ++pfn; ++#ifndef NDEBUG ++ ++alloc_epfn; ++#endif ++ } ++ } ++ BUG_ON(pfn != d->tot_pages); + while ( pfn < nr_pages ) + { + if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL ) +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -948,19 +948,44 @@ void put_page_from_l1e(l1_pgentry_t l1e, + } + + ++static void put_data_page( ++ struct page_info *page, int writeable) ++{ ++ if ( writeable ) ++ put_page_and_type(page); ++ else ++ { ++ BUG_ON((page->u.inuse.type_info & PGT_type_mask) == PGT_seg_desc_page ++ && (page->u.inuse.type_info & PGT_count_mask) != 0); ++ put_page(page); ++ } ++} ++ + /* + * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. + * Note also that this automatically deals correctly with linear p.t.'s. + */ + static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) + { +- if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && +- (l2e_get_pfn(l2e) != pfn) ) ++ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) ) ++ return 1; ++ ++ if ( unlikely(l2e_get_flags(l2e) & _PAGE_PSE) ) ++ { ++ unsigned long mfn = l2e_get_pfn(l2e); ++ int writeable = l2e_get_flags(l2e) & _PAGE_RW; ++ ++ ASSERT(!(mfn & ((1UL << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); ++ do { ++ put_data_page(mfn_to_page(mfn), writeable); ++ } while ( ++mfn & ((1UL << (L2_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) ); ++ } ++ else + { + put_page_and_type(l2e_get_page(l2e)); +- return 0; + } +- return 1; ++ ++ return 0; + } + + static int __put_page_type(struct page_info *, int preemptible); +@@ -968,14 +993,28 @@ static int __put_page_type(struct page_i + static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, + int partial, int preemptible) + { +- if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && +- (l3e_get_pfn(l3e) != pfn) ) ++ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) ) ++ return 1; ++ ++#ifdef __x86_64__ ++ if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) ) + { +- if ( unlikely(partial > 0) ) +- return __put_page_type(l3e_get_page(l3e), preemptible); +- return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); ++ unsigned long mfn = l3e_get_pfn(l3e); ++ int writeable = l3e_get_flags(l3e) & _PAGE_RW; ++ ++ ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1))); ++ do { ++ put_data_page(mfn_to_page(mfn), writeable); ++ } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) ); ++ ++ return 0; + } +- return 1; ++#endif ++ ++ if ( unlikely(partial > 0) ) ++ return __put_page_type(l3e_get_page(l3e), preemptible); ++ ++ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); + } + + #if CONFIG_PAGING_LEVELS >= 4 +--- a/xen/common/libelf/libelf-dominfo.c ++++ b/xen/common/libelf/libelf-dominfo.c +@@ -90,6 +90,7 @@ int elf_xen_parse_note(struct elf_binary + [XEN_ELFNOTE_ENTRY] = { "ENTRY", 0}, + [XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0}, + [XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0}, ++ [XEN_ELFNOTE_INIT_P2M] = { "INIT_P2M", 0}, + [XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0}, + [XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0}, + [XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1}, +@@ -164,6 +165,9 @@ int elf_xen_parse_note(struct elf_binary + case XEN_ELFNOTE_ENTRY: + parms->virt_entry = val; + break; ++ case XEN_ELFNOTE_INIT_P2M: ++ parms->p2m_base = val; ++ break; + case XEN_ELFNOTE_PADDR_OFFSET: + parms->elf_paddr_offset = val; + break; +@@ -392,6 +396,7 @@ static int elf_xen_addr_calc_check(struc + elf_msg(elf, " virt_kstart = 0x%" PRIx64 "\n", parms->virt_kstart); + elf_msg(elf, " virt_kend = 0x%" PRIx64 "\n", parms->virt_kend); + elf_msg(elf, " virt_entry = 0x%" PRIx64 "\n", parms->virt_entry); ++ elf_msg(elf, " p2m_base = 0x%" PRIx64 "\n", parms->p2m_base); + + if ( (parms->virt_kstart > parms->virt_kend) || + (parms->virt_entry < parms->virt_kstart) || +@@ -403,6 +408,15 @@ static int elf_xen_addr_calc_check(struc + return -1; + } + ++ if ( (parms->p2m_base != UNSET_ADDR) && ++ (parms->p2m_base >= parms->virt_kstart) && ++ (parms->p2m_base < parms->virt_kend) ) ++ { ++ elf_err(elf, "%s: ERROR: P->M table base is out of bounds.\n", ++ __FUNCTION__); ++ return -1; ++ } ++ + return 0; + } + +@@ -422,6 +436,7 @@ int elf_xen_parse(struct elf_binary *elf + parms->virt_entry = UNSET_ADDR; + parms->virt_hypercall = UNSET_ADDR; + parms->virt_hv_start_low = UNSET_ADDR; ++ parms->p2m_base = UNSET_ADDR; + parms->elf_paddr_offset = UNSET_ADDR; + + /* Find and parse elf notes. */ +--- a/xen/include/public/elfnote.h ++++ b/xen/include/public/elfnote.h +@@ -162,9 +162,20 @@ + #define XEN_ELFNOTE_SUSPEND_CANCEL 14 + + /* ++ * The (non-default) location the initial phys-to-machine map should be ++ * placed at by the hypervisor (Dom0) or the tools (DomU). ++ * The kernel must be prepared for this mapping to be established using ++ * large pages, despite such otherwise not being available to guests. ++ * The kernel must also be able to handle the page table pages used for ++ * this mapping not being accessible through the initial mapping. ++ * (Only x86-64 supports this at present.) ++ */ ++#define XEN_ELFNOTE_INIT_P2M 15 ++ ++/* + * The number of the highest elfnote defined. + */ +-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL ++#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M + + /* + * System information exported through crash notes. +--- a/xen/include/public/libelf.h ++++ b/xen/include/public/libelf.h +@@ -232,6 +232,7 @@ struct elf_dom_parms { + uint64_t virt_entry; + uint64_t virt_hypercall; + uint64_t virt_hv_start_low; ++ uint64_t p2m_base; + uint64_t elf_paddr_offset; + uint32_t f_supported[XENFEAT_NR_SUBMAPS]; + uint32_t f_required[XENFEAT_NR_SUBMAPS]; +--- a/xen/include/public/xen.h ++++ b/xen/include/public/xen.h +@@ -513,6 +513,7 @@ typedef struct shared_info shared_info_t + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] ++ * (unless relocated due to XEN_ELFNOTE_INIT_P2M) + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base, CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] +@@ -554,6 +555,9 @@ struct start_info { + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; ++ /* The pfn range here covers both page table and p->m table frames. */ ++ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ ++ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ + }; + typedef struct start_info start_info_t; + diff --git a/19027-hvmloader-SMBIOS-dev-mem-boundary.patch b/19027-hvmloader-SMBIOS-dev-mem-boundary.patch new file mode 100644 index 0000000..2d2cc3f --- /dev/null +++ b/19027-hvmloader-SMBIOS-dev-mem-boundary.patch @@ -0,0 +1,27 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231755835 0 +# Node ID 95d8788bf4be2e8b8d2b984e290f5e19eef1a16c +# Parent 59d511c4a8d8ba451afc6ebd88e049fa2addf9f5 +hvmloader: Fix SMBIOS memory device length boundary condition. + +dev_memsize ends up 0 when it shouldn't be on 16G boundary conditions. + +Signed-off-by: Bill Rieske +Signed-off-by: Keir Fraser + +diff -r 59d511c4a8d8 -r 95d8788bf4be tools/firmware/hvmloader/smbios.c +--- a/tools/firmware/hvmloader/smbios.c Mon Jan 12 10:17:12 2009 +0000 ++++ b/tools/firmware/hvmloader/smbios.c Mon Jan 12 10:23:55 2009 +0000 +@@ -118,8 +118,9 @@ write_smbios_tables(void *start, + do_struct(smbios_type_16_init(p, memsize, nr_mem_devs)); + for ( i = 0; i < nr_mem_devs; i++ ) + { +- uint32_t dev_memsize = ((i == (nr_mem_devs - 1)) +- ? (memsize & 0x3fff) : 0x4000); ++ uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */ ++ if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) ) ++ dev_memsize = memsize & 0x3fff; /* last dev is <16GB */ + do_struct(smbios_type_17_init(p, dev_memsize, i)); + do_struct(smbios_type_19_init(p, dev_memsize, i)); + do_struct(smbios_type_20_init(p, dev_memsize, i)); diff --git a/19032-amd-iommu-pointer-reset.patch b/19032-amd-iommu-pointer-reset.patch new file mode 100644 index 0000000..582a294 --- /dev/null +++ b/19032-amd-iommu-pointer-reset.patch @@ -0,0 +1,37 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231859806 0 +# Node ID 73770182aee48e79a2caa441ad1013982deefddb +# Parent 1c6642adaeb204495c95ab86c8aee41587a22928 +AMD IOMMU: Reset tail and head pointer of cmd buffer and event log + +Reset the tail and the head pointers of command buffer and event log +to zero in case that iommu does not reset them after the base +addresses of those buffers are updated. + +Signed-off-by: Wei Wang + +--- a/xen/drivers/passthrough/amd/iommu_init.c ++++ b/xen/drivers/passthrough/amd/iommu_init.c +@@ -195,6 +195,10 @@ static void __init set_iommu_command_buf + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK, + IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); ++ ++ /*reset head and tail pointer */ ++ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_HEAD_OFFSET); ++ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET); + } + + static void __init register_iommu_exclusion_range(struct amd_iommu *iommu) +@@ -259,6 +263,10 @@ static void __init set_iommu_event_log_c + IOMMU_CONTROL_COMP_WAIT_INT_MASK, + IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry); + writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET); ++ ++ /*reset head and tail pointer */ ++ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET); ++ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET); + } + + static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[]) diff --git a/19035-MSI-X-proper-enable.patch b/19035-MSI-X-proper-enable.patch new file mode 100644 index 0000000..9945189 --- /dev/null +++ b/19035-MSI-X-proper-enable.patch @@ -0,0 +1,46 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231862027 0 +# Node ID 4f6a2bbdff3fea7db13979ffccb1ae5403ca79c8 +# Parent b169db55faf38fda27985626284c9262aac09784 +x86: Call msix_set_enable for MSI-x interrupt + +For MSI-x, we should call msix_set_enable() instead of +msi_set_enable(). + +Signed-off-by: Jiang Yunhong + +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c +@@ -702,7 +702,7 @@ static void __pci_disable_msix(int vecto + + pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); + control = pci_conf_read16(bus, slot, func, msix_control_reg(pos)); +- msi_set_enable(dev, 0); ++ msix_set_enable(dev, 0); + + BUG_ON(list_empty(&dev->msi_list)); + +@@ -798,11 +798,20 @@ int pci_restore_msi_state(struct pci_dev + return -EINVAL; + } + +- msi_set_enable(pdev, 0); ++ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) ++ msi_set_enable(pdev, 0); ++ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) ++ msix_set_enable(pdev, 0); ++ + write_msi_msg(entry, &entry->msg); + +- msi_set_enable(pdev, 1); + msi_set_mask_bit(vector, entry->msi_attrib.masked); ++ ++ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI ) ++ msi_set_enable(pdev, 1); ++ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) ++ msix_set_enable(pdev, 1); ++ + spin_unlock_irqrestore(&desc->lock, flags); + } + diff --git a/19038-x86-no-apic.patch b/19038-x86-no-apic.patch new file mode 100644 index 0000000..65ed50a --- /dev/null +++ b/19038-x86-no-apic.patch @@ -0,0 +1,135 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231930540 0 +# Node ID 59274c49a0298fd73f60759c0842a293b5816057 +# Parent cc542ebe48539b9ca0534ca241209734234fdff2 +x86: restore ability to work on systems without APIC + +This got broken with the default-enabling of MSI. Apart from fixing +the base issue, the patch also addresses +- the 'i' command crashing where there is no IO-APIC, +- the 'i' command needlessly printing information for all 256 vectors + when the use of IO-APIC(s) is disabled, and +- the need to specify both "nolapic" and "noapic" when "nolapic" alone + should already have the intended effect. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/apic.c ++++ b/xen/arch/x86/apic.c +@@ -40,7 +40,7 @@ + /* + * Knob to control our willingness to enable the local APIC. + */ +-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ ++static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ + + /* + * Debug level +@@ -719,7 +719,7 @@ static void apic_pm_activate(void) + static void __init lapic_disable(char *str) + { + enable_local_apic = -1; +- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); ++ setup_clear_cpu_cap(X86_FEATURE_APIC); + } + custom_param("nolapic", lapic_disable); + +@@ -852,6 +852,7 @@ void __init init_apic_mappings(void) + if (!smp_found_config && detect_init_APIC()) { + apic_phys = __pa(alloc_xenheap_page()); + clear_page(__va(apic_phys)); ++ msi_enable = 0; + } else + apic_phys = mp_lapic_addr; + +@@ -1280,8 +1281,10 @@ int __init APIC_init_uniprocessor (void) + if (enable_local_apic < 0) + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + +- if (!smp_found_config && !cpu_has_apic) ++ if (!smp_found_config && !cpu_has_apic) { ++ msi_enable = 0; + return -1; ++ } + + /* + * Complain if the BIOS pretends there is one. +@@ -1290,6 +1293,7 @@ int __init APIC_init_uniprocessor (void) + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); ++ msi_enable = 0; + return -1; + } + +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -29,6 +29,14 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM + */ + u64 host_pat = 0x050100070406; + ++static unsigned int __cpuinitdata cleared_caps[NCAPINTS]; ++ ++void __init setup_clear_cpu_cap(unsigned int cap) ++{ ++ __clear_bit(cap, boot_cpu_data.x86_capability); ++ __set_bit(cap, cleared_caps); ++} ++ + static void default_init(struct cpuinfo_x86 * c) + { + /* Not much we can do here... */ +@@ -235,6 +243,7 @@ static void __init early_cpu_detect(void + if (c->x86 >= 0x6) + c->x86_model += ((tfms >> 16) & 0xF) << 4; + c->x86_mask = tfms & 15; ++ cap0 &= ~cleared_caps[0]; + if (cap0 & (1<<19)) + c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; + c->x86_capability[0] = cap0; /* Added for Xen bootstrap */ +@@ -395,6 +404,9 @@ void __cpuinit identify_cpu(struct cpuin + if (disable_pse) + clear_bit(X86_FEATURE_PSE, c->x86_capability); + ++ for (i = 0 ; i < NCAPINTS ; ++i) ++ c->x86_capability[i] &= ~cleared_caps[i]; ++ + /* If the model name is still unset, do table lookup. */ + if ( !c->x86_model_id[0] ) { + char *p; +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -87,7 +87,9 @@ int disable_timer_pin_1 __initdata; + + static struct irq_pin_list { + int apic, pin, next; +-} irq_2_pin[PIN_MAP_SIZE]; ++} irq_2_pin[PIN_MAP_SIZE] = { ++ [0 ... PIN_MAP_SIZE-1].pin = -1 ++}; + static int irq_2_pin_free_entry = NR_IRQS; + + int vector_irq[NR_VECTORS] __read_mostly = { +@@ -1020,11 +1022,6 @@ static void __init enable_IO_APIC(void) + int i, apic; + unsigned long flags; + +- for (i = 0; i < PIN_MAP_SIZE; i++) { +- irq_2_pin[i].pin = -1; +- irq_2_pin[i].next = 0; +- } +- + /* Initialise dynamic irq_2_pin free list. */ + for (i = NR_IRQS; i < PIN_MAP_SIZE; i++) + irq_2_pin[i].next = i + 1; +--- a/xen/include/asm-x86/processor.h ++++ b/xen/include/asm-x86/processor.h +@@ -191,6 +191,7 @@ extern int phys_proc_id[NR_CPUS]; + extern int cpu_core_id[NR_CPUS]; + + extern void identify_cpu(struct cpuinfo_x86 *); ++extern void setup_clear_cpu_cap(unsigned int); + extern void print_cpu_info(struct cpuinfo_x86 *); + extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); + extern void dodgy_tsc(void); diff --git a/19039-x86-propagate-nolapic.patch b/19039-x86-propagate-nolapic.patch new file mode 100644 index 0000000..0698444 --- /dev/null +++ b/19039-x86-propagate-nolapic.patch @@ -0,0 +1,44 @@ +# HG changeset patch +# User Keir Fraser +# Date 1231930578 0 +# Node ID 3fb43f23f64ccc1687d1dc348a9eb454397d4887 +# Parent 59274c49a0298fd73f60759c0842a293b5816057 +x86: propagate disabled APIC state to Dom0 + +This in particular eliminates the need to specify "noapic" to Dom0 +when "noapic" is passed to Xen, provided the kernel side gets slightly +modified to make use of this information (Linux side patch cannot +easily be provided for the 2.6.18 tree, but this is being used by our +2.6.27-based one). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -711,6 +711,8 @@ static void pv_cpuid(struct cpu_user_reg + { + /* Modify Feature Information. */ + __clear_bit(X86_FEATURE_VME, &d); ++ if ( !cpu_has_apic ) ++ __clear_bit(X86_FEATURE_APIC % 32, &d); + __clear_bit(X86_FEATURE_PSE, &d); + __clear_bit(X86_FEATURE_PGE, &d); + __clear_bit(X86_FEATURE_MCE, &d); +@@ -742,6 +744,8 @@ static void pv_cpuid(struct cpu_user_reg + __clear_bit(X86_FEATURE_XTPR % 32, &c); + __clear_bit(X86_FEATURE_PDCM % 32, &c); + __clear_bit(X86_FEATURE_DCA % 32, &c); ++ if ( !cpu_has_apic ) ++ __clear_bit(X86_FEATURE_X2APIC % 32, &c); + __set_bit(X86_FEATURE_HYPERVISOR % 32, &c); + break; + case 0x80000001: +@@ -760,6 +764,8 @@ static void pv_cpuid(struct cpu_user_reg + __clear_bit(X86_FEATURE_RDTSCP % 32, &d); + + __clear_bit(X86_FEATURE_SVME % 32, &c); ++ if ( !cpu_has_apic ) ++ __clear_bit(X86_FEATURE_EXTAPICSPACE % 32, &c); + __clear_bit(X86_FEATURE_OSVW % 32, &c); + __clear_bit(X86_FEATURE_IBS % 32, &c); + __clear_bit(X86_FEATURE_SKINIT % 32, &c); diff --git a/19046-cross-bit-coredumping.patch b/19046-cross-bit-coredumping.patch new file mode 100644 index 0000000..b04c63f --- /dev/null +++ b/19046-cross-bit-coredumping.patch @@ -0,0 +1,391 @@ +# HG changeset patch +# User Keir Fraser +# Date 1232023062 0 +# Node ID ecf603780f560095c4316169c9473d040f216dfc +# Parent f6a455c9f01db586832c0eb98c14965c045e07ac +libxc: Support cross-bitness guest when core-dumping + +This patch allows core-dumping to work on a cross-bit host/guest +configuration, whereas previously that was not supported. It supports +both PV and FV guests. The core file format generated by the host, +needs to match that of the guest, so an alignment issue is addressed, +along with the p2m frame list handling being done according to the +guest size. + +Signed-off-by: Bruce Rogers + +Index: xen-3.3.1-testing/tools/libxc/xc_core.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_core.c ++++ xen-3.3.1-testing/tools/libxc/xc_core.c +@@ -58,9 +58,6 @@ + /* number of pages to write at a time */ + #define DUMP_INCREMENT (4 * 1024) + +-/* Don't yet support cross-address-size core dump */ +-#define guest_width (sizeof (unsigned long)) +- + /* string table */ + struct xc_core_strtab { + char *strings; +@@ -240,7 +237,7 @@ xc_core_ehdr_init(Elf64_Ehdr *ehdr) + ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT; + + ehdr->e_type = ET_CORE; +- ehdr->e_machine = ELF_ARCH_MACHINE; ++ /* e_machine will be filled in later */ + ehdr->e_version = EV_CURRENT; + ehdr->e_entry = 0; + ehdr->e_phoff = 0; +@@ -359,7 +356,8 @@ elfnote_dump_core_header( + } + + static int +-elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle) ++elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle, ++ unsigned int guest_width) + { + int sts; + struct elfnote elfnote; +@@ -371,6 +369,12 @@ elfnote_dump_xen_version(void *args, dum + elfnote.descsz = sizeof(xen_version); + elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION; + elfnote_fill_xen_version(xc_handle, &xen_version); ++ if (guest_width < sizeof(unsigned long)) ++ { ++ // 32 bit elf file format differs in pagesize's alignment ++ char *p = (char *)&xen_version.pagesize; ++ memmove(p - 4, p, sizeof(xen_version.pagesize)); ++ } + sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote)); + if ( sts != 0 ) + return sts; +@@ -396,6 +400,24 @@ elfnote_dump_format_version(void *args, + return dump_rtn(args, (char*)&format_version, sizeof(format_version)); + } + ++static int ++get_guest_width(int xc_handle, ++ uint32_t domid, ++ unsigned int *guest_width) ++{ ++ DECLARE_DOMCTL; ++ ++ memset(&domctl, 0, sizeof(domctl)); ++ domctl.domain = domid; ++ domctl.cmd = XEN_DOMCTL_get_address_size; ++ ++ if ( do_domctl(xc_handle, &domctl) != 0 ) ++ return 1; ++ ++ *guest_width = domctl.u.address_size.size / 8; ++ return 0; ++} ++ + int + xc_domain_dumpcore_via_callback(int xc_handle, + uint32_t domid, +@@ -403,7 +425,8 @@ xc_domain_dumpcore_via_callback(int xc_h + dumpcore_rtn_t dump_rtn) + { + xc_dominfo_t info; +- shared_info_t *live_shinfo = NULL; ++ shared_info_any_t *live_shinfo = NULL; ++ unsigned int guest_width; + + int nr_vcpus = 0; + char *dump_mem, *dump_mem_start = NULL; +@@ -437,6 +460,12 @@ xc_domain_dumpcore_via_callback(int xc_h + uint16_t strtab_idx; + struct xc_core_section_headers *sheaders = NULL; + Elf64_Shdr *shdr; ++ ++ if ( get_guest_width(xc_handle, domid, &guest_width) != 0 ) ++ { ++ PERROR("Could not get address size for domain"); ++ return sts; ++ } + + xc_core_arch_context_init(&arch_ctxt); + if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL ) +@@ -500,7 +529,7 @@ xc_domain_dumpcore_via_callback(int xc_h + goto out; + } + +- sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo, ++ sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo, + &p2m, &p2m_size); + if ( sts != 0 ) + goto out; +@@ -676,6 +705,7 @@ xc_domain_dumpcore_via_callback(int xc_h + /* write out elf header */ + ehdr.e_shnum = sheaders->num; + ehdr.e_shstrndx = strtab_idx; ++ ehdr.e_machine = ELF_ARCH_MACHINE; + sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr)); + if ( sts != 0 ) + goto out; +@@ -697,7 +727,7 @@ xc_domain_dumpcore_via_callback(int xc_h + goto out; + + /* elf note section: xen version */ +- sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle); ++ sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width); + if ( sts != 0 ) + goto out; + +@@ -757,9 +787,21 @@ xc_domain_dumpcore_via_callback(int xc_h + + if ( !auto_translated_physmap ) + { +- gmfn = p2m[i]; +- if ( gmfn == INVALID_P2M_ENTRY ) +- continue; ++ if ( guest_width >= sizeof(unsigned long) ) ++ { ++ if ( guest_width == sizeof(unsigned long) ) ++ gmfn = p2m[i]; ++ else ++ gmfn = ((uint64_t *)p2m)[i]; ++ if ( gmfn == INVALID_P2M_ENTRY ) ++ continue; ++ } ++ else ++ { ++ gmfn = ((uint32_t *)p2m)[i]; ++ if ( gmfn == (uint32_t)INVALID_P2M_ENTRY ) ++ continue; ++ } + + p2m_array[j].pfn = i; + p2m_array[j].gmfn = gmfn; +@@ -802,7 +844,7 @@ copy_done: + /* When live dump-mode (-L option) is specified, + * guest domain may reduce memory. pad with zero pages. + */ +- IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages); ++ IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages); + memset(dump_mem_start, 0, PAGE_SIZE); + for (; j < nr_pages; j++) { + sts = dump_rtn(args, dump_mem_start, PAGE_SIZE); +@@ -891,7 +933,7 @@ xc_domain_dumpcore(int xc_handle, + struct dump_args da; + int sts; + +- if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 ) ++ if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 ) + { + PERROR("Could not open corefile %s", corename); + return -errno; +Index: xen-3.3.1-testing/tools/libxc/xc_core.h +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_core.h ++++ xen-3.3.1-testing/tools/libxc/xc_core.h +@@ -136,12 +136,12 @@ int xc_core_arch_auto_translated_physmap + struct xc_core_arch_context; + int xc_core_arch_memory_map_get(int xc_handle, + struct xc_core_arch_context *arch_ctxt, +- xc_dominfo_t *info, shared_info_t *live_shinfo, ++ xc_dominfo_t *info, shared_info_any_t *live_shinfo, + xc_core_memory_map_t **mapp, + unsigned int *nr_entries); +-int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, +- shared_info_t *live_shinfo, xen_pfn_t **live_p2m, +- unsigned long *pfnp); ++int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, ++ xc_dominfo_t *info, shared_info_any_t *live_shinfo, ++ xen_pfn_t **live_p2m, unsigned long *pfnp); + + + #if defined (__i386__) || defined (__x86_64__) +Index: xen-3.3.1-testing/tools/libxc/xc_core_ia64.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_core_ia64.c ++++ xen-3.3.1-testing/tools/libxc/xc_core_ia64.c +@@ -270,7 +270,7 @@ old: + } + + int +-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, ++xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info, + shared_info_t *live_shinfo, xen_pfn_t **live_p2m, + unsigned long *pfnp) + { +Index: xen-3.3.1-testing/tools/libxc/xc_core_x86.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_core_x86.c ++++ xen-3.3.1-testing/tools/libxc/xc_core_x86.c +@@ -20,9 +20,25 @@ + + #include "xg_private.h" + #include "xc_core.h" ++#include "xc_e820.h" ++ ++#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f)) ++ ++#ifndef MAX ++#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b)) ++#endif ++ ++int ++xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt, ++ unsigned long pfn) ++{ ++ if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */ ++ || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT) ++ && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */ ++ return 0; ++ return 1; ++} + +-/* Don't yet support cross-address-size core dump */ +-#define guest_width (sizeof (unsigned long)) + + static int nr_gpfns(int xc_handle, domid_t domid) + { +@@ -37,7 +53,7 @@ xc_core_arch_auto_translated_physmap(con + + int + xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused, +- xc_dominfo_t *info, shared_info_t *live_shinfo, ++ xc_dominfo_t *info, shared_info_any_t *live_shinfo, + xc_core_memory_map_t **mapp, + unsigned int *nr_entries) + { +@@ -60,17 +76,22 @@ xc_core_arch_memory_map_get(int xc_handl + } + + int +-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info, +- shared_info_t *live_shinfo, xen_pfn_t **live_p2m, ++xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info, ++ shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m, + unsigned long *pfnp) + { + /* Double and single indirect references to the live P2M table */ + xen_pfn_t *live_p2m_frame_list_list = NULL; + xen_pfn_t *live_p2m_frame_list = NULL; ++ /* Copies of the above. */ ++ xen_pfn_t *p2m_frame_list_list = NULL; ++ xen_pfn_t *p2m_frame_list = NULL; ++ + uint32_t dom = info->domid; + unsigned long p2m_size = nr_gpfns(xc_handle, info->domid); + int ret = -1; + int err; ++ int i; + + if ( p2m_size < info->nr_pages ) + { +@@ -80,7 +101,7 @@ xc_core_arch_map_p2m(int xc_handle, xc_d + + live_p2m_frame_list_list = + xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, +- live_shinfo->arch.pfn_to_mfn_frame_list_list); ++ GET_FIELD(live_shinfo, arch.pfn_to_mfn_frame_list_list)); + + if ( !live_p2m_frame_list_list ) + { +@@ -88,9 +109,28 @@ xc_core_arch_map_p2m(int xc_handle, xc_d + goto out; + } + ++ /* Get a local copy of the live_P2M_frame_list_list */ ++ if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) ) ++ { ++ ERROR("Couldn't allocate p2m_frame_list_list array"); ++ goto out; ++ } ++ memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE); ++ ++ /* Canonicalize guest's unsigned long vs ours */ ++ if ( guest_width > sizeof(unsigned long) ) ++ for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ ) ++ if ( i < PAGE_SIZE/guest_width ) ++ p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i]; ++ else ++ p2m_frame_list_list[i] = 0; ++ else if ( guest_width < sizeof(unsigned long) ) ++ for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- ) ++ p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i]; ++ + live_p2m_frame_list = + xc_map_foreign_pages(xc_handle, dom, PROT_READ, +- live_p2m_frame_list_list, ++ p2m_frame_list_list, + P2M_FLL_ENTRIES); + + if ( !live_p2m_frame_list ) +@@ -99,8 +139,25 @@ xc_core_arch_map_p2m(int xc_handle, xc_d + goto out; + } + ++ /* Get a local copy of the live_P2M_frame_list */ ++ if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) ) ++ { ++ ERROR("Couldn't allocate p2m_frame_list array"); ++ goto out; ++ } ++ memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE); ++ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE); ++ ++ /* Canonicalize guest's unsigned long vs ours */ ++ if ( guest_width > sizeof(unsigned long) ) ++ for ( i = 0; i < P2M_FL_ENTRIES; i++ ) ++ p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i]; ++ else if ( guest_width < sizeof(unsigned long) ) ++ for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- ) ++ p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i]; ++ + *live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ, +- live_p2m_frame_list, ++ p2m_frame_list, + P2M_FL_ENTRIES); + + if ( !*live_p2m ) +@@ -122,6 +179,12 @@ out: + if ( live_p2m_frame_list ) + munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE); + ++ if ( p2m_frame_list_list ) ++ free(p2m_frame_list_list); ++ ++ if ( p2m_frame_list ) ++ free(p2m_frame_list); ++ + errno = err; + return ret; + } +Index: xen-3.3.1-testing/tools/libxc/xc_core_x86.h +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_core_x86.h ++++ xen-3.3.1-testing/tools/libxc/xc_core_x86.h +@@ -21,15 +21,8 @@ + #ifndef XC_CORE_X86_H + #define XC_CORE_X86_H + +-#if defined(__i386__) || defined(__x86_64__) + #define ELF_ARCH_DATA ELFDATA2LSB +-#if defined (__i386__) +-# define ELF_ARCH_MACHINE EM_386 +-#else +-# define ELF_ARCH_MACHINE EM_X86_64 +-#endif +-#endif /* __i386__ or __x86_64__ */ +- ++#define ELF_ARCH_MACHINE (guest_width == 8 ? EM_X86_64 : EM_386) + + struct xc_core_arch_context { + /* nothing */ +@@ -40,8 +33,10 @@ struct xc_core_arch_context { + #define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \ + (0) + #define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn) (0) +-#define xc_core_arch_gpfn_may_present(arch_ctxt, i) (1) + ++int ++xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt, ++ unsigned long pfn); + static inline int + xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, + struct xc_core_section_headers *sheaders, diff --git a/19048-cross-bit-coredumping.patch b/19048-cross-bit-coredumping.patch new file mode 100644 index 0000000..2aeefc6 --- /dev/null +++ b/19048-cross-bit-coredumping.patch @@ -0,0 +1,37 @@ +# HG changeset patch +# User Keir Fraser +# Date 1232023248 0 +# Node ID 04f913ab2049bd0d8f13cdd72a487376d3909f87 +# Parent e98032a016d62c4ee09bb59ab9e0987c2563804a +dump-core: update the documentation + +Signed-off-by: Isaku Yamahata + +Index: xen-3.3.1-testing/docs/misc/dump-core-format.txt +=================================================================== +--- xen-3.3.1-testing.orig/docs/misc/dump-core-format.txt ++++ xen-3.3.1-testing/docs/misc/dump-core-format.txt +@@ -30,8 +30,13 @@ The elf header members are set as follow + e_ident[EI_OSABI] = ELFOSABI_SYSV = 0 + e_type = ET_CORE = 4 + ELFCLASS64 is always used independent of architecture. +-e_ident[EI_DATA] and e_flags are set according to the dumping system's +-architecture. Other members are set as usual. ++e_ident[EI_DATA] is set as follows ++ For x86 PV domain case, it is set according to the guest configuration ++ (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.) ++ For other domain case (x86 HVM domain case and ia64 domain case), ++ it is set according to the dumping system's architecture. ++e_flags is set according to the dumping system's architecture. ++Other members are set as usual. + + Sections + -------- +@@ -241,3 +246,7 @@ Currently only (major, minor) = (0, 1) i + The format version isn't bumped because analysis tools can distinguish it. + - .xen_ia64_mapped_regs section was made only for ia64 PV domain. + In case of IA64 HVM domain, this section doesn't exist. ++- elf header e_ident[EI_DATA] ++ On x86 PV domain case, it is set according to the guest configuration. ++ I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64. ++ This is the same as 32-on-32 case, so there is no impact on analysis tools. diff --git a/19051-cross-bit-coredumping.patch b/19051-cross-bit-coredumping.patch new file mode 100644 index 0000000..6f5c345 --- /dev/null +++ b/19051-cross-bit-coredumping.patch @@ -0,0 +1,24 @@ +# HG changeset patch +# User Keir Fraser +# Date 1232106411 0 +# Node ID 71e0b8adeb1f71d0055fabba0e97a4bdbf594c72 +# Parent 40d9d9ff435afee74431102e4e1ac6c7542649bd +ptrace_core: Handle FV cross-bitness. + +Signed-off-by: Bruce Rogers + +Index: xen-3.3.1-testing/tools/libxc/xc_ptrace_core.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/libxc/xc_ptrace_core.c ++++ xen-3.3.1-testing/tools/libxc/xc_ptrace_core.c +@@ -540,7 +540,9 @@ xc_waitdomain_core_elf( + XEN_ELFNOTE_DUMPCORE_XEN_VERSION, + (void**)&xen_version) < 0) + goto out; +- if (xen_version->xen_version.pagesize != PAGE_SIZE) ++ /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */ ++ if (xen_version->xen_version.pagesize != PAGE_SIZE && ++ (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE) + goto out; + + /* .note.Xen: format_version */ diff --git a/19072-vmx-pat.patch b/19072-vmx-pat.patch new file mode 100644 index 0000000..9c62783 --- /dev/null +++ b/19072-vmx-pat.patch @@ -0,0 +1,143 @@ +Index: xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmcs.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/vmx/vmcs.c +@@ -166,14 +166,15 @@ static void vmx_init_vmcs_config(void) + #endif + + min = VM_EXIT_ACK_INTR_ON_EXIT; +- opt = 0; ++ opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT; + #ifdef __x86_64__ + min |= VM_EXIT_IA32E_MODE; + #endif + _vmx_vmexit_control = adjust_vmx_controls( + min, opt, MSR_IA32_VMX_EXIT_CTLS); + +- min = opt = 0; ++ min = 0; ++ opt = VM_ENTRY_LOAD_GUEST_PAT; + _vmx_vmentry_control = adjust_vmx_controls( + min, opt, MSR_IA32_VMX_ENTRY_CTLS); + +@@ -518,8 +519,6 @@ static int construct_vmcs(struct vcpu *v + + /* VMCS controls. */ + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control); +- __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); +- __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); + + v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control; + v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control; +@@ -533,9 +532,15 @@ static int construct_vmcs(struct vcpu *v + else + { + v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; ++ vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT | ++ VM_EXIT_LOAD_HOST_PAT); ++ vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT; + } + + __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control); ++ __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control); ++ __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control); ++ + if ( cpu_has_vmx_secondary_exec_control ) + __vmwrite(SECONDARY_VM_EXEC_CONTROL, + v->arch.hvm_vmx.secondary_exec_control); +@@ -557,6 +562,8 @@ static int construct_vmcs(struct vcpu *v + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP); + vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP); ++ if ( cpu_has_vmx_pat && paging_mode_hap(d) ) ++ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT); + } + + /* I/O access bitmap. */ +@@ -688,6 +695,21 @@ static int construct_vmcs(struct vcpu *v + __vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid); + } + ++ if ( cpu_has_vmx_pat && paging_mode_hap(d) ) ++ { ++ u64 host_pat, guest_pat; ++ ++ rdmsrl(MSR_IA32_CR_PAT, host_pat); ++ guest_pat = 0x7040600070406ULL; ++ ++ __vmwrite(HOST_PAT, host_pat); ++ __vmwrite(GUEST_PAT, guest_pat); ++#ifdef __i386__ ++ __vmwrite(HOST_PAT_HIGH, host_pat >> 32); ++ __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32); ++#endif ++ } ++ + vmx_vmcs_exit(v); + + paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */ +@@ -968,6 +990,8 @@ void vmcs_dump_vcpu(struct vcpu *v) + vmx_dump_sel("LDTR", x86_seg_ldtr); + vmx_dump_sel("IDTR", x86_seg_idtr); + vmx_dump_sel("TR", x86_seg_tr); ++ printk("Guest PAT = 0x%08x%08x\n", ++ (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT)); + x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32; + x |= (uint32_t)vmr(TSC_OFFSET); + printk("TSC Offset = %016llx\n", x); +@@ -1006,6 +1030,8 @@ void vmcs_dump_vcpu(struct vcpu *v) + (unsigned long long)vmr(HOST_SYSENTER_ESP), + (int)vmr(HOST_SYSENTER_CS), + (unsigned long long)vmr(HOST_SYSENTER_EIP)); ++ printk("Host PAT = 0x%08x%08x\n", ++ (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT)); + + printk("*** Control State ***\n"); + printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", +Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/vmx/vmcs.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ xen-3.3.1-testing/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -150,11 +150,14 @@ extern u32 vmx_pin_based_exec_control; + + #define VM_EXIT_IA32E_MODE 0x00000200 + #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 ++#define VM_EXIT_SAVE_GUEST_PAT 0x00040000 ++#define VM_EXIT_LOAD_HOST_PAT 0x00080000 + extern u32 vmx_vmexit_control; + + #define VM_ENTRY_IA32E_MODE 0x00000200 + #define VM_ENTRY_SMM 0x00000400 + #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 ++#define VM_ENTRY_LOAD_GUEST_PAT 0x00004000 + extern u32 vmx_vmentry_control; + + #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +@@ -181,7 +184,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) + #define cpu_has_vmx_vpid \ + (vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID) +- ++#define cpu_has_vmx_pat \ ++ (vmx_vmentry_control & VM_ENTRY_LOAD_GUEST_PAT) + /* GUEST_INTERRUPTIBILITY_INFO flags. */ + #define VMX_INTR_SHADOW_STI 0x00000001 + #define VMX_INTR_SHADOW_MOV_SS 0x00000002 +@@ -232,6 +236,8 @@ enum vmcs_field { + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, ++ GUEST_PAT = 0x00002804, ++ GUEST_PAT_HIGH = 0x00002805, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, +@@ -240,6 +246,8 @@ enum vmcs_field { + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, ++ HOST_PAT = 0x00002c00, ++ HOST_PAT_HIGH = 0x00002c01, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, diff --git a/19079-snp_ctl-1.patch b/19079-snp_ctl-1.patch new file mode 100644 index 0000000..81d09cb --- /dev/null +++ b/19079-snp_ctl-1.patch @@ -0,0 +1,320 @@ +Index: xen-3.3.1-testing/xen/arch/x86/hvm/mtrr.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/mtrr.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/mtrr.c +@@ -702,12 +702,15 @@ HVM_REGISTER_SAVE_RESTORE(MTRR, hvm_save + 1, HVMSR_PER_VCPU); + + uint8_t epte_get_entry_emt( +- struct domain *d, unsigned long gfn, unsigned long mfn) ++ struct domain *d, unsigned long gfn, ++ unsigned long mfn, uint8_t *igmt, int direct_mmio) + { + uint8_t gmtrr_mtype, hmtrr_mtype; + uint32_t type; + struct vcpu *v = current; + ++ *igmt = 0; ++ + if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) ) + return MTRR_TYPE_WRBACK; + +@@ -723,6 +726,21 @@ uint8_t epte_get_entry_emt( + if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) ) + return type; + ++ if ( !iommu_enabled ) ++ { ++ *igmt = 1; ++ return MTRR_TYPE_WRBACK; ++ } ++ ++ if ( direct_mmio ) ++ return MTRR_TYPE_UNCACHABLE; ++ ++ if ( iommu_snoop ) ++ { ++ *igmt = 1; ++ return MTRR_TYPE_WRBACK; ++ } ++ + gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT)); + hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT)); + return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype); +Index: xen-3.3.1-testing/xen/arch/x86/mm/hap/p2m-ept.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/mm/hap/p2m-ept.c ++++ xen-3.3.1-testing/xen/arch/x86/mm/hap/p2m-ept.c +@@ -66,6 +66,7 @@ static int ept_set_middle_entry(struct d + list_add_tail(&pg->list, &d->arch.p2m->pages); + + ept_entry->emt = 0; ++ ept_entry->igmt = 0; + ept_entry->sp_avail = 0; + ept_entry->avail1 = 0; + ept_entry->mfn = page_to_mfn(pg); +@@ -114,9 +115,19 @@ static int ept_next_level(struct domain + } + } + ++/* The parameter need_modify_vtd_table in _ept_set_entry(): ++ * ++ * Usually VT-d page table is created during the p2m table creation time, ++ * and it only cares about the gfn to mfn translations, ++ * EPT table takes the same effect as the p2m table, so we create ++ * VT-d page table in ept_set_entry() function. ++ * But when EPT entires are modified not related to the gfn and mfn translations * We don't need to recreat the same VT-d page tables. ++ * The need_modify_vtd_table parameter is used for this. ++ */ ++ + static int +-ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, +- unsigned int order, p2m_type_t p2mt) ++_ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, ++ unsigned int order, p2m_type_t p2mt, int need_modify_vtd_table) + { + ept_entry_t *table = NULL; + unsigned long gfn_remainder = gfn, offset = 0; +@@ -124,6 +135,8 @@ ept_set_entry(struct domain *d, unsigned + u32 index; + int i, rv = 0, ret = 0; + int walk_level = order / EPT_TABLE_ORDER; ++ int direct_mmio = (p2mt == p2m_mmio_direct); ++ uint8_t igmt = 0; + + /* we only support 4k and 2m pages now */ + +@@ -157,7 +170,9 @@ ept_set_entry(struct domain *d, unsigned + { + if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) ) + { +- ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn)); ++ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), ++ &igmt, direct_mmio); ++ ept_entry->igmt = igmt; + ept_entry->sp_avail = walk_level ? 1 : 0; + + if ( ret == GUEST_TABLE_SUPER_PAGE ) +@@ -208,7 +223,10 @@ ept_set_entry(struct domain *d, unsigned + { + split_ept_entry = split_table + i; + split_ept_entry->emt = epte_get_entry_emt(d, +- gfn-offset+i, split_mfn+i); ++ gfn-offset+i, split_mfn+i, ++ &igmt, direct_mmio); ++ split_ept_entry->igmt = igmt; ++ + split_ept_entry->sp_avail = 0; + + split_ept_entry->mfn = split_mfn+i; +@@ -223,7 +241,10 @@ ept_set_entry(struct domain *d, unsigned + + /* Set the destinated 4k page as normal */ + split_ept_entry = split_table + offset; +- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn)); ++ split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn), ++ &igmt, direct_mmio); ++ split_ept_entry->igmt = igmt; ++ + split_ept_entry->mfn = mfn_x(mfn); + split_ept_entry->avail1 = p2mt; + ept_p2m_type_to_flags(split_ept_entry, p2mt); +@@ -246,7 +267,8 @@ out: + + /* Now the p2m table is not shared with vt-d page table */ + +- if ( iommu_enabled && is_hvm_domain(d) ) ++ if ( iommu_enabled && is_hvm_domain(d) ++ && need_modify_vtd_table ) + { + if ( p2mt == p2m_ram_rw ) + { +@@ -273,6 +295,17 @@ out: + return rv; + } + ++static int ++ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn, ++ unsigned int order, p2m_type_t p2mt) ++{ ++ /* ept_set_entry() are called from set_entry(), ++ * We should always create VT-d page table acording ++ * to the gfn to mfn translations changes. ++ */ ++ return _ept_set_entry(d, gfn, mfn, order, p2mt, 1); ++} ++ + /* Read ept p2m entries */ + static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t) + { +@@ -393,18 +426,30 @@ void ept_change_entry_emt_with_range(str + * Set emt for super page. + */ + order = EPT_TABLE_ORDER; +- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); ++ /* vmx_set_uc_mode() dont' touch the gfn to mfn ++ * translations, only modify the emt field of the EPT entries. ++ * so we need not modify the current VT-d page tables. ++ */ ++ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); + gfn += 0x1FF; + } + else + { +- /* change emt for partial entries of the 2m area */ +- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); ++ /* 1)change emt for partial entries of the 2m area. ++ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn ++ * translations, only modify the emt field of the EPT entries. ++ * so we need not modify the current VT-d page tables. ++ */ ++ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt,0); + gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF; + } + } +- else /* gfn assigned with 4k */ +- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt); ++ else /* 1)gfn assigned with 4k ++ * 2)vmx_set_uc_mode() dont' touch the gfn to mfn ++ * translations, only modify the emt field of the EPT entries. ++ * so we need not modify the current VT-d page tables. ++ */ ++ _ept_set_entry(d, gfn, _mfn(mfn), order, p2mt, 0); + } + } + +Index: xen-3.3.1-testing/xen/drivers/passthrough/iommu.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/drivers/passthrough/iommu.c ++++ xen-3.3.1-testing/xen/drivers/passthrough/iommu.c +@@ -40,6 +40,7 @@ int iommu_enabled = 0; + int iommu_pv_enabled = 0; + int force_iommu = 0; + int iommu_passthrough = 0; ++int iommu_snoop = 0; + + static void __init parse_iommu_param(char *s) + { +Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/dmar.c ++++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/dmar.c +@@ -29,6 +29,7 @@ + #include + #include + #include "dmar.h" ++#include "iommu.h" + + int vtd_enabled = 1; + +@@ -508,6 +509,8 @@ static int __init acpi_parse_dmar(struct + int acpi_dmar_init(void) + { + int rc; ++ struct acpi_drhd_unit *drhd; ++ struct iommu *iommu; + + rc = -ENODEV; + if ( force_iommu ) +@@ -524,7 +527,20 @@ int acpi_dmar_init(void) + if ( list_empty(&acpi_drhd_units) ) + goto fail; + +- printk("Intel VT-d has been enabled\n"); ++ /* Giving that all devices within guest use same io page table, ++ * enable snoop control only if all VT-d engines support it. ++ */ ++ iommu_snoop = 1; ++ for_each_drhd_unit ( drhd ) ++ { ++ iommu = drhd->iommu; ++ if ( !ecap_snp_ctl(iommu->ecap) ) { ++ iommu_snoop = 0; ++ break; ++ } ++ } ++ ++ printk("Intel VT-d has been enabled, snoop_control=%d.\n", iommu_snoop); + + return 0; + +Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.c ++++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.c +@@ -1495,6 +1495,11 @@ int intel_iommu_map_page( + pte_present = dma_pte_present(*pte); + dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K); + dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); ++ ++ /* Set the SNP on leaf page table if Snoop Control available */ ++ if ( iommu_snoop ) ++ dma_set_pte_snp(*pte); ++ + iommu_flush_cache_entry(pte); + unmap_vtd_domain_page(page); + +Index: xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/drivers/passthrough/vtd/iommu.h ++++ xen-3.3.1-testing/xen/drivers/passthrough/vtd/iommu.h +@@ -104,6 +104,7 @@ + #define ecap_ext_intr(e) ((e >> 4) & 0x1) + #define ecap_cache_hints(e) ((e >> 5) & 0x1) + #define ecap_pass_thru(e) ((e >> 6) & 0x1) ++#define ecap_snp_ctl(e) ((e >> 7) & 0x1) + + /* IOTLB_REG */ + #define DMA_TLB_FLUSH_GRANU_OFFSET 60 +@@ -260,10 +261,12 @@ struct dma_pte { + }; + #define DMA_PTE_READ (1) + #define DMA_PTE_WRITE (2) ++#define DMA_PTE_SNP (1 << 11) + #define dma_clear_pte(p) do {(p).val = 0;} while(0) + #define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0) + #define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0) + #define dma_set_pte_superpage(p) do {(p).val |= (1 << 7);} while(0) ++#define dma_set_pte_snp(p) do {(p).val |= DMA_PTE_SNP;} while(0) + #define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) + #define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/vmx/vmx.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/vmx/vmx.h ++++ xen-3.3.1-testing/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -33,7 +33,8 @@ typedef union { + u64 r : 1, + w : 1, + x : 1, +- emt : 4, ++ emt : 3, ++ igmt : 1, + sp_avail : 1, + avail1 : 4, + mfn : 45, +Index: xen-3.3.1-testing/xen/include/asm-x86/mtrr.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/asm-x86/mtrr.h ++++ xen-3.3.1-testing/xen/include/asm-x86/mtrr.h +@@ -64,7 +64,7 @@ extern int mtrr_del_page(int reg, unsign + extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi); + extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr, + paddr_t spaddr); +-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, unsigned long mfn); ++extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, unsigned long mfn, uint8_t *igmt, int direct_mmio); + extern void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn, + unsigned long end_gfn); + extern unsigned char pat_type_2_pte_flags(unsigned char pat_type); +Index: xen-3.3.1-testing/xen/include/xen/iommu.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/xen/iommu.h ++++ xen-3.3.1-testing/xen/include/xen/iommu.h +@@ -32,6 +32,7 @@ extern int iommu_enabled; + extern int iommu_pv_enabled; + extern int force_iommu; + extern int iommu_passthrough; ++extern int iommu_snoop; + + #define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu) + diff --git a/blktap-error-handling.patch b/blktap-error-handling.patch new file mode 100644 index 0000000..92fcf66 --- /dev/null +++ b/blktap-error-handling.patch @@ -0,0 +1,155 @@ +Index: xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/blktap/drivers/blktapctrl.c ++++ xen-3.3.1-testing/tools/blktap/drivers/blktapctrl.c +@@ -662,9 +662,6 @@ static int blktapctrl_new_blkif(blkif_t + + DPRINTF("Received a poll for a new vbd\n"); + if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) { +- if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0) +- return -1; +- + if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) { + DPRINTF("Error in blktap device string(%s).\n", + blk->params); +@@ -693,10 +690,6 @@ static int blktapctrl_new_blkif(blkif_t + blkif->fds[WRITE] = exist->fds[WRITE]; + } + +- add_disktype(blkif, type); +- blkif->major = major; +- blkif->minor = minor; +- + image = (image_t *)malloc(sizeof(image_t)); + blkif->prv = (void *)image; + blkif->ops = &tapdisk_ops; +@@ -720,11 +713,18 @@ static int blktapctrl_new_blkif(blkif_t + goto fail; + } + ++ if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0) ++ return -1; ++ ++ blkif->major = major; ++ blkif->minor = minor; ++ ++ add_disktype(blkif, type); ++ + } else return -1; + + return 0; + fail: +- ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor); + return -EINVAL; + } + +Index: xen-3.3.1-testing/tools/blktap/lib/xenbus.c +=================================================================== +--- xen-3.3.1-testing.orig/tools/blktap/lib/xenbus.c ++++ xen-3.3.1-testing/tools/blktap/lib/xenbus.c +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + #include "blktaplib.h" + #include "list.h" + #include "xs_api.h" +@@ -149,6 +150,37 @@ static int backend_remove(struct xs_hand + return 0; + } + ++static int check_image(struct backend_info *be, const char** errmsg) ++{ ++ const char *path; ++ int mode; ++ blkif_t *blkif = be->blkif; ++ blkif_info_t *info = blkif->info; ++ ++ /* Strip off the image type */ ++ path = strchr(info->params, ':'); ++ if (path == NULL) ++ path = info->params; ++ else ++ path++; ++ ++ /* Check if the image exists and access is permitted */ ++ mode = R_OK; ++ if (!be->readonly) ++ mode |= W_OK; ++ if (access(path, mode)) { ++ if (errno == ENOENT) ++ *errmsg = "File not found."; ++ else ++ *errmsg = "Insufficient file permissions."; ++ return -1; ++ } ++ ++ /* TODO Check that the image is not attached to a different VM */ ++ ++ return 0; ++} ++ + static void ueblktap_setup(struct xs_handle *h, char *bepath) + { + struct backend_info *be; +@@ -156,6 +188,7 @@ static void ueblktap_setup(struct xs_han + int len, er, deverr; + long int pdev = 0, handle; + blkif_info_t *blk; ++ const char* errmsg = NULL; + + be = be_lookup_be(bepath); + if (be == NULL) +@@ -211,6 +244,9 @@ static void ueblktap_setup(struct xs_han + be->pdev = pdev; + } + ++ if (check_image(be, &errmsg)) ++ goto fail; ++ + er = blkif_init(be->blkif, handle, be->pdev, be->readonly); + if (er != 0) { + DPRINTF("Unable to open device %s\n",blk->params); +@@ -246,12 +282,21 @@ static void ueblktap_setup(struct xs_han + } + + be->blkif->state = CONNECTED; ++ xs_printf(h, be->backpath, "hotplug-status", "connected"); ++ + DPRINTF("[SETUP] Complete\n\n"); + goto close; + + fail: +- if ( (be != NULL) && (be->blkif != NULL) ) ++ if (be) { ++ if (errmsg == NULL) ++ errmsg = "Setting up the backend failed. See the log " ++ "files in /var/log/xen/ for details."; ++ xs_printf(h, be->backpath, "hotplug-error", errmsg); ++ xs_printf(h, be->backpath, "hotplug-status", "error"); ++ + backend_remove(h, be); ++ } + close: + if (path) + free(path); +@@ -286,7 +331,8 @@ static void ueblktap_probe(struct xs_han + len = strsep_len(bepath, '/', 7); + if (len < 0) + goto free_be; +- bepath[len] = '\0'; ++ if (bepath[len] != '\0') ++ goto free_be; + + be = malloc(sizeof(*be)); + if (!be) { +Index: xen-3.3.1-testing/tools/examples/xen-backend.rules +=================================================================== +--- xen-3.3.1-testing.orig/tools/examples/xen-backend.rules ++++ xen-3.3.1-testing/tools/examples/xen-backend.rules +@@ -1,4 +1,3 @@ +-SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}" + SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}" + SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}" + SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online" diff --git a/supported_module.diff b/supported_module.diff index 2459055..1beafc6 100644 --- a/supported_module.diff +++ b/supported_module.diff @@ -6,8 +6,9 @@ Index: xen-3.3.1-testing/unmodified_drivers/linux-2.6/Module.supported =================================================================== --- /dev/null +++ xen-3.3.1-testing/unmodified_drivers/linux-2.6/Module.supported -@@ -0,0 +1,4 @@ +@@ -0,0 +1,5 @@ +xen-vbd +xen-platform-pci +xen-vnif +xenbus ++xen-balloon diff --git a/x86-show-page-walk-early.patch b/x86-show-page-walk-early.patch index 1626b59..b382e77 100644 --- a/x86-show-page-walk-early.patch +++ b/x86-show-page-walk-early.patch @@ -1,6 +1,6 @@ --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c -@@ -1267,6 +1267,7 @@ asmlinkage void do_early_page_fault(stru +@@ -1273,6 +1273,7 @@ asmlinkage void do_early_page_fault(stru unsigned long *stk = (unsigned long *)regs; printk("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n", regs->cs, _p(regs->eip), _p(cr2), regs->error_code); diff --git a/xen-ioapic-ack-default.diff b/xen-ioapic-ack-default.diff new file mode 100644 index 0000000..3c06a25 --- /dev/null +++ b/xen-ioapic-ack-default.diff @@ -0,0 +1,23 @@ +Change default IO-APIC ack mode for single IO-APIC systems to old-style. Jan + + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1354,7 +1354,7 @@ static unsigned int startup_level_ioapic + return 0; /* don't check for pending */ + } + +-int ioapic_ack_new = 1; ++int ioapic_ack_new = -1; + static void setup_ioapic_ack(char *s) + { + if ( !strcmp(s, "old") ) +@@ -1854,6 +1854,8 @@ void __init setup_IO_APIC(void) + else + io_apic_irqs = ~PIC_IRQS; + ++ if (ioapic_ack_new < 0) ++ ioapic_ack_new = (nr_ioapics > 1); + printk("ENABLING IO-APIC IRQs\n"); + printk(" -> Using %s ACK method\n", ioapic_ack_new ? "new" : "old"); + diff --git a/xen-updown.sh b/xen-updown.sh index 3fed189..6d19359 100644 --- a/xen-updown.sh +++ b/xen-updown.sh @@ -38,7 +38,7 @@ test "x$1" = "x-o" && shift DEBUG=no RUN_FROM_RC=no while [ $# -gt 0 ]; do - case $1 in + case $1 in debug) DEBUG=yes ;; rc) RUN_FROM_RC=yes ;; *) debug unknown option $1 ;; @@ -100,14 +100,20 @@ xm_list() # For the specified vm, return a list of vifs that are connected to $INTERFACE list_vifs() { - id=$1 - vifs=() - for vif in $(ls -1 "/sys/class/net/$INTERFACE/brif/"); do - tmp="`echo ${vif} | egrep "^(tap|vif)$id\..*"`" - if [ ! -z ${tmp} ]; then - vifs=(${vifs[@]} ${tmp}) - fi - done + id=$1 + vifs=() + for vif in $(ls -1 "/sys/class/net/$INTERFACE/brif/" 2>/dev/null) ; do + eval BRIDGE_PORTS="" `grep "^[[:space:]]*BRIDGE_PORTS=" \ + "/etc/sysconfig/network/ifcfg-$INTERFACE" 2>/dev/null` + for p in $BRIDGE_PORTS ; do + test "x$p" = "x$vif" && continue 2 + done + case $vif in + (tap${id}\.*|vif${id}\.*) + vifs=(${vifs[@]} ${vif}) + ;; + esac + done echo "${vifs[@]}" } @@ -126,19 +132,22 @@ case $SCRIPTNAME in *if-up.d*) exit_if_xend_not_running - for IF in $(ls -1 "${RUN_FILES_BASE}/xen/") ; do - . "${RUN_FILES_BASE}/xen/$INTERFACE" || continue - + if test -f "${RUN_FILES_BASE}/xen/$INTERFACE" ; then + . "${RUN_FILES_BASE}/xen/$INTERFACE" + for vif in ${VIFS}; do test -d "/sys/class/net/${vif}" || continue - if ! is_iface_up ${vif} ; then - ip link set dev ${vif} up - fi - brctl addif ${INTERFACE} ${vif} 2>&1 > /dev/null + test -d "/sys/class/net/${INTERFACE}/brif/${vif}" && \ + continue + if ! is_iface_up ${vif} ; then + ip link set dev ${vif} up || continue + fi + brctl addif ${INTERFACE} ${vif} &>/dev/null done - # remove sysconfig state - rm -f "${RUN_FILES_BASE}/xen/$INTERFACE" - done + + # remove sysconfig state + rm -f "${RUN_FILES_BASE}/xen/$INTERFACE" + fi ;; *if-down.d*) exit_if_xend_not_running diff --git a/xen.changes b/xen.changes index 1ef0ad5..65ec838 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,61 @@ +------------------------------------------------------------------- +Fri Jan 23 11:47:31 MST 2009 - carnold@novell.com + +- Intel - Remove improper operating condition that results in a + machine check. + 19072-vmx-pat.patch + 19079-snp_ctl-1.patch + +------------------------------------------------------------------- +Fri Jan 23 11:45:16 CET 2009 - kwolf@suse.de + +- bnc#465379 - Fix blktap error handling + blktap-error-handling.patch + +------------------------------------------------------------------- +Thu Jan 22 08:36:40 MST 2009 - carnold@novell.com + +- bnc#435219 - XEN pv-driver doesn't work + +------------------------------------------------------------------- +Thu Jan 22 14:17:00 CET 2009 - jbeulich@novell.com + +- Fix unmaskable MSI handling. + 18778-msi-irq-fix.patch + +------------------------------------------------------------------- +Wed Jan 21 16:33:16 MST 2009 - jfehlig@novell.com + +- bnc#467883 - Squelch output of xen-updown.sh sysconfig hook + script and don't save state of tap devices not belonging to Xen. + +------------------------------------------------------------------- +Wed Jan 21 08:15:31 MST 2009 - carnold@novell.com + +- bnc#467807 - Xen: IRQs stop working + xen-ioapic-ack-default.diff + +------------------------------------------------------------------- +Fri Jan 16 14:20:08 MST 2009 - carnold@novell.com + +- bnc#447178 - xm dump-core does not work for cross-bitness guest. + 19046-cross-bit-coredumping.patch + 19048-cross-bit-coredumping.patch + 19051-cross-bit-coredumping.patch + +------------------------------------------------------------------- +Thu Jan 15 10:26:13 MST 2009 - brieske@novell.com + +- bnc#429637 - SSVP SMBIOS HCT Test failing + 19027-hvmloader-SMBIOS-dev-mem-boundary.patch + +------------------------------------------------------------------- +Wed Jan 14 08:22:08 MST 2009 - carnold@novell.com + +- bnc#460805 - Unable to boot with Xen kernel with IBM T42p / T41p + 19039-x86-propagate-nolapic.patch + 19038-x86-no-apic.patch + ------------------------------------------------------------------- Mon Jan 5 10:14:41 MST 2009 - carnold@novell.com diff --git a/xen.spec b/xen.spec index 3a76358..9990d90 100644 --- a/xen.spec +++ b/xen.spec @@ -1,5 +1,5 @@ # -# spec file for package xen (Version 3.3.1_18546_02) +# spec file for package xen (Version 3.3.1_18546_04) # # Copyright (c) 2009 SUSE LINUX Products GmbH, Nuernberg, Germany. # @@ -37,7 +37,7 @@ BuildRequires: glibc-32bit glibc-devel-32bit %if %{?with_kmp}0 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 %endif -Version: 3.3.1_18546_02 +Version: 3.3.1_18546_04 Release: 1 License: GPL v2 only Group: System/Kernel @@ -126,12 +126,26 @@ Patch56: 18870-vtd-flush-per-device.patch Patch57: 18878-x86-cpufreq-less-verbose.patch Patch58: 18879-cpufreq-params.patch Patch59: 18880-x86-pirq-guest-bind-msg.patch -Patch60: 18943-amd-32bit-paging-limit.patch +Patch60: 18887-vtd-error-handling.patch Patch61: 18904-x86-local-irq.patch Patch62: 18905-x86-ioapic-boot-panic.patch Patch63: 18929-shadow-no-duplicates.patch Patch64: 18930-xenoprof-dunnington.patch -Patch65: 18937-S3-MSI.patch +Patch65: 18934-vtd-PCI-X-dev-assign.patch +Patch66: 18937-S3-MSI.patch +Patch67: 18943-amd-32bit-paging-limit.patch +Patch68: 18970-vmx-print-features.patch +Patch69: 19009-x86_64-note-init-p2m.patch +Patch70: 19027-hvmloader-SMBIOS-dev-mem-boundary.patch +Patch71: 19032-amd-iommu-pointer-reset.patch +Patch72: 19035-MSI-X-proper-enable.patch +Patch73: 19038-x86-no-apic.patch +Patch74: 19039-x86-propagate-nolapic.patch +Patch75: 19046-cross-bit-coredumping.patch +Patch76: 19048-cross-bit-coredumping.patch +Patch77: 19051-cross-bit-coredumping.patch +Patch78: 19072-vmx-pat.patch +Patch79: 19079-snp_ctl-1.patch # Our patches Patch100: xen-config.diff Patch101: xend-config.diff @@ -149,6 +163,7 @@ Patch113: serial-split.patch Patch114: xen-xm-top-needs-root.diff Patch115: xen-tightvnc-args.diff Patch116: xen-max-free-mem.diff +Patch119: xen-ioapic-ack-default.diff Patch120: block-losetup-retry.diff Patch121: block-flags.diff Patch122: xen-hvm-default-bridge.diff @@ -196,6 +211,7 @@ Patch184: ioemu-blktap-barriers.patch Patch185: tapdisk-ioemu-logfile.patch Patch186: blktap-ioemu-close-fix.patch Patch187: ioemu-blktap-zero-size.patch +Patch188: blktap-error-handling.patch # Jim's domain lock patch Patch190: xend-domain-lock.patch # Patches from Jan @@ -212,7 +228,6 @@ Patch352: pvdrv_emulation_control.patch Patch353: blktap-pv-cdrom.patch Patch354: x86-cpufreq-report.patch Patch355: dom-print.patch -Patch356: vtd-error-handling.patch # novell_shim patches Patch400: hv_tools.patch Patch401: hv_xen_base.patch @@ -615,6 +630,20 @@ Authors: %patch63 -p1 %patch64 -p1 %patch65 -p1 +%patch66 -p1 +%patch67 -p1 +%patch68 -p1 +%patch69 -p1 +%patch70 -p1 +%patch71 -p1 +%patch72 -p1 +%patch73 -p1 +%patch74 -p1 +%patch75 -p1 +%patch76 -p1 +%patch77 -p1 +%patch78 -p1 +%patch79 -p1 %patch100 -p1 %patch101 -p1 %patch102 -p1 @@ -631,6 +660,7 @@ Authors: %patch114 -p1 %patch115 -p1 %patch116 -p1 +%patch119 -p1 %patch120 -p1 %patch121 -p1 %patch122 -p1 @@ -675,6 +705,7 @@ Authors: %patch185 -p1 %patch186 -p1 %patch187 -p1 +%patch188 -p1 %patch190 -p1 %patch240 -p1 %patch241 -p1 @@ -688,7 +719,6 @@ Authors: %patch353 -p1 %patch354 -p1 %patch355 -p1 -%patch356 -p1 # Don't use shim for now %ifarch x86_64 %patch400 -p1 @@ -1039,6 +1069,37 @@ rm -f $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm.debug /sbin/ldconfig %changelog +* Fri Jan 23 2009 carnold@novell.com +- Intel - Remove improper operating condition that results in a + machine check. + 19072-vmx-pat.patch + 19079-snp_ctl-1.patch +* Fri Jan 23 2009 kwolf@suse.de +- bnc#465379 - Fix blktap error handling + blktap-error-handling.patch +* Thu Jan 22 2009 carnold@novell.com +- bnc#435219 - XEN pv-driver doesn't work +* Thu Jan 22 2009 jbeulich@novell.com +- Fix unmaskable MSI handling. + 18778-msi-irq-fix.patch +* Wed Jan 21 2009 jfehlig@novell.com +- bnc#467883 - Squelch output of xen-updown.sh sysconfig hook + script and don't save state of tap devices not belonging to Xen. +* Wed Jan 21 2009 carnold@novell.com +- bnc#467807 - Xen: IRQs stop working + xen-ioapic-ack-default.diff +* Fri Jan 16 2009 carnold@novell.com +- bnc#447178 - xm dump-core does not work for cross-bitness guest. + 19046-cross-bit-coredumping.patch + 19048-cross-bit-coredumping.patch + 19051-cross-bit-coredumping.patch +* Thu Jan 15 2009 brieske@novell.com +- bnc#429637 - SSVP SMBIOS HCT Test failing + 19027-hvmloader-SMBIOS-dev-mem-boundary.patch +* Wed Jan 14 2009 carnold@novell.com +- bnc#460805 - Unable to boot with Xen kernel with IBM T42p / T41p + 19039-x86-propagate-nolapic.patch + 19038-x86-no-apic.patch * Mon Jan 05 2009 carnold@novell.com - bnc#435596 - dom0 S3 resume fails if disk drive is set as AHCI mode.