diff --git a/23725-pci-add-device.patch b/23725-pci-add-device.patch new file mode 100644 index 0000000..f710960 --- /dev/null +++ b/23725-pci-add-device.patch @@ -0,0 +1,174 @@ +# HG changeset patch +# User Jan Beulich +# Date 1311081248 -3600 +# Node ID 4dc6a9ba90d60fdf0cc0898fc9a8fe84ae9030fc +# Parent b3434f24b0827c5ef34e4b4a72893288e2ffbe40 +PCI: consolidate interface for adding devices + +The functionality of pci_add_device_ext() can be easily folded into +pci_add_device(), and eliminates the need to change two functions for +future adjustments. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/ia64/xen/hypercall.c ++++ b/xen/arch/ia64/xen/hypercall.c +@@ -662,8 +662,8 @@ long do_physdev_op(int cmd, XEN_GUEST_HA + if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) + break; + +- ret = pci_add_device(manage_pci.bus, manage_pci.devfn); +- break; ++ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); ++ break; + } + + case PHYSDEVOP_manage_pci_remove: { +@@ -695,10 +695,10 @@ long do_physdev_op(int cmd, XEN_GUEST_HA + pdev_info.is_virtfn = manage_pci_ext.is_virtfn; + pdev_info.physfn.bus = manage_pci_ext.physfn.bus; + pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; +- ret = pci_add_device_ext(manage_pci_ext.bus, +- manage_pci_ext.devfn, +- &pdev_info); +- break; ++ ret = pci_add_device(manage_pci_ext.bus, ++ manage_pci_ext.devfn, ++ &pdev_info); ++ break; + } + + default: +--- a/xen/arch/x86/physdev.c ++++ b/xen/arch/x86/physdev.c +@@ -472,7 +472,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H + if ( copy_from_guest(&manage_pci, arg, 1) != 0 ) + break; + +- ret = pci_add_device(manage_pci.bus, manage_pci.devfn); ++ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL); + break; + } + +@@ -509,9 +509,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H + pdev_info.is_virtfn = manage_pci_ext.is_virtfn; + pdev_info.physfn.bus = manage_pci_ext.physfn.bus; + pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn; +- ret = pci_add_device_ext(manage_pci_ext.bus, +- manage_pci_ext.devfn, +- &pdev_info); ++ ret = pci_add_device(manage_pci_ext.bus, ++ manage_pci_ext.devfn, ++ &pdev_info); + break; + } + +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -142,16 +142,29 @@ void pci_enable_acs(struct pci_dev *pdev + pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl); + } + +-int pci_add_device(u8 bus, u8 devfn) ++int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) + { + struct pci_dev *pdev; ++ const char *pdev_type; + int ret = -ENOMEM; + ++ if (!info) ++ pdev_type = "device"; ++ else if (info->is_extfn) ++ pdev_type = "extended function"; ++ else if (info->is_virtfn) ++ pdev_type = "virtual function"; ++ else ++ return -EINVAL; ++ + spin_lock(&pcidevs_lock); + pdev = alloc_pdev(bus, devfn); + if ( !pdev ) + goto out; + ++ if ( info ) ++ pdev->info = *info; ++ + ret = 0; + if ( !pdev->domain ) + { +@@ -169,8 +182,8 @@ int pci_add_device(u8 bus, u8 devfn) + + out: + spin_unlock(&pcidevs_lock); +- printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, +- PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, ++ bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + return ret; + } + +@@ -197,51 +210,6 @@ int pci_remove_device(u8 bus, u8 devfn) + return ret; + } + +-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info) +-{ +- int ret; +- char *pdev_type; +- struct pci_dev *pdev; +- +- if (info->is_extfn) +- pdev_type = "Extended Function"; +- else if (info->is_virtfn) +- pdev_type = "Virtual Function"; +- else +- return -EINVAL; +- +- +- ret = -ENOMEM; +- spin_lock(&pcidevs_lock); +- pdev = alloc_pdev(bus, devfn); +- if ( !pdev ) +- goto out; +- +- pdev->info = *info; +- +- ret = 0; +- if ( !pdev->domain ) +- { +- pdev->domain = dom0; +- ret = iommu_add_device(pdev); +- if ( ret ) +- { +- pdev->domain = NULL; +- goto out; +- } +- +- list_add(&pdev->domain_list, &dom0->arch.pdev_list); +- pci_enable_acs(pdev); +- } +- +-out: +- spin_unlock(&pcidevs_lock); +- printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, +- bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); +- +- return ret; +-} +- + static void pci_clean_dpci_irqs(struct domain *d) + { + struct hvm_irq_dpci *hvm_irq_dpci = NULL; +--- a/xen/include/xen/pci.h ++++ b/xen/include/xen/pci.h +@@ -86,9 +86,8 @@ struct pci_dev *pci_lock_pdev(int bus, i + struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn); + + void pci_release_devices(struct domain *d); +-int pci_add_device(u8 bus, u8 devfn); ++int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *); + int pci_remove_device(u8 bus, u8 devfn); +-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info); + struct pci_dev *pci_get_pdev(int bus, int devfn); + struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn); + diff --git a/23746-vtd-cleanup-timers.patch b/23746-vtd-cleanup-timers.patch index 973af4e..36e3694 100644 --- a/23746-vtd-cleanup-timers.patch +++ b/23746-vtd-cleanup-timers.patch @@ -31,7 +31,7 @@ Signed-off-by: Tim Deegan --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c -@@ -252,9 +252,6 @@ static void pci_clean_dpci_irqs(struct d +@@ -220,9 +220,6 @@ static void pci_clean_dpci_irqs(struct d if ( !iommu_enabled ) return; diff --git a/23762-iommu-fault-bm-off.patch b/23762-iommu-fault-bm-off.patch new file mode 100644 index 0000000..c3933e4 --- /dev/null +++ b/23762-iommu-fault-bm-off.patch @@ -0,0 +1,72 @@ +References: bnc#712051, CVE-2011-3131 + +# HG changeset patch +# User Tim Deegan +# Date 1313144964 -3600 +# Node ID 537ed3b74b3f13267cfb3eb0e1483f432f3685cd +# Parent 1f08b380d4386cdd6714786a9163e5f51aecab5d +Passthrough: disable bus-mastering on any card that causes an IOMMU fault. + +This stops the card from raising back-to-back faults and live-locking +the CPU that handles them. + +Signed-off-by: Tim Deegan +Acked-by: Wei Wang2 +Acked-by: Allen M Kay + +--- a/xen/drivers/passthrough/amd/iommu_init.c ++++ b/xen/drivers/passthrough/amd/iommu_init.c +@@ -462,7 +462,7 @@ static hw_irq_controller iommu_msi_type + + static void parse_event_log_entry(u32 entry[]) + { +- u16 domain_id, device_id; ++ u16 domain_id, device_id, bdf, cword; + u32 code; + u64 *addr; + char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY", +@@ -497,6 +497,18 @@ static void parse_event_log_entry(u32 en + "%s: domain = %d, device id = 0x%04x, " + "fault address = 0x%"PRIx64"\n", + event_str[code-1], domain_id, device_id, *addr); ++ ++ /* Tell the device to stop DMAing; we can't rely on the guest to ++ * control it for us. */ ++ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ ) ++ if ( get_dma_requestor_id(bdf) == device_id ) ++ { ++ cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf), ++ PCI_FUNC(bdf), PCI_COMMAND); ++ pci_conf_write16(PCI_BUS(bdf), PCI_SLOT(bdf), ++ PCI_FUNC(bdf), PCI_COMMAND, ++ cword & ~PCI_COMMAND_MASTER); ++ } + } + else + { +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -887,7 +887,7 @@ static void iommu_page_fault(int irq, vo + while (1) + { + u8 fault_reason; +- u16 source_id; ++ u16 source_id, cword; + u32 data; + u64 guest_addr; + int type; +@@ -920,6 +920,14 @@ static void iommu_page_fault(int irq, vo + iommu_page_fault_do_one(iommu, type, fault_reason, + source_id, guest_addr); + ++ /* Tell the device to stop DMAing; we can't rely on the guest to ++ * control it for us. */ ++ cword = pci_conf_read16(PCI_BUS(source_id), PCI_SLOT(source_id), ++ PCI_FUNC(source_id), PCI_COMMAND); ++ pci_conf_write16(PCI_BUS(source_id), PCI_SLOT(source_id), ++ PCI_FUNC(source_id), PCI_COMMAND, ++ cword & ~PCI_COMMAND_MASTER); ++ + fault_index++; + if ( fault_index > cap_num_fault_regs(iommu->cap) ) + fault_index = 0; diff --git a/23763-pci-multi-seg-x2apic-vtd-no-crash.patch b/23763-pci-multi-seg-x2apic-vtd-no-crash.patch new file mode 100644 index 0000000..81c06a6 --- /dev/null +++ b/23763-pci-multi-seg-x2apic-vtd-no-crash.patch @@ -0,0 +1,55 @@ +# HG changeset patch +# User Jan Beulich +# Date 1313226769 -3600 +# Node ID 8f647d409196f1d018f6284af03d1625cf8f93af +# Parent 537ed3b74b3f13267cfb3eb0e1483f432f3685cd +VT-d: don't reject valid DMAR/ATSR tables on systems with multiple PCI segments + +On multi-PCI-segment systems, each segment has to be expected to have +an include-all DRHD and an all-ports ATSR, so the firmware consistency +check incorrectly rejects valid configurations there (which is +particularly problematic when the firmware also pre-enabled x2apic +mode, as the system will panic in that case due to being unable to +enable interrupt remapping). Thus constrain the check to just segment +0 for now; once full multi-segment support is there (which I'm working +on), it can be revisited whether we'd want to track this per segment, +or whether we trust the firmware of such large systems. + +Signed-off-by: Jan Beulich + +--- a/xen/drivers/passthrough/vtd/dmar.c ++++ b/xen/drivers/passthrough/vtd/dmar.c +@@ -427,13 +427,14 @@ acpi_parse_one_drhd(struct acpi_dmar_ent + if ( iommu_verbose ) + dprintk(VTDPREFIX, " flags: INCLUDE_ALL\n"); + /* Only allow one INCLUDE_ALL */ +- if ( include_all ) ++ if ( drhd->segment == 0 && include_all ) + { + dprintk(XENLOG_WARNING VTDPREFIX, + "Only one INCLUDE_ALL device scope is allowed\n"); + ret = -EINVAL; + } +- include_all = 1; ++ if ( drhd->segment == 0 ) ++ include_all = 1; + } + + if ( ret ) +@@ -633,13 +634,14 @@ acpi_parse_one_atsr(struct acpi_dmar_ent + if ( iommu_verbose ) + dprintk(VTDPREFIX, " flags: ALL_PORTS\n"); + /* Only allow one ALL_PORTS */ +- if ( all_ports ) ++ if ( atsr->segment == 0 && all_ports ) + { + dprintk(XENLOG_WARNING VTDPREFIX, + "Only one ALL_PORTS device scope is allowed\n"); + ret = -EINVAL; + } +- all_ports = 1; ++ if ( atsr->segment == 0 ) ++ all_ports = 1; + } + + if ( ret ) diff --git a/23765-x86-irq-vector-leak.patch b/23765-x86-irq-vector-leak.patch new file mode 100644 index 0000000..b9a49f5 --- /dev/null +++ b/23765-x86-irq-vector-leak.patch @@ -0,0 +1,29 @@ +# HG changeset patch +# User Andrew Cooper +# Date 1313226868 -3600 +# Node ID 68b903bb1b01b2a6ef9c6e8ead3be3c1c2208341 +# Parent 67b883402736ef1746cd6654da4c898f70f40723 +x86: IRQ fix incorrect logic in __clear_irq_vector + +In the old code, tmp_mask is the cpu_and of cfg->cpu_mask and +cpu_online_map. However, in the usual case of moving an IRQ from one +PCPU to another because the scheduler decides its a good idea, +cfg->cpu_mask and cfg->old_cpu_mask do not intersect. This causes the +old cpu vector_irq table to keep the irq reference when it shouldn't. + +This leads to a resource leak if a domain is shut down wile an irq has +a move pending, which results in Xen's create_irq() eventually failing +with -ENOSPC when all vector_irq tables are full of stale references. + +Signed-off-by: Andrew Cooper + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -190,6 +190,7 @@ static void __clear_irq_vector(int irq) + + if (likely(!cfg->move_in_progress)) + return; ++ cpus_and(tmp_mask, cfg->old_cpu_mask, cpu_online_map); + for_each_cpu_mask(cpu, tmp_mask) { + for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR; + vector++) { diff --git a/23766-x86-msi-vf-bars.patch b/23766-x86-msi-vf-bars.patch new file mode 100644 index 0000000..d745b76 --- /dev/null +++ b/23766-x86-msi-vf-bars.patch @@ -0,0 +1,295 @@ +# HG changeset patch +# User Jan Beulich +# Date 1313226898 -3600 +# Node ID 8d6edc3d26d26931f3732a2008fb4818bc7bab2d +# Parent 68b903bb1b01b2a6ef9c6e8ead3be3c1c2208341 +x86/PCI-MSI: properly determine VF BAR values + +As was discussed a couple of times on this list, SR-IOV virtual +functions have their BARs read as zero - the physical function's +SR-IOV capability structure must be consulted instead. The bogus +warnings people complained about are being eliminated with this +change. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c +@@ -522,12 +522,48 @@ static int msi_capability_init(struct pc + return 0; + } + +-static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir) ++static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf) + { + u8 limit; +- u32 addr; ++ u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0; + +- switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) ++ if ( vf >= 0 ) ++ { ++ struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func)); ++ unsigned int pos = pci_find_ext_capability(0, bus, ++ PCI_DEVFN(slot, func), ++ PCI_EXT_CAP_ID_SRIOV); ++ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); ++ u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF); ++ u16 offset = pci_conf_read16(bus, slot, func, ++ pos + PCI_SRIOV_VF_OFFSET); ++ u16 stride = pci_conf_read16(bus, slot, func, ++ pos + PCI_SRIOV_VF_STRIDE); ++ ++ if ( !pdev || !pos || ++ !(ctrl & PCI_SRIOV_CTRL_VFE) || ++ !(ctrl & PCI_SRIOV_CTRL_MSE) || ++ !num_vf || !offset || (num_vf > 1 && !stride) || ++ bir >= PCI_SRIOV_NUM_BARS || ++ !pdev->vf_rlen[bir] ) ++ return 0; ++ base = pos + PCI_SRIOV_BAR; ++ vf -= PCI_BDF(bus, slot, func) + offset; ++ if ( vf < 0 || (vf && vf % stride) ) ++ return 0; ++ if ( stride ) ++ { ++ if ( vf % stride ) ++ return 0; ++ vf /= stride; ++ } ++ if ( vf >= num_vf ) ++ return 0; ++ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); ++ disp = vf * pdev->vf_rlen[bir]; ++ limit = PCI_SRIOV_NUM_BARS; ++ } ++ else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f ) + { + case PCI_HEADER_TYPE_NORMAL: + limit = 6; +@@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s + + if ( bir >= limit ) + return 0; +- addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4); ++ addr = pci_conf_read32(bus, slot, func, base + bir * 4); + if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO ) + return 0; + if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 ) +@@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s + addr &= PCI_BASE_ADDRESS_MEM_MASK; + if ( ++bir >= limit ) + return 0; +- return addr | +- ((u64)pci_conf_read32(bus, slot, func, +- PCI_BASE_ADDRESS_0 + bir * 4) << 32); ++ return addr + disp + ++ ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32); + } +- return addr & PCI_BASE_ADDRESS_MEM_MASK; ++ return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp; + } + + /** +@@ -629,11 +664,29 @@ static int msix_capability_init(struct p + + if ( !dev->msix_nr_entries ) + { ++ u8 pbus, pslot, pfunc; ++ int vf; + u64 pba_paddr; + u32 pba_offset; + ++ if ( !dev->info.is_virtfn ) ++ { ++ pbus = bus; ++ pslot = slot; ++ pfunc = func; ++ vf = -1; ++ } ++ else ++ { ++ pbus = dev->info.physfn.bus; ++ pslot = PCI_SLOT(dev->info.physfn.devfn); ++ pfunc = PCI_FUNC(dev->info.physfn.devfn); ++ vf = PCI_BDF2(dev->bus, dev->devfn); ++ } ++ + ASSERT(!dev->msix_used_entries); +- WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir)); ++ WARN_ON(msi->table_base != ++ read_pci_mem_bar(pbus, pslot, pfunc, bir, vf)); + + dev->msix_nr_entries = nr_entries; + dev->msix_table.first = PFN_DOWN(table_paddr); +@@ -645,7 +698,7 @@ static int msix_capability_init(struct p + pba_offset = pci_conf_read32(bus, slot, func, + msix_pba_offset_reg(pos)); + bir = (u8)(pba_offset & PCI_MSIX_BIRMASK); +- pba_paddr = read_pci_mem_bar(bus, slot, func, bir); ++ pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf); + WARN_ON(!pba_paddr); + pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK; + +--- a/xen/drivers/passthrough/pci.c ++++ b/xen/drivers/passthrough/pci.c +@@ -145,6 +145,7 @@ void pci_enable_acs(struct pci_dev *pdev + int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info) + { + struct pci_dev *pdev; ++ unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn); + const char *pdev_type; + int ret = -ENOMEM; + +@@ -153,7 +154,14 @@ int pci_add_device(u8 bus, u8 devfn, con + else if (info->is_extfn) + pdev_type = "extended function"; + else if (info->is_virtfn) ++ { ++ spin_lock(&pcidevs_lock); ++ pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn); ++ spin_unlock(&pcidevs_lock); ++ if ( !pdev ) ++ pci_add_device(info->physfn.bus, info->physfn.devfn, NULL); + pdev_type = "virtual function"; ++ } + else + return -EINVAL; + +@@ -164,6 +172,70 @@ int pci_add_device(u8 bus, u8 devfn, con + + if ( info ) + pdev->info = *info; ++ else if ( !pdev->vf_rlen[0] ) ++ { ++ unsigned int pos = pci_find_ext_capability(0, bus, devfn, ++ PCI_EXT_CAP_ID_SRIOV); ++ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL); ++ ++ if ( !pos ) ++ /* Nothing */; ++ else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) ) ++ { ++ unsigned int i; ++ ++ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS); ++ for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i ) ++ { ++ unsigned int idx = pos + PCI_SRIOV_BAR + i * 4; ++ u32 bar = pci_conf_read32(bus, slot, func, idx); ++ u32 hi = 0; ++ ++ if ( (bar & PCI_BASE_ADDRESS_SPACE) == ++ PCI_BASE_ADDRESS_SPACE_IO ) ++ { ++ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf" ++ " BAR%u in IO space\n", ++ bus, slot, func, i); ++ continue; ++ } ++ pci_conf_write32(bus, slot, func, idx, ~0); ++ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == ++ PCI_BASE_ADDRESS_MEM_TYPE_64 ) ++ { ++ if ( i >= PCI_SRIOV_NUM_BARS ) ++ { ++ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with" ++ " 64-bit vf BAR in last slot\n", ++ bus, slot, func); ++ break; ++ } ++ hi = pci_conf_read32(bus, slot, func, idx + 4); ++ pci_conf_write32(bus, slot, func, idx + 4, ~0); ++ } ++ pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) & ++ PCI_BASE_ADDRESS_MEM_MASK; ++ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == ++ PCI_BASE_ADDRESS_MEM_TYPE_64 ) ++ { ++ pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func, ++ idx + 4) << 32; ++ pci_conf_write32(bus, slot, func, idx + 4, hi); ++ } ++ else if ( pdev->vf_rlen[i] ) ++ pdev->vf_rlen[i] |= (u64)~0 << 32; ++ pci_conf_write32(bus, slot, func, idx, bar); ++ pdev->vf_rlen[i] = -pdev->vf_rlen[i]; ++ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == ++ PCI_BASE_ADDRESS_MEM_TYPE_64 ) ++ ++i; ++ } ++ } ++ else ++ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual" ++ " functions already enabled (%04x)\n", ++ bus, slot, func, ctrl); ++ } + + ret = 0; + if ( !pdev->domain ) +@@ -183,7 +255,7 @@ int pci_add_device(u8 bus, u8 devfn, con + out: + spin_unlock(&pcidevs_lock); + printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type, +- bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ++ bus, slot, func); + return ret; + } + +--- a/xen/include/xen/pci.h ++++ b/xen/include/xen/pci.h +@@ -57,6 +57,7 @@ struct pci_dev { + const u8 bus; + const u8 devfn; + struct pci_dev_info info; ++ u64 vf_rlen[6]; + }; + + #define for_each_pdev(domain, pdev) \ +--- a/xen/include/xen/pci_regs.h ++++ b/xen/include/xen/pci_regs.h +@@ -425,7 +425,7 @@ + #define PCI_EXT_CAP_ID_ACS 13 + #define PCI_EXT_CAP_ID_ARI 14 + #define PCI_EXT_CAP_ID_ATS 15 +-#define PCI_EXT_CAP_ID_IOV 16 ++#define PCI_EXT_CAP_ID_SRIOV 16 + + /* Advanced Error Reporting */ + #define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +@@ -545,4 +545,35 @@ + #define PCI_ACS_CTRL 0x06 /* ACS Control Register */ + #define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */ + ++/* Single Root I/O Virtualization */ ++#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */ ++#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */ ++#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */ ++#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */ ++#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */ ++#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */ ++#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */ ++#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */ ++#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */ ++#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */ ++#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */ ++#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */ ++#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */ ++#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */ ++#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */ ++#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */ ++#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */ ++#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */ ++#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */ ++#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */ ++#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */ ++#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */ ++#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/ ++#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */ ++#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */ ++#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */ ++#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ ++#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ ++#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ ++ + #endif /* LINUX_PCI_REGS_H */ diff --git a/23771-x86-ioapic-clear-pin.patch b/23771-x86-ioapic-clear-pin.patch new file mode 100644 index 0000000..4fa34da --- /dev/null +++ b/23771-x86-ioapic-clear-pin.patch @@ -0,0 +1,61 @@ +References: bnc#701686 + +# HG changeset patch +# User Jan Beulich +# Date 1313503555 -3600 +# Node ID fc2be6cb89ad49efd90fe1b650f7efaab72f61b2 +# Parent 5c1ebc117f9901bc155d2b92ae902a4144767dfb +x86: simplify (and fix) clear_IO_APIC{,_pin}() + +These are used during bootup and (emergency) shutdown only, and their +only purpose is to get the actual IO-APIC's RTE(s) cleared. +Consequently, only the "raw" accessors should be used (and the ones +going through interrupt remapping code can be skipped), with the +exception of determining the delivery mode: This one must always go +through the interrupt remapping path, as in the VT-d case the actual +IO-APIC's RTE will have the delivery mode always set to zero (which +before possibly could have resulted in such an entry getting cleared +in the "raw" pass, though I haven't observed this case in practice). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -365,14 +365,12 @@ static void eoi_IO_APIC_irq(unsigned int + spin_unlock_irqrestore(&ioapic_lock, flags); + } + +-#define clear_IO_APIC_pin(a,p) __clear_IO_APIC_pin(a,p,0) +-#define clear_IO_APIC_pin_raw(a,p) __clear_IO_APIC_pin(a,p,1) +-static void __clear_IO_APIC_pin(unsigned int apic, unsigned int pin, int raw) ++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) + { + struct IO_APIC_route_entry entry; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ +- entry = ioapic_read_entry(apic, pin, raw); ++ entry = __ioapic_read_entry(apic, pin, FALSE); + if (entry.delivery_mode == dest_SMI) + return; + +@@ -381,7 +379,7 @@ static void __clear_IO_APIC_pin(unsigned + */ + memset(&entry, 0, sizeof(entry)); + entry.mask = 1; +- ioapic_write_entry(apic, pin, raw, entry); ++ __ioapic_write_entry(apic, pin, TRUE, entry); + } + + static void clear_IO_APIC (void) +@@ -389,10 +387,8 @@ static void clear_IO_APIC (void) + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { +- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { ++ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +- clear_IO_APIC_pin_raw(apic, pin); +- } + } + } + diff --git a/23772-x86-trampoline.patch b/23772-x86-trampoline.patch new file mode 100644 index 0000000..0f1f4db --- /dev/null +++ b/23772-x86-trampoline.patch @@ -0,0 +1,363 @@ +# HG changeset patch +# User Jan Beulich +# Date 1313744066 -3600 +# Node ID 29aeed4979a78f26519f5fde8a405f8438297ab9 +# Parent fc2be6cb89ad49efd90fe1b650f7efaab72f61b2 +x86: make run-time part of trampoline relocatable + +In order to eliminate an initial hack in the EFI boot code (where +memory for the trampoline was just "claimed" instead of properly +allocated), the trampoline code must no longer make assumption on the +address at which it would be located. For the time being, the fixed +address is being retained for the traditional multiboot path. + +As an additional benefit (at least from my pov) it allows confining +the visibility of the BOOT_TRAMPOLINE definition to just the boot +code. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/boot/Makefile ++++ b/xen/arch/x86/boot/Makefile +@@ -2,8 +2,8 @@ obj-y += head.o + + head.o: reloc.S + +-BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' $(BASEDIR)/include/asm-x86/config.h) ++BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' head.S) + %.S: %.c + RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@ + +-reloc.S: $(BASEDIR)/include/asm-x86/config.h ++reloc.S: head.S +--- a/xen/arch/x86/boot/head.S ++++ b/xen/arch/x86/boot/head.S +@@ -9,7 +9,7 @@ + .text + .code32 + +-#undef bootsym_phys ++#define BOOT_TRAMPOLINE 0x7c000 + #define sym_phys(sym) ((sym) - __XEN_VIRT_START) + #define bootsym_phys(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE) + +@@ -189,6 +189,17 @@ __start: + mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18) + #endif + ++ /* Apply relocations to bootstrap trampoline. */ ++ mov $BOOT_TRAMPOLINE,%edx ++ mov $sym_phys(__trampoline_rel_start),%edi ++ mov %edx,sym_phys(trampoline_phys) ++1: ++ mov (%edi),%eax ++ add %edx,(%edi,%eax) ++ add $4,%edi ++ cmp $sym_phys(__trampoline_rel_stop),%edi ++ jb 1b ++ + /* Copy bootstrap trampoline to low memory, below 1MB. */ + mov $sym_phys(trampoline_start),%esi + mov $bootsym_phys(trampoline_start),%edi +--- a/xen/arch/x86/boot/trampoline.S ++++ b/xen/arch/x86/boot/trampoline.S +@@ -4,6 +4,13 @@ + #undef bootsym + #define bootsym(s) ((s)-trampoline_start) + ++#define bootsym_rel(sym, off, opnd...) \ ++ bootsym(sym),##opnd; \ ++111:; \ ++ .pushsection .trampoline_rel, "a"; \ ++ .long 111b - (off) - .; \ ++ .popsection ++ + .globl trampoline_realmode_entry + trampoline_realmode_entry: + mov %cs,%ax +@@ -17,11 +24,11 @@ trampoline_realmode_entry: + xor %ax, %ax + inc %ax + lmsw %ax # CR0.PE = 1 (enter protected mode) +- ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry) ++ ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6) + + idt_48: .word 0, 0, 0 # base = limit = 0 + gdt_48: .word 6*8-1 +- .long bootsym_phys(trampoline_gdt) ++ .long bootsym_rel(trampoline_gdt,4) + trampoline_gdt: + /* 0x0000: unused */ + .quad 0x0000000000000000 +@@ -32,11 +39,16 @@ trampoline_gdt: + /* 0x0018: ring 0 data */ + .quad 0x00cf92000000ffff + /* 0x0020: real-mode code @ BOOT_TRAMPOLINE */ +- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16) +- .long 0x00009a00 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16) ++ .long 0x0000ffff ++ .long 0x00009a00 + /* 0x0028: real-mode data @ BOOT_TRAMPOLINE */ +- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16) +- .long 0x00009200 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16) ++ .long 0x0000ffff ++ .long 0x00009200 ++ ++ .pushsection .trampoline_rel, "a" ++ .long trampoline_gdt + BOOT_PSEUDORM_CS + 2 - . ++ .long trampoline_gdt + BOOT_PSEUDORM_DS + 2 - . ++ .popsection + + .globl cpuid_ext_features + cpuid_ext_features: +@@ -66,11 +78,11 @@ trampoline_protmode_entry: + + /* Load pagetable base register. */ + mov $sym_phys(idle_pg_table),%eax +- add bootsym_phys(trampoline_xen_phys_start),%eax ++ add bootsym_rel(trampoline_xen_phys_start,4,%eax) + mov %eax,%cr3 + + /* Set up EFER (Extended Feature Enable Register). */ +- mov bootsym_phys(cpuid_ext_features),%edi ++ mov bootsym_rel(cpuid_ext_features,4,%edi) + test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */ + jz .Lskip_efer + movl $MSR_EFER,%ecx +@@ -93,7 +105,7 @@ trampoline_protmode_entry: + #if defined(__x86_64__) + + /* Now in compatibility mode. Long-jump into 64-bit mode. */ +- ljmp $BOOT_CS64,$bootsym_phys(start64) ++ ljmp $BOOT_CS64,$bootsym_rel(start64,6) + + .code64 + start64: +--- a/xen/arch/x86/boot/wakeup.S ++++ b/xen/arch/x86/boot/wakeup.S +@@ -42,15 +42,13 @@ ENTRY(wakeup_start) + + # boot trampoline is under 1M, and shift its start into + # %fs to reference symbols in that area +- movl $BOOT_TRAMPOLINE, %eax +- shrl $4, %eax +- movl %eax, %fs ++ mov wakesym(trampoline_seg), %fs + lidt %fs:bootsym(idt_48) + lgdt %fs:bootsym(gdt_48) + + movw $1, %ax + lmsw %ax # Turn on CR0.PE +- ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32) ++ ljmpl $BOOT_CS32, $bootsym_rel(wakeup_32, 6) + + /* This code uses an extended set of video mode numbers. These include: + * Aliases for standard modes +@@ -103,6 +101,10 @@ real_magic: .long 0x12345678 + .globl video_mode, video_flags + video_mode: .long 0 + video_flags: .long 0 ++trampoline_seg: .word BOOT_TRAMPOLINE >> 4 ++ .pushsection .trampoline_seg, "a" ++ .long trampoline_seg - . ++ .popsection + + .code32 + +@@ -114,11 +116,11 @@ wakeup_32: + mov $BOOT_DS, %eax + mov %eax, %ds + mov %eax, %ss +- mov $bootsym_phys(early_stack), %esp ++ mov $bootsym_rel(early_stack, 4, %esp) + + # check saved magic again + mov $sym_phys(saved_magic), %eax +- add bootsym_phys(trampoline_xen_phys_start), %eax ++ add bootsym_rel(trampoline_xen_phys_start, 4, %eax) + mov (%eax), %eax + cmp $0x9abcdef0, %eax + jne bogus_saved_magic +@@ -131,12 +133,12 @@ wakeup_32: + + /* Load pagetable base register */ + mov $sym_phys(idle_pg_table),%eax +- add bootsym_phys(trampoline_xen_phys_start),%eax ++ add bootsym_rel(trampoline_xen_phys_start,4,%eax) + mov %eax,%cr3 + + /* Will cpuid feature change after resume? */ + /* Set up EFER (Extended Feature Enable Register). */ +- mov bootsym_phys(cpuid_ext_features),%edi ++ mov bootsym_rel(cpuid_ext_features,4,%edi) + test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */ + jz .Lskip_eferw + movl $MSR_EFER,%ecx +@@ -162,7 +164,7 @@ wakeup_32: + #if defined(__x86_64__) + + /* Now in compatibility mode. Long-jump to 64-bit mode */ +- ljmp $BOOT_CS64, $bootsym_phys(wakeup_64) ++ ljmp $BOOT_CS64, $bootsym_rel(wakeup_64,6) + + .code64 + wakeup_64: +--- a/xen/arch/x86/efi/boot.c ++++ b/xen/arch/x86/efi/boot.c +@@ -599,6 +599,9 @@ static void __init relocate_image(unsign + } + } + ++extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; ++extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[]; ++ + void EFIAPI __init __attribute__((__noreturn__)) + efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) + { +@@ -614,9 +617,10 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info; + EFI_FILE_HANDLE dir_handle; + union string section = { NULL }, name; ++ const s32 *trampoline_ptr; + struct e820entry *e; + u64 efer; +- bool_t base_video = 0, trampoline_okay = 0; ++ bool_t base_video = 0; + + efi_ih = ImageHandle; + efi_bs = SystemTable->BootServices; +@@ -914,15 +918,27 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + dmi_efi_get_table((void *)(long)efi.smbios); + + /* Allocate space for trampoline (in first Mb). */ +- cfg.addr = BOOT_TRAMPOLINE; ++ cfg.addr = 0x100000; + cfg.size = trampoline_end - trampoline_start; +- status = efi_bs->AllocatePages(AllocateAddress, EfiLoaderData, ++ status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData, + PFN_UP(cfg.size), &cfg.addr); + if ( EFI_ERROR(status) ) + { + cfg.addr = 0; +- PrintErr(L"Note: Trampoline area is in use\r\n"); ++ blexit(L"No memory for trampoline\r\n"); + } ++ trampoline_phys = cfg.addr; ++ /* Apply relocations to trampoline. */ ++ for ( trampoline_ptr = __trampoline_rel_start; ++ trampoline_ptr < __trampoline_rel_stop; ++ ++trampoline_ptr ) ++ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += ++ trampoline_phys; ++ for ( trampoline_ptr = __trampoline_seg_start; ++ trampoline_ptr < __trampoline_seg_stop; ++ ++trampoline_ptr ) ++ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = ++ trampoline_phys >> 4; + + /* Initialise L2 identity-map and xen page table entries (16MB). */ + for ( i = 0; i < 8; ++i ) +@@ -1096,14 +1112,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + e->type = type; + ++e820nr; + } +- if ( type == E820_RAM && e->addr <= BOOT_TRAMPOLINE && +- e->addr + e->size >= BOOT_TRAMPOLINE + cfg.size ) +- trampoline_okay = 1; + } + +- if ( !trampoline_okay ) +- blexit(L"Trampoline area unavailable\r\n"); +- + status = efi_bs->ExitBootServices(ImageHandle, map_key); + if ( EFI_ERROR(status) ) + PrintErrMesg(L"Cannot exit boot services", status); +@@ -1117,7 +1127,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + efi_fw_vendor = (void *)efi_fw_vendor + DIRECTMAP_VIRT_START; + + relocate_image(__XEN_VIRT_START - xen_phys_start); +- memcpy((void *)(long)BOOT_TRAMPOLINE, trampoline_start, cfg.size); ++ memcpy((void *)trampoline_phys, trampoline_start, cfg.size); + + /* Set system registers and transfer control. */ + asm volatile("pushq $0\n\tpopfq"); +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c +@@ -49,6 +49,8 @@ + + #define setup_trampoline() (bootsym_phys(trampoline_realmode_entry)) + ++unsigned long __read_mostly trampoline_phys; ++ + /* Set if we find a B stepping CPU */ + static int smp_b_stepping; + +--- a/xen/arch/x86/x86_32/mm.c ++++ b/xen/arch/x86/x86_32/mm.c +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -164,8 +165,9 @@ void __init zap_low_mappings(l2_pgentry_ + flush_all(FLUSH_TLB_GLOBAL); + + /* Replace with mapping of the boot trampoline only. */ +- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, +- 0x10, __PAGE_HYPERVISOR); ++ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT, ++ PFN_UP(trampoline_end - trampoline_start), ++ __PAGE_HYPERVISOR); + } + + void __init subarch_init_memory(void) +--- a/xen/arch/x86/x86_64/mm.c ++++ b/xen/arch/x86/x86_64/mm.c +@@ -828,7 +828,7 @@ void __init zap_low_mappings(void) + flush_local(FLUSH_TLB_GLOBAL); + + /* Replace with mapping of the boot trampoline only. */ +- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT, ++ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT, + PFN_UP(trampoline_end - trampoline_start), + __PAGE_HYPERVISOR); + } +--- a/xen/arch/x86/xen.lds.S ++++ b/xen/arch/x86/xen.lds.S +@@ -103,6 +103,13 @@ SECTIONS + *(.init.data) + *(.init.data.rel) + *(.init.data.rel.*) ++ . = ALIGN(4); ++ __trampoline_rel_start = .; ++ *(.trampoline_rel) ++ __trampoline_rel_stop = .; ++ __trampoline_seg_start = .; ++ *(.trampoline_seg) ++ __trampoline_seg_stop = .; + } :text + . = ALIGN(32); + .init.setup : { +--- a/xen/include/asm-x86/config.h ++++ b/xen/include/asm-x86/config.h +@@ -95,13 +95,13 @@ + /* Primary stack is restricted to 8kB by guard pages. */ + #define PRIMARY_STACK_SIZE 8192 + +-#define BOOT_TRAMPOLINE 0x7c000 ++#ifndef __ASSEMBLY__ ++extern unsigned long trampoline_phys; + #define bootsym_phys(sym) \ +- (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE) ++ (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+trampoline_phys) + #define bootsym(sym) \ + (*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \ +- BOOT_TRAMPOLINE-__pa(trampoline_start))) +-#ifndef __ASSEMBLY__ ++ trampoline_phys-__pa(trampoline_start))) + extern char trampoline_start[], trampoline_end[]; + extern char trampoline_realmode_entry[]; + extern unsigned int trampoline_xen_phys_start; diff --git a/23774-x86_64-EFI-EDD.patch b/23774-x86_64-EFI-EDD.patch new file mode 100644 index 0000000..8c76c93 --- /dev/null +++ b/23774-x86_64-EFI-EDD.patch @@ -0,0 +1,364 @@ +# HG changeset patch +# User Jan Beulich +# Date 1313744120 -3600 +# Node ID e35c5202625ef5534561f84352833ad9467d986c +# Parent dd90b59cb11c60c48e174c899190e2967341fe32 +x86-64/EFI: construct EDD data from device path protocol information + +In the absence of a BIOS to handle INT13 requests, this information +must be constructed artificially instead when booted from EFI. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/boot/edd.S ++++ b/xen/arch/x86/boot/edd.S +@@ -16,21 +16,13 @@ + * Updated and ported for Xen by Keir Fraser June 2007 + */ + ++#include ++ + .code16 + + /* Offset of disc signature in the MBR. */ + #define EDD_MBR_SIG_OFFSET 0x1B8 + +-/* Maximum number of EDD information structures at boot_edd_info. */ +-#define EDD_INFO_MAX 6 +- +-/* Maximum number of MBR signatures at boot_mbr_signature. */ +-#define EDD_MBR_SIG_MAX 16 +- +-/* Size of components of EDD information structure. */ +-#define EDDEXTSIZE 8 +-#define EDDPARMSIZE 74 +- + get_edd: + cmpb $2, bootsym(opt_edd) # edd=off ? + je edd_done +--- a/xen/arch/x86/efi/boot.c ++++ b/xen/arch/x86/efi/boot.c +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -539,6 +540,18 @@ static void __init split_value(char *s) + *s = 0; + } + ++static void __init edd_put_string(u8 *dst, size_t n, const char *src) ++{ ++ while ( n-- && *src ) ++ *dst++ = *src++; ++ if ( *src ) ++ PrintErrMesg(L"Internal error populating EDD info", ++ EFI_BUFFER_TOO_SMALL); ++ while ( n-- ) ++ *dst++ = ' '; ++} ++#define edd_put_string(d, s) edd_put_string(d, ARRAY_SIZE(d), s) ++ + static int __init set_color(u32 mask, int bpp, u8 *pos, u8 *sz) + { + if ( bpp < 0 ) +@@ -607,6 +620,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + { + static EFI_GUID __initdata loaded_image_guid = LOADED_IMAGE_PROTOCOL; + static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID; ++ static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL; ++ static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL; + EFI_LOADED_IMAGE *loaded_image; + EFI_STATUS status; + unsigned int i, argc; +@@ -887,7 +902,148 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + + place_string(&mbi.mem_upper, NULL); + +- /* XXX Collect EDD info. */ ++ /* Collect EDD info. */ ++ BUILD_BUG_ON(offsetof(struct edd_info, edd_device_params) != EDDEXTSIZE); ++ BUILD_BUG_ON(sizeof(struct edd_device_params) != EDDPARMSIZE); ++ size = 0; ++ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size, NULL); ++ if ( status == EFI_BUFFER_TOO_SMALL ) ++ status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles); ++ if ( !EFI_ERROR(status) ) ++ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size, ++ handles); ++ if ( EFI_ERROR(status) ) ++ size = 0; ++ for ( i = 0; i < size / sizeof(*handles); ++i ) ++ { ++ EFI_BLOCK_IO *bio; ++ EFI_DEV_PATH_PTR devp; ++ struct edd_info *info = boot_edd_info + boot_edd_info_nr; ++ struct edd_device_params *params = &info->edd_device_params; ++ enum { root, acpi, pci, ctrlr } state = root; ++ ++ status = efi_bs->HandleProtocol(handles[i], &bio_guid, (void **)&bio); ++ if ( EFI_ERROR(status) || ++ bio->Media->RemovableMedia || ++ bio->Media->LogicalPartition ) ++ continue; ++ if ( boot_edd_info_nr < EDD_INFO_MAX ) ++ { ++ info->device = 0x80 + boot_edd_info_nr; /* fake */ ++ info->version = 0x11; ++ params->length = offsetof(struct edd_device_params, dpte_ptr); ++ params->number_of_sectors = bio->Media->LastBlock + 1; ++ params->bytes_per_sector = bio->Media->BlockSize; ++ params->dpte_ptr = ~0; ++ } ++ ++boot_edd_info_nr; ++ status = efi_bs->HandleProtocol(handles[i], &devp_guid, ++ (void **)&devp); ++ if ( EFI_ERROR(status) ) ++ continue; ++ for ( ; !IsDevicePathEnd(devp.DevPath); ++ devp.DevPath = NextDevicePathNode(devp.DevPath) ) ++ { ++ switch ( DevicePathType(devp.DevPath) ) ++ { ++ const u8 *p; ++ ++ case ACPI_DEVICE_PATH: ++ if ( state != root || boot_edd_info_nr > EDD_INFO_MAX ) ++ break; ++ switch ( DevicePathSubType(devp.DevPath) ) ++ { ++ case ACPI_DP: ++ if ( devp.Acpi->HID != EISA_PNP_ID(0xA03) && ++ devp.Acpi->HID != EISA_PNP_ID(0xA08) ) ++ break; ++ params->interface_path.pci.bus = devp.Acpi->UID; ++ state = acpi; ++ break; ++ case EXPANDED_ACPI_DP: ++ /* XXX */ ++ break; ++ } ++ break; ++ case HARDWARE_DEVICE_PATH: ++ if ( state != acpi || ++ DevicePathSubType(devp.DevPath) != HW_PCI_DP || ++ boot_edd_info_nr > EDD_INFO_MAX ) ++ break; ++ state = pci; ++ edd_put_string(params->host_bus_type, "PCI"); ++ params->interface_path.pci.slot = devp.Pci->Device; ++ params->interface_path.pci.function = devp.Pci->Function; ++ break; ++ case MESSAGING_DEVICE_PATH: ++ if ( state != pci || boot_edd_info_nr > EDD_INFO_MAX ) ++ break; ++ state = ctrlr; ++ switch ( DevicePathSubType(devp.DevPath) ) ++ { ++ case MSG_ATAPI_DP: ++ edd_put_string(params->interface_type, "ATAPI"); ++ params->interface_path.pci.channel = ++ devp.Atapi->PrimarySecondary; ++ params->device_path.atapi.device = devp.Atapi->SlaveMaster; ++ params->device_path.atapi.lun = devp.Atapi->Lun; ++ break; ++ case MSG_SCSI_DP: ++ edd_put_string(params->interface_type, "SCSI"); ++ params->device_path.scsi.id = devp.Scsi->Pun; ++ params->device_path.scsi.lun = devp.Scsi->Lun; ++ break; ++ case MSG_FIBRECHANNEL_DP: ++ edd_put_string(params->interface_type, "FIBRE"); ++ params->device_path.fibre.wwid = devp.FibreChannel->WWN; ++ params->device_path.fibre.lun = devp.FibreChannel->Lun; ++ break; ++ case MSG_1394_DP: ++ edd_put_string(params->interface_type, "1394"); ++ params->device_path.i1394.eui = devp.F1394->Guid; ++ break; ++ case MSG_USB_DP: ++ case MSG_USB_CLASS_DP: ++ edd_put_string(params->interface_type, "USB"); ++ break; ++ case MSG_I2O_DP: ++ edd_put_string(params->interface_type, "I2O"); ++ params->device_path.i2o.identity_tag = devp.I2O->Tid; ++ break; ++ default: ++ continue; ++ } ++ info->version = 0x30; ++ params->length = sizeof(struct edd_device_params); ++ params->key = 0xbedd; ++ params->device_path_info_length = ++ sizeof(struct edd_device_params) - ++ offsetof(struct edd_device_params, key); ++ for ( p = (const u8 *)¶ms->key; p < ¶ms->checksum; ++p ) ++ params->checksum -= *p; ++ break; ++ case MEDIA_DEVICE_PATH: ++ if ( DevicePathSubType(devp.DevPath) == MEDIA_HARDDRIVE_DP && ++ devp.HardDrive->MBRType == MBR_TYPE_PCAT && ++ boot_mbr_signature_nr < EDD_MBR_SIG_MAX ) ++ { ++ struct mbr_signature *sig = boot_mbr_signature + ++ boot_mbr_signature_nr; ++ ++ sig->device = 0x80 + boot_edd_info_nr; /* fake */ ++ memcpy(&sig->signature, devp.HardDrive->Signature, ++ sizeof(sig->signature)); ++ ++boot_mbr_signature_nr; ++ } ++ break; ++ } ++ } ++ } ++ if ( handles ) ++ efi_bs->FreePool(handles); ++ if ( boot_edd_info_nr > EDD_INFO_MAX ) ++ boot_edd_info_nr = EDD_INFO_MAX; ++ + /* XXX Collect EDID info. */ + + if ( cpuid_eax(0x80000000) > 0x80000000 ) +--- a/xen/include/asm-x86/edd.h ++++ b/xen/include/asm-x86/edd.h +@@ -23,6 +23,8 @@ + #ifndef __XEN_EDD_H__ + #define __XEN_EDD_H__ + ++#ifndef __ASSEMBLY__ ++ + struct edd_info { + /* Int13, Fn48: Check Extensions Present. */ + u8 device; /* %dl: device */ +@@ -33,10 +35,106 @@ struct edd_info { + u8 legacy_max_head; /* %dh: maximum head number */ + u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ +- struct { ++ struct edd_device_params { + u16 length; +- u8 data[72]; +- } edd_device_params; ++ u16 info_flags; ++ u32 num_default_cylinders; ++ u32 num_default_heads; ++ u32 sectors_per_track; ++ u64 number_of_sectors; ++ u16 bytes_per_sector; ++ u32 dpte_ptr; /* 0xFFFFFFFF for our purposes */ ++ u16 key; /* = 0xBEDD */ ++ u8 device_path_info_length; ++ u8 reserved2; ++ u16 reserved3; ++ u8 host_bus_type[4]; ++ u8 interface_type[8]; ++ union { ++ struct { ++ u16 base_address; ++ u16 reserved1; ++ u32 reserved2; ++ } __attribute__ ((packed)) isa; ++ struct { ++ u8 bus; ++ u8 slot; ++ u8 function; ++ u8 channel; ++ u32 reserved; ++ } __attribute__ ((packed)) pci; ++ /* pcix is same as pci */ ++ struct { ++ u64 reserved; ++ } __attribute__ ((packed)) ibnd; ++ struct { ++ u64 reserved; ++ } __attribute__ ((packed)) xprs; ++ struct { ++ u64 reserved; ++ } __attribute__ ((packed)) htpt; ++ struct { ++ u64 reserved; ++ } __attribute__ ((packed)) unknown; ++ } interface_path; ++ union { ++ struct { ++ u8 device; ++ u8 reserved1; ++ u16 reserved2; ++ u32 reserved3; ++ u64 reserved4; ++ } __attribute__ ((packed)) ata; ++ struct { ++ u8 device; ++ u8 lun; ++ u8 reserved1; ++ u8 reserved2; ++ u32 reserved3; ++ u64 reserved4; ++ } __attribute__ ((packed)) atapi; ++ struct { ++ u16 id; ++ u64 lun; ++ u16 reserved1; ++ u32 reserved2; ++ } __attribute__ ((packed)) scsi; ++ struct { ++ u64 serial_number; ++ u64 reserved; ++ } __attribute__ ((packed)) usb; ++ struct { ++ u64 eui; ++ u64 reserved; ++ } __attribute__ ((packed)) i1394; ++ struct { ++ u64 wwid; ++ u64 lun; ++ } __attribute__ ((packed)) fibre; ++ struct { ++ u64 identity_tag; ++ u64 reserved; ++ } __attribute__ ((packed)) i2o; ++ struct { ++ u32 array_number; ++ u32 reserved1; ++ u64 reserved2; ++ } __attribute__ ((packed)) raid; ++ struct { ++ u8 device; ++ u8 reserved1; ++ u16 reserved2; ++ u32 reserved3; ++ u64 reserved4; ++ } __attribute__ ((packed)) sata; ++ struct { ++ u64 reserved1; ++ u64 reserved2; ++ } __attribute__ ((packed)) unknown; ++ } device_path; ++ u8 reserved4; ++ u8 checksum; ++ } __attribute__ ((packed)) edd_device_params; + } __attribute__ ((packed)); + + struct mbr_signature { +@@ -51,4 +149,16 @@ extern u8 boot_mbr_signature_nr; + extern struct edd_info boot_edd_info[]; + extern u8 boot_edd_info_nr; + ++#endif /* __ASSEMBLY__ */ ++ ++/* Maximum number of EDD information structures at boot_edd_info. */ ++#define EDD_INFO_MAX 6 ++ ++/* Maximum number of MBR signatures at boot_mbr_signature. */ ++#define EDD_MBR_SIG_MAX 16 ++ ++/* Size of components of EDD information structure. */ ++#define EDDEXTSIZE 8 ++#define EDDPARMSIZE 74 ++ + #endif /* __XEN_EDD_H__ */ diff --git a/23776-x86-kexec-hpet-legacy-bcast-disable.patch b/23776-x86-kexec-hpet-legacy-bcast-disable.patch new file mode 100644 index 0000000..02bffc0 --- /dev/null +++ b/23776-x86-kexec-hpet-legacy-bcast-disable.patch @@ -0,0 +1,56 @@ +# HG changeset patch +# User Andrew Cooper +# Date 1313744302 -3600 +# Node ID 0ddb4481f883ddf55c12a0b8d1445cf137ef0b63 +# Parent 9957bef3e7b4511f83ed8883cd5ecd49ea3ee95d +x86/KEXEC: disable hpet legacy broadcasts earlier + +On x2apic machines which booted in xapic mode, +hpet_disable_legacy_broadcast() sends an event check IPI to all online +processors. This leads to a protection fault as the genapic blindly +pokes x2apic MSRs while the local apic is in xapic mode. + +One option is to change genapic when we shut down the local apic, but +there are still problems with trying to IPI processors in the online +processor map which are actually sitting in NMI loops + +Another option is to have each CPU take itself out of the online CPU +map during the NMI shootdown. + +Realistically however, disabling hpet legacy broadcasts earlier in the +kexec path is the easiest fix to the problem. + +Signed-off-by: Andrew Cooper + +--- a/xen/arch/x86/crash.c ++++ b/xen/arch/x86/crash.c +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + static atomic_t waiting_for_crash_ipi; + static unsigned int crashing_cpu; +@@ -59,6 +60,9 @@ static void nmi_shootdown_cpus(void) + + local_irq_disable(); + ++ if ( hpet_broadcast_is_available() ) ++ hpet_disable_legacy_broadcast(); ++ + crashing_cpu = smp_processor_id(); + local_irq_count(crashing_cpu) = 0; + +--- a/xen/arch/x86/machine_kexec.c ++++ b/xen/arch/x86/machine_kexec.c +@@ -96,9 +96,6 @@ void machine_kexec(xen_kexec_image_t *im + .limit = LAST_RESERVED_GDT_BYTE + }; + +- if ( hpet_broadcast_is_available() ) +- hpet_disable_legacy_broadcast(); +- + /* + * compat_machine_kexec() returns to idle pagetables, which requires us + * to be running on a static GDT mapping (idle pagetables have no GDT diff --git a/23781-pm-wide-ACPI-ids.patch b/23781-pm-wide-ACPI-ids.patch new file mode 100644 index 0000000..2c7ce33 --- /dev/null +++ b/23781-pm-wide-ACPI-ids.patch @@ -0,0 +1,68 @@ +# HG changeset patch +# User Jan Beulich +# Date 1314004239 -3600 +# Node ID 0849b0e59e2418e8215616df147f955b01b07577 +# Parent 07f78b5bd03c02e32324eaa00487643d27b7ffa8 +pm: don't truncate processors' ACPI IDs to 8 bits + +This is just another adjustment to allow systems with very many CPUs +(or unusual ACPI IDs) to be properly power-managed. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/ia64/linux-xen/acpi.c ++++ b/xen/arch/ia64/linux-xen/acpi.c +@@ -223,11 +223,14 @@ static u16 ia64_acpiid_to_sapicid[ MAX_L + {[0 ... MAX_LOCAL_SAPIC - 1] = 0xffff }; + + /* acpi id to cpu id */ +-int get_cpu_id(u8 acpi_id) ++int get_cpu_id(u32 acpi_id) + { + int i; + u16 apic_id; + ++ if ( acpi_id >= MAX_LOCAL_SAPIC ) ++ return -EINVAL; ++ + apic_id = ia64_acpiid_to_sapicid[acpi_id]; + if ( apic_id == 0xffff ) + return -EINVAL; +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -871,11 +871,14 @@ static void set_cx( + acpi_power->safe_state = cx; + } + +-int get_cpu_id(u8 acpi_id) ++int get_cpu_id(u32 acpi_id) + { + int i; + u32 apic_id; + ++ if ( acpi_id >= MAX_MADT_ENTRIES ) ++ return -1; ++ + apic_id = x86_acpiid_to_apicid[acpi_id]; + if ( apic_id == BAD_APICID ) + return -1; +@@ -952,7 +955,7 @@ long set_cx_pminfo(uint32_t cpu, struct + print_cx_pminfo(cpu, power); + + /* map from acpi_id to cpu_id */ +- cpu_id = get_cpu_id((u8)cpu); ++ cpu_id = get_cpu_id(cpu); + if ( cpu_id == -1 ) + { + printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu); +--- a/xen/include/acpi/cpufreq/processor_perf.h ++++ b/xen/include/acpi/cpufreq/processor_perf.h +@@ -6,7 +6,7 @@ + + #define XEN_PX_INIT 0x80000000 + +-int get_cpu_id(u8); ++int get_cpu_id(u32); + int powernow_cpufreq_init(void); + unsigned int powernow_register_driver(void); + unsigned int get_measured_perf(unsigned int cpu, unsigned int flag); diff --git a/23782-x86-ioapic-clear-irr.patch b/23782-x86-ioapic-clear-irr.patch new file mode 100644 index 0000000..1d9a3f2 --- /dev/null +++ b/23782-x86-ioapic-clear-irr.patch @@ -0,0 +1,71 @@ +References: bnc#701686 + +# HG changeset patch +# User Jan Beulich +# Date 1314004270 -3600 +# Node ID 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb +# Parent 0849b0e59e2418e8215616df147f955b01b07577 +x86/IO-APIC: clear remoteIRR in clear_IO_APIC_pin() + +It was found that in a crash scenario, the remoteIRR bit in an IO-APIC +RTE could be left set, causing problems when bringing up a kdump +kernel. While this generally is most important to be taken care of in +the new kernel (which usually would be a native one), it still seems +desirable to also address this problem in Xen so that (a) the problem +doesn't bite Xen when used as a secondary emergency kernel and (b) an +attempt is being made to save un-fixed secondary kernels from running +into said problem. + +Based on a Linux patch from suresh.b.siddha@intel.com. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -375,11 +375,46 @@ static void clear_IO_APIC_pin(unsigned i + return; + + /* ++ * Make sure the entry is masked and re-read the contents to check ++ * if it is a level triggered pin and if the remoteIRR is set. ++ */ ++ if (!entry.mask) { ++ entry.mask = 1; ++ __ioapic_write_entry(apic, pin, FALSE, entry); ++ } ++ entry = __ioapic_read_entry(apic, pin, TRUE); ++ ++ if (entry.irr) { ++ /* Make sure the trigger mode is set to level. */ ++ if (!entry.trigger) { ++ entry.trigger = 1; ++ __ioapic_write_entry(apic, pin, TRUE, entry); ++ } ++ if (mp_ioapics[apic].mpc_apicver >= 0x20) ++ io_apic_eoi(apic, entry.vector); ++ else { ++ /* ++ * Mechanism by which we clear remoteIRR in this case is by ++ * changing the trigger mode to edge and back to level. ++ */ ++ entry.trigger = 0; ++ __ioapic_write_entry(apic, pin, TRUE, entry); ++ entry.trigger = 1; ++ __ioapic_write_entry(apic, pin, TRUE, entry); ++ } ++ } ++ ++ /* + * Disable it in the IO-APIC irq-routing table: + */ + memset(&entry, 0, sizeof(entry)); + entry.mask = 1; + __ioapic_write_entry(apic, pin, TRUE, entry); ++ ++ entry = __ioapic_read_entry(apic, pin, TRUE); ++ if (entry.irr) ++ printk(KERN_ERR "IO-APIC%02x-%u: Unable to reset IRR\n", ++ IO_APIC_ID(apic), pin); + } + + static void clear_IO_APIC (void) diff --git a/23783-ACPI-set-_PDC-bits.patch b/23783-ACPI-set-_PDC-bits.patch new file mode 100644 index 0000000..895ff93 --- /dev/null +++ b/23783-ACPI-set-_PDC-bits.patch @@ -0,0 +1,266 @@ +# HG changeset patch +# User Jan Beulich +# Date 1314004356 -3600 +# Node ID 2029263c501c315fa4d94845e5cfa6a9b0b395d5 +# Parent 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb +ACPI: add _PDC input override mechanism + +In order to have Dom0 call _PDC with input fully representing Xen's +capabilities, and in order to avoid building knowledge of Xen +implementation details into Dom0, this provides a mechanism by which +the Dom0 kernel can, once it filled the _PDC input buffer according to +its own knowledge, present the buffer to Xen to apply overrides for +the parts of the C-, P-, and T-state management that it controls. This +is particularly to address the dependency of Xen using MWAIT to enter +certain C-states on the availability of the break-on-interrupt +extension (which the Dom0 kernel should have no need to know about). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/ia64/linux-xen/acpi.c ++++ b/xen/arch/ia64/linux-xen/acpi.c +@@ -243,6 +243,13 @@ int get_cpu_id(u32 acpi_id) + + return -1; + } ++ ++int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask) ++{ ++ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SMP & mask; ++ return 0; ++} ++ + #endif + + static int __init +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -619,12 +619,6 @@ static int init_cx_pminfo(struct acpi_pr + return 0; + } + +-#define CPUID_MWAIT_LEAF (5) +-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1) +-#define CPUID5_ECX_INTERRUPT_BREAK (0x2) +- +-#define MWAIT_ECX_INTERRUPT_BREAK (0x1) +- + #define MWAIT_SUBSTATE_MASK (0xf) + #define MWAIT_SUBSTATE_SIZE (4) + +--- a/xen/arch/x86/acpi/boot.c ++++ b/xen/arch/x86/acpi/boot.c +@@ -1006,3 +1006,47 @@ unsigned int acpi_get_processor_id(unsig + + return INVALID_ACPIID; + } ++ ++static void get_mwait_ecx(void *info) ++{ ++ *(u32 *)info = cpuid_ecx(CPUID_MWAIT_LEAF); ++} ++ ++int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask) ++{ ++ unsigned int cpu = get_cpu_id(acpi_id); ++ struct cpuinfo_x86 *c; ++ u32 ecx; ++ ++ if (!(acpi_id + 1)) ++ c = &boot_cpu_data; ++ else if (cpu >= NR_CPUS || !cpu_online(cpu)) ++ return -EINVAL; ++ else ++ c = cpu_data + cpu; ++ ++ pdc[2] |= ACPI_PDC_C_CAPABILITY_SMP & mask; ++ ++ if (cpu_has(c, X86_FEATURE_EST)) ++ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP & mask; ++ ++ if (cpu_has(c, X86_FEATURE_ACPI)) ++ pdc[2] |= ACPI_PDC_T_FFH & mask; ++ ++ /* ++ * If mwait/monitor or its break-on-interrupt extension are ++ * unsupported, Cx_FFH will be disabled. ++ */ ++ if (!cpu_has(c, X86_FEATURE_MWAIT) || ++ c->cpuid_level < CPUID_MWAIT_LEAF) ++ ecx = 0; ++ else if (c == &boot_cpu_data || cpu == smp_processor_id()) ++ ecx = cpuid_ecx(CPUID_MWAIT_LEAF); ++ else ++ on_selected_cpus(cpumask_of(cpu), get_mwait_ecx, &ecx, 1); ++ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || ++ !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) ++ pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH); ++ ++ return 0; ++} +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -419,6 +419,15 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe + ret = -EINVAL; + break; + ++ case XEN_PM_PDC: ++ { ++ XEN_GUEST_HANDLE(uint32) pdc; ++ ++ guest_from_compat_handle(pdc, op->u.set_pminfo.u.pdc); ++ ret = acpi_set_pdc_bits(op->u.set_pminfo.id, pdc); ++ } ++ break; ++ + default: + ret = -EINVAL; + break; +--- a/xen/drivers/acpi/pmstat.c ++++ b/xen/drivers/acpi/pmstat.c +@@ -519,3 +519,34 @@ int do_pm_op(struct xen_sysctl_pm_op *op + + return ret; + } ++ ++int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32) pdc) ++{ ++ u32 bits[3]; ++ int ret; ++ ++ if ( copy_from_guest(bits, pdc, 2) ) ++ ret = -EFAULT; ++ else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] ) ++ ret = -EINVAL; ++ else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) ) ++ ret = -EFAULT; ++ else ++ { ++ u32 mask = 0; ++ ++ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX ) ++ mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT; ++ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX ) ++ mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT; ++ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX ) ++ mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT; ++ bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK | ++ ACPI_PDC_SMP_C1PT) & ~mask; ++ ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask); ++ } ++ if ( !ret ) ++ ret = copy_to_guest_offset(pdc, 2, bits + 2, 1); ++ ++ return ret; ++} +--- a/xen/include/acpi/cpufreq/processor_perf.h ++++ b/xen/include/acpi/cpufreq/processor_perf.h +@@ -3,10 +3,10 @@ + + #include + #include ++#include + + #define XEN_PX_INIT 0x80000000 + +-int get_cpu_id(u32); + int powernow_cpufreq_init(void); + unsigned int powernow_register_driver(void); + unsigned int get_measured_perf(unsigned int cpu, unsigned int flag); +--- a/xen/include/acpi/pdc_intel.h ++++ b/xen/include/acpi/pdc_intel.h +@@ -4,6 +4,8 @@ + #ifndef __PDC_INTEL_H__ + #define __PDC_INTEL_H__ + ++#define ACPI_PDC_REVISION_ID 1 ++ + #define ACPI_PDC_P_FFH (0x0001) + #define ACPI_PDC_C_C1_HALT (0x0002) + #define ACPI_PDC_T_FFH (0x0004) +@@ -14,6 +16,7 @@ + #define ACPI_PDC_SMP_T_SWCOORD (0x0080) + #define ACPI_PDC_C_C1_FFH (0x0100) + #define ACPI_PDC_C_C2C3_FFH (0x0200) ++#define ACPI_PDC_SMP_P_HWCOORD (0x0800) + + #define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \ + ACPI_PDC_C_C1_HALT | \ +@@ -22,6 +25,7 @@ + #define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \ + ACPI_PDC_C_C1_HALT | \ + ACPI_PDC_SMP_P_SWCOORD | \ ++ ACPI_PDC_SMP_P_HWCOORD | \ + ACPI_PDC_P_FFH) + + #define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \ +@@ -30,4 +34,17 @@ + ACPI_PDC_C_C1_FFH | \ + ACPI_PDC_C_C2C3_FFH) + ++#define ACPI_PDC_C_MASK (ACPI_PDC_C_C1_HALT | \ ++ ACPI_PDC_C_C1_FFH | \ ++ ACPI_PDC_SMP_C2C3 | \ ++ ACPI_PDC_SMP_C_SWCOORD | \ ++ ACPI_PDC_C_C2C3_FFH) ++ ++#define ACPI_PDC_P_MASK (ACPI_PDC_P_FFH | \ ++ ACPI_PDC_SMP_P_SWCOORD | \ ++ ACPI_PDC_SMP_P_HWCOORD) ++ ++#define ACPI_PDC_T_MASK (ACPI_PDC_T_FFH | \ ++ ACPI_PDC_SMP_T_SWCOORD) ++ + #endif /* __PDC_INTEL_H__ */ +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -150,6 +150,10 @@ + #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) + #define cpufeat_mask(idx) (1u << ((idx) & 31)) + ++#define CPUID_MWAIT_LEAF 5 ++#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 ++#define CPUID5_ECX_INTERRUPT_BREAK 0x2 ++ + #ifdef __i386__ + #define cpu_has_vme boot_cpu_has(X86_FEATURE_VME) + #define cpu_has_de boot_cpu_has(X86_FEATURE_DE) +--- a/xen/include/public/platform.h ++++ b/xen/include/public/platform.h +@@ -304,6 +304,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletim + #define XEN_PM_CX 0 + #define XEN_PM_PX 1 + #define XEN_PM_TX 2 ++#define XEN_PM_PDC 3 + + /* Px sub info type */ + #define XEN_PX_PCT 1 +@@ -401,6 +402,7 @@ struct xenpf_set_processor_pminfo { + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ ++ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ + } u; + }; + typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; +--- a/xen/include/xen/acpi.h ++++ b/xen/include/xen/acpi.h +@@ -334,6 +334,8 @@ static inline int acpi_boot_table_init(v + + #endif /*!CONFIG_ACPI_BOOT*/ + ++int get_cpu_id(u32 acpi_id); ++ + unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); + int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); + +@@ -431,6 +433,9 @@ static inline unsigned int acpi_get_csta + static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; } + #endif + ++int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32)); ++int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *, u32 mask); ++ + #ifdef CONFIG_ACPI_NUMA + int acpi_get_pxm(acpi_handle handle); + #else diff --git a/x86-cpufreq-report.patch b/x86-cpufreq-report.patch index f88aea8..19da915 100644 --- a/x86-cpufreq-report.patch +++ b/x86-cpufreq-report.patch @@ -17,7 +17,7 @@ struct xen_platform_op curop, *op = &curop; if ( !IS_PRIV(current->domain) ) -@@ -513,6 +514,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe +@@ -522,6 +523,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe op->u.mem_add.epfn, op->u.mem_add.pxm); break; @@ -44,7 +44,7 @@ break; --- a/xen/include/public/platform.h +++ b/xen/include/public/platform.h -@@ -449,6 +449,14 @@ struct xenpf_mem_hotadd +@@ -451,6 +451,14 @@ struct xenpf_mem_hotadd uint32_t flags; }; @@ -59,7 +59,7 @@ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ -@@ -469,6 +477,7 @@ struct xen_platform_op { +@@ -471,6 +479,7 @@ struct xen_platform_op { struct xenpf_cpu_ol cpu_ol; struct xenpf_cpu_hotadd cpu_add; struct xenpf_mem_hotadd mem_add; diff --git a/x86-ioapic-ack-default.patch b/x86-ioapic-ack-default.patch index b5be017..02b922d 100644 --- a/x86-ioapic-ack-default.patch +++ b/x86-ioapic-ack-default.patch @@ -1,10 +1,8 @@ Change default IO-APIC ack mode for single IO-APIC systems to old-style. -Index: xen-4.1.1-testing/xen/arch/x86/io_apic.c -=================================================================== ---- xen-4.1.1-testing.orig/xen/arch/x86/io_apic.c -+++ xen-4.1.1-testing/xen/arch/x86/io_apic.c -@@ -1547,7 +1547,7 @@ static unsigned int startup_level_ioapic +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1578,7 +1578,7 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } @@ -13,7 +11,7 @@ Index: xen-4.1.1-testing/xen/arch/x86/io_apic.c static void setup_ioapic_ack(char *s) { if ( !strcmp(s, "old") ) -@@ -2044,6 +2044,8 @@ void __init setup_IO_APIC(void) +@@ -2075,6 +2075,8 @@ void __init setup_IO_APIC(void) else io_apic_irqs = ~PIC_IRQS; diff --git a/x86-show-page-walk-early.patch b/x86-show-page-walk-early.patch index af77d83..8356f5c 100644 --- a/x86-show-page-walk-early.patch +++ b/x86-show-page-walk-early.patch @@ -21,7 +21,7 @@ printk("%p ", _p(*stk++)); --- a/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c -@@ -121,6 +121,8 @@ void __init paging_init(void) +@@ -122,6 +122,8 @@ void __init paging_init(void) #undef CNT #undef MFN diff --git a/xen-warnings-unused.diff b/xen-warnings-unused.diff index a963cd3..49964f1 100644 --- a/xen-warnings-unused.diff +++ b/xen-warnings-unused.diff @@ -241,7 +241,7 @@ status = fread(&buf, 1, sizeof(*h), rtnl); --- a/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c -@@ -746,7 +746,7 @@ static void __pci_disable_msi(struct msi +@@ -799,7 +799,7 @@ static void __pci_disable_msi(struct msi { struct pci_dev *dev; int pos; diff --git a/xen.changes b/xen.changes index 1d0de92..cf877d4 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,20 @@ +------------------------------------------------------------------- +Tue Aug 23 08:53:20 MDT 2011 - carnold@novell.com + +- Upstream patches from Jan + 23725-pci-add-device.patch + 23762-iommu-fault-bm-off.patch + 23763-pci-multi-seg-x2apic-vtd-no-crash.patch + 23765-x86-irq-vector-leak.patch + 23766-x86-msi-vf-bars.patch + 23771-x86-ioapic-clear-pin.patch + 23772-x86-trampoline.patch + 23774-x86_64-EFI-EDD.patch + 23776-x86-kexec-hpet-legacy-bcast-disable.patch + 23781-pm-wide-ACPI-ids.patch + 23782-x86-ioapic-clear-irr.patch + 23783-ACPI-set-_PDC-bits.patch + ------------------------------------------------------------------- Mon Aug 15 11:54:08 CEST 2011 - ohering@suse.de diff --git a/xen.spec b/xen.spec index 784f35f..c16ef6f 100644 --- a/xen.spec +++ b/xen.spec @@ -96,7 +96,7 @@ BuildRequires: glibc-devel %if %{?with_kmp}0 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 %endif -Version: 4.1.1_05 +Version: 4.1.1_07 Release: 1 License: GPLv2+ Group: System/Kernel @@ -186,12 +186,24 @@ Patch44: 23685-libxl-segfault-fix.patch Patch45: 23706-fix-20892.patch Patch46: 23723-x86-CMOS-lock.patch Patch47: 23724-x86-smpboot-x2apic.patch -Patch48: 23726-x86-intel-flexmigration.patch -Patch49: 23732-sedf.patch -Patch50: 23735-guest-dom0-cap.patch -Patch51: 23746-vtd-cleanup-timers.patch -Patch52: 23747-mmcfg-base-address.patch -Patch53: 23749-mmcfg-reservation.patch +Patch48: 23725-pci-add-device.patch +Patch49: 23726-x86-intel-flexmigration.patch +Patch50: 23732-sedf.patch +Patch51: 23735-guest-dom0-cap.patch +Patch52: 23746-vtd-cleanup-timers.patch +Patch53: 23747-mmcfg-base-address.patch +Patch54: 23749-mmcfg-reservation.patch +Patch55: 23762-iommu-fault-bm-off.patch +Patch56: 23763-pci-multi-seg-x2apic-vtd-no-crash.patch +Patch57: 23765-x86-irq-vector-leak.patch +Patch58: 23766-x86-msi-vf-bars.patch +Patch59: 23771-x86-ioapic-clear-pin.patch +Patch60: 23772-x86-trampoline.patch +Patch61: 23774-x86_64-EFI-EDD.patch +Patch62: 23776-x86-kexec-hpet-legacy-bcast-disable.patch +Patch63: 23781-pm-wide-ACPI-ids.patch +Patch64: 23782-x86-ioapic-clear-irr.patch +Patch65: 23783-ACPI-set-_PDC-bits.patch # Upstream qemu patches # Our patches Patch300: xen-config.diff @@ -733,6 +745,18 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools %patch51 -p1 %patch52 -p1 %patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 +%patch60 -p1 +%patch61 -p1 +%patch62 -p1 +%patch63 -p1 +%patch64 -p1 +%patch65 -p1 %patch300 -p1 %patch301 -p1 %patch302 -p1