- Upstream patches from Jan

23725-pci-add-device.patch
  23762-iommu-fault-bm-off.patch
  23763-pci-multi-seg-x2apic-vtd-no-crash.patch
  23765-x86-irq-vector-leak.patch 
  23766-x86-msi-vf-bars.patch
  23771-x86-ioapic-clear-pin.patch
  23772-x86-trampoline.patch
  23774-x86_64-EFI-EDD.patch
  23776-x86-kexec-hpet-legacy-bcast-disable.patch
  23781-pm-wide-ACPI-ids.patch
  23782-x86-ioapic-clear-irr.patch
  23783-ACPI-set-_PDC-bits.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=140
This commit is contained in:
Charles Arnold 2011-08-26 16:49:33 +00:00 committed by Git OBS Bridge
parent d793408b8a
commit 287ca6da2e
19 changed files with 1932 additions and 19 deletions

174
23725-pci-add-device.patch Normal file
View File

@ -0,0 +1,174 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1311081248 -3600
# Node ID 4dc6a9ba90d60fdf0cc0898fc9a8fe84ae9030fc
# Parent b3434f24b0827c5ef34e4b4a72893288e2ffbe40
PCI: consolidate interface for adding devices
The functionality of pci_add_device_ext() can be easily folded into
pci_add_device(), and eliminates the need to change two functions for
future adjustments.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/ia64/xen/hypercall.c
+++ b/xen/arch/ia64/xen/hypercall.c
@@ -662,8 +662,8 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
break;
- ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
- break;
+ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
+ break;
}
case PHYSDEVOP_manage_pci_remove: {
@@ -695,10 +695,10 @@ long do_physdev_op(int cmd, XEN_GUEST_HA
pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
- ret = pci_add_device_ext(manage_pci_ext.bus,
- manage_pci_ext.devfn,
- &pdev_info);
- break;
+ ret = pci_add_device(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
+ break;
}
default:
--- a/xen/arch/x86/physdev.c
+++ b/xen/arch/x86/physdev.c
@@ -472,7 +472,7 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
break;
- ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+ ret = pci_add_device(manage_pci.bus, manage_pci.devfn, NULL);
break;
}
@@ -509,9 +509,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
- ret = pci_add_device_ext(manage_pci_ext.bus,
- manage_pci_ext.devfn,
- &pdev_info);
+ ret = pci_add_device(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
break;
}
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -142,16 +142,29 @@ void pci_enable_acs(struct pci_dev *pdev
pci_conf_write16(bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
}
-int pci_add_device(u8 bus, u8 devfn)
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
{
struct pci_dev *pdev;
+ const char *pdev_type;
int ret = -ENOMEM;
+ if (!info)
+ pdev_type = "device";
+ else if (info->is_extfn)
+ pdev_type = "extended function";
+ else if (info->is_virtfn)
+ pdev_type = "virtual function";
+ else
+ return -EINVAL;
+
spin_lock(&pcidevs_lock);
pdev = alloc_pdev(bus, devfn);
if ( !pdev )
goto out;
+ if ( info )
+ pdev->info = *info;
+
ret = 0;
if ( !pdev->domain )
{
@@ -169,8 +182,8 @@ int pci_add_device(u8 bus, u8 devfn)
out:
spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
+ printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
return ret;
}
@@ -197,51 +210,6 @@ int pci_remove_device(u8 bus, u8 devfn)
return ret;
}
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info)
-{
- int ret;
- char *pdev_type;
- struct pci_dev *pdev;
-
- if (info->is_extfn)
- pdev_type = "Extended Function";
- else if (info->is_virtfn)
- pdev_type = "Virtual Function";
- else
- return -EINVAL;
-
-
- ret = -ENOMEM;
- spin_lock(&pcidevs_lock);
- pdev = alloc_pdev(bus, devfn);
- if ( !pdev )
- goto out;
-
- pdev->info = *info;
-
- ret = 0;
- if ( !pdev->domain )
- {
- pdev->domain = dom0;
- ret = iommu_add_device(pdev);
- if ( ret )
- {
- pdev->domain = NULL;
- goto out;
- }
-
- list_add(&pdev->domain_list, &dom0->arch.pdev_list);
- pci_enable_acs(pdev);
- }
-
-out:
- spin_unlock(&pcidevs_lock);
- printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-
- return ret;
-}
-
static void pci_clean_dpci_irqs(struct domain *d)
{
struct hvm_irq_dpci *hvm_irq_dpci = NULL;
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -86,9 +86,8 @@ struct pci_dev *pci_lock_pdev(int bus, i
struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn);
void pci_release_devices(struct domain *d);
-int pci_add_device(u8 bus, u8 devfn);
+int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *);
int pci_remove_device(u8 bus, u8 devfn);
-int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info);
struct pci_dev *pci_get_pdev(int bus, int devfn);
struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);

View File

@ -31,7 +31,7 @@ Signed-off-by: Tim Deegan <Tim.Deegan@citrix.com>
--- a/xen/drivers/passthrough/pci.c --- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c
@@ -252,9 +252,6 @@ static void pci_clean_dpci_irqs(struct d @@ -220,9 +220,6 @@ static void pci_clean_dpci_irqs(struct d
if ( !iommu_enabled ) if ( !iommu_enabled )
return; return;

View File

@ -0,0 +1,72 @@
References: bnc#712051, CVE-2011-3131
# HG changeset patch
# User Tim Deegan <Tim.Deegan@citrix.com>
# Date 1313144964 -3600
# Node ID 537ed3b74b3f13267cfb3eb0e1483f432f3685cd
# Parent 1f08b380d4386cdd6714786a9163e5f51aecab5d
Passthrough: disable bus-mastering on any card that causes an IOMMU fault.
This stops the card from raising back-to-back faults and live-locking
the CPU that handles them.
Signed-off-by: Tim Deegan <tim@xen.org>
Acked-by: Wei Wang2 <wei.wang2@amd.com>
Acked-by: Allen M Kay <allen.m.kay@intel.com>
--- a/xen/drivers/passthrough/amd/iommu_init.c
+++ b/xen/drivers/passthrough/amd/iommu_init.c
@@ -462,7 +462,7 @@ static hw_irq_controller iommu_msi_type
static void parse_event_log_entry(u32 entry[])
{
- u16 domain_id, device_id;
+ u16 domain_id, device_id, bdf, cword;
u32 code;
u64 *addr;
char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
@@ -497,6 +497,18 @@ static void parse_event_log_entry(u32 en
"%s: domain = %d, device id = 0x%04x, "
"fault address = 0x%"PRIx64"\n",
event_str[code-1], domain_id, device_id, *addr);
+
+ /* Tell the device to stop DMAing; we can't rely on the guest to
+ * control it for us. */
+ for ( bdf = 0; bdf < ivrs_bdf_entries; bdf++ )
+ if ( get_dma_requestor_id(bdf) == device_id )
+ {
+ cword = pci_conf_read16(PCI_BUS(bdf), PCI_SLOT(bdf),
+ PCI_FUNC(bdf), PCI_COMMAND);
+ pci_conf_write16(PCI_BUS(bdf), PCI_SLOT(bdf),
+ PCI_FUNC(bdf), PCI_COMMAND,
+ cword & ~PCI_COMMAND_MASTER);
+ }
}
else
{
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -887,7 +887,7 @@ static void iommu_page_fault(int irq, vo
while (1)
{
u8 fault_reason;
- u16 source_id;
+ u16 source_id, cword;
u32 data;
u64 guest_addr;
int type;
@@ -920,6 +920,14 @@ static void iommu_page_fault(int irq, vo
iommu_page_fault_do_one(iommu, type, fault_reason,
source_id, guest_addr);
+ /* Tell the device to stop DMAing; we can't rely on the guest to
+ * control it for us. */
+ cword = pci_conf_read16(PCI_BUS(source_id), PCI_SLOT(source_id),
+ PCI_FUNC(source_id), PCI_COMMAND);
+ pci_conf_write16(PCI_BUS(source_id), PCI_SLOT(source_id),
+ PCI_FUNC(source_id), PCI_COMMAND,
+ cword & ~PCI_COMMAND_MASTER);
+
fault_index++;
if ( fault_index > cap_num_fault_regs(iommu->cap) )
fault_index = 0;

View File

@ -0,0 +1,55 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1313226769 -3600
# Node ID 8f647d409196f1d018f6284af03d1625cf8f93af
# Parent 537ed3b74b3f13267cfb3eb0e1483f432f3685cd
VT-d: don't reject valid DMAR/ATSR tables on systems with multiple PCI segments
On multi-PCI-segment systems, each segment has to be expected to have
an include-all DRHD and an all-ports ATSR, so the firmware consistency
check incorrectly rejects valid configurations there (which is
particularly problematic when the firmware also pre-enabled x2apic
mode, as the system will panic in that case due to being unable to
enable interrupt remapping). Thus constrain the check to just segment
0 for now; once full multi-segment support is there (which I'm working
on), it can be revisited whether we'd want to track this per segment,
or whether we trust the firmware of such large systems.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -427,13 +427,14 @@ acpi_parse_one_drhd(struct acpi_dmar_ent
if ( iommu_verbose )
dprintk(VTDPREFIX, " flags: INCLUDE_ALL\n");
/* Only allow one INCLUDE_ALL */
- if ( include_all )
+ if ( drhd->segment == 0 && include_all )
{
dprintk(XENLOG_WARNING VTDPREFIX,
"Only one INCLUDE_ALL device scope is allowed\n");
ret = -EINVAL;
}
- include_all = 1;
+ if ( drhd->segment == 0 )
+ include_all = 1;
}
if ( ret )
@@ -633,13 +634,14 @@ acpi_parse_one_atsr(struct acpi_dmar_ent
if ( iommu_verbose )
dprintk(VTDPREFIX, " flags: ALL_PORTS\n");
/* Only allow one ALL_PORTS */
- if ( all_ports )
+ if ( atsr->segment == 0 && all_ports )
{
dprintk(XENLOG_WARNING VTDPREFIX,
"Only one ALL_PORTS device scope is allowed\n");
ret = -EINVAL;
}
- all_ports = 1;
+ if ( atsr->segment == 0 )
+ all_ports = 1;
}
if ( ret )

View File

@ -0,0 +1,29 @@
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1313226868 -3600
# Node ID 68b903bb1b01b2a6ef9c6e8ead3be3c1c2208341
# Parent 67b883402736ef1746cd6654da4c898f70f40723
x86: IRQ fix incorrect logic in __clear_irq_vector
In the old code, tmp_mask is the cpu_and of cfg->cpu_mask and
cpu_online_map. However, in the usual case of moving an IRQ from one
PCPU to another because the scheduler decides its a good idea,
cfg->cpu_mask and cfg->old_cpu_mask do not intersect. This causes the
old cpu vector_irq table to keep the irq reference when it shouldn't.
This leads to a resource leak if a domain is shut down wile an irq has
a move pending, which results in Xen's create_irq() eventually failing
with -ENOSPC when all vector_irq tables are full of stale references.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -190,6 +190,7 @@ static void __clear_irq_vector(int irq)
if (likely(!cfg->move_in_progress))
return;
+ cpus_and(tmp_mask, cfg->old_cpu_mask, cpu_online_map);
for_each_cpu_mask(cpu, tmp_mask) {
for (vector = FIRST_DYNAMIC_VECTOR; vector <= LAST_DYNAMIC_VECTOR;
vector++) {

295
23766-x86-msi-vf-bars.patch Normal file
View File

@ -0,0 +1,295 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1313226898 -3600
# Node ID 8d6edc3d26d26931f3732a2008fb4818bc7bab2d
# Parent 68b903bb1b01b2a6ef9c6e8ead3be3c1c2208341
x86/PCI-MSI: properly determine VF BAR values
As was discussed a couple of times on this list, SR-IOV virtual
functions have their BARs read as zero - the physical function's
SR-IOV capability structure must be consulted instead. The bogus
warnings people complained about are being eliminated with this
change.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -522,12 +522,48 @@ static int msi_capability_init(struct pc
return 0;
}
-static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir)
+static u64 read_pci_mem_bar(u8 bus, u8 slot, u8 func, u8 bir, int vf)
{
u8 limit;
- u32 addr;
+ u32 addr, base = PCI_BASE_ADDRESS_0, disp = 0;
- switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
+ if ( vf >= 0 )
+ {
+ struct pci_dev *pdev = pci_get_pdev(bus, PCI_DEVFN(slot, func));
+ unsigned int pos = pci_find_ext_capability(0, bus,
+ PCI_DEVFN(slot, func),
+ PCI_EXT_CAP_ID_SRIOV);
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+ u16 num_vf = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_NUM_VF);
+ u16 offset = pci_conf_read16(bus, slot, func,
+ pos + PCI_SRIOV_VF_OFFSET);
+ u16 stride = pci_conf_read16(bus, slot, func,
+ pos + PCI_SRIOV_VF_STRIDE);
+
+ if ( !pdev || !pos ||
+ !(ctrl & PCI_SRIOV_CTRL_VFE) ||
+ !(ctrl & PCI_SRIOV_CTRL_MSE) ||
+ !num_vf || !offset || (num_vf > 1 && !stride) ||
+ bir >= PCI_SRIOV_NUM_BARS ||
+ !pdev->vf_rlen[bir] )
+ return 0;
+ base = pos + PCI_SRIOV_BAR;
+ vf -= PCI_BDF(bus, slot, func) + offset;
+ if ( vf < 0 || (vf && vf % stride) )
+ return 0;
+ if ( stride )
+ {
+ if ( vf % stride )
+ return 0;
+ vf /= stride;
+ }
+ if ( vf >= num_vf )
+ return 0;
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+ disp = vf * pdev->vf_rlen[bir];
+ limit = PCI_SRIOV_NUM_BARS;
+ }
+ else switch ( pci_conf_read8(bus, slot, func, PCI_HEADER_TYPE) & 0x7f )
{
case PCI_HEADER_TYPE_NORMAL:
limit = 6;
@@ -544,7 +580,7 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
if ( bir >= limit )
return 0;
- addr = pci_conf_read32(bus, slot, func, PCI_BASE_ADDRESS_0 + bir * 4);
+ addr = pci_conf_read32(bus, slot, func, base + bir * 4);
if ( (addr & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
return 0;
if ( (addr & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64 )
@@ -552,11 +588,10 @@ static u64 read_pci_mem_bar(u8 bus, u8 s
addr &= PCI_BASE_ADDRESS_MEM_MASK;
if ( ++bir >= limit )
return 0;
- return addr |
- ((u64)pci_conf_read32(bus, slot, func,
- PCI_BASE_ADDRESS_0 + bir * 4) << 32);
+ return addr + disp +
+ ((u64)pci_conf_read32(bus, slot, func, base + bir * 4) << 32);
}
- return addr & PCI_BASE_ADDRESS_MEM_MASK;
+ return (addr & PCI_BASE_ADDRESS_MEM_MASK) + disp;
}
/**
@@ -629,11 +664,29 @@ static int msix_capability_init(struct p
if ( !dev->msix_nr_entries )
{
+ u8 pbus, pslot, pfunc;
+ int vf;
u64 pba_paddr;
u32 pba_offset;
+ if ( !dev->info.is_virtfn )
+ {
+ pbus = bus;
+ pslot = slot;
+ pfunc = func;
+ vf = -1;
+ }
+ else
+ {
+ pbus = dev->info.physfn.bus;
+ pslot = PCI_SLOT(dev->info.physfn.devfn);
+ pfunc = PCI_FUNC(dev->info.physfn.devfn);
+ vf = PCI_BDF2(dev->bus, dev->devfn);
+ }
+
ASSERT(!dev->msix_used_entries);
- WARN_ON(msi->table_base != read_pci_mem_bar(bus, slot, func, bir));
+ WARN_ON(msi->table_base !=
+ read_pci_mem_bar(pbus, pslot, pfunc, bir, vf));
dev->msix_nr_entries = nr_entries;
dev->msix_table.first = PFN_DOWN(table_paddr);
@@ -645,7 +698,7 @@ static int msix_capability_init(struct p
pba_offset = pci_conf_read32(bus, slot, func,
msix_pba_offset_reg(pos));
bir = (u8)(pba_offset & PCI_MSIX_BIRMASK);
- pba_paddr = read_pci_mem_bar(bus, slot, func, bir);
+ pba_paddr = read_pci_mem_bar(pbus, pslot, pfunc, bir, vf);
WARN_ON(!pba_paddr);
pba_paddr += pba_offset & ~PCI_MSIX_BIRMASK;
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -145,6 +145,7 @@ void pci_enable_acs(struct pci_dev *pdev
int pci_add_device(u8 bus, u8 devfn, const struct pci_dev_info *info)
{
struct pci_dev *pdev;
+ unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
const char *pdev_type;
int ret = -ENOMEM;
@@ -153,7 +154,14 @@ int pci_add_device(u8 bus, u8 devfn, con
else if (info->is_extfn)
pdev_type = "extended function";
else if (info->is_virtfn)
+ {
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(info->physfn.bus, info->physfn.devfn);
+ spin_unlock(&pcidevs_lock);
+ if ( !pdev )
+ pci_add_device(info->physfn.bus, info->physfn.devfn, NULL);
pdev_type = "virtual function";
+ }
else
return -EINVAL;
@@ -164,6 +172,70 @@ int pci_add_device(u8 bus, u8 devfn, con
if ( info )
pdev->info = *info;
+ else if ( !pdev->vf_rlen[0] )
+ {
+ unsigned int pos = pci_find_ext_capability(0, bus, devfn,
+ PCI_EXT_CAP_ID_SRIOV);
+ u16 ctrl = pci_conf_read16(bus, slot, func, pos + PCI_SRIOV_CTRL);
+
+ if ( !pos )
+ /* Nothing */;
+ else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
+ {
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
+ for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
+ {
+ unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
+ u32 bar = pci_conf_read32(bus, slot, func, idx);
+ u32 hi = 0;
+
+ if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
+ PCI_BASE_ADDRESS_SPACE_IO )
+ {
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with vf"
+ " BAR%u in IO space\n",
+ bus, slot, func, i);
+ continue;
+ }
+ pci_conf_write32(bus, slot, func, idx, ~0);
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ {
+ if ( i >= PCI_SRIOV_NUM_BARS )
+ {
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x with"
+ " 64-bit vf BAR in last slot\n",
+ bus, slot, func);
+ break;
+ }
+ hi = pci_conf_read32(bus, slot, func, idx + 4);
+ pci_conf_write32(bus, slot, func, idx + 4, ~0);
+ }
+ pdev->vf_rlen[i] = pci_conf_read32(bus, slot, func, idx) &
+ PCI_BASE_ADDRESS_MEM_MASK;
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ {
+ pdev->vf_rlen[i] |= (u64)pci_conf_read32(bus, slot, func,
+ idx + 4) << 32;
+ pci_conf_write32(bus, slot, func, idx + 4, hi);
+ }
+ else if ( pdev->vf_rlen[i] )
+ pdev->vf_rlen[i] |= (u64)~0 << 32;
+ pci_conf_write32(bus, slot, func, idx, bar);
+ pdev->vf_rlen[i] = -pdev->vf_rlen[i];
+ if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+ PCI_BASE_ADDRESS_MEM_TYPE_64 )
+ ++i;
+ }
+ }
+ else
+ printk(XENLOG_WARNING "SR-IOV device %02x:%02x.%x has its virtual"
+ " functions already enabled (%04x)\n",
+ bus, slot, func, ctrl);
+ }
ret = 0;
if ( !pdev->domain )
@@ -183,7 +255,7 @@ int pci_add_device(u8 bus, u8 devfn, con
out:
spin_unlock(&pcidevs_lock);
printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ bus, slot, func);
return ret;
}
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -57,6 +57,7 @@ struct pci_dev {
const u8 bus;
const u8 devfn;
struct pci_dev_info info;
+ u64 vf_rlen[6];
};
#define for_each_pdev(domain, pdev) \
--- a/xen/include/xen/pci_regs.h
+++ b/xen/include/xen/pci_regs.h
@@ -425,7 +425,7 @@
#define PCI_EXT_CAP_ID_ACS 13
#define PCI_EXT_CAP_ID_ARI 14
#define PCI_EXT_CAP_ID_ATS 15
-#define PCI_EXT_CAP_ID_IOV 16
+#define PCI_EXT_CAP_ID_SRIOV 16
/* Advanced Error Reporting */
#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */
@@ -545,4 +545,35 @@
#define PCI_ACS_CTRL 0x06 /* ACS Control Register */
#define PCI_ACS_EGRESS_CTL_V 0x08 /* ACS Egress Control Vector */
+/* Single Root I/O Virtualization */
+#define PCI_SRIOV_CAP 0x04 /* SR-IOV Capabilities */
+#define PCI_SRIOV_CAP_VFM 0x01 /* VF Migration Capable */
+#define PCI_SRIOV_CAP_INTR(x) ((x) >> 21) /* Interrupt Message Number */
+#define PCI_SRIOV_CTRL 0x08 /* SR-IOV Control */
+#define PCI_SRIOV_CTRL_VFE 0x01 /* VF Enable */
+#define PCI_SRIOV_CTRL_VFM 0x02 /* VF Migration Enable */
+#define PCI_SRIOV_CTRL_INTR 0x04 /* VF Migration Interrupt Enable */
+#define PCI_SRIOV_CTRL_MSE 0x08 /* VF Memory Space Enable */
+#define PCI_SRIOV_CTRL_ARI 0x10 /* ARI Capable Hierarchy */
+#define PCI_SRIOV_STATUS 0x0a /* SR-IOV Status */
+#define PCI_SRIOV_STATUS_VFM 0x01 /* VF Migration Status */
+#define PCI_SRIOV_INITIAL_VF 0x0c /* Initial VFs */
+#define PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
+#define PCI_SRIOV_NUM_VF 0x10 /* Number of VFs */
+#define PCI_SRIOV_FUNC_LINK 0x12 /* Function Dependency Link */
+#define PCI_SRIOV_VF_OFFSET 0x14 /* First VF Offset */
+#define PCI_SRIOV_VF_STRIDE 0x16 /* Following VF Stride */
+#define PCI_SRIOV_VF_DID 0x1a /* VF Device ID */
+#define PCI_SRIOV_SUP_PGSIZE 0x1c /* Supported Page Sizes */
+#define PCI_SRIOV_SYS_PGSIZE 0x20 /* System Page Size */
+#define PCI_SRIOV_BAR 0x24 /* VF BAR0 */
+#define PCI_SRIOV_NUM_BARS 6 /* Number of VF BARs */
+#define PCI_SRIOV_VFM 0x3c /* VF Migration State Array Offset*/
+#define PCI_SRIOV_VFM_BIR(x) ((x) & 7) /* State BIR */
+#define PCI_SRIOV_VFM_OFFSET(x) ((x) & ~7) /* State Offset */
+#define PCI_SRIOV_VFM_UA 0x0 /* Inactive.Unavailable */
+#define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */
+#define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */
+#define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */
+
#endif /* LINUX_PCI_REGS_H */

View File

@ -0,0 +1,61 @@
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1313503555 -3600
# Node ID fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
# Parent 5c1ebc117f9901bc155d2b92ae902a4144767dfb
x86: simplify (and fix) clear_IO_APIC{,_pin}()
These are used during bootup and (emergency) shutdown only, and their
only purpose is to get the actual IO-APIC's RTE(s) cleared.
Consequently, only the "raw" accessors should be used (and the ones
going through interrupt remapping code can be skipped), with the
exception of determining the delivery mode: This one must always go
through the interrupt remapping path, as in the VT-d case the actual
IO-APIC's RTE will have the delivery mode always set to zero (which
before possibly could have resulted in such an entry getting cleared
in the "raw" pass, though I haven't observed this case in practice).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -365,14 +365,12 @@ static void eoi_IO_APIC_irq(unsigned int
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#define clear_IO_APIC_pin(a,p) __clear_IO_APIC_pin(a,p,0)
-#define clear_IO_APIC_pin_raw(a,p) __clear_IO_APIC_pin(a,p,1)
-static void __clear_IO_APIC_pin(unsigned int apic, unsigned int pin, int raw)
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
struct IO_APIC_route_entry entry;
/* Check delivery_mode to be sure we're not clearing an SMI pin */
- entry = ioapic_read_entry(apic, pin, raw);
+ entry = __ioapic_read_entry(apic, pin, FALSE);
if (entry.delivery_mode == dest_SMI)
return;
@@ -381,7 +379,7 @@ static void __clear_IO_APIC_pin(unsigned
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
- ioapic_write_entry(apic, pin, raw, entry);
+ __ioapic_write_entry(apic, pin, TRUE, entry);
}
static void clear_IO_APIC (void)
@@ -389,10 +387,8 @@ static void clear_IO_APIC (void)
int apic, pin;
for (apic = 0; apic < nr_ioapics; apic++) {
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
clear_IO_APIC_pin(apic, pin);
- clear_IO_APIC_pin_raw(apic, pin);
- }
}
}

363
23772-x86-trampoline.patch Normal file
View File

@ -0,0 +1,363 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1313744066 -3600
# Node ID 29aeed4979a78f26519f5fde8a405f8438297ab9
# Parent fc2be6cb89ad49efd90fe1b650f7efaab72f61b2
x86: make run-time part of trampoline relocatable
In order to eliminate an initial hack in the EFI boot code (where
memory for the trampoline was just "claimed" instead of properly
allocated), the trampoline code must no longer make assumption on the
address at which it would be located. For the time being, the fixed
address is being retained for the traditional multiboot path.
As an additional benefit (at least from my pov) it allows confining
the visibility of the BOOT_TRAMPOLINE definition to just the boot
code.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/boot/Makefile
+++ b/xen/arch/x86/boot/Makefile
@@ -2,8 +2,8 @@ obj-y += head.o
head.o: reloc.S
-BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' $(BASEDIR)/include/asm-x86/config.h)
+BOOT_TRAMPOLINE := $(shell sed -n 's,^\#define[[:space:]]\{1\,\}BOOT_TRAMPOLINE[[:space:]]\{1\,\},,p' head.S)
%.S: %.c
RELOC=$(BOOT_TRAMPOLINE) $(MAKE) -f build32.mk $@
-reloc.S: $(BASEDIR)/include/asm-x86/config.h
+reloc.S: head.S
--- a/xen/arch/x86/boot/head.S
+++ b/xen/arch/x86/boot/head.S
@@ -9,7 +9,7 @@
.text
.code32
-#undef bootsym_phys
+#define BOOT_TRAMPOLINE 0x7c000
#define sym_phys(sym) ((sym) - __XEN_VIRT_START)
#define bootsym_phys(sym) ((sym) - trampoline_start + BOOT_TRAMPOLINE)
@@ -189,6 +189,17 @@ __start:
mov %edi,sym_phys(idle_pg_table_l2) + (__PAGE_OFFSET>>18)
#endif
+ /* Apply relocations to bootstrap trampoline. */
+ mov $BOOT_TRAMPOLINE,%edx
+ mov $sym_phys(__trampoline_rel_start),%edi
+ mov %edx,sym_phys(trampoline_phys)
+1:
+ mov (%edi),%eax
+ add %edx,(%edi,%eax)
+ add $4,%edi
+ cmp $sym_phys(__trampoline_rel_stop),%edi
+ jb 1b
+
/* Copy bootstrap trampoline to low memory, below 1MB. */
mov $sym_phys(trampoline_start),%esi
mov $bootsym_phys(trampoline_start),%edi
--- a/xen/arch/x86/boot/trampoline.S
+++ b/xen/arch/x86/boot/trampoline.S
@@ -4,6 +4,13 @@
#undef bootsym
#define bootsym(s) ((s)-trampoline_start)
+#define bootsym_rel(sym, off, opnd...) \
+ bootsym(sym),##opnd; \
+111:; \
+ .pushsection .trampoline_rel, "a"; \
+ .long 111b - (off) - .; \
+ .popsection
+
.globl trampoline_realmode_entry
trampoline_realmode_entry:
mov %cs,%ax
@@ -17,11 +24,11 @@ trampoline_realmode_entry:
xor %ax, %ax
inc %ax
lmsw %ax # CR0.PE = 1 (enter protected mode)
- ljmpl $BOOT_CS32,$bootsym_phys(trampoline_protmode_entry)
+ ljmpl $BOOT_CS32,$bootsym_rel(trampoline_protmode_entry,6)
idt_48: .word 0, 0, 0 # base = limit = 0
gdt_48: .word 6*8-1
- .long bootsym_phys(trampoline_gdt)
+ .long bootsym_rel(trampoline_gdt,4)
trampoline_gdt:
/* 0x0000: unused */
.quad 0x0000000000000000
@@ -32,11 +39,16 @@ trampoline_gdt:
/* 0x0018: ring 0 data */
.quad 0x00cf92000000ffff
/* 0x0020: real-mode code @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009a00 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009a00
/* 0x0028: real-mode data @ BOOT_TRAMPOLINE */
- .long 0x0000ffff | ((BOOT_TRAMPOLINE & 0x00ffff) << 16)
- .long 0x00009200 | ((BOOT_TRAMPOLINE & 0xff0000) >> 16)
+ .long 0x0000ffff
+ .long 0x00009200
+
+ .pushsection .trampoline_rel, "a"
+ .long trampoline_gdt + BOOT_PSEUDORM_CS + 2 - .
+ .long trampoline_gdt + BOOT_PSEUDORM_DS + 2 - .
+ .popsection
.globl cpuid_ext_features
cpuid_ext_features:
@@ -66,11 +78,11 @@ trampoline_protmode_entry:
/* Load pagetable base register. */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_efer
movl $MSR_EFER,%ecx
@@ -93,7 +105,7 @@ trampoline_protmode_entry:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump into 64-bit mode. */
- ljmp $BOOT_CS64,$bootsym_phys(start64)
+ ljmp $BOOT_CS64,$bootsym_rel(start64,6)
.code64
start64:
--- a/xen/arch/x86/boot/wakeup.S
+++ b/xen/arch/x86/boot/wakeup.S
@@ -42,15 +42,13 @@ ENTRY(wakeup_start)
# boot trampoline is under 1M, and shift its start into
# %fs to reference symbols in that area
- movl $BOOT_TRAMPOLINE, %eax
- shrl $4, %eax
- movl %eax, %fs
+ mov wakesym(trampoline_seg), %fs
lidt %fs:bootsym(idt_48)
lgdt %fs:bootsym(gdt_48)
movw $1, %ax
lmsw %ax # Turn on CR0.PE
- ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
+ ljmpl $BOOT_CS32, $bootsym_rel(wakeup_32, 6)
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
@@ -103,6 +101,10 @@ real_magic: .long 0x12345678
.globl video_mode, video_flags
video_mode: .long 0
video_flags: .long 0
+trampoline_seg: .word BOOT_TRAMPOLINE >> 4
+ .pushsection .trampoline_seg, "a"
+ .long trampoline_seg - .
+ .popsection
.code32
@@ -114,11 +116,11 @@ wakeup_32:
mov $BOOT_DS, %eax
mov %eax, %ds
mov %eax, %ss
- mov $bootsym_phys(early_stack), %esp
+ mov $bootsym_rel(early_stack, 4, %esp)
# check saved magic again
mov $sym_phys(saved_magic), %eax
- add bootsym_phys(trampoline_xen_phys_start), %eax
+ add bootsym_rel(trampoline_xen_phys_start, 4, %eax)
mov (%eax), %eax
cmp $0x9abcdef0, %eax
jne bogus_saved_magic
@@ -131,12 +133,12 @@ wakeup_32:
/* Load pagetable base register */
mov $sym_phys(idle_pg_table),%eax
- add bootsym_phys(trampoline_xen_phys_start),%eax
+ add bootsym_rel(trampoline_xen_phys_start,4,%eax)
mov %eax,%cr3
/* Will cpuid feature change after resume? */
/* Set up EFER (Extended Feature Enable Register). */
- mov bootsym_phys(cpuid_ext_features),%edi
+ mov bootsym_rel(cpuid_ext_features,4,%edi)
test $0x20100800,%edi /* SYSCALL/SYSRET, No Execute, Long Mode? */
jz .Lskip_eferw
movl $MSR_EFER,%ecx
@@ -162,7 +164,7 @@ wakeup_32:
#if defined(__x86_64__)
/* Now in compatibility mode. Long-jump to 64-bit mode */
- ljmp $BOOT_CS64, $bootsym_phys(wakeup_64)
+ ljmp $BOOT_CS64, $bootsym_rel(wakeup_64,6)
.code64
wakeup_64:
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -599,6 +599,9 @@ static void __init relocate_image(unsign
}
}
+extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[];
+extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[];
+
void EFIAPI __init __attribute__((__noreturn__))
efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
{
@@ -614,9 +617,10 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info;
EFI_FILE_HANDLE dir_handle;
union string section = { NULL }, name;
+ const s32 *trampoline_ptr;
struct e820entry *e;
u64 efer;
- bool_t base_video = 0, trampoline_okay = 0;
+ bool_t base_video = 0;
efi_ih = ImageHandle;
efi_bs = SystemTable->BootServices;
@@ -914,15 +918,27 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
dmi_efi_get_table((void *)(long)efi.smbios);
/* Allocate space for trampoline (in first Mb). */
- cfg.addr = BOOT_TRAMPOLINE;
+ cfg.addr = 0x100000;
cfg.size = trampoline_end - trampoline_start;
- status = efi_bs->AllocatePages(AllocateAddress, EfiLoaderData,
+ status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData,
PFN_UP(cfg.size), &cfg.addr);
if ( EFI_ERROR(status) )
{
cfg.addr = 0;
- PrintErr(L"Note: Trampoline area is in use\r\n");
+ blexit(L"No memory for trampoline\r\n");
}
+ trampoline_phys = cfg.addr;
+ /* Apply relocations to trampoline. */
+ for ( trampoline_ptr = __trampoline_rel_start;
+ trampoline_ptr < __trampoline_rel_stop;
+ ++trampoline_ptr )
+ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) +=
+ trampoline_phys;
+ for ( trampoline_ptr = __trampoline_seg_start;
+ trampoline_ptr < __trampoline_seg_stop;
+ ++trampoline_ptr )
+ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) =
+ trampoline_phys >> 4;
/* Initialise L2 identity-map and xen page table entries (16MB). */
for ( i = 0; i < 8; ++i )
@@ -1096,14 +1112,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
e->type = type;
++e820nr;
}
- if ( type == E820_RAM && e->addr <= BOOT_TRAMPOLINE &&
- e->addr + e->size >= BOOT_TRAMPOLINE + cfg.size )
- trampoline_okay = 1;
}
- if ( !trampoline_okay )
- blexit(L"Trampoline area unavailable\r\n");
-
status = efi_bs->ExitBootServices(ImageHandle, map_key);
if ( EFI_ERROR(status) )
PrintErrMesg(L"Cannot exit boot services", status);
@@ -1117,7 +1127,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
efi_fw_vendor = (void *)efi_fw_vendor + DIRECTMAP_VIRT_START;
relocate_image(__XEN_VIRT_START - xen_phys_start);
- memcpy((void *)(long)BOOT_TRAMPOLINE, trampoline_start, cfg.size);
+ memcpy((void *)trampoline_phys, trampoline_start, cfg.size);
/* Set system registers and transfer control. */
asm volatile("pushq $0\n\tpopfq");
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -49,6 +49,8 @@
#define setup_trampoline() (bootsym_phys(trampoline_realmode_entry))
+unsigned long __read_mostly trampoline_phys;
+
/* Set if we find a B stepping CPU */
static int smp_b_stepping;
--- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c
@@ -22,6 +22,7 @@
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
+#include <xen/pfn.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -164,8 +165,9 @@ void __init zap_low_mappings(l2_pgentry_
flush_all(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
- 0x10, __PAGE_HYPERVISOR);
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
+ PFN_UP(trampoline_end - trampoline_start),
+ __PAGE_HYPERVISOR);
}
void __init subarch_init_memory(void)
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -828,7 +828,7 @@ void __init zap_low_mappings(void)
flush_local(FLUSH_TLB_GLOBAL);
/* Replace with mapping of the boot trampoline only. */
- map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
+ map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
PFN_UP(trampoline_end - trampoline_start),
__PAGE_HYPERVISOR);
}
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -103,6 +103,13 @@ SECTIONS
*(.init.data)
*(.init.data.rel)
*(.init.data.rel.*)
+ . = ALIGN(4);
+ __trampoline_rel_start = .;
+ *(.trampoline_rel)
+ __trampoline_rel_stop = .;
+ __trampoline_seg_start = .;
+ *(.trampoline_seg)
+ __trampoline_seg_stop = .;
} :text
. = ALIGN(32);
.init.setup : {
--- a/xen/include/asm-x86/config.h
+++ b/xen/include/asm-x86/config.h
@@ -95,13 +95,13 @@
/* Primary stack is restricted to 8kB by guard pages. */
#define PRIMARY_STACK_SIZE 8192
-#define BOOT_TRAMPOLINE 0x7c000
+#ifndef __ASSEMBLY__
+extern unsigned long trampoline_phys;
#define bootsym_phys(sym) \
- (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+BOOT_TRAMPOLINE)
+ (((unsigned long)&(sym)-(unsigned long)&trampoline_start)+trampoline_phys)
#define bootsym(sym) \
(*RELOC_HIDE((typeof(&(sym)))__va(__pa(&(sym))), \
- BOOT_TRAMPOLINE-__pa(trampoline_start)))
-#ifndef __ASSEMBLY__
+ trampoline_phys-__pa(trampoline_start)))
extern char trampoline_start[], trampoline_end[];
extern char trampoline_realmode_entry[];
extern unsigned int trampoline_xen_phys_start;

364
23774-x86_64-EFI-EDD.patch Normal file
View File

@ -0,0 +1,364 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1313744120 -3600
# Node ID e35c5202625ef5534561f84352833ad9467d986c
# Parent dd90b59cb11c60c48e174c899190e2967341fe32
x86-64/EFI: construct EDD data from device path protocol information
In the absence of a BIOS to handle INT13 requests, this information
must be constructed artificially instead when booted from EFI.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/boot/edd.S
+++ b/xen/arch/x86/boot/edd.S
@@ -16,21 +16,13 @@
* Updated and ported for Xen by Keir Fraser <keir@xensource.com> June 2007
*/
+#include <asm/edd.h>
+
.code16
/* Offset of disc signature in the MBR. */
#define EDD_MBR_SIG_OFFSET 0x1B8
-/* Maximum number of EDD information structures at boot_edd_info. */
-#define EDD_INFO_MAX 6
-
-/* Maximum number of MBR signatures at boot_mbr_signature. */
-#define EDD_MBR_SIG_MAX 16
-
-/* Size of components of EDD information structure. */
-#define EDDEXTSIZE 8
-#define EDDPARMSIZE 74
-
get_edd:
cmpb $2, bootsym(opt_edd) # edd=off ?
je edd_done
--- a/xen/arch/x86/efi/boot.c
+++ b/xen/arch/x86/efi/boot.c
@@ -16,6 +16,7 @@
#include <xen/stringify.h>
#include <xen/vga.h>
#include <asm/e820.h>
+#include <asm/edd.h>
#include <asm/mm.h>
#include <asm/msr.h>
#include <asm/processor.h>
@@ -539,6 +540,18 @@ static void __init split_value(char *s)
*s = 0;
}
+static void __init edd_put_string(u8 *dst, size_t n, const char *src)
+{
+ while ( n-- && *src )
+ *dst++ = *src++;
+ if ( *src )
+ PrintErrMesg(L"Internal error populating EDD info",
+ EFI_BUFFER_TOO_SMALL);
+ while ( n-- )
+ *dst++ = ' ';
+}
+#define edd_put_string(d, s) edd_put_string(d, ARRAY_SIZE(d), s)
+
static int __init set_color(u32 mask, int bpp, u8 *pos, u8 *sz)
{
if ( bpp < 0 )
@@ -607,6 +620,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
{
static EFI_GUID __initdata loaded_image_guid = LOADED_IMAGE_PROTOCOL;
static EFI_GUID __initdata gop_guid = EFI_GRAPHICS_OUTPUT_PROTOCOL_GUID;
+ static EFI_GUID __initdata bio_guid = BLOCK_IO_PROTOCOL;
+ static EFI_GUID __initdata devp_guid = DEVICE_PATH_PROTOCOL;
EFI_LOADED_IMAGE *loaded_image;
EFI_STATUS status;
unsigned int i, argc;
@@ -887,7 +902,148 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY
place_string(&mbi.mem_upper, NULL);
- /* XXX Collect EDD info. */
+ /* Collect EDD info. */
+ BUILD_BUG_ON(offsetof(struct edd_info, edd_device_params) != EDDEXTSIZE);
+ BUILD_BUG_ON(sizeof(struct edd_device_params) != EDDPARMSIZE);
+ size = 0;
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size, NULL);
+ if ( status == EFI_BUFFER_TOO_SMALL )
+ status = efi_bs->AllocatePool(EfiLoaderData, size, (void **)&handles);
+ if ( !EFI_ERROR(status) )
+ status = efi_bs->LocateHandle(ByProtocol, &bio_guid, NULL, &size,
+ handles);
+ if ( EFI_ERROR(status) )
+ size = 0;
+ for ( i = 0; i < size / sizeof(*handles); ++i )
+ {
+ EFI_BLOCK_IO *bio;
+ EFI_DEV_PATH_PTR devp;
+ struct edd_info *info = boot_edd_info + boot_edd_info_nr;
+ struct edd_device_params *params = &info->edd_device_params;
+ enum { root, acpi, pci, ctrlr } state = root;
+
+ status = efi_bs->HandleProtocol(handles[i], &bio_guid, (void **)&bio);
+ if ( EFI_ERROR(status) ||
+ bio->Media->RemovableMedia ||
+ bio->Media->LogicalPartition )
+ continue;
+ if ( boot_edd_info_nr < EDD_INFO_MAX )
+ {
+ info->device = 0x80 + boot_edd_info_nr; /* fake */
+ info->version = 0x11;
+ params->length = offsetof(struct edd_device_params, dpte_ptr);
+ params->number_of_sectors = bio->Media->LastBlock + 1;
+ params->bytes_per_sector = bio->Media->BlockSize;
+ params->dpte_ptr = ~0;
+ }
+ ++boot_edd_info_nr;
+ status = efi_bs->HandleProtocol(handles[i], &devp_guid,
+ (void **)&devp);
+ if ( EFI_ERROR(status) )
+ continue;
+ for ( ; !IsDevicePathEnd(devp.DevPath);
+ devp.DevPath = NextDevicePathNode(devp.DevPath) )
+ {
+ switch ( DevicePathType(devp.DevPath) )
+ {
+ const u8 *p;
+
+ case ACPI_DEVICE_PATH:
+ if ( state != root || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case ACPI_DP:
+ if ( devp.Acpi->HID != EISA_PNP_ID(0xA03) &&
+ devp.Acpi->HID != EISA_PNP_ID(0xA08) )
+ break;
+ params->interface_path.pci.bus = devp.Acpi->UID;
+ state = acpi;
+ break;
+ case EXPANDED_ACPI_DP:
+ /* XXX */
+ break;
+ }
+ break;
+ case HARDWARE_DEVICE_PATH:
+ if ( state != acpi ||
+ DevicePathSubType(devp.DevPath) != HW_PCI_DP ||
+ boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = pci;
+ edd_put_string(params->host_bus_type, "PCI");
+ params->interface_path.pci.slot = devp.Pci->Device;
+ params->interface_path.pci.function = devp.Pci->Function;
+ break;
+ case MESSAGING_DEVICE_PATH:
+ if ( state != pci || boot_edd_info_nr > EDD_INFO_MAX )
+ break;
+ state = ctrlr;
+ switch ( DevicePathSubType(devp.DevPath) )
+ {
+ case MSG_ATAPI_DP:
+ edd_put_string(params->interface_type, "ATAPI");
+ params->interface_path.pci.channel =
+ devp.Atapi->PrimarySecondary;
+ params->device_path.atapi.device = devp.Atapi->SlaveMaster;
+ params->device_path.atapi.lun = devp.Atapi->Lun;
+ break;
+ case MSG_SCSI_DP:
+ edd_put_string(params->interface_type, "SCSI");
+ params->device_path.scsi.id = devp.Scsi->Pun;
+ params->device_path.scsi.lun = devp.Scsi->Lun;
+ break;
+ case MSG_FIBRECHANNEL_DP:
+ edd_put_string(params->interface_type, "FIBRE");
+ params->device_path.fibre.wwid = devp.FibreChannel->WWN;
+ params->device_path.fibre.lun = devp.FibreChannel->Lun;
+ break;
+ case MSG_1394_DP:
+ edd_put_string(params->interface_type, "1394");
+ params->device_path.i1394.eui = devp.F1394->Guid;
+ break;
+ case MSG_USB_DP:
+ case MSG_USB_CLASS_DP:
+ edd_put_string(params->interface_type, "USB");
+ break;
+ case MSG_I2O_DP:
+ edd_put_string(params->interface_type, "I2O");
+ params->device_path.i2o.identity_tag = devp.I2O->Tid;
+ break;
+ default:
+ continue;
+ }
+ info->version = 0x30;
+ params->length = sizeof(struct edd_device_params);
+ params->key = 0xbedd;
+ params->device_path_info_length =
+ sizeof(struct edd_device_params) -
+ offsetof(struct edd_device_params, key);
+ for ( p = (const u8 *)&params->key; p < &params->checksum; ++p )
+ params->checksum -= *p;
+ break;
+ case MEDIA_DEVICE_PATH:
+ if ( DevicePathSubType(devp.DevPath) == MEDIA_HARDDRIVE_DP &&
+ devp.HardDrive->MBRType == MBR_TYPE_PCAT &&
+ boot_mbr_signature_nr < EDD_MBR_SIG_MAX )
+ {
+ struct mbr_signature *sig = boot_mbr_signature +
+ boot_mbr_signature_nr;
+
+ sig->device = 0x80 + boot_edd_info_nr; /* fake */
+ memcpy(&sig->signature, devp.HardDrive->Signature,
+ sizeof(sig->signature));
+ ++boot_mbr_signature_nr;
+ }
+ break;
+ }
+ }
+ }
+ if ( handles )
+ efi_bs->FreePool(handles);
+ if ( boot_edd_info_nr > EDD_INFO_MAX )
+ boot_edd_info_nr = EDD_INFO_MAX;
+
/* XXX Collect EDID info. */
if ( cpuid_eax(0x80000000) > 0x80000000 )
--- a/xen/include/asm-x86/edd.h
+++ b/xen/include/asm-x86/edd.h
@@ -23,6 +23,8 @@
#ifndef __XEN_EDD_H__
#define __XEN_EDD_H__
+#ifndef __ASSEMBLY__
+
struct edd_info {
/* Int13, Fn48: Check Extensions Present. */
u8 device; /* %dl: device */
@@ -33,10 +35,106 @@ struct edd_info {
u8 legacy_max_head; /* %dh: maximum head number */
u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
/* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
- struct {
+ struct edd_device_params {
u16 length;
- u8 data[72];
- } edd_device_params;
+ u16 info_flags;
+ u32 num_default_cylinders;
+ u32 num_default_heads;
+ u32 sectors_per_track;
+ u64 number_of_sectors;
+ u16 bytes_per_sector;
+ u32 dpte_ptr; /* 0xFFFFFFFF for our purposes */
+ u16 key; /* = 0xBEDD */
+ u8 device_path_info_length;
+ u8 reserved2;
+ u16 reserved3;
+ u8 host_bus_type[4];
+ u8 interface_type[8];
+ union {
+ struct {
+ u16 base_address;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) isa;
+ struct {
+ u8 bus;
+ u8 slot;
+ u8 function;
+ u8 channel;
+ u32 reserved;
+ } __attribute__ ((packed)) pci;
+ /* pcix is same as pci */
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) ibnd;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) xprs;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) htpt;
+ struct {
+ u64 reserved;
+ } __attribute__ ((packed)) unknown;
+ } interface_path;
+ union {
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) ata;
+ struct {
+ u8 device;
+ u8 lun;
+ u8 reserved1;
+ u8 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) atapi;
+ struct {
+ u16 id;
+ u64 lun;
+ u16 reserved1;
+ u32 reserved2;
+ } __attribute__ ((packed)) scsi;
+ struct {
+ u64 serial_number;
+ u64 reserved;
+ } __attribute__ ((packed)) usb;
+ struct {
+ u64 eui;
+ u64 reserved;
+ } __attribute__ ((packed)) i1394;
+ struct {
+ u64 wwid;
+ u64 lun;
+ } __attribute__ ((packed)) fibre;
+ struct {
+ u64 identity_tag;
+ u64 reserved;
+ } __attribute__ ((packed)) i2o;
+ struct {
+ u32 array_number;
+ u32 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) raid;
+ struct {
+ u8 device;
+ u8 reserved1;
+ u16 reserved2;
+ u32 reserved3;
+ u64 reserved4;
+ } __attribute__ ((packed)) sata;
+ struct {
+ u64 reserved1;
+ u64 reserved2;
+ } __attribute__ ((packed)) unknown;
+ } device_path;
+ u8 reserved4;
+ u8 checksum;
+ } __attribute__ ((packed)) edd_device_params;
} __attribute__ ((packed));
struct mbr_signature {
@@ -51,4 +149,16 @@ extern u8 boot_mbr_signature_nr;
extern struct edd_info boot_edd_info[];
extern u8 boot_edd_info_nr;
+#endif /* __ASSEMBLY__ */
+
+/* Maximum number of EDD information structures at boot_edd_info. */
+#define EDD_INFO_MAX 6
+
+/* Maximum number of MBR signatures at boot_mbr_signature. */
+#define EDD_MBR_SIG_MAX 16
+
+/* Size of components of EDD information structure. */
+#define EDDEXTSIZE 8
+#define EDDPARMSIZE 74
+
#endif /* __XEN_EDD_H__ */

View File

@ -0,0 +1,56 @@
# HG changeset patch
# User Andrew Cooper <andrew.cooper3@citrix.com>
# Date 1313744302 -3600
# Node ID 0ddb4481f883ddf55c12a0b8d1445cf137ef0b63
# Parent 9957bef3e7b4511f83ed8883cd5ecd49ea3ee95d
x86/KEXEC: disable hpet legacy broadcasts earlier
On x2apic machines which booted in xapic mode,
hpet_disable_legacy_broadcast() sends an event check IPI to all online
processors. This leads to a protection fault as the genapic blindly
pokes x2apic MSRs while the local apic is in xapic mode.
One option is to change genapic when we shut down the local apic, but
there are still problems with trying to IPI processors in the online
processor map which are actually sitting in NMI loops
Another option is to have each CPU take itself out of the online CPU
map during the NMI shootdown.
Realistically however, disabling hpet legacy broadcasts earlier in the
kexec path is the easiest fix to the problem.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
--- a/xen/arch/x86/crash.c
+++ b/xen/arch/x86/crash.c
@@ -27,6 +27,7 @@
#include <asm/hvm/support.h>
#include <asm/apic.h>
#include <asm/io_apic.h>
+#include <asm/hpet.h>
static atomic_t waiting_for_crash_ipi;
static unsigned int crashing_cpu;
@@ -59,6 +60,9 @@ static void nmi_shootdown_cpus(void)
local_irq_disable();
+ if ( hpet_broadcast_is_available() )
+ hpet_disable_legacy_broadcast();
+
crashing_cpu = smp_processor_id();
local_irq_count(crashing_cpu) = 0;
--- a/xen/arch/x86/machine_kexec.c
+++ b/xen/arch/x86/machine_kexec.c
@@ -96,9 +96,6 @@ void machine_kexec(xen_kexec_image_t *im
.limit = LAST_RESERVED_GDT_BYTE
};
- if ( hpet_broadcast_is_available() )
- hpet_disable_legacy_broadcast();
-
/*
* compat_machine_kexec() returns to idle pagetables, which requires us
* to be running on a static GDT mapping (idle pagetables have no GDT

View File

@ -0,0 +1,68 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1314004239 -3600
# Node ID 0849b0e59e2418e8215616df147f955b01b07577
# Parent 07f78b5bd03c02e32324eaa00487643d27b7ffa8
pm: don't truncate processors' ACPI IDs to 8 bits
This is just another adjustment to allow systems with very many CPUs
(or unusual ACPI IDs) to be properly power-managed.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/ia64/linux-xen/acpi.c
+++ b/xen/arch/ia64/linux-xen/acpi.c
@@ -223,11 +223,14 @@ static u16 ia64_acpiid_to_sapicid[ MAX_L
{[0 ... MAX_LOCAL_SAPIC - 1] = 0xffff };
/* acpi id to cpu id */
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u16 apic_id;
+ if ( acpi_id >= MAX_LOCAL_SAPIC )
+ return -EINVAL;
+
apic_id = ia64_acpiid_to_sapicid[acpi_id];
if ( apic_id == 0xffff )
return -EINVAL;
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -871,11 +871,14 @@ static void set_cx(
acpi_power->safe_state = cx;
}
-int get_cpu_id(u8 acpi_id)
+int get_cpu_id(u32 acpi_id)
{
int i;
u32 apic_id;
+ if ( acpi_id >= MAX_MADT_ENTRIES )
+ return -1;
+
apic_id = x86_acpiid_to_apicid[acpi_id];
if ( apic_id == BAD_APICID )
return -1;
@@ -952,7 +955,7 @@ long set_cx_pminfo(uint32_t cpu, struct
print_cx_pminfo(cpu, power);
/* map from acpi_id to cpu_id */
- cpu_id = get_cpu_id((u8)cpu);
+ cpu_id = get_cpu_id(cpu);
if ( cpu_id == -1 )
{
printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -6,7 +6,7 @@
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u8);
+int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);

View File

@ -0,0 +1,71 @@
References: bnc#701686
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1314004270 -3600
# Node ID 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
# Parent 0849b0e59e2418e8215616df147f955b01b07577
x86/IO-APIC: clear remoteIRR in clear_IO_APIC_pin()
It was found that in a crash scenario, the remoteIRR bit in an IO-APIC
RTE could be left set, causing problems when bringing up a kdump
kernel. While this generally is most important to be taken care of in
the new kernel (which usually would be a native one), it still seems
desirable to also address this problem in Xen so that (a) the problem
doesn't bite Xen when used as a secondary emergency kernel and (b) an
attempt is being made to save un-fixed secondary kernels from running
into said problem.
Based on a Linux patch from suresh.b.siddha@intel.com.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -375,11 +375,46 @@ static void clear_IO_APIC_pin(unsigned i
return;
/*
+ * Make sure the entry is masked and re-read the contents to check
+ * if it is a level triggered pin and if the remoteIRR is set.
+ */
+ if (!entry.mask) {
+ entry.mask = 1;
+ __ioapic_write_entry(apic, pin, FALSE, entry);
+ }
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+
+ if (entry.irr) {
+ /* Make sure the trigger mode is set to level. */
+ if (!entry.trigger) {
+ entry.trigger = 1;
+ __ioapic_write_entry(apic, pin, TRUE, entry);
+ }
+ if (mp_ioapics[apic].mpc_apicver >= 0x20)
+ io_apic_eoi(apic, entry.vector);
+ else {
+ /*
+ * Mechanism by which we clear remoteIRR in this case is by
+ * changing the trigger mode to edge and back to level.
+ */
+ entry.trigger = 0;
+ __ioapic_write_entry(apic, pin, TRUE, entry);
+ entry.trigger = 1;
+ __ioapic_write_entry(apic, pin, TRUE, entry);
+ }
+ }
+
+ /*
* Disable it in the IO-APIC irq-routing table:
*/
memset(&entry, 0, sizeof(entry));
entry.mask = 1;
__ioapic_write_entry(apic, pin, TRUE, entry);
+
+ entry = __ioapic_read_entry(apic, pin, TRUE);
+ if (entry.irr)
+ printk(KERN_ERR "IO-APIC%02x-%u: Unable to reset IRR\n",
+ IO_APIC_ID(apic), pin);
}
static void clear_IO_APIC (void)

View File

@ -0,0 +1,266 @@
# HG changeset patch
# User Jan Beulich <jbeulich@novell.com>
# Date 1314004356 -3600
# Node ID 2029263c501c315fa4d94845e5cfa6a9b0b395d5
# Parent 25dfe53bb1898b3967ceb71a7eb60a8b760c25fb
ACPI: add _PDC input override mechanism
In order to have Dom0 call _PDC with input fully representing Xen's
capabilities, and in order to avoid building knowledge of Xen
implementation details into Dom0, this provides a mechanism by which
the Dom0 kernel can, once it filled the _PDC input buffer according to
its own knowledge, present the buffer to Xen to apply overrides for
the parts of the C-, P-, and T-state management that it controls. This
is particularly to address the dependency of Xen using MWAIT to enter
certain C-states on the availability of the break-on-interrupt
extension (which the Dom0 kernel should have no need to know about).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/ia64/linux-xen/acpi.c
+++ b/xen/arch/ia64/linux-xen/acpi.c
@@ -243,6 +243,13 @@ int get_cpu_id(u32 acpi_id)
return -1;
}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SMP & mask;
+ return 0;
+}
+
#endif
static int __init
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -619,12 +619,6 @@ static int init_cx_pminfo(struct acpi_pr
return 0;
}
-#define CPUID_MWAIT_LEAF (5)
-#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
-#define CPUID5_ECX_INTERRUPT_BREAK (0x2)
-
-#define MWAIT_ECX_INTERRUPT_BREAK (0x1)
-
#define MWAIT_SUBSTATE_MASK (0xf)
#define MWAIT_SUBSTATE_SIZE (4)
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -1006,3 +1006,47 @@ unsigned int acpi_get_processor_id(unsig
return INVALID_ACPIID;
}
+
+static void get_mwait_ecx(void *info)
+{
+ *(u32 *)info = cpuid_ecx(CPUID_MWAIT_LEAF);
+}
+
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *pdc, u32 mask)
+{
+ unsigned int cpu = get_cpu_id(acpi_id);
+ struct cpuinfo_x86 *c;
+ u32 ecx;
+
+ if (!(acpi_id + 1))
+ c = &boot_cpu_data;
+ else if (cpu >= NR_CPUS || !cpu_online(cpu))
+ return -EINVAL;
+ else
+ c = cpu_data + cpu;
+
+ pdc[2] |= ACPI_PDC_C_CAPABILITY_SMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_EST))
+ pdc[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP & mask;
+
+ if (cpu_has(c, X86_FEATURE_ACPI))
+ pdc[2] |= ACPI_PDC_T_FFH & mask;
+
+ /*
+ * If mwait/monitor or its break-on-interrupt extension are
+ * unsupported, Cx_FFH will be disabled.
+ */
+ if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+ c->cpuid_level < CPUID_MWAIT_LEAF)
+ ecx = 0;
+ else if (c == &boot_cpu_data || cpu == smp_processor_id())
+ ecx = cpuid_ecx(CPUID_MWAIT_LEAF);
+ else
+ on_selected_cpus(cpumask_of(cpu), get_mwait_ecx, &ecx, 1);
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ pdc[2] &= ~(ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH);
+
+ return 0;
+}
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -419,6 +419,15 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
ret = -EINVAL;
break;
+ case XEN_PM_PDC:
+ {
+ XEN_GUEST_HANDLE(uint32) pdc;
+
+ guest_from_compat_handle(pdc, op->u.set_pminfo.u.pdc);
+ ret = acpi_set_pdc_bits(op->u.set_pminfo.id, pdc);
+ }
+ break;
+
default:
ret = -EINVAL;
break;
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -519,3 +519,34 @@ int do_pm_op(struct xen_sysctl_pm_op *op
return ret;
}
+
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32) pdc)
+{
+ u32 bits[3];
+ int ret;
+
+ if ( copy_from_guest(bits, pdc, 2) )
+ ret = -EFAULT;
+ else if ( bits[0] != ACPI_PDC_REVISION_ID || !bits[1] )
+ ret = -EINVAL;
+ else if ( copy_from_guest_offset(bits + 2, pdc, 2, 1) )
+ ret = -EFAULT;
+ else
+ {
+ u32 mask = 0;
+
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_CX )
+ mask |= ACPI_PDC_C_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_PX )
+ mask |= ACPI_PDC_P_MASK | ACPI_PDC_SMP_C1PT;
+ if ( xen_processor_pmbits & XEN_PROCESSOR_PM_TX )
+ mask |= ACPI_PDC_T_MASK | ACPI_PDC_SMP_C1PT;
+ bits[2] &= (ACPI_PDC_C_MASK | ACPI_PDC_P_MASK | ACPI_PDC_T_MASK |
+ ACPI_PDC_SMP_C1PT) & ~mask;
+ ret = arch_acpi_set_pdc_bits(acpi_id, bits, mask);
+ }
+ if ( !ret )
+ ret = copy_to_guest_offset(pdc, 2, bits + 2, 1);
+
+ return ret;
+}
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -3,10 +3,10 @@
#include <public/platform.h>
#include <public/sysctl.h>
+#include <xen/acpi.h>
#define XEN_PX_INIT 0x80000000
-int get_cpu_id(u32);
int powernow_cpufreq_init(void);
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
--- a/xen/include/acpi/pdc_intel.h
+++ b/xen/include/acpi/pdc_intel.h
@@ -4,6 +4,8 @@
#ifndef __PDC_INTEL_H__
#define __PDC_INTEL_H__
+#define ACPI_PDC_REVISION_ID 1
+
#define ACPI_PDC_P_FFH (0x0001)
#define ACPI_PDC_C_C1_HALT (0x0002)
#define ACPI_PDC_T_FFH (0x0004)
@@ -14,6 +16,7 @@
#define ACPI_PDC_SMP_T_SWCOORD (0x0080)
#define ACPI_PDC_C_C1_FFH (0x0100)
#define ACPI_PDC_C_C2C3_FFH (0x0200)
+#define ACPI_PDC_SMP_P_HWCOORD (0x0800)
#define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
@@ -22,6 +25,7 @@
#define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \
ACPI_PDC_C_C1_HALT | \
ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD | \
ACPI_PDC_P_FFH)
#define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \
@@ -30,4 +34,17 @@
ACPI_PDC_C_C1_FFH | \
ACPI_PDC_C_C2C3_FFH)
+#define ACPI_PDC_C_MASK (ACPI_PDC_C_C1_HALT | \
+ ACPI_PDC_C_C1_FFH | \
+ ACPI_PDC_SMP_C2C3 | \
+ ACPI_PDC_SMP_C_SWCOORD | \
+ ACPI_PDC_C_C2C3_FFH)
+
+#define ACPI_PDC_P_MASK (ACPI_PDC_P_FFH | \
+ ACPI_PDC_SMP_P_SWCOORD | \
+ ACPI_PDC_SMP_P_HWCOORD)
+
+#define ACPI_PDC_T_MASK (ACPI_PDC_T_FFH | \
+ ACPI_PDC_SMP_T_SWCOORD)
+
#endif /* __PDC_INTEL_H__ */
--- a/xen/include/asm-x86/cpufeature.h
+++ b/xen/include/asm-x86/cpufeature.h
@@ -150,6 +150,10 @@
#define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability)
#define cpufeat_mask(idx) (1u << ((idx) & 31))
+#define CPUID_MWAIT_LEAF 5
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
+#define CPUID5_ECX_INTERRUPT_BREAK 0x2
+
#ifdef __i386__
#define cpu_has_vme boot_cpu_has(X86_FEATURE_VME)
#define cpu_has_de boot_cpu_has(X86_FEATURE_DE)
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -304,6 +304,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletim
#define XEN_PM_CX 0
#define XEN_PM_PX 1
#define XEN_PM_TX 2
+#define XEN_PM_PDC 3
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -401,6 +402,7 @@ struct xenpf_set_processor_pminfo {
union {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
} u;
};
typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -334,6 +334,8 @@ static inline int acpi_boot_table_init(v
#endif /*!CONFIG_ACPI_BOOT*/
+int get_cpu_id(u32 acpi_id);
+
unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low);
int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
@@ -431,6 +433,9 @@ static inline unsigned int acpi_get_csta
static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
#endif
+int acpi_set_pdc_bits(u32 acpi_id, XEN_GUEST_HANDLE(uint32));
+int arch_acpi_set_pdc_bits(u32 acpi_id, u32 *, u32 mask);
+
#ifdef CONFIG_ACPI_NUMA
int acpi_get_pxm(acpi_handle handle);
#else

View File

@ -17,7 +17,7 @@
struct xen_platform_op curop, *op = &curop; struct xen_platform_op curop, *op = &curop;
if ( !IS_PRIV(current->domain) ) if ( !IS_PRIV(current->domain) )
@@ -513,6 +514,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe @@ -522,6 +523,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
op->u.mem_add.epfn, op->u.mem_add.epfn,
op->u.mem_add.pxm); op->u.mem_add.pxm);
break; break;
@ -44,7 +44,7 @@
break; break;
--- a/xen/include/public/platform.h --- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h +++ b/xen/include/public/platform.h
@@ -449,6 +449,14 @@ struct xenpf_mem_hotadd @@ -451,6 +451,14 @@ struct xenpf_mem_hotadd
uint32_t flags; uint32_t flags;
}; };
@ -59,7 +59,7 @@
struct xen_platform_op { struct xen_platform_op {
uint32_t cmd; uint32_t cmd;
uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -469,6 +477,7 @@ struct xen_platform_op { @@ -471,6 +479,7 @@ struct xen_platform_op {
struct xenpf_cpu_ol cpu_ol; struct xenpf_cpu_ol cpu_ol;
struct xenpf_cpu_hotadd cpu_add; struct xenpf_cpu_hotadd cpu_add;
struct xenpf_mem_hotadd mem_add; struct xenpf_mem_hotadd mem_add;

View File

@ -1,10 +1,8 @@
Change default IO-APIC ack mode for single IO-APIC systems to old-style. Change default IO-APIC ack mode for single IO-APIC systems to old-style.
Index: xen-4.1.1-testing/xen/arch/x86/io_apic.c --- a/xen/arch/x86/io_apic.c
=================================================================== +++ b/xen/arch/x86/io_apic.c
--- xen-4.1.1-testing.orig/xen/arch/x86/io_apic.c @@ -1578,7 +1578,7 @@ static unsigned int startup_level_ioapic
+++ xen-4.1.1-testing/xen/arch/x86/io_apic.c
@@ -1547,7 +1547,7 @@ static unsigned int startup_level_ioapic
return 0; /* don't check for pending */ return 0; /* don't check for pending */
} }
@ -13,7 +11,7 @@ Index: xen-4.1.1-testing/xen/arch/x86/io_apic.c
static void setup_ioapic_ack(char *s) static void setup_ioapic_ack(char *s)
{ {
if ( !strcmp(s, "old") ) if ( !strcmp(s, "old") )
@@ -2044,6 +2044,8 @@ void __init setup_IO_APIC(void) @@ -2075,6 +2075,8 @@ void __init setup_IO_APIC(void)
else else
io_apic_irqs = ~PIC_IRQS; io_apic_irqs = ~PIC_IRQS;

View File

@ -21,7 +21,7 @@
printk("%p ", _p(*stk++)); printk("%p ", _p(*stk++));
--- a/xen/arch/x86/x86_32/mm.c --- a/xen/arch/x86/x86_32/mm.c
+++ b/xen/arch/x86/x86_32/mm.c +++ b/xen/arch/x86/x86_32/mm.c
@@ -121,6 +121,8 @@ void __init paging_init(void) @@ -122,6 +122,8 @@ void __init paging_init(void)
#undef CNT #undef CNT
#undef MFN #undef MFN

View File

@ -241,7 +241,7 @@
status = fread(&buf, 1, sizeof(*h), rtnl); status = fread(&buf, 1, sizeof(*h), rtnl);
--- a/xen/arch/x86/msi.c --- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c +++ b/xen/arch/x86/msi.c
@@ -746,7 +746,7 @@ static void __pci_disable_msi(struct msi @@ -799,7 +799,7 @@ static void __pci_disable_msi(struct msi
{ {
struct pci_dev *dev; struct pci_dev *dev;
int pos; int pos;

View File

@ -1,3 +1,20 @@
-------------------------------------------------------------------
Tue Aug 23 08:53:20 MDT 2011 - carnold@novell.com
- Upstream patches from Jan
23725-pci-add-device.patch
23762-iommu-fault-bm-off.patch
23763-pci-multi-seg-x2apic-vtd-no-crash.patch
23765-x86-irq-vector-leak.patch
23766-x86-msi-vf-bars.patch
23771-x86-ioapic-clear-pin.patch
23772-x86-trampoline.patch
23774-x86_64-EFI-EDD.patch
23776-x86-kexec-hpet-legacy-bcast-disable.patch
23781-pm-wide-ACPI-ids.patch
23782-x86-ioapic-clear-irr.patch
23783-ACPI-set-_PDC-bits.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Mon Aug 15 11:54:08 CEST 2011 - ohering@suse.de Mon Aug 15 11:54:08 CEST 2011 - ohering@suse.de

View File

@ -96,7 +96,7 @@ BuildRequires: glibc-devel
%if %{?with_kmp}0 %if %{?with_kmp}0
BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11
%endif %endif
Version: 4.1.1_05 Version: 4.1.1_07
Release: 1 Release: 1
License: GPLv2+ License: GPLv2+
Group: System/Kernel Group: System/Kernel
@ -186,12 +186,24 @@ Patch44: 23685-libxl-segfault-fix.patch
Patch45: 23706-fix-20892.patch Patch45: 23706-fix-20892.patch
Patch46: 23723-x86-CMOS-lock.patch Patch46: 23723-x86-CMOS-lock.patch
Patch47: 23724-x86-smpboot-x2apic.patch Patch47: 23724-x86-smpboot-x2apic.patch
Patch48: 23726-x86-intel-flexmigration.patch Patch48: 23725-pci-add-device.patch
Patch49: 23732-sedf.patch Patch49: 23726-x86-intel-flexmigration.patch
Patch50: 23735-guest-dom0-cap.patch Patch50: 23732-sedf.patch
Patch51: 23746-vtd-cleanup-timers.patch Patch51: 23735-guest-dom0-cap.patch
Patch52: 23747-mmcfg-base-address.patch Patch52: 23746-vtd-cleanup-timers.patch
Patch53: 23749-mmcfg-reservation.patch Patch53: 23747-mmcfg-base-address.patch
Patch54: 23749-mmcfg-reservation.patch
Patch55: 23762-iommu-fault-bm-off.patch
Patch56: 23763-pci-multi-seg-x2apic-vtd-no-crash.patch
Patch57: 23765-x86-irq-vector-leak.patch
Patch58: 23766-x86-msi-vf-bars.patch
Patch59: 23771-x86-ioapic-clear-pin.patch
Patch60: 23772-x86-trampoline.patch
Patch61: 23774-x86_64-EFI-EDD.patch
Patch62: 23776-x86-kexec-hpet-legacy-bcast-disable.patch
Patch63: 23781-pm-wide-ACPI-ids.patch
Patch64: 23782-x86-ioapic-clear-irr.patch
Patch65: 23783-ACPI-set-_PDC-bits.patch
# Upstream qemu patches # Upstream qemu patches
# Our patches # Our patches
Patch300: xen-config.diff Patch300: xen-config.diff
@ -733,6 +745,18 @@ tar xfj %{SOURCE2} -C $RPM_BUILD_DIR/%{xen_build_dir}/tools
%patch51 -p1 %patch51 -p1
%patch52 -p1 %patch52 -p1
%patch53 -p1 %patch53 -p1
%patch54 -p1
%patch55 -p1
%patch56 -p1
%patch57 -p1
%patch58 -p1
%patch59 -p1
%patch60 -p1
%patch61 -p1
%patch62 -p1
%patch63 -p1
%patch64 -p1
%patch65 -p1
%patch300 -p1 %patch300 -p1
%patch301 -p1 %patch301 -p1
%patch302 -p1 %patch302 -p1