- Upstream patches from Jan

51e517e6-AMD-IOMMU-allocate-IRTEs.patch
  51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch
  51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch
  52146070-ACPI-fix-acpi_os_map_memory.patch
  5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch
  5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch
  5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch
  5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch
  5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch
  5215d8b0-Correct-X2-APIC-HVM-emulation.patch
- Dropped 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=267
This commit is contained in:
Charles Arnold 2013-08-30 14:59:38 +00:00 committed by Git OBS Bridge
parent 73291eb01a
commit 0891920741
13 changed files with 1402 additions and 75 deletions

View File

@ -0,0 +1,652 @@
# Commit 2ca9fbd739b8a72b16dd790d0fff7b75f5488fb8
# Date 2013-07-16 11:52:38 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: allocate IRTE entries instead of using a static mapping
For multi-vector MSI, where we surely don't want to allocate
contiguous vectors and be able to set affinities of the individual
vectors separately, we need to drop the use of the tuple of vector and
delivery mode to determine the IRTE to use, and instead allocate IRTEs
(which imo should have been done from the beginning).
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
# Commit dcbff3aeac6020cdf1f5bd0f0eb0d329fc55d939
# Date 2013-08-28 10:11:19 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: also allocate IRTEs for HPET MSI
Omitting this was a blatant oversight of mine in commit 2ca9fbd7 ("AMD
IOMMU: allocate IRTE entries instead of using a static mapping").
This also changes a bogus inequality check into a sensible one, even
though it is already known that this will make HPET MSI unusable on
certain systems (having respective broken firmware). This, however,
seems better than failing on systems with consistent ACPI tables.
Reported-by: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
--- a/xen/drivers/passthrough/amd/iommu_acpi.c
+++ b/xen/drivers/passthrough/amd/iommu_acpi.c
@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr
/* allocate per-device interrupt remapping table */
if ( amd_iommu_perdev_intremap )
ivrs_mappings[alias_id].intremap_table =
- amd_iommu_alloc_intremap_table();
+ amd_iommu_alloc_intremap_table(
+ &ivrs_mappings[alias_id].intremap_inuse);
else
{
if ( shared_intremap_table == NULL )
- shared_intremap_table = amd_iommu_alloc_intremap_table();
+ shared_intremap_table = amd_iommu_alloc_intremap_table(
+ &shared_intremap_inuse);
ivrs_mappings[alias_id].intremap_table = shared_intremap_table;
+ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse;
}
}
/* assgin iommu hardware */
@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec
if ( IO_APIC_ID(apic) != special->handle )
continue;
- if ( ioapic_sbdf[special->handle].pin_setup )
+ if ( ioapic_sbdf[special->handle].pin_2_idx )
{
if ( ioapic_sbdf[special->handle].bdf == bdf &&
ioapic_sbdf[special->handle].seg == seg )
@@ -691,14 +694,17 @@ static u16 __init parse_ivhd_device_spec
ioapic_sbdf[special->handle].bdf = bdf;
ioapic_sbdf[special->handle].seg = seg;
- ioapic_sbdf[special->handle].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
+ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
if ( nr_ioapic_entries[apic] &&
- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
return 0;
}
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic] *
+ sizeof(*ioapic_sbdf->pin_2_idx));
}
break;
}
@@ -926,7 +932,7 @@ static int __init parse_ivrs_table(struc
for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic )
{
if ( !nr_ioapic_entries[apic] ||
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
continue;
printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n",
@@ -935,9 +941,12 @@ static int __init parse_ivrs_table(struc
error = -ENXIO;
else
{
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array(
- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic]));
- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array(
+ u16, nr_ioapic_entries[apic]);
+ if ( ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx )
+ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1,
+ nr_ioapic_entries[apic] * sizeof(*ioapic_sbdf->pin_2_idx));
+ else
{
printk(XENLOG_ERR "IVHD Error: Out of memory\n");
error = -ENOMEM;
--- a/xen/drivers/passthrough/amd/iommu_intr.c
+++ b/xen/drivers/passthrough/amd/iommu_intr.c
@@ -31,6 +31,7 @@
struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS];
struct hpet_sbdf hpet_sbdf;
void *shared_intremap_table;
+unsigned long *shared_intremap_inuse;
static DEFINE_SPINLOCK(shared_intremap_lock);
static spinlock_t* get_intremap_lock(int seg, int req_id)
@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int
return get_ivrs_mappings(seg)[bdf].dte_requestor_id;
}
-static int get_intremap_offset(u8 vector, u8 dm)
+static unsigned int alloc_intremap_entry(int seg, int bdf)
{
- int offset = 0;
- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK;
- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) &
- INT_REMAP_INDEX_VECTOR_MASK;
- return offset;
+ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse;
+ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES);
+
+ if ( slot < INTREMAP_ENTRIES )
+ __set_bit(slot, inuse);
+ return slot;
}
-static u8 *get_intremap_entry(int seg, int bdf, int offset)
+static u32 *get_intremap_entry(int seg, int bdf, int offset)
{
- u8 *table;
+ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table;
- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table;
ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) );
- return (u8*) (table + offset);
+ return table + offset;
}
static void free_intremap_entry(int seg, int bdf, int offset)
{
- u32* entry;
- entry = (u32*)get_intremap_entry(seg, bdf, offset);
+ u32 *entry = get_intremap_entry(seg, bdf, offset);
+
memset(entry, 0, sizeof(u32));
+ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse);
}
static void update_intremap_entry(u32* entry, u8 vector, u8 int_type,
@@ -98,18 +100,30 @@ static void update_intremap_entry(u32* e
INT_REMAP_ENTRY_VECTOR_SHIFT, entry);
}
-static void update_intremap_entry_from_ioapic(
+static inline int get_rte_index(const struct IO_APIC_route_entry *rte)
+{
+ return rte->vector | (rte->delivery_mode << 8);
+}
+
+static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset)
+{
+ rte->vector = (u8)offset;
+ rte->delivery_mode = offset >> 8;
+}
+
+static int update_intremap_entry_from_ioapic(
int bdf,
struct amd_iommu *iommu,
- const struct IO_APIC_route_entry *rte,
- const struct IO_APIC_route_entry *old_rte)
+ struct IO_APIC_route_entry *rte,
+ bool_t lo_update,
+ u16 *index)
{
unsigned long flags;
u32* entry;
u8 delivery_mode, dest, vector, dest_mode;
int req_id;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_intremap_requestor_id(iommu->seg, bdf);
lock = get_intremap_lock(iommu->seg, req_id);
@@ -121,16 +135,35 @@ static void update_intremap_entry_from_i
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- if ( old_rte )
+ offset = *index;
+ if ( offset >= INTREMAP_ENTRIES )
{
- int old_offset = get_intremap_offset(old_rte->vector,
- old_rte->delivery_mode);
+ offset = alloc_intremap_entry(iommu->seg, req_id);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ rte->mask = 1;
+ return -ENOSPC;
+ }
+ *index = offset;
+ lo_update = 1;
+ }
- if ( offset != old_offset )
- free_intremap_entry(iommu->seg, bdf, old_offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
+ if ( !lo_update )
+ {
+ /*
+ * Low half of incoming RTE is already in remapped format,
+ * so need to recover vector and delivery mode from IRTE.
+ */
+ ASSERT(get_rte_index(rte) == offset);
+ vector = get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ delivery_mode = get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT);
}
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
@@ -141,6 +174,10 @@ static void update_intremap_entry_from_i
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ set_rte_index(rte, offset);
+
+ return 0;
}
int __init amd_iommu_setup_ioapic_remapping(void)
@@ -153,7 +190,7 @@ int __init amd_iommu_setup_ioapic_remapp
u16 seg, bdf, req_id;
struct amd_iommu *iommu;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
/* Read ioapic entries and update interrupt remapping table accordingly */
for ( apic = 0; apic < nr_ioapics; apic++ )
@@ -184,19 +221,23 @@ int __init amd_iommu_setup_ioapic_remapp
dest = rte.dest.logical.logical_dest;
spin_lock_irqsave(lock, flags);
- offset = get_intremap_offset(vector, delivery_mode);
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ offset = alloc_intremap_entry(seg, req_id);
+ BUG_ON(offset >= INTREMAP_ENTRIES);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector,
delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ set_rte_index(&rte, offset);
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset;
+ __ioapic_write_entry(apic, pin, 1, rte);
+
if ( iommu->enabled )
{
spin_lock_irqsave(&iommu->lock, flags);
amd_iommu_flush_intremap(iommu, req_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup);
}
}
return 0;
@@ -209,7 +250,7 @@ void amd_iommu_ioapic_update_ire(
struct IO_APIC_route_entry new_rte = { 0 };
unsigned int rte_lo = (reg & 1) ? reg - 1 : reg;
unsigned int pin = (reg - 0x10) / 2;
- int saved_mask, seg, bdf;
+ int saved_mask, seg, bdf, rc;
struct amd_iommu *iommu;
if ( !iommu_intremap )
@@ -247,7 +288,7 @@ void amd_iommu_ioapic_update_ire(
}
if ( new_rte.mask &&
- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) )
+ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES )
{
ASSERT(saved_mask);
__io_apic_write(apic, reg, value);
@@ -262,14 +303,19 @@ void amd_iommu_ioapic_update_ire(
}
/* Update interrupt remapping entry */
- update_intremap_entry_from_ioapic(
- bdf, iommu, &new_rte,
- test_and_set_bit(pin,
- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte
- : NULL);
+ rc = update_intremap_entry_from_ioapic(
+ bdf, iommu, &new_rte, reg == rte_lo,
+ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]);
- /* Forward write access to IO-APIC RTE */
- __io_apic_write(apic, reg, value);
+ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]);
+
+ if ( rc )
+ {
+ /* Keep the entry masked. */
+ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n",
+ IO_APIC_ID(apic), pin, rc);
+ return;
+ }
/* For lower bits access, return directly to avoid double writes */
if ( reg == rte_lo )
@@ -283,16 +329,41 @@ void amd_iommu_ioapic_update_ire(
}
}
-static void update_intremap_entry_from_msi_msg(
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg)
+{
+ unsigned int val = __io_apic_read(apic, reg);
+
+ if ( !(reg & 1) )
+ {
+ unsigned int offset = val & (INTREMAP_ENTRIES - 1);
+ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf;
+ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg;
+ u16 req_id = get_intremap_requestor_id(seg, bdf);
+ const u32 *entry = get_intremap_entry(seg, req_id, offset);
+
+ val &= ~(INTREMAP_ENTRIES - 1);
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ val |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
+ }
+
+ return val;
+}
+
+static int update_intremap_entry_from_msi_msg(
struct amd_iommu *iommu, u16 bdf,
- int *remap_index, const struct msi_msg *msg)
+ int *remap_index, const struct msi_msg *msg, u32 *data)
{
unsigned long flags;
u32* entry;
u16 req_id, alias_id;
u8 delivery_mode, dest, vector, dest_mode;
spinlock_t *lock;
- int offset;
+ unsigned int offset;
req_id = get_dma_requestor_id(iommu->seg, bdf);
alias_id = get_intremap_requestor_id(iommu->seg, bdf);
@@ -303,15 +374,6 @@ static void update_intremap_entry_from_m
spin_lock_irqsave(lock, flags);
free_intremap_entry(iommu->seg, req_id, *remap_index);
spin_unlock_irqrestore(lock, flags);
-
- if ( ( req_id != alias_id ) &&
- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
- {
- lock = get_intremap_lock(iommu->seg, alias_id);
- spin_lock_irqsave(lock, flags);
- free_intremap_entry(iommu->seg, alias_id, *remap_index);
- spin_unlock_irqrestore(lock, flags);
- }
goto done;
}
@@ -322,16 +384,24 @@ static void update_intremap_entry_from_m
delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1;
vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK;
dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff;
- offset = get_intremap_offset(vector, delivery_mode);
- if ( *remap_index < 0)
+ offset = *remap_index;
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ offset = alloc_intremap_entry(iommu->seg, bdf);
+ if ( offset >= INTREMAP_ENTRIES )
+ {
+ spin_unlock_irqrestore(lock, flags);
+ return -ENOSPC;
+ }
*remap_index = offset;
- else
- BUG_ON(*remap_index != offset);
+ }
- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset);
+ entry = get_intremap_entry(iommu->seg, req_id, offset);
update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
spin_unlock_irqrestore(lock, flags);
+ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset;
+
/*
* In some special cases, a pci-e device(e.g SATA controller in IDE mode)
* will use alias id to index interrupt remapping table.
@@ -343,10 +413,8 @@ static void update_intremap_entry_from_m
if ( ( req_id != alias_id ) &&
get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL )
{
- spin_lock_irqsave(lock, flags);
- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset);
- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest);
- spin_unlock_irqrestore(lock, flags);
+ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table !=
+ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table);
}
done:
@@ -358,19 +426,22 @@ done:
amd_iommu_flush_intremap(iommu, alias_id);
spin_unlock_irqrestore(&iommu->lock, flags);
}
+
+ return 0;
}
static struct amd_iommu *_find_iommu_for_device(int seg, int bdf)
{
- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf);
-
- if ( iommu )
- return iommu;
+ struct amd_iommu *iommu;
list_for_each_entry ( iommu, &amd_iommu_head, list )
if ( iommu->seg == seg && iommu->bdf == bdf )
return NULL;
+ iommu = find_iommu_for_device(seg, bdf);
+ if ( iommu )
+ return iommu;
+
AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n",
seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf));
return ERR_PTR(-EINVAL);
@@ -380,8 +451,9 @@ int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
struct pci_dev *pdev = msi_desc->dev;
- int bdf, seg;
+ int bdf, seg, rc;
struct amd_iommu *iommu;
+ u32 data;
bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
seg = pdev ? pdev->seg : hpet_sbdf.seg;
@@ -390,11 +462,12 @@ int amd_iommu_msi_msg_update_ire(
if ( IS_ERR_OR_NULL(iommu) )
return PTR_ERR(iommu);
- if ( msi_desc->remap_index >= 0 )
+ if ( msi_desc->remap_index >= 0 && !msg )
{
do {
update_intremap_entry_from_msi_msg(iommu, bdf,
- &msi_desc->remap_index, NULL);
+ &msi_desc->remap_index,
+ NULL, NULL);
if ( !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
@@ -409,19 +482,39 @@ int amd_iommu_msi_msg_update_ire(
return 0;
do {
- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index,
- msg);
- if ( !pdev || !pdev->phantom_stride )
+ rc = update_intremap_entry_from_msi_msg(iommu, bdf,
+ &msi_desc->remap_index,
+ msg, &data);
+ if ( rc || !pdev || !pdev->phantom_stride )
break;
bdf += pdev->phantom_stride;
} while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) );
- return 0;
+ msg->data = data;
+ return rc;
}
void amd_iommu_read_msi_from_ire(
struct msi_desc *msi_desc, struct msi_msg *msg)
{
+ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1);
+ const struct pci_dev *pdev = msi_desc->dev;
+ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf;
+ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg;
+ const u32 *entry;
+
+ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) )
+ return;
+
+ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset);
+
+ msg->data &= ~(INTREMAP_ENTRIES - 1);
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_INTTYPE_MASK,
+ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8;
+ msg->data |= get_field_from_reg_u32(*entry,
+ INT_REMAP_ENTRY_VECTOR_MASK,
+ INT_REMAP_ENTRY_VECTOR_SHIFT);
}
int __init amd_iommu_free_intremap_table(
@@ -438,23 +531,42 @@ int __init amd_iommu_free_intremap_table
return 0;
}
-void* __init amd_iommu_alloc_intremap_table(void)
+void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map)
{
void *tb;
tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
BUG_ON(tb == NULL);
memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
+ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES));
+ BUG_ON(*inuse_map == NULL);
return tb;
}
int __init amd_setup_hpet_msi(struct msi_desc *msi_desc)
{
- if ( (!msi_desc->hpet_id != hpet_sbdf.id) ||
- (hpet_sbdf.iommu == NULL) )
+ spinlock_t *lock;
+ unsigned long flags;
+ int rc = 0;
+
+ if ( msi_desc->hpet_id != hpet_sbdf.id || !hpet_sbdf.iommu )
{
- AMD_IOMMU_DEBUG("Fail to setup HPET MSI remapping\n");
- return 1;
+ AMD_IOMMU_DEBUG("Failed to setup HPET MSI remapping: %s\n",
+ hpet_sbdf.iommu ? "Wrong HPET" : "No IOMMU");
+ return -ENODEV;
}
- return 0;
+ lock = get_intremap_lock(hpet_sbdf.seg, hpet_sbdf.bdf);
+ spin_lock_irqsave(lock, flags);
+
+ msi_desc->remap_index = alloc_intremap_entry(hpet_sbdf.seg,
+ hpet_sbdf.bdf);
+ if ( msi_desc->remap_index >= INTREMAP_ENTRIES )
+ {
+ msi_desc->remap_index = -1;
+ rc = -ENXIO;
+ }
+
+ spin_unlock_irqrestore(lock, flags);
+
+ return rc;
}
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -637,7 +637,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_device_group_id = amd_iommu_group_id,
.update_ire_from_apic = amd_iommu_ioapic_update_ire,
.update_ire_from_msi = amd_iommu_msi_msg_update_ire,
- .read_apic_from_ire = __io_apic_read,
+ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire,
.read_msi_from_ire = amd_iommu_read_msi_from_ire,
.setup_hpet_msi = amd_setup_hpet_msi,
.suspend = amd_iommu_suspend,
--- a/xen/include/asm-x86/amd-iommu.h
+++ b/xen/include/asm-x86/amd-iommu.h
@@ -119,6 +119,7 @@ struct ivrs_mappings {
/* per device interrupt remapping table */
void *intremap_table;
+ unsigned long *intremap_inuse;
spinlock_t intremap_lock;
/* ivhd device data settings */
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
@@ -470,10 +470,6 @@
#define MAX_AMD_IOMMUS 32
/* interrupt remapping table */
-#define INT_REMAP_INDEX_DM_MASK 0x1C00
-#define INT_REMAP_INDEX_DM_SHIFT 10
-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC
-#define INT_REMAP_INDEX_VECTOR_SHIFT 2
#define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001
#define INT_REMAP_ENTRY_REMAPEN_SHIFT 0
#define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h
@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device(
/* interrupt remapping */
int amd_iommu_setup_ioapic_remapping(void);
-void *amd_iommu_alloc_intremap_table(void);
+void *amd_iommu_alloc_intremap_table(unsigned long **);
int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *);
void amd_iommu_ioapic_update_ire(
unsigned int apic, unsigned int reg, unsigned int value);
+unsigned int amd_iommu_read_ioapic_from_ire(
+ unsigned int apic, unsigned int reg);
int amd_iommu_msi_msg_update_ire(
struct msi_desc *msi_desc, struct msi_msg *msg);
void amd_iommu_read_msi_from_ire(
@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc *
extern struct ioapic_sbdf {
u16 bdf, seg;
- unsigned long *pin_setup;
+ u16 *pin_2_idx;
} ioapic_sbdf[MAX_IO_APICS];
-extern void *shared_intremap_table;
extern struct hpet_sbdf {
u16 bdf, seg, id;
struct amd_iommu *iommu;
} hpet_sbdf;
+extern void *shared_intremap_table;
+extern unsigned long *shared_intremap_inuse;
+
/* power management support */
void amd_iommu_resume(void);
void amd_iommu_suspend(void);

View File

@ -0,0 +1,68 @@
# Commit 561e0f86660f10db492c1ead1cd772013a6cc32d
# Date 2013-07-16 11:54:07 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
AMD IOMMU: untie remap and vector maps
With the specific IRTEs used for an interrupt no longer depending on
the vector, there's no need to tie the remap sharing model to the
vector sharing one.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>
Acked-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -207,50 +207,6 @@ int __init amd_iov_detect(void)
init_done = 1;
- /*
- * AMD IOMMUs don't distinguish between vectors destined for
- * different cpus when doing interrupt remapping. This means
- * that interrupts going through the same intremap table
- * can't share the same vector.
- *
- * If irq_vector_map isn't specified, choose a sensible default:
- * - If we're using per-device interemap tables, per-device
- * vector non-sharing maps
- * - If we're using a global interemap table, global vector
- * non-sharing map
- */
- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
- {
- if ( amd_iommu_perdev_intremap )
- {
- /* Per-device vector map logic is broken for devices with multiple
- * MSI-X interrupts (and would also be for multiple MSI, if Xen
- * supported it).
- *
- * Until this is fixed, use global vector tables as far as the irq
- * logic is concerned to avoid the buggy behaviour of per-device
- * maps in map_domain_pirq(), and use per-device tables as far as
- * intremap code is concerned to avoid the security issue.
- */
- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. "
- "Using per-device-global maps instead until a fix is found.\n");
-
- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
- }
- else
- {
- printk("AMD-Vi: Enabling global vector map\n");
- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
- }
- }
- else
- {
- printk("AMD-Vi: Not overriding irq_vector_map setting\n");
-
- if ( opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_GLOBAL )
- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. "
- "Use irq_vector_map=global to work around.\n");
- }
if ( !amd_iommu_perdev_intremap )
printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n");
return scan_pci_devices();

View File

@ -0,0 +1,77 @@
# Commit 303066fdb1e4fe816e48acd665453f58b8399e81
# Date 2013-07-17 08:47:18 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VMX: fix interaction of APIC-V and Viridian emulation
Viridian using a synthetic MSR for issuing EOI notifications bypasses
the normal in-processor handling, which would clear
GUEST_INTR_STATUS.SVI. Hence we need to do this in software in order
for future interrupts to get delivered.
Based on analysis by Yang Z Zhang <yang.z.zhang@intel.com>.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Yang Zhang <yang.z.zhang@intel.com>
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -386,6 +386,9 @@ void vlapic_EOI_set(struct vlapic *vlapi
vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]);
+ if ( hvm_funcs.handle_eoi )
+ hvm_funcs.handle_eoi(vector);
+
if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) )
vioapic_update_EOI(vlapic_domain(vlapic), vector);
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1502,6 +1502,15 @@ static void vmx_sync_pir_to_irr(struct v
vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]);
}
+static void vmx_handle_eoi(u8 vector)
+{
+ unsigned long status = __vmread(GUEST_INTR_STATUS);
+
+ /* We need to clear the SVI field. */
+ status &= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK;
+ __vmwrite(GUEST_INTR_STATUS, status);
+}
+
static struct hvm_function_table __initdata vmx_function_table = {
.name = "VMX",
.cpu_up_prepare = vmx_cpu_up_prepare,
@@ -1554,6 +1563,7 @@ static struct hvm_function_table __initd
.process_isr = vmx_process_isr,
.deliver_posted_intr = vmx_deliver_posted_intr,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
+ .handle_eoi = vmx_handle_eoi,
.nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
};
@@ -1580,7 +1590,10 @@ const struct hvm_function_table * __init
setup_ept_dump();
}
-
+
+ if ( !cpu_has_vmx_virtual_intr_delivery )
+ vmx_function_table.handle_eoi = NULL;
+
if ( cpu_has_vmx_posted_intr_processing )
alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt);
else
--- a/xen/include/asm-x86/hvm/hvm.h
+++ b/xen/include/asm-x86/hvm/hvm.h
@@ -186,6 +186,7 @@ struct hvm_function_table {
void (*process_isr)(int isr, struct vcpu *v);
void (*deliver_posted_intr)(struct vcpu *v, u8 vector);
void (*sync_pir_to_irr)(struct vcpu *v);
+ void (*handle_eoi)(u8 vector);
/*Walk nested p2m */
int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa,

View File

@ -1,61 +0,0 @@
# Commit 7b9685ca4ed2fd723600ce66eb20a6d0c115b6cb
# Date 2013-08-15 22:00:45 +0100
# Author Tim Deegan <tim@xen.org>
# Committer Tim Deegan <tim@xen.org>
xen: Add stdbool.h workaround for BSD.
On *BSD, stdbool.h lives in /usr/include, but we don't want to have
that on the search path in case we pick up any headers from the build
host's C libraries.
Copy the equivalent hack already in place for stdarg.h: on all
supported compilers the contents of stdbool.h are trivial, so just
supply the things we need in a xen/stdbool.h header.
Signed-off-by: Tim Deegan <tim@xen.org>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Keir Fraser <keir@xen.org>
Tested-by: Patrick Welche <prlw1@cam.ac.uk>
--- a/xen/include/xen/libelf.h
+++ b/xen/include/xen/libelf.h
@@ -29,8 +29,6 @@
#error define architectural endianness
#endif
-#include <stdbool.h>
-
typedef int elf_errorstatus; /* 0: ok; -ve (normally -1): error */
typedef int elf_negerrnoval; /* 0: ok; -EFOO: error */
@@ -39,11 +37,13 @@ typedef int elf_negerrnoval; /* 0: ok; -
#ifdef __XEN__
#include <public/elfnote.h>
#include <public/features.h>
+#include <xen/stdbool.h>
#else
#include <xen/elfnote.h>
#include <xen/features.h>
#include <stdarg.h>
+#include <stdbool.h>
struct elf_binary;
typedef void elf_log_callback(struct elf_binary*, void *caller_data,
--- /dev/null
+++ b/xen/include/xen/stdbool.h
@@ -0,0 +1,13 @@
+#ifndef __XEN_STDBOOL_H__
+#define __XEN_STDBOOL_H__
+
+#if defined(__OpenBSD__) || defined(__NetBSD__)
+# define bool _Bool
+# define true 1
+# define false 0
+# define __bool_true_false_are_defined 1
+#else
+# include <stdbool.h>
+#endif
+
+#endif /* __XEN_STDBOOL_H__ */

View File

@ -0,0 +1,132 @@
References: bnc#833251, bnc#834751
# Commit 2ee9cbf9d8eaeff6e21222905d22dbd58dc5fe29
# Date 2013-08-21 08:38:40 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
ACPI: fix acpi_os_map_memory()
It using map_domain_page() was entirely wrong. Use __acpi_map_table()
instead for the time being, with locking added as the mappings it
produces get replaced with subsequent invocations. Using locking in
this way is acceptable here since the only two runtime callers are
acpi_os_{read,write}_memory(), which don't leave mappings pending upon
returning to their callers.
Also fix __acpi_map_table()'s first parameter's type - while benign for
unstable, backports to pre-4.3 trees will need this.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
# Commit c5ba8ed4c6f005d332a49d93a3ef8ff2b690b256
# Date 2013-08-21 08:40:22 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
ACPI: use ioremap() in acpi_os_map_memory()
This drops the post-boot use of __acpi_map_table() here again (together
with the somewhat awkward locking), in favor of using ioremap().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
--- a/xen/arch/x86/acpi/lib.c
+++ b/xen/arch/x86/acpi/lib.c
@@ -39,7 +39,7 @@ u32 __read_mostly x86_acpiid_to_apicid[M
* from the fixed base. That's why we start at FIX_ACPI_END and
* count idx down while incrementing the phys address.
*/
-char *__acpi_map_table(unsigned long phys, unsigned long size)
+char *__acpi_map_table(paddr_t phys, unsigned long size)
{
unsigned long base, offset, mapped_size;
int idx;
--- a/xen/drivers/acpi/osl.c
+++ b/xen/drivers/acpi/osl.c
@@ -38,6 +38,7 @@
#include <xen/spinlock.h>
#include <xen/domain_page.h>
#include <xen/efi.h>
+#include <xen/vmap.h>
#define _COMPONENT ACPI_OS_SERVICES
ACPI_MODULE_NAME("osl")
@@ -83,14 +84,25 @@ acpi_physical_address __init acpi_os_get
}
}
-void __iomem *__init
+void __iomem *
acpi_os_map_memory(acpi_physical_address phys, acpi_size size)
{
- return __acpi_map_table((unsigned long)phys, size);
+ if (system_state >= SYS_STATE_active) {
+ unsigned long pfn = PFN_DOWN(phys);
+ unsigned int offs = phys & (PAGE_SIZE - 1);
+
+ /* The low first Mb is always mapped. */
+ if ( !((phys + size - 1) >> 20) )
+ return __va(phys);
+ return __vmap(&pfn, PFN_UP(offs + size), 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs;
+ }
+ return __acpi_map_table(phys, size);
}
-void __init acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
+void acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
{
+ if (system_state >= SYS_STATE_active)
+ vunmap((void *)((unsigned long)virt & PAGE_MASK));
}
acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width)
@@ -133,9 +145,8 @@ acpi_status
acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width)
{
u32 dummy;
- void __iomem *virt_addr;
+ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3);
- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT);
if (!value)
value = &dummy;
@@ -153,7 +164,7 @@ acpi_os_read_memory(acpi_physical_addres
BUG();
}
- unmap_domain_page(virt_addr);
+ acpi_os_unmap_memory(virt_addr, width >> 3);
return AE_OK;
}
@@ -161,9 +172,7 @@ acpi_os_read_memory(acpi_physical_addres
acpi_status
acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width)
{
- void __iomem *virt_addr;
-
- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT);
+ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3);
switch (width) {
case 8:
@@ -179,7 +188,7 @@ acpi_os_write_memory(acpi_physical_addre
BUG();
}
- unmap_domain_page(virt_addr);
+ acpi_os_unmap_memory(virt_addr, width >> 3);
return AE_OK;
}
--- a/xen/include/xen/acpi.h
+++ b/xen/include/xen/acpi.h
@@ -56,7 +56,7 @@ typedef int (*acpi_table_handler) (struc
typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
unsigned int acpi_get_processor_id (unsigned int cpu);
-char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
+char * __acpi_map_table (paddr_t phys_addr, unsigned long size);
int acpi_boot_init (void);
int acpi_boot_table_init (void);
int acpi_numa_init (void);

View File

@ -0,0 +1,50 @@
# Commit c9c6abab583d27fdca1d979a7f1d18ae30f54e9b
# Date 2013-08-21 16:44:58 +0200
# Author Jan Beulich <jbeulich@suse.com>
# Committer Jan Beulich <jbeulich@suse.com>
VT-d: warn about Compatibility Format Interrupts being enabled by firmware
... as being insecure.
Also drop the second (redundant) read DMAR_GSTS_REG from enable_intremap().
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by Xiantao Zhang <xiantao.zhang@intel.com>
--- a/xen/drivers/passthrough/vtd/intremap.c
+++ b/xen/drivers/passthrough/vtd/intremap.c
@@ -706,8 +706,8 @@ int enable_intremap(struct iommu *iommu,
if ( !platform_supports_intremap() )
{
- dprintk(XENLOG_ERR VTDPREFIX,
- "Platform firmware does not support interrupt remapping\n");
+ printk(XENLOG_ERR VTDPREFIX
+ " Platform firmware does not support interrupt remapping\n");
return -EINVAL;
}
@@ -718,15 +718,19 @@ int enable_intremap(struct iommu *iommu,
if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr )
return 0;
- sts = dmar_readl(iommu->reg, DMAR_GSTS_REG);
if ( !(sts & DMA_GSTS_QIES) )
{
- dprintk(XENLOG_ERR VTDPREFIX,
- "Queued invalidation is not enabled, should not enable "
- "interrupt remapping\n");
+ printk(XENLOG_ERR VTDPREFIX
+ " Queued invalidation is not enabled on IOMMU #%u:"
+ " Should not enable interrupt remapping\n", iommu->index);
return -EINVAL;
}
+ if ( !eim && (sts & DMA_GSTS_CFIS) )
+ printk(XENLOG_WARNING VTDPREFIX
+ " Compatibility Format Interrupts permitted on IOMMU #%u:"
+ " Device pass-through will be insecure\n", iommu->index);
+
if ( ir_ctrl->iremap_maddr == 0 )
{
drhd = iommu_to_drhd(iommu);

View File

@ -0,0 +1,26 @@
# Commit 7fb5c6b9ef22915e3fcac95cd44857f4457ba783
# Date 2013-08-22 10:49:24 +0200
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: Check whether interrupt is blocked by TPR
If interrupt is blocked by L1's TPR, L2 should not see it and keep
running. Adding the check before L2 to retrive interrupt.
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Acked-by: "Dong, Eddie" <eddie.dong@intel.com>
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -165,6 +165,11 @@ static int nvmx_intr_intercept(struct vc
{
u32 ctrl;
+ /* If blocked by L1's tpr, then nothing to do. */
+ if ( nestedhvm_vcpu_in_guestmode(v) &&
+ hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr )
+ return 1;
+
if ( nvmx_intr_blocked(v) != hvm_intblk_none )
{
enable_intr_window(v, intack);

View File

@ -0,0 +1,36 @@
# Commit b35d0a26983843c092bfa353fd6b9aa8c3bf4886
# Date 2013-08-22 10:50:13 +0200
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: Force check ISR when L2 is running
External interrupt is allowed to notify CPU only when it has higher
priority than current in servicing interrupt. With APIC-v, the priority
comparing is done by hardware and hardware will inject the interrupt to
VCPU when it recognizes an interrupt. Currently, there is no virtual
APIC-v feature available for L1 to use, so when L2 is running, we still need
to compare interrupt priority with ISR in hypervisor instead via hardware.
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Acked-by: "Dong, Eddie" <eddie.dong@intel.com>
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -37,6 +37,7 @@
#include <asm/hvm/io.h>
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
+#include <asm/hvm/nestedhvm.h>
#include <public/hvm/ioreq.h>
#include <public/hvm/params.h>
@@ -1037,7 +1038,8 @@ int vlapic_has_pending_irq(struct vcpu *
if ( irr == -1 )
return -1;
- if ( vlapic_virtual_intr_delivery_enabled() )
+ if ( vlapic_virtual_intr_delivery_enabled() &&
+ !nestedhvm_vcpu_in_guestmode(v) )
return irr;
isr = vlapic_find_highest_isr(vlapic);

View File

@ -0,0 +1,43 @@
# Commit 375a1035002fb257087756a86e6caeda649fc0f1
# Date 2013-08-22 10:52:05 +0200
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: Clear APIC-v control bit in vmcs02
There is no vAPIC-v support, so mask APIC-v control bit when
constructing vmcs02.
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Acked-by: "Dong, Eddie" <eddie.dong@intel.com>
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -613,8 +613,15 @@ void nvmx_update_secondary_exec_control(
u32 shadow_cntrl;
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+ u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT |
+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
+ host_cntrl &= ~apicv_bit;
shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL);
+
+ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
+ ASSERT(!(shadow_cntrl & apicv_bit));
+
nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT);
shadow_cntrl |= host_cntrl;
__vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl);
@@ -625,7 +632,12 @@ static void nvmx_update_pin_control(stru
u32 shadow_cntrl;
struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+ host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT;
shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL);
+
+ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */
+ ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT));
+
shadow_cntrl |= host_cntrl;
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl);
}

View File

@ -0,0 +1,247 @@
# Commit 84e6af58707520baf59c1c86c29237419e439afb
# Date 2013-08-22 10:59:01 +0200
# Author Yang Zhang <yang.z.zhang@Intel.com>
# Committer Jan Beulich <jbeulich@suse.com>
Nested VMX: Update APIC-v(RVI/SVI) when vmexit to L1
If enabling APIC-v, all interrupts to L1 are delivered through APIC-v.
But when L2 is running, external interrupt will casue L1 vmexit with
reason external interrupt. Then L1 will pick up the interrupt through
vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when
L1 is running, so APIC-v hardware still will do vEOI updating. The problem
is that the interrupt is delivered not through APIC-v hardware, this means
SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI
updating. The solution is that, when L1 tried to pick up the interrupt
from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make
sure the following vEOI updating and vPPR updating corrently.
Also, since interrupt is delivered through vmcs12, so APIC-v hardware will
not cleare vIRR and hypervisor need to clear it before L1 running.
Signed-off-by: Yang Zhang <yang.z.zhang@Intel.com>
Acked-by: "Dong, Eddie" <eddie.dong@intel.com>
--- a/xen/arch/x86/hvm/irq.c
+++ b/xen/arch/x86/hvm/irq.c
@@ -437,7 +437,7 @@ struct hvm_intack hvm_vcpu_ack_pending_i
intack.vector = (uint8_t)vector;
break;
case hvm_intsrc_lapic:
- if ( !vlapic_ack_pending_irq(v, intack.vector) )
+ if ( !vlapic_ack_pending_irq(v, intack.vector, 0) )
intack = hvm_intack_none;
break;
case hvm_intsrc_vector:
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -168,6 +168,14 @@ static uint32_t vlapic_get_ppr(struct vl
return ppr;
}
+uint32_t vlapic_set_ppr(struct vlapic *vlapic)
+{
+ uint32_t ppr = vlapic_get_ppr(vlapic);
+
+ vlapic_set_reg(vlapic, APIC_PROCPRI, ppr);
+ return ppr;
+}
+
static int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda)
{
int result = 0;
@@ -1050,15 +1058,15 @@ int vlapic_has_pending_irq(struct vcpu *
return irr;
}
-int vlapic_ack_pending_irq(struct vcpu *v, int vector)
+int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack)
{
struct vlapic *vlapic = vcpu_vlapic(v);
- if ( vlapic_virtual_intr_delivery_enabled() )
- return 1;
-
- vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
- vlapic_clear_irr(vector, vlapic);
+ if ( force_ack || !vlapic_virtual_intr_delivery_enabled() )
+ {
+ vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]);
+ vlapic_clear_irr(vector, vlapic);
+ }
return 1;
}
--- a/xen/arch/x86/hvm/vmx/intr.c
+++ b/xen/arch/x86/hvm/vmx/intr.c
@@ -185,7 +185,7 @@ static int nvmx_intr_intercept(struct vc
if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) )
return 0;
- vmx_inject_extint(intack.vector);
+ vmx_inject_extint(intack.vector, intack.source);
ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS);
if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
@@ -314,7 +314,7 @@ void vmx_intr_assist(void)
else
{
HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
- vmx_inject_extint(intack.vector);
+ vmx_inject_extint(intack.vector, intack.source);
pt_intr_post(v, intack);
}
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1205,7 +1205,7 @@ static void vmx_update_guest_efer(struct
}
void nvmx_enqueue_n2_exceptions(struct vcpu *v,
- unsigned long intr_fields, int error_code)
+ unsigned long intr_fields, int error_code, uint8_t source)
{
struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
@@ -1213,6 +1213,7 @@ void nvmx_enqueue_n2_exceptions(struct v
/* enqueue the exception till the VMCS switch back to L1 */
nvmx->intr.intr_info = intr_fields;
nvmx->intr.error_code = error_code;
+ nvmx->intr.source = source;
vcpu_nestedhvm(v).nv_vmexit_pending = 1;
return;
}
@@ -1224,7 +1225,8 @@ void nvmx_enqueue_n2_exceptions(struct v
static int nvmx_vmexit_trap(struct vcpu *v, struct hvm_trap *trap)
{
- nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code);
+ nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code,
+ hvm_intsrc_none);
return NESTEDHVM_VMEXIT_DONE;
}
@@ -1255,7 +1257,7 @@ static void __vmx_inject_exception(int t
curr->arch.hvm_vmx.vmx_emulate = 1;
}
-void vmx_inject_extint(int trap)
+void vmx_inject_extint(int trap, uint8_t source)
{
struct vcpu *v = current;
u32 pin_based_cntrl;
@@ -1266,7 +1268,7 @@ void vmx_inject_extint(int trap)
if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) {
nvmx_enqueue_n2_exceptions (v,
INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap,
- HVM_DELIVER_NO_ERROR_CODE);
+ HVM_DELIVER_NO_ERROR_CODE, source);
return;
}
}
@@ -1285,7 +1287,7 @@ void vmx_inject_nmi(void)
if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) {
nvmx_enqueue_n2_exceptions (v,
INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi,
- HVM_DELIVER_NO_ERROR_CODE);
+ HVM_DELIVER_NO_ERROR_CODE, hvm_intsrc_nmi);
return;
}
}
@@ -1353,7 +1355,7 @@ static void vmx_inject_trap(struct hvm_t
{
nvmx_enqueue_n2_exceptions (curr,
INTR_INFO_VALID_MASK | (_trap.type<<8) | _trap.vector,
- _trap.error_code);
+ _trap.error_code, hvm_intsrc_none);
return;
}
else
--- a/xen/arch/x86/hvm/vmx/vvmx.c
+++ b/xen/arch/x86/hvm/vmx/vvmx.c
@@ -1295,6 +1295,36 @@ static void sync_exception_state(struct
}
}
+static void nvmx_update_apicv(struct vcpu *v)
+{
+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v);
+ unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON);
+ uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO);
+
+ if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
+ nvmx->intr.source == hvm_intsrc_lapic &&
+ (intr_info & INTR_INFO_VALID_MASK) )
+ {
+ uint16_t status;
+ uint32_t rvi, ppr;
+ uint32_t vector = intr_info & 0xff;
+ struct vlapic *vlapic = vcpu_vlapic(v);
+
+ vlapic_ack_pending_irq(v, vector, 1);
+
+ ppr = vlapic_set_ppr(vlapic);
+ WARN_ON((ppr & 0xf0) != (vector & 0xf0));
+
+ status = vector << 8;
+ rvi = vlapic_has_pending_irq(v);
+ if ( rvi != -1 )
+ status |= rvi & 0xff;
+
+ __vmwrite(GUEST_INTR_STATUS, status);
+ }
+}
+
static void virtual_vmexit(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -1340,6 +1370,9 @@ static void virtual_vmexit(struct cpu_us
/* updating host cr0 to sync TS bit */
__vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0);
+ if ( cpu_has_vmx_virtual_intr_delivery )
+ nvmx_update_apicv(v);
+
vmreturn(regs, VMSUCCEED);
}
--- a/xen/include/asm-x86/hvm/vlapic.h
+++ b/xen/include/asm-x86/hvm/vlapic.h
@@ -98,7 +98,7 @@ bool_t is_vlapic_lvtpc_enabled(struct vl
void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig);
int vlapic_has_pending_irq(struct vcpu *v);
-int vlapic_ack_pending_irq(struct vcpu *v, int vector);
+int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack);
int vlapic_init(struct vcpu *v);
void vlapic_destroy(struct vcpu *v);
@@ -110,6 +110,7 @@ void vlapic_tdt_msr_set(struct vlapic *v
uint64_t vlapic_tdt_msr_get(struct vlapic *vlapic);
int vlapic_accept_pic_intr(struct vcpu *v);
+uint32_t vlapic_set_ppr(struct vlapic *vlapic);
void vlapic_adjust_i8259_target(struct domain *d);
--- a/xen/include/asm-x86/hvm/vmx/vmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vmx.h
@@ -448,7 +448,7 @@ static inline int __vmxon(u64 addr)
void vmx_get_segment_register(struct vcpu *, enum x86_segment,
struct segment_register *);
-void vmx_inject_extint(int trap);
+void vmx_inject_extint(int trap, uint8_t source);
void vmx_inject_nmi(void);
int ept_p2m_init(struct p2m_domain *p2m);
--- a/xen/include/asm-x86/hvm/vmx/vvmx.h
+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h
@@ -36,6 +36,7 @@ struct nestedvmx {
struct {
unsigned long intr_info;
u32 error_code;
+ u8 source;
} intr;
struct {
bool_t enabled;

View File

@ -0,0 +1,24 @@
References: bnc#835896
# Commit 69962e19ed432570f6cdcfdb5f6f22d6e3c54e6c
# Date 2013-08-22 11:24:00 +0200
# Author Juergen Gross <juergen.gross@ts.fujitsu.com>
# Committer Jan Beulich <jbeulich@suse.com>
Correct X2-APIC HVM emulation
commit 6859874b61d5ddaf5289e72ed2b2157739b72ca5 ("x86/HVM: fix x2APIC
APIC_ID read emulation") introduced an error for the hvm emulation of
x2apic. Any try to write to APIC_ICR MSR will result in a GP fault.
Signed-off-by: Juergen Gross <juergen.gross@ts.fujitsu.com>
--- a/xen/arch/x86/hvm/vlapic.c
+++ b/xen/arch/x86/hvm/vlapic.c
@@ -868,6 +868,7 @@ int hvm_x2apic_msr_write(struct vcpu *v,
rc = vlapic_reg_write(v, APIC_ICR2, (uint32_t)(msr_content >> 32));
if ( rc )
return rc;
+ break;
case APIC_ICR2:
return X86EMUL_UNHANDLEABLE;

View File

@ -1,3 +1,19 @@
-------------------------------------------------------------------
Fri Aug 30 08:11:55 MDT 2013 - carnold@suse.com
- Upstream patches from Jan
51e517e6-AMD-IOMMU-allocate-IRTEs.patch
51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch
51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch
52146070-ACPI-fix-acpi_os_map_memory.patch
5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch
5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch
5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch
5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch
5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch
5215d8b0-Correct-X2-APIC-HVM-emulation.patch
- Dropped 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch
------------------------------------------------------------------- -------------------------------------------------------------------
Mon Aug 26 15:48:57 MDT 2013 - carnold@suse.com Mon Aug 26 15:48:57 MDT 2013 - carnold@suse.com

View File

@ -15,7 +15,6 @@
# Please submit bugfixes or comments via http://bugs.opensuse.org/ # Please submit bugfixes or comments via http://bugs.opensuse.org/
# #
Name: xen Name: xen
ExclusiveArch: %ix86 x86_64 %arm aarch64 ExclusiveArch: %ix86 x86_64 %arm aarch64
%define xvers 4.3 %define xvers 4.3
@ -200,19 +199,28 @@ Patch3: 51d27841-iommu-amd-Workaround-for-erratum-787.patch
Patch4: 51daa074-Revert-hvmloader-always-include-HPET-table.patch Patch4: 51daa074-Revert-hvmloader-always-include-HPET-table.patch
Patch5: 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch Patch5: 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch
Patch6: 51dd155c-adjust-x86-EFI-build.patch Patch6: 51dd155c-adjust-x86-EFI-build.patch
Patch7: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch Patch7: 51e517e6-AMD-IOMMU-allocate-IRTEs.patch
Patch8: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch Patch8: 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch
Patch9: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch Patch9: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch
Patch10: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch Patch10: 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch
Patch11: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch Patch11: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch
Patch12: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch Patch12: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch
Patch13: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch Patch13: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch
Patch14: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch Patch14: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch
Patch15: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch Patch15: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch
Patch16: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch Patch16: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch
Patch17: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch Patch17: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch
Patch18: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch Patch18: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch
Patch19: 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch Patch19: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch
Patch20: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch
Patch21: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch
Patch22: 52146070-ACPI-fix-acpi_os_map_memory.patch
Patch23: 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch
Patch24: 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch
Patch25: 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch
Patch26: 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch
Patch27: 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch
Patch28: 5215d8b0-Correct-X2-APIC-HVM-emulation.patch
# Upstream qemu patches # Upstream qemu patches
# Our patches # Our patches
Patch301: xen-destdir.patch Patch301: xen-destdir.patch
@ -521,6 +529,15 @@ Authors
%patch17 -p1 %patch17 -p1
%patch18 -p1 %patch18 -p1
%patch19 -p1 %patch19 -p1
%patch20 -p1
%patch21 -p1
%patch22 -p1
%patch23 -p1
%patch24 -p1
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch301 -p1 %patch301 -p1
%patch302 -p1 %patch302 -p1
%patch303 -p1 %patch303 -p1