diff --git a/51e517e6-AMD-IOMMU-allocate-IRTEs.patch b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch new file mode 100644 index 0000000..b928ab6 --- /dev/null +++ b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch @@ -0,0 +1,652 @@ +# Commit 2ca9fbd739b8a72b16dd790d0fff7b75f5488fb8 +# Date 2013-07-16 11:52:38 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: allocate IRTE entries instead of using a static mapping + +For multi-vector MSI, where we surely don't want to allocate +contiguous vectors and be able to set affinities of the individual +vectors separately, we need to drop the use of the tuple of vector and +delivery mode to determine the IRTE to use, and instead allocate IRTEs +(which imo should have been done from the beginning). + +Signed-off-by: Jan Beulich +Acked-by: Suravee Suthikulpanit + +# Commit dcbff3aeac6020cdf1f5bd0f0eb0d329fc55d939 +# Date 2013-08-28 10:11:19 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: also allocate IRTEs for HPET MSI + +Omitting this was a blatant oversight of mine in commit 2ca9fbd7 ("AMD +IOMMU: allocate IRTE entries instead of using a static mapping"). + +This also changes a bogus inequality check into a sensible one, even +though it is already known that this will make HPET MSI unusable on +certain systems (having respective broken firmware). This, however, +seems better than failing on systems with consistent ACPI tables. + +Reported-by: Sander Eikelenboom +Signed-off-by: Jan Beulich +Acked-by: Suravee Suthikulpanit + +--- a/xen/drivers/passthrough/amd/iommu_acpi.c ++++ b/xen/drivers/passthrough/amd/iommu_acpi.c +@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr + /* allocate per-device interrupt remapping table */ + if ( amd_iommu_perdev_intremap ) + ivrs_mappings[alias_id].intremap_table = +- amd_iommu_alloc_intremap_table(); ++ amd_iommu_alloc_intremap_table( ++ &ivrs_mappings[alias_id].intremap_inuse); + else + { + if ( shared_intremap_table == NULL ) +- shared_intremap_table = amd_iommu_alloc_intremap_table(); ++ shared_intremap_table = amd_iommu_alloc_intremap_table( ++ &shared_intremap_inuse); + ivrs_mappings[alias_id].intremap_table = shared_intremap_table; ++ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse; + } + } + /* assgin iommu hardware */ +@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec + if ( IO_APIC_ID(apic) != special->handle ) + continue; + +- if ( ioapic_sbdf[special->handle].pin_setup ) ++ if ( ioapic_sbdf[special->handle].pin_2_idx ) + { + if ( ioapic_sbdf[special->handle].bdf == bdf && + ioapic_sbdf[special->handle].seg == seg ) +@@ -691,14 +694,17 @@ static u16 __init parse_ivhd_device_spec + ioapic_sbdf[special->handle].bdf = bdf; + ioapic_sbdf[special->handle].seg = seg; + +- ioapic_sbdf[special->handle].pin_setup = xzalloc_array( +- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); ++ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array( ++ u16, nr_ioapic_entries[apic]); + if ( nr_ioapic_entries[apic] && +- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) + { + printk(XENLOG_ERR "IVHD Error: Out of memory\n"); + return 0; + } ++ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, ++ nr_ioapic_entries[apic] * ++ sizeof(*ioapic_sbdf->pin_2_idx)); + } + break; + } +@@ -926,7 +932,7 @@ static int __init parse_ivrs_table(struc + for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic ) + { + if ( !nr_ioapic_entries[apic] || +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) + continue; + + printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n", +@@ -935,9 +941,12 @@ static int __init parse_ivrs_table(struc + error = -ENXIO; + else + { +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array( +- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); +- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array( ++ u16, nr_ioapic_entries[apic]); ++ if ( ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) ++ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, ++ nr_ioapic_entries[apic] * sizeof(*ioapic_sbdf->pin_2_idx)); ++ else + { + printk(XENLOG_ERR "IVHD Error: Out of memory\n"); + error = -ENOMEM; +--- a/xen/drivers/passthrough/amd/iommu_intr.c ++++ b/xen/drivers/passthrough/amd/iommu_intr.c +@@ -31,6 +31,7 @@ + struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS]; + struct hpet_sbdf hpet_sbdf; + void *shared_intremap_table; ++unsigned long *shared_intremap_inuse; + static DEFINE_SPINLOCK(shared_intremap_lock); + + static spinlock_t* get_intremap_lock(int seg, int req_id) +@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int + return get_ivrs_mappings(seg)[bdf].dte_requestor_id; + } + +-static int get_intremap_offset(u8 vector, u8 dm) ++static unsigned int alloc_intremap_entry(int seg, int bdf) + { +- int offset = 0; +- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK; +- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & +- INT_REMAP_INDEX_VECTOR_MASK; +- return offset; ++ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse; ++ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES); ++ ++ if ( slot < INTREMAP_ENTRIES ) ++ __set_bit(slot, inuse); ++ return slot; + } + +-static u8 *get_intremap_entry(int seg, int bdf, int offset) ++static u32 *get_intremap_entry(int seg, int bdf, int offset) + { +- u8 *table; ++ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table; + +- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table; + ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) ); + +- return (u8*) (table + offset); ++ return table + offset; + } + + static void free_intremap_entry(int seg, int bdf, int offset) + { +- u32* entry; +- entry = (u32*)get_intremap_entry(seg, bdf, offset); ++ u32 *entry = get_intremap_entry(seg, bdf, offset); ++ + memset(entry, 0, sizeof(u32)); ++ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse); + } + + static void update_intremap_entry(u32* entry, u8 vector, u8 int_type, +@@ -98,18 +100,30 @@ static void update_intremap_entry(u32* e + INT_REMAP_ENTRY_VECTOR_SHIFT, entry); + } + +-static void update_intremap_entry_from_ioapic( ++static inline int get_rte_index(const struct IO_APIC_route_entry *rte) ++{ ++ return rte->vector | (rte->delivery_mode << 8); ++} ++ ++static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset) ++{ ++ rte->vector = (u8)offset; ++ rte->delivery_mode = offset >> 8; ++} ++ ++static int update_intremap_entry_from_ioapic( + int bdf, + struct amd_iommu *iommu, +- const struct IO_APIC_route_entry *rte, +- const struct IO_APIC_route_entry *old_rte) ++ struct IO_APIC_route_entry *rte, ++ bool_t lo_update, ++ u16 *index) + { + unsigned long flags; + u32* entry; + u8 delivery_mode, dest, vector, dest_mode; + int req_id; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + req_id = get_intremap_requestor_id(iommu->seg, bdf); + lock = get_intremap_lock(iommu->seg, req_id); +@@ -121,16 +135,35 @@ static void update_intremap_entry_from_i + + spin_lock_irqsave(lock, flags); + +- offset = get_intremap_offset(vector, delivery_mode); +- if ( old_rte ) ++ offset = *index; ++ if ( offset >= INTREMAP_ENTRIES ) + { +- int old_offset = get_intremap_offset(old_rte->vector, +- old_rte->delivery_mode); ++ offset = alloc_intremap_entry(iommu->seg, req_id); ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ spin_unlock_irqrestore(lock, flags); ++ rte->mask = 1; ++ return -ENOSPC; ++ } ++ *index = offset; ++ lo_update = 1; ++ } + +- if ( offset != old_offset ) +- free_intremap_entry(iommu->seg, bdf, old_offset); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); ++ if ( !lo_update ) ++ { ++ /* ++ * Low half of incoming RTE is already in remapped format, ++ * so need to recover vector and delivery mode from IRTE. ++ */ ++ ASSERT(get_rte_index(rte) == offset); ++ vector = get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); ++ delivery_mode = get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT); + } +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + + spin_unlock_irqrestore(lock, flags); +@@ -141,6 +174,10 @@ static void update_intremap_entry_from_i + amd_iommu_flush_intremap(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } ++ ++ set_rte_index(rte, offset); ++ ++ return 0; + } + + int __init amd_iommu_setup_ioapic_remapping(void) +@@ -153,7 +190,7 @@ int __init amd_iommu_setup_ioapic_remapp + u16 seg, bdf, req_id; + struct amd_iommu *iommu; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + /* Read ioapic entries and update interrupt remapping table accordingly */ + for ( apic = 0; apic < nr_ioapics; apic++ ) +@@ -184,19 +221,23 @@ int __init amd_iommu_setup_ioapic_remapp + dest = rte.dest.logical.logical_dest; + + spin_lock_irqsave(lock, flags); +- offset = get_intremap_offset(vector, delivery_mode); +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); ++ offset = alloc_intremap_entry(seg, req_id); ++ BUG_ON(offset >= INTREMAP_ENTRIES); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, + delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(lock, flags); + ++ set_rte_index(&rte, offset); ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset; ++ __ioapic_write_entry(apic, pin, 1, rte); ++ + if ( iommu->enabled ) + { + spin_lock_irqsave(&iommu->lock, flags); + amd_iommu_flush_intremap(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } +- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup); + } + } + return 0; +@@ -209,7 +250,7 @@ void amd_iommu_ioapic_update_ire( + struct IO_APIC_route_entry new_rte = { 0 }; + unsigned int rte_lo = (reg & 1) ? reg - 1 : reg; + unsigned int pin = (reg - 0x10) / 2; +- int saved_mask, seg, bdf; ++ int saved_mask, seg, bdf, rc; + struct amd_iommu *iommu; + + if ( !iommu_intremap ) +@@ -247,7 +288,7 @@ void amd_iommu_ioapic_update_ire( + } + + if ( new_rte.mask && +- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES ) + { + ASSERT(saved_mask); + __io_apic_write(apic, reg, value); +@@ -262,14 +303,19 @@ void amd_iommu_ioapic_update_ire( + } + + /* Update interrupt remapping entry */ +- update_intremap_entry_from_ioapic( +- bdf, iommu, &new_rte, +- test_and_set_bit(pin, +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte +- : NULL); ++ rc = update_intremap_entry_from_ioapic( ++ bdf, iommu, &new_rte, reg == rte_lo, ++ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]); + +- /* Forward write access to IO-APIC RTE */ +- __io_apic_write(apic, reg, value); ++ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]); ++ ++ if ( rc ) ++ { ++ /* Keep the entry masked. */ ++ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n", ++ IO_APIC_ID(apic), pin, rc); ++ return; ++ } + + /* For lower bits access, return directly to avoid double writes */ + if ( reg == rte_lo ) +@@ -283,16 +329,41 @@ void amd_iommu_ioapic_update_ire( + } + } + +-static void update_intremap_entry_from_msi_msg( ++unsigned int amd_iommu_read_ioapic_from_ire( ++ unsigned int apic, unsigned int reg) ++{ ++ unsigned int val = __io_apic_read(apic, reg); ++ ++ if ( !(reg & 1) ) ++ { ++ unsigned int offset = val & (INTREMAP_ENTRIES - 1); ++ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf; ++ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg; ++ u16 req_id = get_intremap_requestor_id(seg, bdf); ++ const u32 *entry = get_intremap_entry(seg, req_id, offset); ++ ++ val &= ~(INTREMAP_ENTRIES - 1); ++ val |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; ++ val |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); ++ } ++ ++ return val; ++} ++ ++static int update_intremap_entry_from_msi_msg( + struct amd_iommu *iommu, u16 bdf, +- int *remap_index, const struct msi_msg *msg) ++ int *remap_index, const struct msi_msg *msg, u32 *data) + { + unsigned long flags; + u32* entry; + u16 req_id, alias_id; + u8 delivery_mode, dest, vector, dest_mode; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + req_id = get_dma_requestor_id(iommu->seg, bdf); + alias_id = get_intremap_requestor_id(iommu->seg, bdf); +@@ -303,15 +374,6 @@ static void update_intremap_entry_from_m + spin_lock_irqsave(lock, flags); + free_intremap_entry(iommu->seg, req_id, *remap_index); + spin_unlock_irqrestore(lock, flags); +- +- if ( ( req_id != alias_id ) && +- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) +- { +- lock = get_intremap_lock(iommu->seg, alias_id); +- spin_lock_irqsave(lock, flags); +- free_intremap_entry(iommu->seg, alias_id, *remap_index); +- spin_unlock_irqrestore(lock, flags); +- } + goto done; + } + +@@ -322,16 +384,24 @@ static void update_intremap_entry_from_m + delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; + vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK; + dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff; +- offset = get_intremap_offset(vector, delivery_mode); +- if ( *remap_index < 0) ++ offset = *remap_index; ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ offset = alloc_intremap_entry(iommu->seg, bdf); ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ spin_unlock_irqrestore(lock, flags); ++ return -ENOSPC; ++ } + *remap_index = offset; +- else +- BUG_ON(*remap_index != offset); ++ } + +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(lock, flags); + ++ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset; ++ + /* + * In some special cases, a pci-e device(e.g SATA controller in IDE mode) + * will use alias id to index interrupt remapping table. +@@ -343,10 +413,8 @@ static void update_intremap_entry_from_m + if ( ( req_id != alias_id ) && + get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) + { +- spin_lock_irqsave(lock, flags); +- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset); +- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); +- spin_unlock_irqrestore(lock, flags); ++ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table != ++ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table); + } + + done: +@@ -358,19 +426,22 @@ done: + amd_iommu_flush_intremap(iommu, alias_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } ++ ++ return 0; + } + + static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) + { +- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf); +- +- if ( iommu ) +- return iommu; ++ struct amd_iommu *iommu; + + list_for_each_entry ( iommu, &amd_iommu_head, list ) + if ( iommu->seg == seg && iommu->bdf == bdf ) + return NULL; + ++ iommu = find_iommu_for_device(seg, bdf); ++ if ( iommu ) ++ return iommu; ++ + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n", + seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf)); + return ERR_PTR(-EINVAL); +@@ -380,8 +451,9 @@ int amd_iommu_msi_msg_update_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) + { + struct pci_dev *pdev = msi_desc->dev; +- int bdf, seg; ++ int bdf, seg, rc; + struct amd_iommu *iommu; ++ u32 data; + + bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; + seg = pdev ? pdev->seg : hpet_sbdf.seg; +@@ -390,11 +462,12 @@ int amd_iommu_msi_msg_update_ire( + if ( IS_ERR_OR_NULL(iommu) ) + return PTR_ERR(iommu); + +- if ( msi_desc->remap_index >= 0 ) ++ if ( msi_desc->remap_index >= 0 && !msg ) + { + do { + update_intremap_entry_from_msi_msg(iommu, bdf, +- &msi_desc->remap_index, NULL); ++ &msi_desc->remap_index, ++ NULL, NULL); + if ( !pdev || !pdev->phantom_stride ) + break; + bdf += pdev->phantom_stride; +@@ -409,19 +482,39 @@ int amd_iommu_msi_msg_update_ire( + return 0; + + do { +- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index, +- msg); +- if ( !pdev || !pdev->phantom_stride ) ++ rc = update_intremap_entry_from_msi_msg(iommu, bdf, ++ &msi_desc->remap_index, ++ msg, &data); ++ if ( rc || !pdev || !pdev->phantom_stride ) + break; + bdf += pdev->phantom_stride; + } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); + +- return 0; ++ msg->data = data; ++ return rc; + } + + void amd_iommu_read_msi_from_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) + { ++ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1); ++ const struct pci_dev *pdev = msi_desc->dev; ++ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; ++ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg; ++ const u32 *entry; ++ ++ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) ) ++ return; ++ ++ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset); ++ ++ msg->data &= ~(INTREMAP_ENTRIES - 1); ++ msg->data |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; ++ msg->data |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); + } + + int __init amd_iommu_free_intremap_table( +@@ -438,23 +531,42 @@ int __init amd_iommu_free_intremap_table + return 0; + } + +-void* __init amd_iommu_alloc_intremap_table(void) ++void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map) + { + void *tb; + tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER); + BUG_ON(tb == NULL); + memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER)); ++ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES)); ++ BUG_ON(*inuse_map == NULL); + return tb; + } + + int __init amd_setup_hpet_msi(struct msi_desc *msi_desc) + { +- if ( (!msi_desc->hpet_id != hpet_sbdf.id) || +- (hpet_sbdf.iommu == NULL) ) ++ spinlock_t *lock; ++ unsigned long flags; ++ int rc = 0; ++ ++ if ( msi_desc->hpet_id != hpet_sbdf.id || !hpet_sbdf.iommu ) + { +- AMD_IOMMU_DEBUG("Fail to setup HPET MSI remapping\n"); +- return 1; ++ AMD_IOMMU_DEBUG("Failed to setup HPET MSI remapping: %s\n", ++ hpet_sbdf.iommu ? "Wrong HPET" : "No IOMMU"); ++ return -ENODEV; + } + +- return 0; ++ lock = get_intremap_lock(hpet_sbdf.seg, hpet_sbdf.bdf); ++ spin_lock_irqsave(lock, flags); ++ ++ msi_desc->remap_index = alloc_intremap_entry(hpet_sbdf.seg, ++ hpet_sbdf.bdf); ++ if ( msi_desc->remap_index >= INTREMAP_ENTRIES ) ++ { ++ msi_desc->remap_index = -1; ++ rc = -ENXIO; ++ } ++ ++ spin_unlock_irqrestore(lock, flags); ++ ++ return rc; + } +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -637,7 +637,7 @@ const struct iommu_ops amd_iommu_ops = { + .get_device_group_id = amd_iommu_group_id, + .update_ire_from_apic = amd_iommu_ioapic_update_ire, + .update_ire_from_msi = amd_iommu_msi_msg_update_ire, +- .read_apic_from_ire = __io_apic_read, ++ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire, + .read_msi_from_ire = amd_iommu_read_msi_from_ire, + .setup_hpet_msi = amd_setup_hpet_msi, + .suspend = amd_iommu_suspend, +--- a/xen/include/asm-x86/amd-iommu.h ++++ b/xen/include/asm-x86/amd-iommu.h +@@ -119,6 +119,7 @@ struct ivrs_mappings { + + /* per device interrupt remapping table */ + void *intremap_table; ++ unsigned long *intremap_inuse; + spinlock_t intremap_lock; + + /* ivhd device data settings */ +--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h ++++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +@@ -470,10 +470,6 @@ + #define MAX_AMD_IOMMUS 32 + + /* interrupt remapping table */ +-#define INT_REMAP_INDEX_DM_MASK 0x1C00 +-#define INT_REMAP_INDEX_DM_SHIFT 10 +-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC +-#define INT_REMAP_INDEX_VECTOR_SHIFT 2 + #define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001 + #define INT_REMAP_ENTRY_REMAPEN_SHIFT 0 + #define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002 +--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h ++++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device( + + /* interrupt remapping */ + int amd_iommu_setup_ioapic_remapping(void); +-void *amd_iommu_alloc_intremap_table(void); ++void *amd_iommu_alloc_intremap_table(unsigned long **); + int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *); + void amd_iommu_ioapic_update_ire( + unsigned int apic, unsigned int reg, unsigned int value); ++unsigned int amd_iommu_read_ioapic_from_ire( ++ unsigned int apic, unsigned int reg); + int amd_iommu_msi_msg_update_ire( + struct msi_desc *msi_desc, struct msi_msg *msg); + void amd_iommu_read_msi_from_ire( +@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc * + + extern struct ioapic_sbdf { + u16 bdf, seg; +- unsigned long *pin_setup; ++ u16 *pin_2_idx; + } ioapic_sbdf[MAX_IO_APICS]; +-extern void *shared_intremap_table; + + extern struct hpet_sbdf { + u16 bdf, seg, id; + struct amd_iommu *iommu; + } hpet_sbdf; + ++extern void *shared_intremap_table; ++extern unsigned long *shared_intremap_inuse; ++ + /* power management support */ + void amd_iommu_resume(void); + void amd_iommu_suspend(void); diff --git a/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch b/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch new file mode 100644 index 0000000..34f25d9 --- /dev/null +++ b/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch @@ -0,0 +1,68 @@ +# Commit 561e0f86660f10db492c1ead1cd772013a6cc32d +# Date 2013-07-16 11:54:07 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: untie remap and vector maps + +With the specific IRTEs used for an interrupt no longer depending on +the vector, there's no need to tie the remap sharing model to the +vector sharing one. + +Signed-off-by: Jan Beulich +Acked-by: George Dunlap +Acked-by: Suravee Suthikulpanit + + +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -207,50 +207,6 @@ int __init amd_iov_detect(void) + + init_done = 1; + +- /* +- * AMD IOMMUs don't distinguish between vectors destined for +- * different cpus when doing interrupt remapping. This means +- * that interrupts going through the same intremap table +- * can't share the same vector. +- * +- * If irq_vector_map isn't specified, choose a sensible default: +- * - If we're using per-device interemap tables, per-device +- * vector non-sharing maps +- * - If we're using a global interemap table, global vector +- * non-sharing map +- */ +- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT ) +- { +- if ( amd_iommu_perdev_intremap ) +- { +- /* Per-device vector map logic is broken for devices with multiple +- * MSI-X interrupts (and would also be for multiple MSI, if Xen +- * supported it). +- * +- * Until this is fixed, use global vector tables as far as the irq +- * logic is concerned to avoid the buggy behaviour of per-device +- * maps in map_domain_pirq(), and use per-device tables as far as +- * intremap code is concerned to avoid the security issue. +- */ +- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. " +- "Using per-device-global maps instead until a fix is found.\n"); +- +- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; +- } +- else +- { +- printk("AMD-Vi: Enabling global vector map\n"); +- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; +- } +- } +- else +- { +- printk("AMD-Vi: Not overriding irq_vector_map setting\n"); +- +- if ( opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_GLOBAL ) +- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. " +- "Use irq_vector_map=global to work around.\n"); +- } + if ( !amd_iommu_perdev_intremap ) + printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n"); + return scan_pci_devices(); diff --git a/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch new file mode 100644 index 0000000..1ff5d47 --- /dev/null +++ b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch @@ -0,0 +1,77 @@ +# Commit 303066fdb1e4fe816e48acd665453f58b8399e81 +# Date 2013-07-17 08:47:18 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VMX: fix interaction of APIC-V and Viridian emulation + +Viridian using a synthetic MSR for issuing EOI notifications bypasses +the normal in-processor handling, which would clear +GUEST_INTR_STATUS.SVI. Hence we need to do this in software in order +for future interrupts to get delivered. + +Based on analysis by Yang Z Zhang . + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Reviewed-by: Yang Zhang + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -386,6 +386,9 @@ void vlapic_EOI_set(struct vlapic *vlapi + + vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]); + ++ if ( hvm_funcs.handle_eoi ) ++ hvm_funcs.handle_eoi(vector); ++ + if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) + vioapic_update_EOI(vlapic_domain(vlapic), vector); + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1502,6 +1502,15 @@ static void vmx_sync_pir_to_irr(struct v + vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]); + } + ++static void vmx_handle_eoi(u8 vector) ++{ ++ unsigned long status = __vmread(GUEST_INTR_STATUS); ++ ++ /* We need to clear the SVI field. */ ++ status &= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK; ++ __vmwrite(GUEST_INTR_STATUS, status); ++} ++ + static struct hvm_function_table __initdata vmx_function_table = { + .name = "VMX", + .cpu_up_prepare = vmx_cpu_up_prepare, +@@ -1554,6 +1563,7 @@ static struct hvm_function_table __initd + .process_isr = vmx_process_isr, + .deliver_posted_intr = vmx_deliver_posted_intr, + .sync_pir_to_irr = vmx_sync_pir_to_irr, ++ .handle_eoi = vmx_handle_eoi, + .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m, + }; + +@@ -1580,7 +1590,10 @@ const struct hvm_function_table * __init + + setup_ept_dump(); + } +- ++ ++ if ( !cpu_has_vmx_virtual_intr_delivery ) ++ vmx_function_table.handle_eoi = NULL; ++ + if ( cpu_has_vmx_posted_intr_processing ) + alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt); + else +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -186,6 +186,7 @@ struct hvm_function_table { + void (*process_isr)(int isr, struct vcpu *v); + void (*deliver_posted_intr)(struct vcpu *v, u8 vector); + void (*sync_pir_to_irr)(struct vcpu *v); ++ void (*handle_eoi)(u8 vector); + + /*Walk nested p2m */ + int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa, diff --git a/520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch b/520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch deleted file mode 100644 index 6b7ec90..0000000 --- a/520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch +++ /dev/null @@ -1,61 +0,0 @@ -# Commit 7b9685ca4ed2fd723600ce66eb20a6d0c115b6cb -# Date 2013-08-15 22:00:45 +0100 -# Author Tim Deegan -# Committer Tim Deegan -xen: Add stdbool.h workaround for BSD. - -On *BSD, stdbool.h lives in /usr/include, but we don't want to have -that on the search path in case we pick up any headers from the build -host's C libraries. - -Copy the equivalent hack already in place for stdarg.h: on all -supported compilers the contents of stdbool.h are trivial, so just -supply the things we need in a xen/stdbool.h header. - -Signed-off-by: Tim Deegan -Reviewed-by: Jan Beulich -Reviewed-by: Ian Campbell -Acked-by: Keir Fraser -Tested-by: Patrick Welche - ---- a/xen/include/xen/libelf.h -+++ b/xen/include/xen/libelf.h -@@ -29,8 +29,6 @@ - #error define architectural endianness - #endif - --#include -- - typedef int elf_errorstatus; /* 0: ok; -ve (normally -1): error */ - typedef int elf_negerrnoval; /* 0: ok; -EFOO: error */ - -@@ -39,11 +37,13 @@ typedef int elf_negerrnoval; /* 0: ok; - - #ifdef __XEN__ - #include - #include -+#include - #else - #include - #include - - #include -+#include - - struct elf_binary; - typedef void elf_log_callback(struct elf_binary*, void *caller_data, ---- /dev/null -+++ b/xen/include/xen/stdbool.h -@@ -0,0 +1,13 @@ -+#ifndef __XEN_STDBOOL_H__ -+#define __XEN_STDBOOL_H__ -+ -+#if defined(__OpenBSD__) || defined(__NetBSD__) -+# define bool _Bool -+# define true 1 -+# define false 0 -+# define __bool_true_false_are_defined 1 -+#else -+# include -+#endif -+ -+#endif /* __XEN_STDBOOL_H__ */ diff --git a/52146070-ACPI-fix-acpi_os_map_memory.patch b/52146070-ACPI-fix-acpi_os_map_memory.patch new file mode 100644 index 0000000..bf4e38c --- /dev/null +++ b/52146070-ACPI-fix-acpi_os_map_memory.patch @@ -0,0 +1,132 @@ +References: bnc#833251, bnc#834751 + +# Commit 2ee9cbf9d8eaeff6e21222905d22dbd58dc5fe29 +# Date 2013-08-21 08:38:40 +0200 +# Author Jan Beulich +# Committer Jan Beulich +ACPI: fix acpi_os_map_memory() + +It using map_domain_page() was entirely wrong. Use __acpi_map_table() +instead for the time being, with locking added as the mappings it +produces get replaced with subsequent invocations. Using locking in +this way is acceptable here since the only two runtime callers are +acpi_os_{read,write}_memory(), which don't leave mappings pending upon +returning to their callers. + +Also fix __acpi_map_table()'s first parameter's type - while benign for +unstable, backports to pre-4.3 trees will need this. + +Signed-off-by: Jan Beulich + +# Commit c5ba8ed4c6f005d332a49d93a3ef8ff2b690b256 +# Date 2013-08-21 08:40:22 +0200 +# Author Jan Beulich +# Committer Jan Beulich +ACPI: use ioremap() in acpi_os_map_memory() + +This drops the post-boot use of __acpi_map_table() here again (together +with the somewhat awkward locking), in favor of using ioremap(). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/acpi/lib.c ++++ b/xen/arch/x86/acpi/lib.c +@@ -39,7 +39,7 @@ u32 __read_mostly x86_acpiid_to_apicid[M + * from the fixed base. That's why we start at FIX_ACPI_END and + * count idx down while incrementing the phys address. + */ +-char *__acpi_map_table(unsigned long phys, unsigned long size) ++char *__acpi_map_table(paddr_t phys, unsigned long size) + { + unsigned long base, offset, mapped_size; + int idx; +--- a/xen/drivers/acpi/osl.c ++++ b/xen/drivers/acpi/osl.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #define _COMPONENT ACPI_OS_SERVICES + ACPI_MODULE_NAME("osl") +@@ -83,14 +84,25 @@ acpi_physical_address __init acpi_os_get + } + } + +-void __iomem *__init ++void __iomem * + acpi_os_map_memory(acpi_physical_address phys, acpi_size size) + { +- return __acpi_map_table((unsigned long)phys, size); ++ if (system_state >= SYS_STATE_active) { ++ unsigned long pfn = PFN_DOWN(phys); ++ unsigned int offs = phys & (PAGE_SIZE - 1); ++ ++ /* The low first Mb is always mapped. */ ++ if ( !((phys + size - 1) >> 20) ) ++ return __va(phys); ++ return __vmap(&pfn, PFN_UP(offs + size), 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs; ++ } ++ return __acpi_map_table(phys, size); + } + +-void __init acpi_os_unmap_memory(void __iomem * virt, acpi_size size) ++void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) + { ++ if (system_state >= SYS_STATE_active) ++ vunmap((void *)((unsigned long)virt & PAGE_MASK)); + } + + acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width) +@@ -133,9 +145,8 @@ acpi_status + acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width) + { + u32 dummy; +- void __iomem *virt_addr; ++ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); + +- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); + if (!value) + value = &dummy; + +@@ -153,7 +164,7 @@ acpi_os_read_memory(acpi_physical_addres + BUG(); + } + +- unmap_domain_page(virt_addr); ++ acpi_os_unmap_memory(virt_addr, width >> 3); + + return AE_OK; + } +@@ -161,9 +172,7 @@ acpi_os_read_memory(acpi_physical_addres + acpi_status + acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) + { +- void __iomem *virt_addr; +- +- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); ++ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); + + switch (width) { + case 8: +@@ -179,7 +188,7 @@ acpi_os_write_memory(acpi_physical_addre + BUG(); + } + +- unmap_domain_page(virt_addr); ++ acpi_os_unmap_memory(virt_addr, width >> 3); + + return AE_OK; + } +--- a/xen/include/xen/acpi.h ++++ b/xen/include/xen/acpi.h +@@ -56,7 +56,7 @@ typedef int (*acpi_table_handler) (struc + typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); + + unsigned int acpi_get_processor_id (unsigned int cpu); +-char * __acpi_map_table (unsigned long phys_addr, unsigned long size); ++char * __acpi_map_table (paddr_t phys_addr, unsigned long size); + int acpi_boot_init (void); + int acpi_boot_table_init (void); + int acpi_numa_init (void); diff --git a/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch b/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch new file mode 100644 index 0000000..834f182 --- /dev/null +++ b/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch @@ -0,0 +1,50 @@ +# Commit c9c6abab583d27fdca1d979a7f1d18ae30f54e9b +# Date 2013-08-21 16:44:58 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VT-d: warn about Compatibility Format Interrupts being enabled by firmware + +... as being insecure. + +Also drop the second (redundant) read DMAR_GSTS_REG from enable_intremap(). + +Signed-off-by: Jan Beulich +Acked-by Xiantao Zhang + +--- a/xen/drivers/passthrough/vtd/intremap.c ++++ b/xen/drivers/passthrough/vtd/intremap.c +@@ -706,8 +706,8 @@ int enable_intremap(struct iommu *iommu, + + if ( !platform_supports_intremap() ) + { +- dprintk(XENLOG_ERR VTDPREFIX, +- "Platform firmware does not support interrupt remapping\n"); ++ printk(XENLOG_ERR VTDPREFIX ++ " Platform firmware does not support interrupt remapping\n"); + return -EINVAL; + } + +@@ -718,15 +718,19 @@ int enable_intremap(struct iommu *iommu, + if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr ) + return 0; + +- sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_QIES) ) + { +- dprintk(XENLOG_ERR VTDPREFIX, +- "Queued invalidation is not enabled, should not enable " +- "interrupt remapping\n"); ++ printk(XENLOG_ERR VTDPREFIX ++ " Queued invalidation is not enabled on IOMMU #%u:" ++ " Should not enable interrupt remapping\n", iommu->index); + return -EINVAL; + } + ++ if ( !eim && (sts & DMA_GSTS_CFIS) ) ++ printk(XENLOG_WARNING VTDPREFIX ++ " Compatibility Format Interrupts permitted on IOMMU #%u:" ++ " Device pass-through will be insecure\n", iommu->index); ++ + if ( ir_ctrl->iremap_maddr == 0 ) + { + drhd = iommu_to_drhd(iommu); diff --git a/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch b/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch new file mode 100644 index 0000000..5f93087 --- /dev/null +++ b/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch @@ -0,0 +1,26 @@ +# Commit 7fb5c6b9ef22915e3fcac95cd44857f4457ba783 +# Date 2013-08-22 10:49:24 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Check whether interrupt is blocked by TPR + +If interrupt is blocked by L1's TPR, L2 should not see it and keep +running. Adding the check before L2 to retrive interrupt. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vmx/intr.c ++++ b/xen/arch/x86/hvm/vmx/intr.c +@@ -165,6 +165,11 @@ static int nvmx_intr_intercept(struct vc + { + u32 ctrl; + ++ /* If blocked by L1's tpr, then nothing to do. */ ++ if ( nestedhvm_vcpu_in_guestmode(v) && ++ hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr ) ++ return 1; ++ + if ( nvmx_intr_blocked(v) != hvm_intblk_none ) + { + enable_intr_window(v, intack); diff --git a/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch b/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch new file mode 100644 index 0000000..eda8b87 --- /dev/null +++ b/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch @@ -0,0 +1,36 @@ +# Commit b35d0a26983843c092bfa353fd6b9aa8c3bf4886 +# Date 2013-08-22 10:50:13 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Force check ISR when L2 is running + +External interrupt is allowed to notify CPU only when it has higher +priority than current in servicing interrupt. With APIC-v, the priority +comparing is done by hardware and hardware will inject the interrupt to +VCPU when it recognizes an interrupt. Currently, there is no virtual +APIC-v feature available for L1 to use, so when L2 is running, we still need +to compare interrupt priority with ISR in hypervisor instead via hardware. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -1037,7 +1038,8 @@ int vlapic_has_pending_irq(struct vcpu * + if ( irr == -1 ) + return -1; + +- if ( vlapic_virtual_intr_delivery_enabled() ) ++ if ( vlapic_virtual_intr_delivery_enabled() && ++ !nestedhvm_vcpu_in_guestmode(v) ) + return irr; + + isr = vlapic_find_highest_isr(vlapic); diff --git a/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch b/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch new file mode 100644 index 0000000..ed714cb --- /dev/null +++ b/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch @@ -0,0 +1,43 @@ +# Commit 375a1035002fb257087756a86e6caeda649fc0f1 +# Date 2013-08-22 10:52:05 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Clear APIC-v control bit in vmcs02 + +There is no vAPIC-v support, so mask APIC-v control bit when +constructing vmcs02. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -613,8 +613,15 @@ void nvmx_update_secondary_exec_control( + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + ++ host_cntrl &= ~apicv_bit; + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL); ++ ++ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ ++ ASSERT(!(shadow_cntrl & apicv_bit)); ++ + nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT); + shadow_cntrl |= host_cntrl; + __vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl); +@@ -625,7 +632,12 @@ static void nvmx_update_pin_control(stru + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + ++ host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT; + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL); ++ ++ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ ++ ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT)); ++ + shadow_cntrl |= host_cntrl; + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl); + } diff --git a/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch b/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch new file mode 100644 index 0000000..529ea7a --- /dev/null +++ b/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch @@ -0,0 +1,247 @@ +# Commit 84e6af58707520baf59c1c86c29237419e439afb +# Date 2013-08-22 10:59:01 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Update APIC-v(RVI/SVI) when vmexit to L1 + +If enabling APIC-v, all interrupts to L1 are delivered through APIC-v. +But when L2 is running, external interrupt will casue L1 vmexit with +reason external interrupt. Then L1 will pick up the interrupt through +vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when +L1 is running, so APIC-v hardware still will do vEOI updating. The problem +is that the interrupt is delivered not through APIC-v hardware, this means +SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI +updating. The solution is that, when L1 tried to pick up the interrupt +from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make +sure the following vEOI updating and vPPR updating corrently. + +Also, since interrupt is delivered through vmcs12, so APIC-v hardware will +not cleare vIRR and hypervisor need to clear it before L1 running. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/irq.c ++++ b/xen/arch/x86/hvm/irq.c +@@ -437,7 +437,7 @@ struct hvm_intack hvm_vcpu_ack_pending_i + intack.vector = (uint8_t)vector; + break; + case hvm_intsrc_lapic: +- if ( !vlapic_ack_pending_irq(v, intack.vector) ) ++ if ( !vlapic_ack_pending_irq(v, intack.vector, 0) ) + intack = hvm_intack_none; + break; + case hvm_intsrc_vector: +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -168,6 +168,14 @@ static uint32_t vlapic_get_ppr(struct vl + return ppr; + } + ++uint32_t vlapic_set_ppr(struct vlapic *vlapic) ++{ ++ uint32_t ppr = vlapic_get_ppr(vlapic); ++ ++ vlapic_set_reg(vlapic, APIC_PROCPRI, ppr); ++ return ppr; ++} ++ + static int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda) + { + int result = 0; +@@ -1050,15 +1058,15 @@ int vlapic_has_pending_irq(struct vcpu * + return irr; + } + +-int vlapic_ack_pending_irq(struct vcpu *v, int vector) ++int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack) + { + struct vlapic *vlapic = vcpu_vlapic(v); + +- if ( vlapic_virtual_intr_delivery_enabled() ) +- return 1; +- +- vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); +- vlapic_clear_irr(vector, vlapic); ++ if ( force_ack || !vlapic_virtual_intr_delivery_enabled() ) ++ { ++ vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); ++ vlapic_clear_irr(vector, vlapic); ++ } + + return 1; + } +--- a/xen/arch/x86/hvm/vmx/intr.c ++++ b/xen/arch/x86/hvm/vmx/intr.c +@@ -185,7 +185,7 @@ static int nvmx_intr_intercept(struct vc + if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) ) + return 0; + +- vmx_inject_extint(intack.vector); ++ vmx_inject_extint(intack.vector, intack.source); + + ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS); + if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) +@@ -314,7 +314,7 @@ void vmx_intr_assist(void) + else + { + HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); +- vmx_inject_extint(intack.vector); ++ vmx_inject_extint(intack.vector, intack.source); + pt_intr_post(v, intack); + } + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1205,7 +1205,7 @@ static void vmx_update_guest_efer(struct + } + + void nvmx_enqueue_n2_exceptions(struct vcpu *v, +- unsigned long intr_fields, int error_code) ++ unsigned long intr_fields, int error_code, uint8_t source) + { + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + +@@ -1213,6 +1213,7 @@ void nvmx_enqueue_n2_exceptions(struct v + /* enqueue the exception till the VMCS switch back to L1 */ + nvmx->intr.intr_info = intr_fields; + nvmx->intr.error_code = error_code; ++ nvmx->intr.source = source; + vcpu_nestedhvm(v).nv_vmexit_pending = 1; + return; + } +@@ -1224,7 +1225,8 @@ void nvmx_enqueue_n2_exceptions(struct v + + static int nvmx_vmexit_trap(struct vcpu *v, struct hvm_trap *trap) + { +- nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code); ++ nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code, ++ hvm_intsrc_none); + return NESTEDHVM_VMEXIT_DONE; + } + +@@ -1255,7 +1257,7 @@ static void __vmx_inject_exception(int t + curr->arch.hvm_vmx.vmx_emulate = 1; + } + +-void vmx_inject_extint(int trap) ++void vmx_inject_extint(int trap, uint8_t source) + { + struct vcpu *v = current; + u32 pin_based_cntrl; +@@ -1266,7 +1268,7 @@ void vmx_inject_extint(int trap) + if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap, +- HVM_DELIVER_NO_ERROR_CODE); ++ HVM_DELIVER_NO_ERROR_CODE, source); + return; + } + } +@@ -1285,7 +1287,7 @@ void vmx_inject_nmi(void) + if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi, +- HVM_DELIVER_NO_ERROR_CODE); ++ HVM_DELIVER_NO_ERROR_CODE, hvm_intsrc_nmi); + return; + } + } +@@ -1353,7 +1355,7 @@ static void vmx_inject_trap(struct hvm_t + { + nvmx_enqueue_n2_exceptions (curr, + INTR_INFO_VALID_MASK | (_trap.type<<8) | _trap.vector, +- _trap.error_code); ++ _trap.error_code, hvm_intsrc_none); + return; + } + else +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -1295,6 +1295,36 @@ static void sync_exception_state(struct + } + } + ++static void nvmx_update_apicv(struct vcpu *v) ++{ ++ struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); ++ unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON); ++ uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO); ++ ++ if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT && ++ nvmx->intr.source == hvm_intsrc_lapic && ++ (intr_info & INTR_INFO_VALID_MASK) ) ++ { ++ uint16_t status; ++ uint32_t rvi, ppr; ++ uint32_t vector = intr_info & 0xff; ++ struct vlapic *vlapic = vcpu_vlapic(v); ++ ++ vlapic_ack_pending_irq(v, vector, 1); ++ ++ ppr = vlapic_set_ppr(vlapic); ++ WARN_ON((ppr & 0xf0) != (vector & 0xf0)); ++ ++ status = vector << 8; ++ rvi = vlapic_has_pending_irq(v); ++ if ( rvi != -1 ) ++ status |= rvi & 0xff; ++ ++ __vmwrite(GUEST_INTR_STATUS, status); ++ } ++} ++ + static void virtual_vmexit(struct cpu_user_regs *regs) + { + struct vcpu *v = current; +@@ -1340,6 +1370,9 @@ static void virtual_vmexit(struct cpu_us + /* updating host cr0 to sync TS bit */ + __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); + ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ nvmx_update_apicv(v); ++ + vmreturn(regs, VMSUCCEED); + } + +--- a/xen/include/asm-x86/hvm/vlapic.h ++++ b/xen/include/asm-x86/hvm/vlapic.h +@@ -98,7 +98,7 @@ bool_t is_vlapic_lvtpc_enabled(struct vl + void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig); + + int vlapic_has_pending_irq(struct vcpu *v); +-int vlapic_ack_pending_irq(struct vcpu *v, int vector); ++int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack); + + int vlapic_init(struct vcpu *v); + void vlapic_destroy(struct vcpu *v); +@@ -110,6 +110,7 @@ void vlapic_tdt_msr_set(struct vlapic *v + uint64_t vlapic_tdt_msr_get(struct vlapic *vlapic); + + int vlapic_accept_pic_intr(struct vcpu *v); ++uint32_t vlapic_set_ppr(struct vlapic *vlapic); + + void vlapic_adjust_i8259_target(struct domain *d); + +--- a/xen/include/asm-x86/hvm/vmx/vmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -448,7 +448,7 @@ static inline int __vmxon(u64 addr) + + void vmx_get_segment_register(struct vcpu *, enum x86_segment, + struct segment_register *); +-void vmx_inject_extint(int trap); ++void vmx_inject_extint(int trap, uint8_t source); + void vmx_inject_nmi(void); + + int ept_p2m_init(struct p2m_domain *p2m); +--- a/xen/include/asm-x86/hvm/vmx/vvmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vvmx.h +@@ -36,6 +36,7 @@ struct nestedvmx { + struct { + unsigned long intr_info; + u32 error_code; ++ u8 source; + } intr; + struct { + bool_t enabled; diff --git a/5215d8b0-Correct-X2-APIC-HVM-emulation.patch b/5215d8b0-Correct-X2-APIC-HVM-emulation.patch new file mode 100644 index 0000000..014de12 --- /dev/null +++ b/5215d8b0-Correct-X2-APIC-HVM-emulation.patch @@ -0,0 +1,24 @@ +References: bnc#835896 + +# Commit 69962e19ed432570f6cdcfdb5f6f22d6e3c54e6c +# Date 2013-08-22 11:24:00 +0200 +# Author Juergen Gross +# Committer Jan Beulich +Correct X2-APIC HVM emulation + +commit 6859874b61d5ddaf5289e72ed2b2157739b72ca5 ("x86/HVM: fix x2APIC +APIC_ID read emulation") introduced an error for the hvm emulation of +x2apic. Any try to write to APIC_ICR MSR will result in a GP fault. + +Signed-off-by: Juergen Gross + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -868,6 +868,7 @@ int hvm_x2apic_msr_write(struct vcpu *v, + rc = vlapic_reg_write(v, APIC_ICR2, (uint32_t)(msr_content >> 32)); + if ( rc ) + return rc; ++ break; + + case APIC_ICR2: + return X86EMUL_UNHANDLEABLE; diff --git a/xen.changes b/xen.changes index 6c1eb8f..460d292 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,19 @@ +------------------------------------------------------------------- +Fri Aug 30 08:11:55 MDT 2013 - carnold@suse.com + +- Upstream patches from Jan + 51e517e6-AMD-IOMMU-allocate-IRTEs.patch + 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch + 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch + 52146070-ACPI-fix-acpi_os_map_memory.patch + 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch + 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch + 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch + 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch + 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch + 5215d8b0-Correct-X2-APIC-HVM-emulation.patch +- Dropped 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch + ------------------------------------------------------------------- Mon Aug 26 15:48:57 MDT 2013 - carnold@suse.com diff --git a/xen.spec b/xen.spec index 9baa817..0ab0648 100644 --- a/xen.spec +++ b/xen.spec @@ -15,7 +15,6 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # - Name: xen ExclusiveArch: %ix86 x86_64 %arm aarch64 %define xvers 4.3 @@ -200,19 +199,28 @@ Patch3: 51d27841-iommu-amd-Workaround-for-erratum-787.patch Patch4: 51daa074-Revert-hvmloader-always-include-HPET-table.patch Patch5: 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch Patch6: 51dd155c-adjust-x86-EFI-build.patch -Patch7: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch -Patch8: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch -Patch9: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch -Patch10: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch -Patch11: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch -Patch12: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch -Patch13: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch -Patch14: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch -Patch15: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch -Patch16: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch -Patch17: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch -Patch18: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch -Patch19: 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch +Patch7: 51e517e6-AMD-IOMMU-allocate-IRTEs.patch +Patch8: 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch +Patch9: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch +Patch10: 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch +Patch11: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch +Patch12: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch +Patch13: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch +Patch14: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch +Patch15: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch +Patch16: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch +Patch17: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch +Patch18: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch +Patch19: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch +Patch20: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch +Patch21: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch +Patch22: 52146070-ACPI-fix-acpi_os_map_memory.patch +Patch23: 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch +Patch24: 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch +Patch25: 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch +Patch26: 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch +Patch27: 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch +Patch28: 5215d8b0-Correct-X2-APIC-HVM-emulation.patch # Upstream qemu patches # Our patches Patch301: xen-destdir.patch @@ -521,6 +529,15 @@ Authors %patch17 -p1 %patch18 -p1 %patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 %patch301 -p1 %patch302 -p1 %patch303 -p1