diff --git a/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch b/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch new file mode 100644 index 0000000..a433513 --- /dev/null +++ b/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch @@ -0,0 +1,30 @@ +# Commit d3a55d7d9bb518efe08143d050deff9f4ee80ec1 +# Date 2013-07-04 10:33:18 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/mm: Ensure useful progress in alloc_l2_table() + +While debugging the issue which turned out to be XSA-58, a printk in this loop +showed that it was quite easy to never make useful progress, because of +consistently failing the preemption check. + +One single l2 entry is a reasonable amount of work to do, even if an action is +pending, and also assures forwards progress across repeat continuations. + +Tweak the continuation criteria to fail on the first iteration of the loop. + +Signed-off-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -1278,7 +1278,8 @@ static int alloc_l2_table(struct page_in + + for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) + { +- if ( preemptible && i && hypercall_preempt_check() ) ++ if ( preemptible && i > page->nr_validated_ptes ++ && hypercall_preempt_check() ) + { + page->nr_validated_ptes = i; + rc = -EAGAIN; diff --git a/51dd155c-adjust-x86-EFI-build.patch b/51dd155c-adjust-x86-EFI-build.patch new file mode 100644 index 0000000..72d99e2 --- /dev/null +++ b/51dd155c-adjust-x86-EFI-build.patch @@ -0,0 +1,27 @@ +# Commit 5656b93d215d7c5160790ea87758625ba1de16b1 +# Date 2013-07-10 10:03:40 +0200 +# Author Jan Beulich +# Committer Jan Beulich +adjust x86 EFI build + +While the rule to generate .init.o files from .o ones already correctly +included $(extra-y), the setting of the necessary compiler flag didn't +have the same. With some yet to be posted patch this resulted in build +breakage because of the compiler deciding not to inline a few functions +(which then results in .text not being empty as required for these +object files). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/Rules.mk ++++ b/xen/Rules.mk +@@ -101,7 +101,7 @@ obj-y := $(patsubst %/,%/built-in.o,$ + + subdir-all := $(subdir-y) $(subdir-n) + +-$(filter %.init.o,$(obj-y) $(obj-bin-y)): CFLAGS += -DINIT_SECTIONS_ONLY ++$(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): CFLAGS += -DINIT_SECTIONS_ONLY + + $(obj-$(coverage)): CFLAGS += -fprofile-arcs -ftest-coverage -DTEST_COVERAGE + diff --git a/51e517e6-AMD-IOMMU-allocate-IRTEs.patch b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch new file mode 100644 index 0000000..b928ab6 --- /dev/null +++ b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch @@ -0,0 +1,652 @@ +# Commit 2ca9fbd739b8a72b16dd790d0fff7b75f5488fb8 +# Date 2013-07-16 11:52:38 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: allocate IRTE entries instead of using a static mapping + +For multi-vector MSI, where we surely don't want to allocate +contiguous vectors and be able to set affinities of the individual +vectors separately, we need to drop the use of the tuple of vector and +delivery mode to determine the IRTE to use, and instead allocate IRTEs +(which imo should have been done from the beginning). + +Signed-off-by: Jan Beulich +Acked-by: Suravee Suthikulpanit + +# Commit dcbff3aeac6020cdf1f5bd0f0eb0d329fc55d939 +# Date 2013-08-28 10:11:19 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: also allocate IRTEs for HPET MSI + +Omitting this was a blatant oversight of mine in commit 2ca9fbd7 ("AMD +IOMMU: allocate IRTE entries instead of using a static mapping"). + +This also changes a bogus inequality check into a sensible one, even +though it is already known that this will make HPET MSI unusable on +certain systems (having respective broken firmware). This, however, +seems better than failing on systems with consistent ACPI tables. + +Reported-by: Sander Eikelenboom +Signed-off-by: Jan Beulich +Acked-by: Suravee Suthikulpanit + +--- a/xen/drivers/passthrough/amd/iommu_acpi.c ++++ b/xen/drivers/passthrough/amd/iommu_acpi.c +@@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr + /* allocate per-device interrupt remapping table */ + if ( amd_iommu_perdev_intremap ) + ivrs_mappings[alias_id].intremap_table = +- amd_iommu_alloc_intremap_table(); ++ amd_iommu_alloc_intremap_table( ++ &ivrs_mappings[alias_id].intremap_inuse); + else + { + if ( shared_intremap_table == NULL ) +- shared_intremap_table = amd_iommu_alloc_intremap_table(); ++ shared_intremap_table = amd_iommu_alloc_intremap_table( ++ &shared_intremap_inuse); + ivrs_mappings[alias_id].intremap_table = shared_intremap_table; ++ ivrs_mappings[alias_id].intremap_inuse = shared_intremap_inuse; + } + } + /* assgin iommu hardware */ +@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec + if ( IO_APIC_ID(apic) != special->handle ) + continue; + +- if ( ioapic_sbdf[special->handle].pin_setup ) ++ if ( ioapic_sbdf[special->handle].pin_2_idx ) + { + if ( ioapic_sbdf[special->handle].bdf == bdf && + ioapic_sbdf[special->handle].seg == seg ) +@@ -691,14 +694,17 @@ static u16 __init parse_ivhd_device_spec + ioapic_sbdf[special->handle].bdf = bdf; + ioapic_sbdf[special->handle].seg = seg; + +- ioapic_sbdf[special->handle].pin_setup = xzalloc_array( +- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); ++ ioapic_sbdf[special->handle].pin_2_idx = xmalloc_array( ++ u16, nr_ioapic_entries[apic]); + if ( nr_ioapic_entries[apic] && +- !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ !ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) + { + printk(XENLOG_ERR "IVHD Error: Out of memory\n"); + return 0; + } ++ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, ++ nr_ioapic_entries[apic] * ++ sizeof(*ioapic_sbdf->pin_2_idx)); + } + break; + } +@@ -926,7 +932,7 @@ static int __init parse_ivrs_table(struc + for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic ) + { + if ( !nr_ioapic_entries[apic] || +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) + continue; + + printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n", +@@ -935,9 +941,12 @@ static int __init parse_ivrs_table(struc + error = -ENXIO; + else + { +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup = xzalloc_array( +- unsigned long, BITS_TO_LONGS(nr_ioapic_entries[apic])); +- if ( !ioapic_sbdf[IO_APIC_ID(apic)].pin_setup ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx = xmalloc_array( ++ u16, nr_ioapic_entries[apic]); ++ if ( ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx ) ++ memset(ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx, -1, ++ nr_ioapic_entries[apic] * sizeof(*ioapic_sbdf->pin_2_idx)); ++ else + { + printk(XENLOG_ERR "IVHD Error: Out of memory\n"); + error = -ENOMEM; +--- a/xen/drivers/passthrough/amd/iommu_intr.c ++++ b/xen/drivers/passthrough/amd/iommu_intr.c +@@ -31,6 +31,7 @@ + struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS]; + struct hpet_sbdf hpet_sbdf; + void *shared_intremap_table; ++unsigned long *shared_intremap_inuse; + static DEFINE_SPINLOCK(shared_intremap_lock); + + static spinlock_t* get_intremap_lock(int seg, int req_id) +@@ -46,30 +47,31 @@ static int get_intremap_requestor_id(int + return get_ivrs_mappings(seg)[bdf].dte_requestor_id; + } + +-static int get_intremap_offset(u8 vector, u8 dm) ++static unsigned int alloc_intremap_entry(int seg, int bdf) + { +- int offset = 0; +- offset = (dm << INT_REMAP_INDEX_DM_SHIFT) & INT_REMAP_INDEX_DM_MASK; +- offset |= (vector << INT_REMAP_INDEX_VECTOR_SHIFT ) & +- INT_REMAP_INDEX_VECTOR_MASK; +- return offset; ++ unsigned long *inuse = get_ivrs_mappings(seg)[bdf].intremap_inuse; ++ unsigned int slot = find_first_zero_bit(inuse, INTREMAP_ENTRIES); ++ ++ if ( slot < INTREMAP_ENTRIES ) ++ __set_bit(slot, inuse); ++ return slot; + } + +-static u8 *get_intremap_entry(int seg, int bdf, int offset) ++static u32 *get_intremap_entry(int seg, int bdf, int offset) + { +- u8 *table; ++ u32 *table = get_ivrs_mappings(seg)[bdf].intremap_table; + +- table = (u8*)get_ivrs_mappings(seg)[bdf].intremap_table; + ASSERT( (table != NULL) && (offset < INTREMAP_ENTRIES) ); + +- return (u8*) (table + offset); ++ return table + offset; + } + + static void free_intremap_entry(int seg, int bdf, int offset) + { +- u32* entry; +- entry = (u32*)get_intremap_entry(seg, bdf, offset); ++ u32 *entry = get_intremap_entry(seg, bdf, offset); ++ + memset(entry, 0, sizeof(u32)); ++ __clear_bit(offset, get_ivrs_mappings(seg)[bdf].intremap_inuse); + } + + static void update_intremap_entry(u32* entry, u8 vector, u8 int_type, +@@ -98,18 +100,30 @@ static void update_intremap_entry(u32* e + INT_REMAP_ENTRY_VECTOR_SHIFT, entry); + } + +-static void update_intremap_entry_from_ioapic( ++static inline int get_rte_index(const struct IO_APIC_route_entry *rte) ++{ ++ return rte->vector | (rte->delivery_mode << 8); ++} ++ ++static inline void set_rte_index(struct IO_APIC_route_entry *rte, int offset) ++{ ++ rte->vector = (u8)offset; ++ rte->delivery_mode = offset >> 8; ++} ++ ++static int update_intremap_entry_from_ioapic( + int bdf, + struct amd_iommu *iommu, +- const struct IO_APIC_route_entry *rte, +- const struct IO_APIC_route_entry *old_rte) ++ struct IO_APIC_route_entry *rte, ++ bool_t lo_update, ++ u16 *index) + { + unsigned long flags; + u32* entry; + u8 delivery_mode, dest, vector, dest_mode; + int req_id; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + req_id = get_intremap_requestor_id(iommu->seg, bdf); + lock = get_intremap_lock(iommu->seg, req_id); +@@ -121,16 +135,35 @@ static void update_intremap_entry_from_i + + spin_lock_irqsave(lock, flags); + +- offset = get_intremap_offset(vector, delivery_mode); +- if ( old_rte ) ++ offset = *index; ++ if ( offset >= INTREMAP_ENTRIES ) + { +- int old_offset = get_intremap_offset(old_rte->vector, +- old_rte->delivery_mode); ++ offset = alloc_intremap_entry(iommu->seg, req_id); ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ spin_unlock_irqrestore(lock, flags); ++ rte->mask = 1; ++ return -ENOSPC; ++ } ++ *index = offset; ++ lo_update = 1; ++ } + +- if ( offset != old_offset ) +- free_intremap_entry(iommu->seg, bdf, old_offset); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); ++ if ( !lo_update ) ++ { ++ /* ++ * Low half of incoming RTE is already in remapped format, ++ * so need to recover vector and delivery mode from IRTE. ++ */ ++ ASSERT(get_rte_index(rte) == offset); ++ vector = get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); ++ delivery_mode = get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT); + } +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + + spin_unlock_irqrestore(lock, flags); +@@ -141,6 +174,10 @@ static void update_intremap_entry_from_i + amd_iommu_flush_intremap(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } ++ ++ set_rte_index(rte, offset); ++ ++ return 0; + } + + int __init amd_iommu_setup_ioapic_remapping(void) +@@ -153,7 +190,7 @@ int __init amd_iommu_setup_ioapic_remapp + u16 seg, bdf, req_id; + struct amd_iommu *iommu; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + /* Read ioapic entries and update interrupt remapping table accordingly */ + for ( apic = 0; apic < nr_ioapics; apic++ ) +@@ -184,19 +221,23 @@ int __init amd_iommu_setup_ioapic_remapp + dest = rte.dest.logical.logical_dest; + + spin_lock_irqsave(lock, flags); +- offset = get_intremap_offset(vector, delivery_mode); +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); ++ offset = alloc_intremap_entry(seg, req_id); ++ BUG_ON(offset >= INTREMAP_ENTRIES); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, + delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(lock, flags); + ++ set_rte_index(&rte, offset); ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] = offset; ++ __ioapic_write_entry(apic, pin, 1, rte); ++ + if ( iommu->enabled ) + { + spin_lock_irqsave(&iommu->lock, flags); + amd_iommu_flush_intremap(iommu, req_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } +- set_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup); + } + } + return 0; +@@ -209,7 +250,7 @@ void amd_iommu_ioapic_update_ire( + struct IO_APIC_route_entry new_rte = { 0 }; + unsigned int rte_lo = (reg & 1) ? reg - 1 : reg; + unsigned int pin = (reg - 0x10) / 2; +- int saved_mask, seg, bdf; ++ int saved_mask, seg, bdf, rc; + struct amd_iommu *iommu; + + if ( !iommu_intremap ) +@@ -247,7 +288,7 @@ void amd_iommu_ioapic_update_ire( + } + + if ( new_rte.mask && +- !test_bit(pin, ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ) ++ ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin] >= INTREMAP_ENTRIES ) + { + ASSERT(saved_mask); + __io_apic_write(apic, reg, value); +@@ -262,14 +303,19 @@ void amd_iommu_ioapic_update_ire( + } + + /* Update interrupt remapping entry */ +- update_intremap_entry_from_ioapic( +- bdf, iommu, &new_rte, +- test_and_set_bit(pin, +- ioapic_sbdf[IO_APIC_ID(apic)].pin_setup) ? &old_rte +- : NULL); ++ rc = update_intremap_entry_from_ioapic( ++ bdf, iommu, &new_rte, reg == rte_lo, ++ &ioapic_sbdf[IO_APIC_ID(apic)].pin_2_idx[pin]); + +- /* Forward write access to IO-APIC RTE */ +- __io_apic_write(apic, reg, value); ++ __io_apic_write(apic, reg, ((u32 *)&new_rte)[reg != rte_lo]); ++ ++ if ( rc ) ++ { ++ /* Keep the entry masked. */ ++ printk(XENLOG_ERR "Remapping IO-APIC %#x pin %u failed (%d)\n", ++ IO_APIC_ID(apic), pin, rc); ++ return; ++ } + + /* For lower bits access, return directly to avoid double writes */ + if ( reg == rte_lo ) +@@ -283,16 +329,41 @@ void amd_iommu_ioapic_update_ire( + } + } + +-static void update_intremap_entry_from_msi_msg( ++unsigned int amd_iommu_read_ioapic_from_ire( ++ unsigned int apic, unsigned int reg) ++{ ++ unsigned int val = __io_apic_read(apic, reg); ++ ++ if ( !(reg & 1) ) ++ { ++ unsigned int offset = val & (INTREMAP_ENTRIES - 1); ++ u16 bdf = ioapic_sbdf[IO_APIC_ID(apic)].bdf; ++ u16 seg = ioapic_sbdf[IO_APIC_ID(apic)].seg; ++ u16 req_id = get_intremap_requestor_id(seg, bdf); ++ const u32 *entry = get_intremap_entry(seg, req_id, offset); ++ ++ val &= ~(INTREMAP_ENTRIES - 1); ++ val |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; ++ val |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); ++ } ++ ++ return val; ++} ++ ++static int update_intremap_entry_from_msi_msg( + struct amd_iommu *iommu, u16 bdf, +- int *remap_index, const struct msi_msg *msg) ++ int *remap_index, const struct msi_msg *msg, u32 *data) + { + unsigned long flags; + u32* entry; + u16 req_id, alias_id; + u8 delivery_mode, dest, vector, dest_mode; + spinlock_t *lock; +- int offset; ++ unsigned int offset; + + req_id = get_dma_requestor_id(iommu->seg, bdf); + alias_id = get_intremap_requestor_id(iommu->seg, bdf); +@@ -303,15 +374,6 @@ static void update_intremap_entry_from_m + spin_lock_irqsave(lock, flags); + free_intremap_entry(iommu->seg, req_id, *remap_index); + spin_unlock_irqrestore(lock, flags); +- +- if ( ( req_id != alias_id ) && +- get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) +- { +- lock = get_intremap_lock(iommu->seg, alias_id); +- spin_lock_irqsave(lock, flags); +- free_intremap_entry(iommu->seg, alias_id, *remap_index); +- spin_unlock_irqrestore(lock, flags); +- } + goto done; + } + +@@ -322,16 +384,24 @@ static void update_intremap_entry_from_m + delivery_mode = (msg->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x1; + vector = (msg->data >> MSI_DATA_VECTOR_SHIFT) & MSI_DATA_VECTOR_MASK; + dest = (msg->address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff; +- offset = get_intremap_offset(vector, delivery_mode); +- if ( *remap_index < 0) ++ offset = *remap_index; ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ offset = alloc_intremap_entry(iommu->seg, bdf); ++ if ( offset >= INTREMAP_ENTRIES ) ++ { ++ spin_unlock_irqrestore(lock, flags); ++ return -ENOSPC; ++ } + *remap_index = offset; +- else +- BUG_ON(*remap_index != offset); ++ } + +- entry = (u32*)get_intremap_entry(iommu->seg, req_id, offset); ++ entry = get_intremap_entry(iommu->seg, req_id, offset); + update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); + spin_unlock_irqrestore(lock, flags); + ++ *data = (msg->data & ~(INTREMAP_ENTRIES - 1)) | offset; ++ + /* + * In some special cases, a pci-e device(e.g SATA controller in IDE mode) + * will use alias id to index interrupt remapping table. +@@ -343,10 +413,8 @@ static void update_intremap_entry_from_m + if ( ( req_id != alias_id ) && + get_ivrs_mappings(iommu->seg)[alias_id].intremap_table != NULL ) + { +- spin_lock_irqsave(lock, flags); +- entry = (u32*)get_intremap_entry(iommu->seg, alias_id, offset); +- update_intremap_entry(entry, vector, delivery_mode, dest_mode, dest); +- spin_unlock_irqrestore(lock, flags); ++ BUG_ON(get_ivrs_mappings(iommu->seg)[req_id].intremap_table != ++ get_ivrs_mappings(iommu->seg)[alias_id].intremap_table); + } + + done: +@@ -358,19 +426,22 @@ done: + amd_iommu_flush_intremap(iommu, alias_id); + spin_unlock_irqrestore(&iommu->lock, flags); + } ++ ++ return 0; + } + + static struct amd_iommu *_find_iommu_for_device(int seg, int bdf) + { +- struct amd_iommu *iommu = find_iommu_for_device(seg, bdf); +- +- if ( iommu ) +- return iommu; ++ struct amd_iommu *iommu; + + list_for_each_entry ( iommu, &amd_iommu_head, list ) + if ( iommu->seg == seg && iommu->bdf == bdf ) + return NULL; + ++ iommu = find_iommu_for_device(seg, bdf); ++ if ( iommu ) ++ return iommu; ++ + AMD_IOMMU_DEBUG("No IOMMU for MSI dev = %04x:%02x:%02x.%u\n", + seg, PCI_BUS(bdf), PCI_SLOT(bdf), PCI_FUNC(bdf)); + return ERR_PTR(-EINVAL); +@@ -380,8 +451,9 @@ int amd_iommu_msi_msg_update_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) + { + struct pci_dev *pdev = msi_desc->dev; +- int bdf, seg; ++ int bdf, seg, rc; + struct amd_iommu *iommu; ++ u32 data; + + bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; + seg = pdev ? pdev->seg : hpet_sbdf.seg; +@@ -390,11 +462,12 @@ int amd_iommu_msi_msg_update_ire( + if ( IS_ERR_OR_NULL(iommu) ) + return PTR_ERR(iommu); + +- if ( msi_desc->remap_index >= 0 ) ++ if ( msi_desc->remap_index >= 0 && !msg ) + { + do { + update_intremap_entry_from_msi_msg(iommu, bdf, +- &msi_desc->remap_index, NULL); ++ &msi_desc->remap_index, ++ NULL, NULL); + if ( !pdev || !pdev->phantom_stride ) + break; + bdf += pdev->phantom_stride; +@@ -409,19 +482,39 @@ int amd_iommu_msi_msg_update_ire( + return 0; + + do { +- update_intremap_entry_from_msi_msg(iommu, bdf, &msi_desc->remap_index, +- msg); +- if ( !pdev || !pdev->phantom_stride ) ++ rc = update_intremap_entry_from_msi_msg(iommu, bdf, ++ &msi_desc->remap_index, ++ msg, &data); ++ if ( rc || !pdev || !pdev->phantom_stride ) + break; + bdf += pdev->phantom_stride; + } while ( PCI_SLOT(bdf) == PCI_SLOT(pdev->devfn) ); + +- return 0; ++ msg->data = data; ++ return rc; + } + + void amd_iommu_read_msi_from_ire( + struct msi_desc *msi_desc, struct msi_msg *msg) + { ++ unsigned int offset = msg->data & (INTREMAP_ENTRIES - 1); ++ const struct pci_dev *pdev = msi_desc->dev; ++ u16 bdf = pdev ? PCI_BDF2(pdev->bus, pdev->devfn) : hpet_sbdf.bdf; ++ u16 seg = pdev ? pdev->seg : hpet_sbdf.seg; ++ const u32 *entry; ++ ++ if ( IS_ERR_OR_NULL(_find_iommu_for_device(seg, bdf)) ) ++ return; ++ ++ entry = get_intremap_entry(seg, get_dma_requestor_id(seg, bdf), offset); ++ ++ msg->data &= ~(INTREMAP_ENTRIES - 1); ++ msg->data |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_INTTYPE_MASK, ++ INT_REMAP_ENTRY_INTTYPE_SHIFT) << 8; ++ msg->data |= get_field_from_reg_u32(*entry, ++ INT_REMAP_ENTRY_VECTOR_MASK, ++ INT_REMAP_ENTRY_VECTOR_SHIFT); + } + + int __init amd_iommu_free_intremap_table( +@@ -438,23 +531,42 @@ int __init amd_iommu_free_intremap_table + return 0; + } + +-void* __init amd_iommu_alloc_intremap_table(void) ++void* __init amd_iommu_alloc_intremap_table(unsigned long **inuse_map) + { + void *tb; + tb = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER); + BUG_ON(tb == NULL); + memset(tb, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER)); ++ *inuse_map = xzalloc_array(unsigned long, BITS_TO_LONGS(INTREMAP_ENTRIES)); ++ BUG_ON(*inuse_map == NULL); + return tb; + } + + int __init amd_setup_hpet_msi(struct msi_desc *msi_desc) + { +- if ( (!msi_desc->hpet_id != hpet_sbdf.id) || +- (hpet_sbdf.iommu == NULL) ) ++ spinlock_t *lock; ++ unsigned long flags; ++ int rc = 0; ++ ++ if ( msi_desc->hpet_id != hpet_sbdf.id || !hpet_sbdf.iommu ) + { +- AMD_IOMMU_DEBUG("Fail to setup HPET MSI remapping\n"); +- return 1; ++ AMD_IOMMU_DEBUG("Failed to setup HPET MSI remapping: %s\n", ++ hpet_sbdf.iommu ? "Wrong HPET" : "No IOMMU"); ++ return -ENODEV; + } + +- return 0; ++ lock = get_intremap_lock(hpet_sbdf.seg, hpet_sbdf.bdf); ++ spin_lock_irqsave(lock, flags); ++ ++ msi_desc->remap_index = alloc_intremap_entry(hpet_sbdf.seg, ++ hpet_sbdf.bdf); ++ if ( msi_desc->remap_index >= INTREMAP_ENTRIES ) ++ { ++ msi_desc->remap_index = -1; ++ rc = -ENXIO; ++ } ++ ++ spin_unlock_irqrestore(lock, flags); ++ ++ return rc; + } +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -637,7 +637,7 @@ const struct iommu_ops amd_iommu_ops = { + .get_device_group_id = amd_iommu_group_id, + .update_ire_from_apic = amd_iommu_ioapic_update_ire, + .update_ire_from_msi = amd_iommu_msi_msg_update_ire, +- .read_apic_from_ire = __io_apic_read, ++ .read_apic_from_ire = amd_iommu_read_ioapic_from_ire, + .read_msi_from_ire = amd_iommu_read_msi_from_ire, + .setup_hpet_msi = amd_setup_hpet_msi, + .suspend = amd_iommu_suspend, +--- a/xen/include/asm-x86/amd-iommu.h ++++ b/xen/include/asm-x86/amd-iommu.h +@@ -119,6 +119,7 @@ struct ivrs_mappings { + + /* per device interrupt remapping table */ + void *intremap_table; ++ unsigned long *intremap_inuse; + spinlock_t intremap_lock; + + /* ivhd device data settings */ +--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h ++++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +@@ -470,10 +470,6 @@ + #define MAX_AMD_IOMMUS 32 + + /* interrupt remapping table */ +-#define INT_REMAP_INDEX_DM_MASK 0x1C00 +-#define INT_REMAP_INDEX_DM_SHIFT 10 +-#define INT_REMAP_INDEX_VECTOR_MASK 0x3FC +-#define INT_REMAP_INDEX_VECTOR_SHIFT 2 + #define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001 + #define INT_REMAP_ENTRY_REMAPEN_SHIFT 0 + #define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002 +--- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h ++++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +@@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device( + + /* interrupt remapping */ + int amd_iommu_setup_ioapic_remapping(void); +-void *amd_iommu_alloc_intremap_table(void); ++void *amd_iommu_alloc_intremap_table(unsigned long **); + int amd_iommu_free_intremap_table(u16 seg, struct ivrs_mappings *); + void amd_iommu_ioapic_update_ire( + unsigned int apic, unsigned int reg, unsigned int value); ++unsigned int amd_iommu_read_ioapic_from_ire( ++ unsigned int apic, unsigned int reg); + int amd_iommu_msi_msg_update_ire( + struct msi_desc *msi_desc, struct msi_msg *msg); + void amd_iommu_read_msi_from_ire( +@@ -101,15 +103,17 @@ int amd_setup_hpet_msi(struct msi_desc * + + extern struct ioapic_sbdf { + u16 bdf, seg; +- unsigned long *pin_setup; ++ u16 *pin_2_idx; + } ioapic_sbdf[MAX_IO_APICS]; +-extern void *shared_intremap_table; + + extern struct hpet_sbdf { + u16 bdf, seg, id; + struct amd_iommu *iommu; + } hpet_sbdf; + ++extern void *shared_intremap_table; ++extern unsigned long *shared_intremap_inuse; ++ + /* power management support */ + void amd_iommu_resume(void); + void amd_iommu_suspend(void); diff --git a/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch b/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch new file mode 100644 index 0000000..34f25d9 --- /dev/null +++ b/51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch @@ -0,0 +1,68 @@ +# Commit 561e0f86660f10db492c1ead1cd772013a6cc32d +# Date 2013-07-16 11:54:07 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: untie remap and vector maps + +With the specific IRTEs used for an interrupt no longer depending on +the vector, there's no need to tie the remap sharing model to the +vector sharing one. + +Signed-off-by: Jan Beulich +Acked-by: George Dunlap +Acked-by: Suravee Suthikulpanit + + +--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +@@ -207,50 +207,6 @@ int __init amd_iov_detect(void) + + init_done = 1; + +- /* +- * AMD IOMMUs don't distinguish between vectors destined for +- * different cpus when doing interrupt remapping. This means +- * that interrupts going through the same intremap table +- * can't share the same vector. +- * +- * If irq_vector_map isn't specified, choose a sensible default: +- * - If we're using per-device interemap tables, per-device +- * vector non-sharing maps +- * - If we're using a global interemap table, global vector +- * non-sharing map +- */ +- if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT ) +- { +- if ( amd_iommu_perdev_intremap ) +- { +- /* Per-device vector map logic is broken for devices with multiple +- * MSI-X interrupts (and would also be for multiple MSI, if Xen +- * supported it). +- * +- * Until this is fixed, use global vector tables as far as the irq +- * logic is concerned to avoid the buggy behaviour of per-device +- * maps in map_domain_pirq(), and use per-device tables as far as +- * intremap code is concerned to avoid the security issue. +- */ +- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. " +- "Using per-device-global maps instead until a fix is found.\n"); +- +- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; +- } +- else +- { +- printk("AMD-Vi: Enabling global vector map\n"); +- opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL; +- } +- } +- else +- { +- printk("AMD-Vi: Not overriding irq_vector_map setting\n"); +- +- if ( opt_irq_vector_map != OPT_IRQ_VECTOR_MAP_GLOBAL ) +- printk(XENLOG_WARNING "AMD-Vi: per-device vector map logic is broken. " +- "Use irq_vector_map=global to work around.\n"); +- } + if ( !amd_iommu_perdev_intremap ) + printk(XENLOG_WARNING "AMD-Vi: Using global interrupt remap table is not recommended (see XSA-36)!\n"); + return scan_pci_devices(); diff --git a/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch b/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch new file mode 100644 index 0000000..5c1c604 --- /dev/null +++ b/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch @@ -0,0 +1,44 @@ +# Commit 85047d9e4f4afeb73bca1e98f705a2f4f1d51c03 +# Date 2013-07-17 08:45:20 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/cpuidle: Change logging for unknown APIC IDs + +Dom0 uses this hypercall to pass ACPI information to Xen. It is not very +uncommon for more cpus to be listed in the ACPI tables than are present on the +system, particularly on systems with a common BIOS for a 2 and 4 socket server +varients. + +As Dom0 does not control the number of entries in the ACPI tables, and is +required to pass everything it finds to Xen, change the logging. + +There is now an single unconditional warning for the first unknown ID, and +further warnings if "cpuinfo" is requested by the user on the command line. + +Signed-off-by: Andrew Cooper + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -1031,7 +1031,10 @@ long set_cx_pminfo(uint32_t cpu, struct + cpu_id = get_cpu_id(cpu); + if ( cpu_id == -1 ) + { +- printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu); ++ static bool_t warn_once = 1; ++ if ( warn_once || opt_cpu_info ) ++ printk(XENLOG_WARNING "No CPU ID for APIC ID %#x\n", cpu); ++ warn_once = 0; + return -EINVAL; + } + +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -63,7 +63,7 @@ static struct cpu_dev default_cpu = { + }; + static struct cpu_dev * this_cpu = &default_cpu; + +-bool_t __cpuinitdata opt_cpu_info; ++bool_t opt_cpu_info; + boolean_param("cpuinfo", opt_cpu_info); + + int __cpuinit get_model_name(struct cpuinfo_x86 *c) diff --git a/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch new file mode 100644 index 0000000..1ff5d47 --- /dev/null +++ b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch @@ -0,0 +1,77 @@ +# Commit 303066fdb1e4fe816e48acd665453f58b8399e81 +# Date 2013-07-17 08:47:18 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VMX: fix interaction of APIC-V and Viridian emulation + +Viridian using a synthetic MSR for issuing EOI notifications bypasses +the normal in-processor handling, which would clear +GUEST_INTR_STATUS.SVI. Hence we need to do this in software in order +for future interrupts to get delivered. + +Based on analysis by Yang Z Zhang . + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Reviewed-by: Yang Zhang + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -386,6 +386,9 @@ void vlapic_EOI_set(struct vlapic *vlapi + + vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]); + ++ if ( hvm_funcs.handle_eoi ) ++ hvm_funcs.handle_eoi(vector); ++ + if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) + vioapic_update_EOI(vlapic_domain(vlapic), vector); + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1502,6 +1502,15 @@ static void vmx_sync_pir_to_irr(struct v + vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]); + } + ++static void vmx_handle_eoi(u8 vector) ++{ ++ unsigned long status = __vmread(GUEST_INTR_STATUS); ++ ++ /* We need to clear the SVI field. */ ++ status &= VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK; ++ __vmwrite(GUEST_INTR_STATUS, status); ++} ++ + static struct hvm_function_table __initdata vmx_function_table = { + .name = "VMX", + .cpu_up_prepare = vmx_cpu_up_prepare, +@@ -1554,6 +1563,7 @@ static struct hvm_function_table __initd + .process_isr = vmx_process_isr, + .deliver_posted_intr = vmx_deliver_posted_intr, + .sync_pir_to_irr = vmx_sync_pir_to_irr, ++ .handle_eoi = vmx_handle_eoi, + .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m, + }; + +@@ -1580,7 +1590,10 @@ const struct hvm_function_table * __init + + setup_ept_dump(); + } +- ++ ++ if ( !cpu_has_vmx_virtual_intr_delivery ) ++ vmx_function_table.handle_eoi = NULL; ++ + if ( cpu_has_vmx_posted_intr_processing ) + alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt); + else +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -186,6 +186,7 @@ struct hvm_function_table { + void (*process_isr)(int isr, struct vcpu *v); + void (*deliver_posted_intr)(struct vcpu *v, u8 vector); + void (*sync_pir_to_irr)(struct vcpu *v); ++ void (*handle_eoi)(u8 vector); + + /*Walk nested p2m */ + int (*nhvm_hap_walk_L1_p2m)(struct vcpu *v, paddr_t L2_gpa, diff --git a/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch b/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch new file mode 100644 index 0000000..39ee3f7 --- /dev/null +++ b/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch @@ -0,0 +1,41 @@ +# Commit 68caac7f6f4687241a24e804a9fca19aa26fe183 +# Date 2013-07-17 10:21:33 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: don't use destroy_xen_mappings() for vunmap() + +Its attempt to tear down intermediate page table levels may race with +map_pages_to_xen() establishing them, and now that +map_domain_page_global() is backed by vmap() this teardown is also +wasteful (as it's very likely to need the same address space populated +again within foreseeable time). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/common/vmap.c ++++ b/xen/common/vmap.c +@@ -196,9 +196,13 @@ void *vmap(const unsigned long *mfn, uns + + void vunmap(const void *va) + { ++#ifndef _PAGE_NONE + unsigned long addr = (unsigned long)va; + + destroy_xen_mappings(addr, addr + PAGE_SIZE * vm_size(va)); ++#else /* Avoid tearing down intermediate page tables. */ ++ map_pages_to_xen((unsigned long)va, 0, vm_size(va), _PAGE_NONE); ++#endif + vm_free(va); + } + #endif +--- a/xen/include/asm-x86/page.h ++++ b/xen/include/asm-x86/page.h +@@ -288,6 +288,7 @@ extern l1_pgentry_t l1_identmap[L1_PAGET + void paging_init(void); + #endif /* !defined(__ASSEMBLY__) */ + ++#define _PAGE_NONE _AC(0x000,U) + #define _PAGE_PRESENT _AC(0x001,U) + #define _PAGE_RW _AC(0x002,U) + #define _PAGE_USER _AC(0x004,U) diff --git a/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch b/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch new file mode 100644 index 0000000..165f9ab --- /dev/null +++ b/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch @@ -0,0 +1,24 @@ +# Commit 915a59f25c5eddd86bc2cae6389d0ed2ab87e69e +# Date 2013-07-18 09:16:15 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/time: Update wallclock in shared info when altering domain time offset + +domain_set_time_offset() udpates d->time_offset_seconds, but does not correct +the wallclock in the shared info, meaning that it is incorrect until the next +XENPF_settime hypercall from dom0 which resynchronises the wallclock for all +domains. + +Signed-off-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -931,6 +931,7 @@ void domain_set_time_offset(struct domai + d->time_offset_seconds = time_offset_seconds; + if ( is_hvm_domain(d) ) + rtc_update_clock(d); ++ update_domain_wallclock_time(d); + } + + int cpu_frequency_change(u64 freq) diff --git a/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch b/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch new file mode 100644 index 0000000..b97e081 --- /dev/null +++ b/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch @@ -0,0 +1,62 @@ +# Commit b0e55bd49725c7c0183eb18670997b9e5930adac +# Date 2013-08-05 18:40:23 +0200 +# Author Jan Beulich +# Committer Jan Beulich +fix off-by-one mistakes in vm_alloc() + +Also add another pair of assertions to catch eventual further cases of +incorrect accounting. + +Signed-off-by: Jan Beulich +Reviewed-by Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/common/vmap.c ++++ b/xen/common/vmap.c +@@ -57,8 +57,8 @@ void *vm_alloc(unsigned int nr, unsigned + { + struct page_info *pg; + +- ASSERT(!test_bit(vm_low, vm_bitmap)); +- for ( start = vm_low; ; ) ++ ASSERT(vm_low == vm_top || !test_bit(vm_low, vm_bitmap)); ++ for ( start = vm_low; start < vm_top; ) + { + bit = find_next_bit(vm_bitmap, vm_top, start + 1); + if ( bit > vm_top ) +@@ -68,12 +68,18 @@ void *vm_alloc(unsigned int nr, unsigned + * corresponding page a guard one. + */ + start = (start + align) & ~(align - 1); +- if ( start + nr <= bit ) +- break; +- start = bit < vm_top ? +- find_next_zero_bit(vm_bitmap, vm_top, bit + 1) : bit; +- if ( start >= vm_top ) +- break; ++ if ( bit < vm_top ) ++ { ++ if ( start + nr < bit ) ++ break; ++ start = find_next_zero_bit(vm_bitmap, vm_top, bit + 1); ++ } ++ else ++ { ++ if ( start + nr <= bit ) ++ break; ++ start = bit; ++ } + } + + if ( start < vm_top ) +@@ -115,6 +121,10 @@ void *vm_alloc(unsigned int nr, unsigned + + for ( bit = start; bit < start + nr; ++bit ) + __set_bit(bit, vm_bitmap); ++ if ( bit < vm_top ) ++ ASSERT(!test_bit(bit, vm_bitmap)); ++ else ++ ASSERT(bit == vm_top); + if ( start <= vm_low + 2 ) + vm_low = bit; + spin_unlock(&vm_lock); diff --git a/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch b/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch new file mode 100644 index 0000000..1dabcc0 --- /dev/null +++ b/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch @@ -0,0 +1,60 @@ +# Commit c58d9f2f4844c2ce8859a8d0f26a54cd058eb51f +# Date 2013-08-05 18:42:37 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: refine FPU selector handling code for XSAVEOPT + +Some extra tweaks are necessary to deal with the situation of XSAVEOPT +not writing the FPU portion of the save image (due to it detecting that +the register state did not get modified since the last XRSTOR). + +Signed-off-by: Jan Beulich +Tested-by: Ben Guthro +Acked-by: Keir Fraser + +--- a/xen/arch/x86/xstate.c ++++ b/xen/arch/x86/xstate.c +@@ -71,10 +71,28 @@ void xsave(struct vcpu *v, uint64_t mask + + if ( word_size <= 0 || !is_pv_32bit_vcpu(v) ) + { ++ typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; ++ typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; ++ + if ( cpu_has_xsaveopt ) ++ { ++ /* ++ * xsaveopt may not write the FPU portion even when the respective ++ * mask bit is set. For the check further down to work we hence ++ * need to put the save image back into the state that it was in ++ * right after the previous xsaveopt. ++ */ ++ if ( word_size > 0 && ++ (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 || ++ ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) ) ++ { ++ ptr->fpu_sse.fip.sel = 0; ++ ptr->fpu_sse.fdp.sel = 0; ++ } + asm volatile ( ".byte 0x48,0x0f,0xae,0x37" + : "=m" (*ptr) + : "a" (lmask), "d" (hmask), "D" (ptr) ); ++ } + else + asm volatile ( ".byte 0x48,0x0f,0xae,0x27" + : "=m" (*ptr) +@@ -87,7 +105,14 @@ void xsave(struct vcpu *v, uint64_t mask + */ + (!(ptr->fpu_sse.fsw & 0x0080) && + boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ) ++ { ++ if ( cpu_has_xsaveopt && word_size > 0 ) ++ { ++ ptr->fpu_sse.fip.sel = fcs; ++ ptr->fpu_sse.fdp.sel = fds; ++ } + return; ++ } + + if ( word_size > 0 && + !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) ) diff --git a/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch b/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch new file mode 100644 index 0000000..75b6356 --- /dev/null +++ b/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch @@ -0,0 +1,23 @@ +# Commit e1ab5c77b44b7bd835a2c032fa4963b36545fdb3 +# Date 2013-08-06 17:22:35 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Flush TLBs and Caches if paging mode changed + +According to SDM, if paging mode is changed, then whole TLBs and caches will +be flushed. This is missed in nested handle logic. Also this fixed the issue +that 64 bits windows cannot boot up on top of L1 kvm. + +Signed-off-by: Yang Zhang +Acked-by: Keir Fraser + +--- a/xen/arch/x86/mm/paging.c ++++ b/xen/arch/x86/mm/paging.c +@@ -709,6 +709,7 @@ void paging_update_nestedmode(struct vcp + else + /* TODO: shadow-on-shadow */ + v->arch.paging.nestedmode = NULL; ++ hvm_asid_flush_vcpu(v); + } + + void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, diff --git a/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch b/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch new file mode 100644 index 0000000..1d781af --- /dev/null +++ b/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch @@ -0,0 +1,138 @@ +# Commit 85fc517ec3055e8e8d9c9e36e15a81e630237252 +# Date 2013-08-13 14:22:14 +0200 +# Author Suravee Suthikulpanit +# Committer Jan Beulich +x86/AMD: Fix nested svm crash due to assertion in __virt_to_maddr + +Fix assertion in __virt_to_maddr when starting nested SVM guest +in debug mode. Investigation has shown that svm_vmsave/svm_vmload +make use of __pa() with invalid address. + +Signed-off-by: Suravee Suthikulpanit +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1792,6 +1792,32 @@ svm_vmexit_do_vmrun(struct cpu_user_regs + return; + } + ++static struct page_info * ++nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr) ++{ ++ p2m_type_t p2mt; ++ struct page_info *page; ++ struct nestedvcpu *nv = &vcpu_nestedhvm(v); ++ ++ if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) ++ return NULL; ++ ++ /* Need to translate L1-GPA to MPA */ ++ page = get_page_from_gfn(v->domain, ++ nv->nv_vvmcxaddr >> PAGE_SHIFT, ++ &p2mt, P2M_ALLOC | P2M_UNSHARE); ++ if ( !page ) ++ return NULL; ++ ++ if ( !p2m_is_ram(p2mt) || p2m_is_readonly(p2mt) ) ++ { ++ put_page(page); ++ return NULL; ++ } ++ ++ return page; ++} ++ + static void + svm_vmexit_do_vmload(struct vmcb_struct *vmcb, + struct cpu_user_regs *regs, +@@ -1799,7 +1825,7 @@ svm_vmexit_do_vmload(struct vmcb_struct + { + int ret; + unsigned int inst_len; +- struct nestedvcpu *nv = &vcpu_nestedhvm(v); ++ struct page_info *page; + + if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 ) + return; +@@ -1810,13 +1836,18 @@ svm_vmexit_do_vmload(struct vmcb_struct + goto inject; + } + +- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { +- gdprintk(XENLOG_ERR, "VMLOAD: mapping vmcb failed, injecting #UD\n"); ++ page = nsvm_get_nvmcb_page(v, vmcbaddr); ++ if ( !page ) ++ { ++ gdprintk(XENLOG_ERR, ++ "VMLOAD: mapping failed, injecting #UD\n"); + ret = TRAP_invalid_op; + goto inject; + } + +- svm_vmload(nv->nv_vvmcx); ++ svm_vmload_pa(page_to_maddr(page)); ++ put_page(page); ++ + /* State in L1 VMCB is stale now */ + v->arch.hvm_svm.vmcb_in_sync = 0; + +@@ -1835,7 +1866,7 @@ svm_vmexit_do_vmsave(struct vmcb_struct + { + int ret; + unsigned int inst_len; +- struct nestedvcpu *nv = &vcpu_nestedhvm(v); ++ struct page_info *page; + + if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 ) + return; +@@ -1846,14 +1877,17 @@ svm_vmexit_do_vmsave(struct vmcb_struct + goto inject; + } + +- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { +- gdprintk(XENLOG_ERR, "VMSAVE: mapping vmcb failed, injecting #UD\n"); ++ page = nsvm_get_nvmcb_page(v, vmcbaddr); ++ if ( !page ) ++ { ++ gdprintk(XENLOG_ERR, ++ "VMSAVE: mapping vmcb failed, injecting #UD\n"); + ret = TRAP_invalid_op; + goto inject; + } + +- svm_vmsave(nv->nv_vvmcx); +- ++ svm_vmsave_pa(page_to_maddr(page)); ++ put_page(page); + __update_guest_eip(regs, inst_len); + return; + +--- a/xen/include/asm-x86/hvm/svm/svm.h ++++ b/xen/include/asm-x86/hvm/svm/svm.h +@@ -41,18 +41,21 @@ + #define SVM_REG_R14 (14) + #define SVM_REG_R15 (15) + +-static inline void svm_vmload(void *vmcb) ++#define svm_vmload(x) svm_vmload_pa(__pa(x)) ++#define svm_vmsave(x) svm_vmsave_pa(__pa(x)) ++ ++static inline void svm_vmload_pa(paddr_t vmcb) + { + asm volatile ( + ".byte 0x0f,0x01,0xda" /* vmload */ +- : : "a" (__pa(vmcb)) : "memory" ); ++ : : "a" (vmcb) : "memory" ); + } + +-static inline void svm_vmsave(void *vmcb) ++static inline void svm_vmsave_pa(paddr_t vmcb) + { + asm volatile ( + ".byte 0x0f,0x01,0xdb" /* vmsave */ +- : : "a" (__pa(vmcb)) : "memory" ); ++ : : "a" (vmcb) : "memory" ); + } + + static inline void svm_invlpga(unsigned long vaddr, uint32_t asid) diff --git a/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch b/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch new file mode 100644 index 0000000..8e2977b --- /dev/null +++ b/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch @@ -0,0 +1,91 @@ +# Commit 910daaf5aaa837624099c0fc5c373bea7202ff43 +# Date 2013-08-13 14:24:16 +0200 +# Author Suravee Suthikulpanit +# Committer Jan Beulich +x86/AMD: Inject #GP instead of #UD when unable to map vmcb + +According to AMD Programmer's Manual vol2, vmrun, vmsave and vmload +should inject #GP instead of #UD when unable to access memory +location for vmcb. Also, the code should make sure that L1 guest +EFER.SVME is not zero. Otherwise, #UD should be injected. + +Signed-off-by: Suravee Suthikulpanit +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1776,15 +1776,17 @@ static void + svm_vmexit_do_vmrun(struct cpu_user_regs *regs, + struct vcpu *v, uint64_t vmcbaddr) + { +- if (!nestedhvm_enabled(v->domain)) { ++ if ( !nsvm_efer_svm_enabled(v) ) ++ { + gdprintk(XENLOG_ERR, "VMRUN: nestedhvm disabled, injecting #UD\n"); + hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); + return; + } + +- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { +- gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #UD\n"); +- hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); ++ if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) ++ { ++ gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #GP\n"); ++ hvm_inject_hw_exception(TRAP_gp_fault, HVM_DELIVER_NO_ERROR_CODE); + return; + } + +@@ -1830,7 +1832,8 @@ svm_vmexit_do_vmload(struct vmcb_struct + if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 ) + return; + +- if (!nestedhvm_enabled(v->domain)) { ++ if ( !nsvm_efer_svm_enabled(v) ) ++ { + gdprintk(XENLOG_ERR, "VMLOAD: nestedhvm disabled, injecting #UD\n"); + ret = TRAP_invalid_op; + goto inject; +@@ -1840,8 +1843,8 @@ svm_vmexit_do_vmload(struct vmcb_struct + if ( !page ) + { + gdprintk(XENLOG_ERR, +- "VMLOAD: mapping failed, injecting #UD\n"); +- ret = TRAP_invalid_op; ++ "VMLOAD: mapping failed, injecting #GP\n"); ++ ret = TRAP_gp_fault; + goto inject; + } + +@@ -1871,7 +1874,8 @@ svm_vmexit_do_vmsave(struct vmcb_struct + if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 ) + return; + +- if (!nestedhvm_enabled(v->domain)) { ++ if ( !nsvm_efer_svm_enabled(v) ) ++ { + gdprintk(XENLOG_ERR, "VMSAVE: nestedhvm disabled, injecting #UD\n"); + ret = TRAP_invalid_op; + goto inject; +@@ -1881,8 +1885,8 @@ svm_vmexit_do_vmsave(struct vmcb_struct + if ( !page ) + { + gdprintk(XENLOG_ERR, +- "VMSAVE: mapping vmcb failed, injecting #UD\n"); +- ret = TRAP_invalid_op; ++ "VMSAVE: mapping vmcb failed, injecting #GP\n"); ++ ret = TRAP_gp_fault; + goto inject; + } + +--- a/xen/include/asm-x86/hvm/svm/nestedsvm.h ++++ b/xen/include/asm-x86/hvm/svm/nestedsvm.h +@@ -94,7 +94,7 @@ struct nestedsvm { + #define vcpu_nestedsvm(v) (vcpu_nestedhvm(v).u.nsvm) + + /* True when l1 guest enabled SVM in EFER */ +-#define hvm_svm_enabled(v) \ ++#define nsvm_efer_svm_enabled(v) \ + (!!((v)->arch.hvm_vcpu.guest_efer & EFER_SVME)) + + int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr); diff --git a/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch b/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch new file mode 100644 index 0000000..2f6e767 --- /dev/null +++ b/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch @@ -0,0 +1,38 @@ +# Commit 0c006b41a283a0a569c863d44abde5aa5750ae01 +# Date 2013-08-13 17:47:16 +0200 +# Author Yang Zhang +# Committer Jan Beulich +VMX: add boot parameter to enable/disable APIC-v dynamically + +Add a boot parameter to enable/disable the APIC-v dynamically. APIC-v is +enabled by default. User can use apicv=0 to disable it. + +Signed-off-by: Yang Zhang + +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -46,6 +46,9 @@ boolean_param("vpid", opt_vpid_enabled); + static bool_t __read_mostly opt_unrestricted_guest_enabled = 1; + boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled); + ++static bool_t __read_mostly opt_apicv_enabled = 1; ++boolean_param("apicv", opt_apicv_enabled); ++ + /* + * These two parameters are used to config the controls for Pause-Loop Exiting: + * ple_gap: upper bound on the amount of time between two successive +@@ -196,12 +199,12 @@ static int vmx_init_vmcs_config(void) + * "APIC Register Virtualization" and "Virtual Interrupt Delivery" + * can be set only when "use TPR shadow" is set + */ +- if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) ++ if ( (_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) && ++ opt_apicv_enabled ) + opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + +- + _vmx_secondary_exec_control = adjust_vmx_controls( + "Secondary Exec Control", min, opt, + MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); diff --git a/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch b/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch new file mode 100644 index 0000000..979c64b --- /dev/null +++ b/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch @@ -0,0 +1,41 @@ +# Commit e8e8b030ecf916fea19639f0b6a446c1c9dbe174 +# Date 2013-08-14 11:18:24 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VT-d: protect against bogus information coming from BIOS + +Add checks similar to those done by Linux: The DRHD address must not +be all zeros or all ones (Linux only checks for zero), and capabilities +as well as extended capabilities must not be all ones. + +Signed-off-by: Jan Beulich +Reviewed-by: Ben Guthro +Reviewed-by: Andrew Cooper +Tested-by: Ben Guthro +Acked by: Yang Zhang +Acked-by: Xiantao Zhang + +--- a/xen/drivers/passthrough/vtd/dmar.c ++++ b/xen/drivers/passthrough/vtd/dmar.c +@@ -447,6 +447,9 @@ acpi_parse_one_drhd(struct acpi_dmar_hea + if ( (ret = acpi_dmar_check_length(header, sizeof(*drhd))) != 0 ) + return ret; + ++ if ( !drhd->address || !(drhd->address + 1) ) ++ return -ENODEV; ++ + dmaru = xzalloc(struct acpi_drhd_unit); + if ( !dmaru ) + return -ENOMEM; +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -1159,6 +1159,9 @@ int __init iommu_alloc(struct acpi_drhd_ + dprintk(VTDPREFIX, + "cap = %"PRIx64" ecap = %"PRIx64"\n", iommu->cap, iommu->ecap); + } ++ if ( !(iommu->cap + 1) || !(iommu->ecap + 1) ) ++ return -ENODEV; ++ + if ( cap_fault_reg_offset(iommu->cap) + + cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || + ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) diff --git a/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch b/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch new file mode 100644 index 0000000..8770e53 --- /dev/null +++ b/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch @@ -0,0 +1,24 @@ +# Commit f67af6d5803b6a015e30cb490a94f9547cb0437c +# Date 2013-08-14 11:20:26 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/MTRR: fix range check in mtrr_add_page() + +Extracted from Yinghai Lu's Linux commit d5c78673 ("x86: Fix /proc/mtrr +with base/size more than 44bits"). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c +@@ -340,7 +340,7 @@ int mtrr_add_page(unsigned long base, un + return -EINVAL; + } + +- if (base & size_or_mask || size & size_or_mask) { ++ if ((base | (base + size - 1)) >> (paddr_bits - PAGE_SHIFT)) { + printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); + return -EINVAL; + } diff --git a/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch b/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch new file mode 100644 index 0000000..5bebae5 --- /dev/null +++ b/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch @@ -0,0 +1,22 @@ +# Commit ab7f9a793c78dfea81c037b34b0dd2db7070d8f8 +# Date 2013-08-15 13:17:10 +0200 +# Author Tim Deegan +# Committer Jan Beulich +x86/time: fix check for negative time in __update_vcpu_system_time() + +Clang points out that u64 stime variable is always >= 0. + +Signed-off-by: Tim Deegan + +--- a/xen/arch/x86/time.c ++++ b/xen/arch/x86/time.c +@@ -817,7 +817,8 @@ static void __update_vcpu_system_time(st + + if ( d->arch.vtsc ) + { +- u64 stime = t->stime_local_stamp; ++ s_time_t stime = t->stime_local_stamp; ++ + if ( is_hvm_domain(d) ) + { + struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; diff --git a/52146070-ACPI-fix-acpi_os_map_memory.patch b/52146070-ACPI-fix-acpi_os_map_memory.patch new file mode 100644 index 0000000..bf4e38c --- /dev/null +++ b/52146070-ACPI-fix-acpi_os_map_memory.patch @@ -0,0 +1,132 @@ +References: bnc#833251, bnc#834751 + +# Commit 2ee9cbf9d8eaeff6e21222905d22dbd58dc5fe29 +# Date 2013-08-21 08:38:40 +0200 +# Author Jan Beulich +# Committer Jan Beulich +ACPI: fix acpi_os_map_memory() + +It using map_domain_page() was entirely wrong. Use __acpi_map_table() +instead for the time being, with locking added as the mappings it +produces get replaced with subsequent invocations. Using locking in +this way is acceptable here since the only two runtime callers are +acpi_os_{read,write}_memory(), which don't leave mappings pending upon +returning to their callers. + +Also fix __acpi_map_table()'s first parameter's type - while benign for +unstable, backports to pre-4.3 trees will need this. + +Signed-off-by: Jan Beulich + +# Commit c5ba8ed4c6f005d332a49d93a3ef8ff2b690b256 +# Date 2013-08-21 08:40:22 +0200 +# Author Jan Beulich +# Committer Jan Beulich +ACPI: use ioremap() in acpi_os_map_memory() + +This drops the post-boot use of __acpi_map_table() here again (together +with the somewhat awkward locking), in favor of using ioremap(). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/acpi/lib.c ++++ b/xen/arch/x86/acpi/lib.c +@@ -39,7 +39,7 @@ u32 __read_mostly x86_acpiid_to_apicid[M + * from the fixed base. That's why we start at FIX_ACPI_END and + * count idx down while incrementing the phys address. + */ +-char *__acpi_map_table(unsigned long phys, unsigned long size) ++char *__acpi_map_table(paddr_t phys, unsigned long size) + { + unsigned long base, offset, mapped_size; + int idx; +--- a/xen/drivers/acpi/osl.c ++++ b/xen/drivers/acpi/osl.c +@@ -38,6 +38,7 @@ + #include + #include + #include ++#include + + #define _COMPONENT ACPI_OS_SERVICES + ACPI_MODULE_NAME("osl") +@@ -83,14 +84,25 @@ acpi_physical_address __init acpi_os_get + } + } + +-void __iomem *__init ++void __iomem * + acpi_os_map_memory(acpi_physical_address phys, acpi_size size) + { +- return __acpi_map_table((unsigned long)phys, size); ++ if (system_state >= SYS_STATE_active) { ++ unsigned long pfn = PFN_DOWN(phys); ++ unsigned int offs = phys & (PAGE_SIZE - 1); ++ ++ /* The low first Mb is always mapped. */ ++ if ( !((phys + size - 1) >> 20) ) ++ return __va(phys); ++ return __vmap(&pfn, PFN_UP(offs + size), 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs; ++ } ++ return __acpi_map_table(phys, size); + } + +-void __init acpi_os_unmap_memory(void __iomem * virt, acpi_size size) ++void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) + { ++ if (system_state >= SYS_STATE_active) ++ vunmap((void *)((unsigned long)virt & PAGE_MASK)); + } + + acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width) +@@ -133,9 +145,8 @@ acpi_status + acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width) + { + u32 dummy; +- void __iomem *virt_addr; ++ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); + +- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); + if (!value) + value = &dummy; + +@@ -153,7 +164,7 @@ acpi_os_read_memory(acpi_physical_addres + BUG(); + } + +- unmap_domain_page(virt_addr); ++ acpi_os_unmap_memory(virt_addr, width >> 3); + + return AE_OK; + } +@@ -161,9 +172,7 @@ acpi_os_read_memory(acpi_physical_addres + acpi_status + acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) + { +- void __iomem *virt_addr; +- +- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); ++ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); + + switch (width) { + case 8: +@@ -179,7 +188,7 @@ acpi_os_write_memory(acpi_physical_addre + BUG(); + } + +- unmap_domain_page(virt_addr); ++ acpi_os_unmap_memory(virt_addr, width >> 3); + + return AE_OK; + } +--- a/xen/include/xen/acpi.h ++++ b/xen/include/xen/acpi.h +@@ -56,7 +56,7 @@ typedef int (*acpi_table_handler) (struc + typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); + + unsigned int acpi_get_processor_id (unsigned int cpu); +-char * __acpi_map_table (unsigned long phys_addr, unsigned long size); ++char * __acpi_map_table (paddr_t phys_addr, unsigned long size); + int acpi_boot_init (void); + int acpi_boot_table_init (void); + int acpi_numa_init (void); diff --git a/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch b/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch new file mode 100644 index 0000000..834f182 --- /dev/null +++ b/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch @@ -0,0 +1,50 @@ +# Commit c9c6abab583d27fdca1d979a7f1d18ae30f54e9b +# Date 2013-08-21 16:44:58 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VT-d: warn about Compatibility Format Interrupts being enabled by firmware + +... as being insecure. + +Also drop the second (redundant) read DMAR_GSTS_REG from enable_intremap(). + +Signed-off-by: Jan Beulich +Acked-by Xiantao Zhang + +--- a/xen/drivers/passthrough/vtd/intremap.c ++++ b/xen/drivers/passthrough/vtd/intremap.c +@@ -706,8 +706,8 @@ int enable_intremap(struct iommu *iommu, + + if ( !platform_supports_intremap() ) + { +- dprintk(XENLOG_ERR VTDPREFIX, +- "Platform firmware does not support interrupt remapping\n"); ++ printk(XENLOG_ERR VTDPREFIX ++ " Platform firmware does not support interrupt remapping\n"); + return -EINVAL; + } + +@@ -718,15 +718,19 @@ int enable_intremap(struct iommu *iommu, + if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr ) + return 0; + +- sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); + if ( !(sts & DMA_GSTS_QIES) ) + { +- dprintk(XENLOG_ERR VTDPREFIX, +- "Queued invalidation is not enabled, should not enable " +- "interrupt remapping\n"); ++ printk(XENLOG_ERR VTDPREFIX ++ " Queued invalidation is not enabled on IOMMU #%u:" ++ " Should not enable interrupt remapping\n", iommu->index); + return -EINVAL; + } + ++ if ( !eim && (sts & DMA_GSTS_CFIS) ) ++ printk(XENLOG_WARNING VTDPREFIX ++ " Compatibility Format Interrupts permitted on IOMMU #%u:" ++ " Device pass-through will be insecure\n", iommu->index); ++ + if ( ir_ctrl->iremap_maddr == 0 ) + { + drhd = iommu_to_drhd(iommu); diff --git a/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch b/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch new file mode 100644 index 0000000..5f93087 --- /dev/null +++ b/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch @@ -0,0 +1,26 @@ +# Commit 7fb5c6b9ef22915e3fcac95cd44857f4457ba783 +# Date 2013-08-22 10:49:24 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Check whether interrupt is blocked by TPR + +If interrupt is blocked by L1's TPR, L2 should not see it and keep +running. Adding the check before L2 to retrive interrupt. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vmx/intr.c ++++ b/xen/arch/x86/hvm/vmx/intr.c +@@ -165,6 +165,11 @@ static int nvmx_intr_intercept(struct vc + { + u32 ctrl; + ++ /* If blocked by L1's tpr, then nothing to do. */ ++ if ( nestedhvm_vcpu_in_guestmode(v) && ++ hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr ) ++ return 1; ++ + if ( nvmx_intr_blocked(v) != hvm_intblk_none ) + { + enable_intr_window(v, intack); diff --git a/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch b/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch new file mode 100644 index 0000000..eda8b87 --- /dev/null +++ b/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch @@ -0,0 +1,36 @@ +# Commit b35d0a26983843c092bfa353fd6b9aa8c3bf4886 +# Date 2013-08-22 10:50:13 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Force check ISR when L2 is running + +External interrupt is allowed to notify CPU only when it has higher +priority than current in servicing interrupt. With APIC-v, the priority +comparing is done by hardware and hardware will inject the interrupt to +VCPU when it recognizes an interrupt. Currently, there is no virtual +APIC-v feature available for L1 to use, so when L2 is running, we still need +to compare interrupt priority with ISR in hypervisor instead via hardware. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -1037,7 +1038,8 @@ int vlapic_has_pending_irq(struct vcpu * + if ( irr == -1 ) + return -1; + +- if ( vlapic_virtual_intr_delivery_enabled() ) ++ if ( vlapic_virtual_intr_delivery_enabled() && ++ !nestedhvm_vcpu_in_guestmode(v) ) + return irr; + + isr = vlapic_find_highest_isr(vlapic); diff --git a/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch b/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch new file mode 100644 index 0000000..ed714cb --- /dev/null +++ b/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch @@ -0,0 +1,43 @@ +# Commit 375a1035002fb257087756a86e6caeda649fc0f1 +# Date 2013-08-22 10:52:05 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Clear APIC-v control bit in vmcs02 + +There is no vAPIC-v support, so mask APIC-v control bit when +constructing vmcs02. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -613,8 +613,15 @@ void nvmx_update_secondary_exec_control( + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT | ++ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; + ++ host_cntrl &= ~apicv_bit; + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL); ++ ++ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ ++ ASSERT(!(shadow_cntrl & apicv_bit)); ++ + nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT); + shadow_cntrl |= host_cntrl; + __vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl); +@@ -625,7 +632,12 @@ static void nvmx_update_pin_control(stru + u32 shadow_cntrl; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + ++ host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT; + shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL); ++ ++ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ ++ ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT)); ++ + shadow_cntrl |= host_cntrl; + __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl); + } diff --git a/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch b/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch new file mode 100644 index 0000000..529ea7a --- /dev/null +++ b/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch @@ -0,0 +1,247 @@ +# Commit 84e6af58707520baf59c1c86c29237419e439afb +# Date 2013-08-22 10:59:01 +0200 +# Author Yang Zhang +# Committer Jan Beulich +Nested VMX: Update APIC-v(RVI/SVI) when vmexit to L1 + +If enabling APIC-v, all interrupts to L1 are delivered through APIC-v. +But when L2 is running, external interrupt will casue L1 vmexit with +reason external interrupt. Then L1 will pick up the interrupt through +vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when +L1 is running, so APIC-v hardware still will do vEOI updating. The problem +is that the interrupt is delivered not through APIC-v hardware, this means +SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI +updating. The solution is that, when L1 tried to pick up the interrupt +from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make +sure the following vEOI updating and vPPR updating corrently. + +Also, since interrupt is delivered through vmcs12, so APIC-v hardware will +not cleare vIRR and hypervisor need to clear it before L1 running. + +Signed-off-by: Yang Zhang +Acked-by: "Dong, Eddie" + +--- a/xen/arch/x86/hvm/irq.c ++++ b/xen/arch/x86/hvm/irq.c +@@ -437,7 +437,7 @@ struct hvm_intack hvm_vcpu_ack_pending_i + intack.vector = (uint8_t)vector; + break; + case hvm_intsrc_lapic: +- if ( !vlapic_ack_pending_irq(v, intack.vector) ) ++ if ( !vlapic_ack_pending_irq(v, intack.vector, 0) ) + intack = hvm_intack_none; + break; + case hvm_intsrc_vector: +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -168,6 +168,14 @@ static uint32_t vlapic_get_ppr(struct vl + return ppr; + } + ++uint32_t vlapic_set_ppr(struct vlapic *vlapic) ++{ ++ uint32_t ppr = vlapic_get_ppr(vlapic); ++ ++ vlapic_set_reg(vlapic, APIC_PROCPRI, ppr); ++ return ppr; ++} ++ + static int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda) + { + int result = 0; +@@ -1050,15 +1058,15 @@ int vlapic_has_pending_irq(struct vcpu * + return irr; + } + +-int vlapic_ack_pending_irq(struct vcpu *v, int vector) ++int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack) + { + struct vlapic *vlapic = vcpu_vlapic(v); + +- if ( vlapic_virtual_intr_delivery_enabled() ) +- return 1; +- +- vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); +- vlapic_clear_irr(vector, vlapic); ++ if ( force_ack || !vlapic_virtual_intr_delivery_enabled() ) ++ { ++ vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); ++ vlapic_clear_irr(vector, vlapic); ++ } + + return 1; + } +--- a/xen/arch/x86/hvm/vmx/intr.c ++++ b/xen/arch/x86/hvm/vmx/intr.c +@@ -185,7 +185,7 @@ static int nvmx_intr_intercept(struct vc + if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) ) + return 0; + +- vmx_inject_extint(intack.vector); ++ vmx_inject_extint(intack.vector, intack.source); + + ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS); + if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) +@@ -314,7 +314,7 @@ void vmx_intr_assist(void) + else + { + HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); +- vmx_inject_extint(intack.vector); ++ vmx_inject_extint(intack.vector, intack.source); + pt_intr_post(v, intack); + } + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -1205,7 +1205,7 @@ static void vmx_update_guest_efer(struct + } + + void nvmx_enqueue_n2_exceptions(struct vcpu *v, +- unsigned long intr_fields, int error_code) ++ unsigned long intr_fields, int error_code, uint8_t source) + { + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + +@@ -1213,6 +1213,7 @@ void nvmx_enqueue_n2_exceptions(struct v + /* enqueue the exception till the VMCS switch back to L1 */ + nvmx->intr.intr_info = intr_fields; + nvmx->intr.error_code = error_code; ++ nvmx->intr.source = source; + vcpu_nestedhvm(v).nv_vmexit_pending = 1; + return; + } +@@ -1224,7 +1225,8 @@ void nvmx_enqueue_n2_exceptions(struct v + + static int nvmx_vmexit_trap(struct vcpu *v, struct hvm_trap *trap) + { +- nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code); ++ nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code, ++ hvm_intsrc_none); + return NESTEDHVM_VMEXIT_DONE; + } + +@@ -1255,7 +1257,7 @@ static void __vmx_inject_exception(int t + curr->arch.hvm_vmx.vmx_emulate = 1; + } + +-void vmx_inject_extint(int trap) ++void vmx_inject_extint(int trap, uint8_t source) + { + struct vcpu *v = current; + u32 pin_based_cntrl; +@@ -1266,7 +1268,7 @@ void vmx_inject_extint(int trap) + if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap, +- HVM_DELIVER_NO_ERROR_CODE); ++ HVM_DELIVER_NO_ERROR_CODE, source); + return; + } + } +@@ -1285,7 +1287,7 @@ void vmx_inject_nmi(void) + if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) { + nvmx_enqueue_n2_exceptions (v, + INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi, +- HVM_DELIVER_NO_ERROR_CODE); ++ HVM_DELIVER_NO_ERROR_CODE, hvm_intsrc_nmi); + return; + } + } +@@ -1353,7 +1355,7 @@ static void vmx_inject_trap(struct hvm_t + { + nvmx_enqueue_n2_exceptions (curr, + INTR_INFO_VALID_MASK | (_trap.type<<8) | _trap.vector, +- _trap.error_code); ++ _trap.error_code, hvm_intsrc_none); + return; + } + else +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -1295,6 +1295,36 @@ static void sync_exception_state(struct + } + } + ++static void nvmx_update_apicv(struct vcpu *v) ++{ ++ struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); ++ unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON); ++ uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO); ++ ++ if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT && ++ nvmx->intr.source == hvm_intsrc_lapic && ++ (intr_info & INTR_INFO_VALID_MASK) ) ++ { ++ uint16_t status; ++ uint32_t rvi, ppr; ++ uint32_t vector = intr_info & 0xff; ++ struct vlapic *vlapic = vcpu_vlapic(v); ++ ++ vlapic_ack_pending_irq(v, vector, 1); ++ ++ ppr = vlapic_set_ppr(vlapic); ++ WARN_ON((ppr & 0xf0) != (vector & 0xf0)); ++ ++ status = vector << 8; ++ rvi = vlapic_has_pending_irq(v); ++ if ( rvi != -1 ) ++ status |= rvi & 0xff; ++ ++ __vmwrite(GUEST_INTR_STATUS, status); ++ } ++} ++ + static void virtual_vmexit(struct cpu_user_regs *regs) + { + struct vcpu *v = current; +@@ -1340,6 +1370,9 @@ static void virtual_vmexit(struct cpu_us + /* updating host cr0 to sync TS bit */ + __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); + ++ if ( cpu_has_vmx_virtual_intr_delivery ) ++ nvmx_update_apicv(v); ++ + vmreturn(regs, VMSUCCEED); + } + +--- a/xen/include/asm-x86/hvm/vlapic.h ++++ b/xen/include/asm-x86/hvm/vlapic.h +@@ -98,7 +98,7 @@ bool_t is_vlapic_lvtpc_enabled(struct vl + void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig); + + int vlapic_has_pending_irq(struct vcpu *v); +-int vlapic_ack_pending_irq(struct vcpu *v, int vector); ++int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack); + + int vlapic_init(struct vcpu *v); + void vlapic_destroy(struct vcpu *v); +@@ -110,6 +110,7 @@ void vlapic_tdt_msr_set(struct vlapic *v + uint64_t vlapic_tdt_msr_get(struct vlapic *vlapic); + + int vlapic_accept_pic_intr(struct vcpu *v); ++uint32_t vlapic_set_ppr(struct vlapic *vlapic); + + void vlapic_adjust_i8259_target(struct domain *d); + +--- a/xen/include/asm-x86/hvm/vmx/vmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vmx.h +@@ -448,7 +448,7 @@ static inline int __vmxon(u64 addr) + + void vmx_get_segment_register(struct vcpu *, enum x86_segment, + struct segment_register *); +-void vmx_inject_extint(int trap); ++void vmx_inject_extint(int trap, uint8_t source); + void vmx_inject_nmi(void); + + int ept_p2m_init(struct p2m_domain *p2m); +--- a/xen/include/asm-x86/hvm/vmx/vvmx.h ++++ b/xen/include/asm-x86/hvm/vmx/vvmx.h +@@ -36,6 +36,7 @@ struct nestedvmx { + struct { + unsigned long intr_info; + u32 error_code; ++ u8 source; + } intr; + struct { + bool_t enabled; diff --git a/5215d8b0-Correct-X2-APIC-HVM-emulation.patch b/5215d8b0-Correct-X2-APIC-HVM-emulation.patch new file mode 100644 index 0000000..014de12 --- /dev/null +++ b/5215d8b0-Correct-X2-APIC-HVM-emulation.patch @@ -0,0 +1,24 @@ +References: bnc#835896 + +# Commit 69962e19ed432570f6cdcfdb5f6f22d6e3c54e6c +# Date 2013-08-22 11:24:00 +0200 +# Author Juergen Gross +# Committer Jan Beulich +Correct X2-APIC HVM emulation + +commit 6859874b61d5ddaf5289e72ed2b2157739b72ca5 ("x86/HVM: fix x2APIC +APIC_ID read emulation") introduced an error for the hvm emulation of +x2apic. Any try to write to APIC_ICR MSR will result in a GP fault. + +Signed-off-by: Juergen Gross + +--- a/xen/arch/x86/hvm/vlapic.c ++++ b/xen/arch/x86/hvm/vlapic.c +@@ -868,6 +868,7 @@ int hvm_x2apic_msr_write(struct vcpu *v, + rc = vlapic_reg_write(v, APIC_ICR2, (uint32_t)(msr_content >> 32)); + if ( rc ) + return rc; ++ break; + + case APIC_ICR2: + return X86EMUL_UNHANDLEABLE; diff --git a/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch b/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch new file mode 100644 index 0000000..d0dd632 --- /dev/null +++ b/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch @@ -0,0 +1,24 @@ +# Commit 850188e1278cecd1dfb9b936024bee2d8dfdcc18 +# Date 2013-08-27 11:11:38 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: don't allow Dom0 access to the MSI address range + +In particular, MMIO assignments should not be done using this area. + +Signed-off-by: Jan Beulich +Acked-by Xiantao Zhang + +--- 2013-08-30.orig/xen/arch/x86/domain_build.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/domain_build.c 2013-09-09 11:23:00.000000000 +0200 +@@ -1122,6 +1122,10 @@ int __init construct_dom0( + if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) + rc |= iomem_deny_access(dom0, mfn, mfn); + } ++ /* MSI range. */ ++ rc |= iomem_deny_access(dom0, paddr_to_pfn(MSI_ADDR_BASE_LO), ++ paddr_to_pfn(MSI_ADDR_BASE_LO + ++ MSI_ADDR_DEST_ID_MASK)); + + /* Remove access to E820_UNUSABLE I/O regions above 1MB. */ + for ( i = 0; i < e820.nr_map; i++ ) diff --git a/521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch b/521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch new file mode 100644 index 0000000..cb8fdb4 --- /dev/null +++ b/521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch @@ -0,0 +1,23 @@ +# Commit d838ac2539cf1987bea6e15662fd6a80a58fe26d +# Date 2013-08-27 11:12:12 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: don't allow Dom0 access to the HT address range + +In particular, MMIO assignments should not be done using this area. + +Signed-off-by: Jan Beulich + +--- 2013-08-30.orig/xen/arch/x86/domain_build.c 2013-09-09 11:23:00.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/domain_build.c 2013-09-09 11:23:06.000000000 +0200 +@@ -1126,6 +1126,10 @@ int __init construct_dom0( + rc |= iomem_deny_access(dom0, paddr_to_pfn(MSI_ADDR_BASE_LO), + paddr_to_pfn(MSI_ADDR_BASE_LO + + MSI_ADDR_DEST_ID_MASK)); ++ /* HyperTransport range. */ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ rc |= iomem_deny_access(dom0, paddr_to_pfn(0xfdULL << 32), ++ paddr_to_pfn((1ULL << 40) - 1)); + + /* Remove access to E820_UNUSABLE I/O regions above 1MB. */ + for ( i = 0; i < e820.nr_map; i++ ) diff --git a/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch b/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch new file mode 100644 index 0000000..b8cd049 --- /dev/null +++ b/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch @@ -0,0 +1,52 @@ +# Commit 3e787021fb2420851c7bdc3911ea53c728ba5ac0 +# Date 2013-08-27 11:15:15 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/Intel: add support for Haswell CPU models + +... according to their most recent public documentation. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- 2013-08-30.orig/xen/arch/x86/acpi/cpu_idle.c 2013-08-30 00:00:00.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/acpi/cpu_idle.c 2013-09-06 13:46:10.000000000 +0200 +@@ -135,8 +135,10 @@ static void do_get_hw_residencies(void * + case 0x3A: + case 0x3E: + /* Haswell */ +- case 0x3c: ++ case 0x3C: ++ case 0x3F: + case 0x45: ++ case 0x46: + GET_PC2_RES(hw_res->pc2); + GET_CC7_RES(hw_res->cc7); + /* fall through */ +--- 2013-08-30.orig/xen/arch/x86/hvm/vmx/vmx.c 2013-09-06 00:00:00.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/hvm/vmx/vmx.c 2013-09-06 13:46:10.000000000 +0200 +@@ -1814,7 +1814,7 @@ static const struct lbr_info *last_branc + /* Ivy Bridge */ + case 58: case 62: + /* Haswell */ +- case 60: case 69: ++ case 60: case 63: case 69: case 70: + return nh_lbr; + break; + /* Atom */ +--- 2013-08-30.orig/xen/arch/x86/hvm/vmx/vpmu_core2.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/hvm/vmx/vpmu_core2.c 2013-09-06 13:46:10.000000000 +0200 +@@ -878,7 +878,12 @@ int vmx_vpmu_initialise(struct vcpu *v, + + case 0x3a: /* IvyBridge */ + case 0x3e: /* IvyBridge EP */ +- case 0x3c: /* Haswell */ ++ ++ /* Haswell: */ ++ case 0x3c: ++ case 0x3f: ++ case 0x45: ++ case 0x46: + ret = core2_vpmu_initialise(v, vpmu_flags); + if ( !ret ) + vpmu->arch_vpmu_ops = &core2_vpmu_ops; diff --git a/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch b/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch new file mode 100644 index 0000000..9b1dbe8 --- /dev/null +++ b/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch @@ -0,0 +1,42 @@ +# Commit 9e2c5938246546a5b3f698b7421640d85602b994 +# Date 2013-08-28 10:18:39 +0200 +# Author Tomasz Wroblewski +# Committer Jan Beulich +Fix inactive timer list corruption on second S3 resume + +init_timer cannot be safely called multiple times on same timer since it does memset(0) +on the structure, erasing the auxiliary member used by linked list code. This breaks +inactive timer list in common/timer.c. + +Moved resume_timer initialisation to ns16550_init_postirq, so it's only done once. + +Signed-off-by: Tomasz Wroblewski +Acked-by: Keir Fraser + +--- 2013-08-30.orig/xen/drivers/char/ns16550.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/drivers/char/ns16550.c 2013-09-06 13:46:19.000000000 +0200 +@@ -128,6 +128,8 @@ static struct ns16550 { + #define RESUME_DELAY MILLISECS(10) + #define RESUME_RETRIES 100 + ++static void ns16550_delayed_resume(void *data); ++ + static char ns_read_reg(struct ns16550 *uart, int reg) + { + if ( uart->remapped_io_base == NULL ) +@@ -323,6 +325,7 @@ static void __init ns16550_init_postirq( + serial_async_transmit(port); + + init_timer(&uart->timer, ns16550_poll, port, 0); ++ init_timer(&uart->resume_timer, ns16550_delayed_resume, port, 0); + + /* Calculate time to fill RX FIFO and/or empty TX FIFO for polling. */ + bits = uart->data_bits + uart->stop_bits + !!uart->parity; +@@ -413,7 +416,6 @@ static void ns16550_resume(struct serial + if ( ns16550_ioport_invalid(uart) ) + { + delayed_resume_tries = RESUME_RETRIES; +- init_timer(&uart->resume_timer, ns16550_delayed_resume, port, 0); + set_timer(&uart->resume_timer, NOW() + RESUME_DELAY); + } + else diff --git a/521e1156-x86-AVX-instruction-emulation-fixes.patch b/521e1156-x86-AVX-instruction-emulation-fixes.patch new file mode 100644 index 0000000..d26866e --- /dev/null +++ b/521e1156-x86-AVX-instruction-emulation-fixes.patch @@ -0,0 +1,254 @@ +# Commit 062919448e2f4b127c9c3c085b1a8e1d56a33051 +# Date 2013-08-28 17:03:50 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86: AVX instruction emulation fixes + +- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M + one as the second prefix byte +- early decoding normalized vex.reg, thus corrupting it for the main + consumer (copy_REX_VEX()), resulting in #UD on the two-operand + instructions we emulate + +Also add respective test cases to the testing utility plus +- fix get_fpu() (the fall-through order was inverted) +- add cpu_has_avx2, even if it's currently unused (as in the new test + cases I decided to refrain from using AVX2 instructions in order to + be able to actually run all the tests on the hardware I have) +- slightly tweak cpu_has_avx to more consistently express the outputs + we don't care about (sinking them all into the same variable) + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- 2013-08-30.orig/tools/tests/x86_emulator/test_x86_emulator.c 2012-09-18 23:42:06.000000000 +0200 ++++ 2013-08-30/tools/tests/x86_emulator/test_x86_emulator.c 2013-09-09 11:23:32.000000000 +0200 +@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x + } + + #define cpu_has_avx ({ \ +- unsigned int eax = 1, ecx = 0, edx; \ +- cpuid(&eax, &edx, &ecx, &edx, NULL); \ ++ unsigned int eax = 1, ecx = 0; \ ++ cpuid(&eax, &eax, &ecx, &eax, NULL); \ + if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ + ecx = 0; \ + (ecx & (1U << 28)) != 0; \ + }) + ++#define cpu_has_avx2 ({ \ ++ unsigned int eax = 1, ebx, ecx = 0; \ ++ cpuid(&eax, &ebx, &ecx, &eax, NULL); \ ++ if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ ++ ebx = 0; \ ++ else { \ ++ eax = 7, ecx = 0; \ ++ cpuid(&eax, &ebx, &ecx, &eax, NULL); \ ++ } \ ++ (ebx & (1U << 5)) != 0; \ ++}) ++ + int get_fpu( + void (*exception_callback)(void *, struct cpu_user_regs *), + void *exception_callback_arg, +@@ -111,14 +123,14 @@ int get_fpu( + { + case X86EMUL_FPU_fpu: + break; +- case X86EMUL_FPU_ymm: +- if ( cpu_has_avx ) ++ case X86EMUL_FPU_mmx: ++ if ( cpu_has_mmx ) + break; + case X86EMUL_FPU_xmm: + if ( cpu_has_sse ) + break; +- case X86EMUL_FPU_mmx: +- if ( cpu_has_mmx ) ++ case X86EMUL_FPU_ymm: ++ if ( cpu_has_avx ) + break; + default: + return X86EMUL_UNHANDLEABLE; +@@ -629,6 +641,73 @@ int main(int argc, char **argv) + else + printf("skipped\n"); + ++ printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); ++ if ( stack_exec && cpu_has_avx ) ++ { ++ extern const unsigned char vmovdqu_to_mem[]; ++ ++ asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" ++ ".pushsection .test, \"a\", @progbits\n" ++ "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" ++ ".popsection" :: "c" (NULL) ); ++ ++ memcpy(instr, vmovdqu_to_mem, 15); ++ memset(res, 0x55, 128); ++ memset(res + 16, 0xff, 16); ++ memset(res + 20, 0x00, 16); ++ regs.eip = (unsigned long)&instr[0]; ++ regs.ecx = (unsigned long)res; ++ rc = x86_emulate(&ctxt, &emulops); ++ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) ++ goto fail; ++ printf("okay\n"); ++ } ++ else ++ printf("skipped\n"); ++ ++ printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); ++ if ( stack_exec && cpu_has_avx ) ++ { ++ extern const unsigned char vmovdqu_from_mem[]; ++ ++#if 0 /* Don't use AVX2 instructions for now */ ++ asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" ++#else ++ asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" ++ "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" ++#endif ++ ".pushsection .test, \"a\", @progbits\n" ++ "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" ++ ".popsection" :: "d" (NULL) ); ++ ++ memcpy(instr, vmovdqu_from_mem, 15); ++ memset(res + 4, 0xff, 16); ++ regs.eip = (unsigned long)&instr[0]; ++ regs.ecx = 0; ++ regs.edx = (unsigned long)res; ++ rc = x86_emulate(&ctxt, &emulops); ++ if ( rc != X86EMUL_OKAY ) ++ goto fail; ++#if 0 /* Don't use AVX2 instructions for now */ ++ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" ++ "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" ++ "vpmovmskb %%ymm1, %0" : "=r" (rc) ); ++#else ++ asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" ++ "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" ++ "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" ++ "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" ++ "vpmovmskb %%xmm0, %0\n\t" ++ "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); ++ rc |= i << 16; ++#endif ++ if ( rc != 0xffffffff ) ++ goto fail; ++ printf("okay\n"); ++ } ++ else ++ printf("skipped\n"); ++ + printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); + memset(res, 0x77, 64); + memset(res + 10, 0x66, 8); +@@ -683,6 +762,59 @@ int main(int argc, char **argv) + else + printf("skipped\n"); + ++ printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); ++ memset(res, 0x88, 64); ++ memset(res + 10, 0x77, 8); ++ if ( stack_exec && cpu_has_avx ) ++ { ++ extern const unsigned char vmovsd_to_mem[]; ++ ++ asm volatile ( "vbroadcastsd %0, %%ymm5\n" ++ ".pushsection .test, \"a\", @progbits\n" ++ "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" ++ ".popsection" :: "m" (res[10]), "c" (NULL) ); ++ ++ memcpy(instr, vmovsd_to_mem, 15); ++ regs.eip = (unsigned long)&instr[0]; ++ regs.ecx = (unsigned long)(res + 2); ++ regs.edx = 0; ++ rc = x86_emulate(&ctxt, &emulops); ++ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) ++ goto fail; ++ printf("okay\n"); ++ } ++ else ++ { ++ printf("skipped\n"); ++ memset(res + 2, 0x77, 8); ++ } ++ ++ printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); ++ if ( stack_exec && cpu_has_avx ) ++ { ++ extern const unsigned char vmovaps_from_mem[]; ++ ++ asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" ++ ".pushsection .test, \"a\", @progbits\n" ++ "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" ++ ".popsection" :: "d" (NULL) ); ++ ++ memcpy(instr, vmovaps_from_mem, 15); ++ regs.eip = (unsigned long)&instr[0]; ++ regs.ecx = 0; ++ regs.edx = (unsigned long)res; ++ rc = x86_emulate(&ctxt, &emulops); ++ if ( rc != X86EMUL_OKAY ) ++ goto fail; ++ asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" ++ "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); ++ if ( rc != 0xff ) ++ goto fail; ++ printf("okay\n"); ++ } ++ else ++ printf("skipped\n"); ++ + for ( j = 1; j <= 2; j++ ) + { + #if defined(__i386__) +--- 2013-08-30.orig/xen/arch/x86/x86_emulate/x86_emulate.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/x86_emulate/x86_emulate.c 2013-09-09 11:23:33.000000000 +0200 +@@ -1454,10 +1454,10 @@ x86_emulate( + /* VEX */ + generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); + +- vex.raw[0] = b; ++ vex.raw[0] = modrm; + if ( b & 1 ) + { +- vex.raw[1] = b; ++ vex.raw[1] = modrm; + vex.opcx = vex_0f; + vex.x = 1; + vex.b = 1; +@@ -1479,10 +1479,7 @@ x86_emulate( + } + } + } +- vex.reg ^= 0xf; +- if ( !mode_64bit() ) +- vex.reg &= 0x7; +- else if ( !vex.r ) ++ if ( mode_64bit() && !vex.r ) + rex_prefix |= REX_R; + + fail_if(vex.opcx != vex_0f); +@@ -3899,8 +3896,9 @@ x86_emulate( + else + { + fail_if((vex.opcx != vex_0f) || +- (vex.reg && ((ea.type == OP_MEM) || +- !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); ++ ((vex.reg != 0xf) && ++ ((ea.type == OP_MEM) || ++ !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); + vcpu_must_have_avx(); + get_fpu(X86EMUL_FPU_ymm, &fic); + ea.bytes = 16 << vex.l; +@@ -4168,7 +4166,7 @@ x86_emulate( + } + else + { +- fail_if((vex.opcx != vex_0f) || vex.reg || ++ fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || + ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); + vcpu_must_have_avx(); + get_fpu(X86EMUL_FPU_ymm, &fic); diff --git a/521ef8d9-AMD-IOMMU-add-missing-checks.patch b/521ef8d9-AMD-IOMMU-add-missing-checks.patch new file mode 100644 index 0000000..1b10c7e --- /dev/null +++ b/521ef8d9-AMD-IOMMU-add-missing-checks.patch @@ -0,0 +1,29 @@ +# Commit 3785d30efe8264b899499e0883b10cc434bd0959 +# Date 2013-08-29 09:31:37 +0200 +# Author Jan Beulich +# Committer Jan Beulich +AMD IOMMU: add missing check + +We shouldn't accept IVHD tables specifying IO-APIC IDs beyond the limit +we support (MAX_IO_APICS, currently 128). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Suravee Suthikulpanit + +--- 2013-08-30.orig/xen/drivers/passthrough/amd/iommu_acpi.c 2013-08-30 13:48:36.000000000 +0200 ++++ 2013-08-30/xen/drivers/passthrough/amd/iommu_acpi.c 2013-09-06 13:49:07.000000000 +0200 +@@ -674,6 +674,13 @@ static u16 __init parse_ivhd_device_spec + if ( IO_APIC_ID(apic) != special->handle ) + continue; + ++ if ( special->handle >= ARRAY_SIZE(ioapic_sbdf) ) ++ { ++ printk(XENLOG_ERR "IVHD Error: IO-APIC %#x entry beyond bounds\n", ++ special->handle); ++ return 0; ++ } ++ + if ( ioapic_sbdf[special->handle].pin_2_idx ) + { + if ( ioapic_sbdf[special->handle].bdf == bdf && diff --git a/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch b/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch new file mode 100644 index 0000000..b970686 --- /dev/null +++ b/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch @@ -0,0 +1,28 @@ +# Commit 4aa19549e17650b9bfe2b31d7f52a95696d388f0 +# Date 2013-08-30 10:40:29 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +hvmloader/smbios: Correctly count the number of tables written + +Fixes regression indirectly introduced by c/s 4d23036e709627 + +That changeset added some smbios tables which were option based on the +toolstack providing appropriate xenstore keys. The do_struct() macro would +unconditionally increment nr_structs, even if a table was not actually +written. + +Signed-off-by: Andrew Cooper +Acked-by: Keir Fraser + +--- 2013-08-30.orig/tools/firmware/hvmloader/smbios.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/tools/firmware/hvmloader/smbios.c 2013-09-09 11:23:52.000000000 +0200 +@@ -192,7 +192,8 @@ write_smbios_tables(void *ep, void *star + + #define do_struct(fn) do { \ + q = (fn); \ +- (*nr_structs)++; \ ++ if ( q != p ) \ ++ (*nr_structs)++; \ + if ( (q - p) > *max_struct_size ) \ + *max_struct_size = q - p; \ + p = q; \ diff --git a/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch b/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch new file mode 100644 index 0000000..05f4a7d --- /dev/null +++ b/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch @@ -0,0 +1,42 @@ +# Commit 0f4cb23c3ea5b987c49c9a9368e7a0d505ec064f +# Date 2013-08-30 10:40:48 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +public/hvm_xs_strings.h: Fix ABI regression for OEM SMBios strings + +The old code for OEM SMBios strings was: + + char path[20] = "bios-strings/oem-XX"; + path[(sizeof path) - 3] = '0' + ((i < 10) ? i : i / 10); + path[(sizeof path) - 2] = (i < 10) ? '\0' : '0' + (i % 10); + +Where oem-1 thru 9 specifically had no leading 0. + +However, the definition of HVM_XS_OEM_STRINGS specifically requires leading +0s. + +This regression was introduced by the combination of c/s 4d23036e709627 and +e64c3f71ceb662 + +I realise that this patch causes a change to the public headers. However I +feel it is justified as: + +* All toolstacks used to have to embed the magic string (and almost certainly + still do) +* If by some miriacle a new toolstack has started using the new define will + continue to work. +* The only intree consumer of the define is hvmloader itself. + +Signed-off-by: Andrew Cooper +Acked-by: Keir Fraser + +--- 2013-08-30.orig/xen/include/public/hvm/hvm_xs_strings.h 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/include/public/hvm/hvm_xs_strings.h 2013-09-09 11:23:57.000000000 +0200 +@@ -75,6 +75,6 @@ + /* 1 to 99 OEM strings can be set in xenstore using values of the form + * below. These strings will be loaded into the SMBIOS type 11 structure. + */ +-#define HVM_XS_OEM_STRINGS "bios-strings/oem-%02d" ++#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" + + #endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ diff --git a/52205e27-x86-xsave-initialization-improvements.patch b/52205e27-x86-xsave-initialization-improvements.patch new file mode 100644 index 0000000..f65a16f --- /dev/null +++ b/52205e27-x86-xsave-initialization-improvements.patch @@ -0,0 +1,103 @@ +# Commit c6066e78f4a66005b0d5d86c6ade32e2ab78923a +# Date 2013-08-30 10:56:07 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/xsave: initialization improvements + +- properly validate available feature set on APs +- also validate xsaveopt availability on APs +- properly indicate whether the initialization is on the BSP (we + shouldn't be using "cpu == 0" checks for this) + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- 2013-08-30.orig/xen/arch/x86/cpu/common.c 2013-08-30 00:00:00.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/cpu/common.c 2013-09-09 11:24:05.000000000 +0200 +@@ -304,7 +304,7 @@ void __cpuinit identify_cpu(struct cpuin + clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability); + + if ( cpu_has_xsave ) +- xstate_init(); ++ xstate_init(c == &boot_cpu_data); + + /* + * The vendor-specific functions might have changed features. Now +--- 2013-08-30.orig/xen/arch/x86/xstate.c 2013-09-09 11:21:56.000000000 +0200 ++++ 2013-08-30/xen/arch/x86/xstate.c 2013-09-09 11:24:05.000000000 +0200 +@@ -247,11 +247,10 @@ void xstate_free_save_area(struct vcpu * + } + + /* Collect the information of processor's extended state */ +-void xstate_init(void) ++void xstate_init(bool_t bsp) + { +- u32 eax, ebx, ecx, edx; +- int cpu = smp_processor_id(); +- u32 min_size; ++ u32 eax, ebx, ecx, edx, min_size; ++ u64 feature_mask; + + if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) + return; +@@ -260,6 +259,7 @@ void xstate_init(void) + + BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); + BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); ++ feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; + + /* FP/SSE, XSAVE.HEADER, YMM */ + min_size = XSTATE_AREA_MIN_SIZE; +@@ -271,31 +271,33 @@ void xstate_init(void) + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. + */ + set_in_cr4(X86_CR4_OSXSAVE); +- if ( !set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK) ) ++ if ( !set_xcr0(feature_mask) ) + BUG(); + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + +- if ( cpu == 0 ) ++ if ( bsp ) + { ++ xfeature_mask = feature_mask; + /* + * xsave_cntxt_size is the max size required by enabled features. + * We know FP/SSE and YMM about eax, and nothing about edx at present. + */ + xsave_cntxt_size = ebx; +- xfeature_mask = eax + ((u64)edx << 32); +- xfeature_mask &= XCNTXT_MASK; + printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n", + __func__, xsave_cntxt_size, xfeature_mask); +- +- /* Check XSAVEOPT feature. */ +- cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); +- cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); + } + else + { ++ BUG_ON(xfeature_mask != feature_mask); + BUG_ON(xsave_cntxt_size != ebx); +- BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); + } ++ ++ /* Check XSAVEOPT feature. */ ++ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); ++ if ( bsp ) ++ cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); ++ else ++ BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT)); + } + + int handle_xsetbv(u32 index, u64 new_bv) +--- 2013-08-30.orig/xen/include/asm-x86/xstate.h 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/xen/include/asm-x86/xstate.h 2013-09-09 11:24:05.000000000 +0200 +@@ -81,6 +81,6 @@ int __must_check handle_xsetbv(u32 index + /* extended state init and cleanup functions */ + void xstate_free_save_area(struct vcpu *v); + int xstate_alloc_save_area(struct vcpu *v); +-void xstate_init(void); ++void xstate_init(bool_t bsp); + + #endif /* __ASM_XSTATE_H */ diff --git a/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch b/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch new file mode 100644 index 0000000..795b865 --- /dev/null +++ b/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch @@ -0,0 +1,31 @@ +# Commit 1893cf77992cc0ce9d827a8d345437fa2494b540 +# Date 2013-09-03 16:36:47 +0100 +# Author Steven Noonan +# Committer Ian Campbell +xend: handle extended PCI configuration space when saving state + +Newer PCI standards (e.g., PCI-X 2.0 and PCIe) introduce extended +configuration space which is larger than 256 bytes. This patch uses +stat() to determine the amount of space used to correctly save all of +the PCI configuration space. Resets handled by the xen-pciback driver +don't have this problem, as that code correctly handles saving +extended configuration space. + +Signed-off-by: Steven Noonan +Reviewed-by: Matt Wilson +[msw: adjusted commit message] +Signed-off-by: Matt Wilson + +--- 2013-08-30.orig/tools/python/xen/util/pci.py 2013-09-09 11:21:53.000000000 +0200 ++++ 2013-08-30/tools/python/xen/util/pci.py 2013-09-09 11:24:09.000000000 +0200 +@@ -521,8 +521,9 @@ def save_pci_conf_space(devs_string): + pci_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + pci_str + \ + SYSFS_PCI_DEV_CONFIG_PATH + fd = os.open(pci_path, os.O_RDONLY) ++ size = os.fstat(fd).st_size + configs = [] +- for i in range(0, 256, 4): ++ for i in range(0, size, 4): + configs = configs + [os.read(fd,4)] + os.close(fd) + pci_list = pci_list + [pci_path] diff --git a/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch b/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch new file mode 100644 index 0000000..acf1bbf --- /dev/null +++ b/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch @@ -0,0 +1,48 @@ +# Commit 749019afca4fd002d36856bad002cc11f7d0ddda +# Date 2013-09-03 16:36:52 +0100 +# Author Xi Xiong +# Committer Ian Campbell +xend: fix file descriptor leak in pci utilities + +A file descriptor leak was detected after creating multiple domUs with +pass-through PCI devices. This patch fixes the issue. + +Signed-off-by: Xi Xiong +Reviewed-by: Matt Wilson +[msw: adjusted commit message] +Signed-off-by: Matt Wilson + +--- 2013-08-30.orig/tools/python/xen/util/pci.py 2013-09-09 11:24:09.000000000 +0200 ++++ 2013-08-30/tools/python/xen/util/pci.py 2013-09-09 11:24:14.000000000 +0200 +@@ -969,18 +969,22 @@ class PciDevice: + ttl = 480; # 3840 bytes, minimum 8 bytes per capability + pos = 0x100 + ++ fd = None + try: + fd = os.open(path, os.O_RDONLY) + os.lseek(fd, pos, 0) + h = os.read(fd, 4) + if len(h) == 0: # MMCONF is not enabled? ++ os.close(fd) + return 0 + header = struct.unpack('I', h)[0] + if header == 0 or header == -1: ++ os.close(fd) + return 0 + + while ttl > 0: + if (header & 0x0000ffff) == cap: ++ os.close(fd) + return pos + pos = (header >> 20) & 0xffc + if pos < 0x100: +@@ -990,6 +994,8 @@ class PciDevice: + ttl = ttl - 1 + os.close(fd) + except OSError, (errno, strerr): ++ if fd is not None: ++ os.close(fd) + raise PciDeviceParseError(('Error when accessing sysfs: %s (%d)' % + (strerr, errno))) + return 0 diff --git a/52285317-hvmloader-fix-SeaBIOS-interface.patch b/52285317-hvmloader-fix-SeaBIOS-interface.patch new file mode 100644 index 0000000..1f7e73b --- /dev/null +++ b/52285317-hvmloader-fix-SeaBIOS-interface.patch @@ -0,0 +1,84 @@ +# Commit 5f2875739beef3a75c7a7e8579b6cbcb464e61b3 +# Date 2013-09-05 11:47:03 +0200 +# Author Jan Beulich +# Committer Jan Beulich +hvmloader: fix SeaBIOS interface + +The SeaBIOS ROM image may validly exceed 128k in size, it's only our +interface code that so far assumed that it wouldn't. Remove that +restriction by setting the base address depending on image size. + +Add a check to HVM loader so that too big images won't result in silent +guest failure anymore. + +Uncomment the intended build-time size check for rombios, moving it +into a function so that it would actually compile. + +Signed-off-by: Jan Beulich +Acked-by: Ian Campbell + +--- 2013-08-30.orig/tools/firmware/hvmloader/config-seabios.h 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/tools/firmware/hvmloader/config-seabios.h 2013-09-09 11:24:23.000000000 +0200 +@@ -3,8 +3,6 @@ + + #define BIOS_INFO_PHYSICAL_ADDRESS 0x00001000 + +-#define SEABIOS_PHYSICAL_ADDRESS 0x000E0000 +- + #endif /* __HVMLOADER_CONFIG_SEABIOS_H__ */ + + /* +--- 2013-08-30.orig/tools/firmware/hvmloader/hvmloader.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/tools/firmware/hvmloader/hvmloader.c 2013-09-09 11:24:23.000000000 +0200 +@@ -292,8 +292,12 @@ int main(void) + if ( bios->bios_load ) + bios->bios_load(bios); + else ++ { ++ BUG_ON(bios->bios_address + bios->image_size > ++ HVMLOADER_PHYSICAL_ADDRESS); + memcpy((void *)bios->bios_address, bios->image, + bios->image_size); ++ } + + if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) + { +--- 2013-08-30.orig/tools/firmware/hvmloader/rombios.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/tools/firmware/hvmloader/rombios.c 2013-09-09 11:24:23.000000000 +0200 +@@ -127,6 +127,8 @@ static void rombios_load(const struct bi + uint32_t bioshigh; + struct rombios_info *info; + ++ BUILD_BUG_ON(sizeof(rombios) > 0x100000 - ROMBIOS_PHYSICAL_ADDRESS); ++ + memcpy((void *)config->bios_address, config->image, + config->image_size); + +@@ -206,8 +208,6 @@ static void rombios_create_smbios_tables + SMBIOS_PHYSICAL_END); + } + +-//BUILD_BUG_ON(sizeof(rombios) > (0x00100000U - ROMBIOS_PHYSICAL_ADDRESS)); +- + struct bios_config rombios_config = { + .name = "ROMBIOS", + +--- 2013-08-30.orig/tools/firmware/hvmloader/seabios.c 2013-07-09 20:57:12.000000000 +0200 ++++ 2013-08-30/tools/firmware/hvmloader/seabios.c 2013-09-09 11:24:23.000000000 +0200 +@@ -133,15 +133,13 @@ static void seabios_setup_e820(void) + dump_e820_table(e820, info->e820_nr); + } + +-//BUILD_BUG_ON(sizeof(seabios) > (0x00100000U - SEABIOS_PHYSICAL_ADDRESS)); +- + struct bios_config seabios_config = { + .name = "SeaBIOS", + + .image = seabios, + .image_size = sizeof(seabios), + +- .bios_address = SEABIOS_PHYSICAL_ADDRESS, ++ .bios_address = 0x100000 - sizeof(seabios), + + .load_roms = NULL, + diff --git a/blktapctrl.service b/blktapctrl.service deleted file mode 100644 index e5fdf30..0000000 --- a/blktapctrl.service +++ /dev/null @@ -1,14 +0,0 @@ -[Unit] -Description=blktapctrl daemon -RefuseManualStop=true -ConditionPathExists=/proc/xen - -[Service] -Type=forking -Environment=BLKTAPCTRL_ARGS= -EnvironmentFile=-/etc/sysconfig/blktapctrl -ExecStartPre=/bin/grep -q control_d /proc/xen/capabilities -ExecStart=/usr/sbin/blktapctrl $BLKTAPCTRL_ARGS - -[Install] -WantedBy=multi-user.target diff --git a/disable_emulated_device.patch b/disable_emulated_device.patch index 486d61f..3a5dc09 100644 --- a/disable_emulated_device.patch +++ b/disable_emulated_device.patch @@ -1,14 +1,39 @@ ---- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -@@ -419,6 +419,11 @@ static int __devinit platform_pci_init(s - platform_mmio = mmio_addr; - platform_mmiolen = mmio_len; +From: Olaf Hering +Subject: [PATCH v2] unmodified_drivers: enable unplug per default + +Since xen-3.3 an official unplug protocol for emulated hardware is +available in the toolstack. The pvops kernel does the unplug per +default, so it is safe to do it also in the drivers for forward ported +xenlinux. +Currently its required to load xen-platform-pci with the module +parameter dev_unplug=all, which is cumbersome. + +Signed-off-by: Olaf Hering +--- + unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +Index: xen-4.3.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c +=================================================================== +--- xen-4.3.0-testing.orig/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c ++++ xen-4.3.0-testing/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c +@@ -66,7 +66,7 @@ MODULE_LICENSE("GPL"); + static char *dev_unplug; + module_param(dev_unplug, charp, 0644); + MODULE_PARM_DESC(dev_unplug, "Emulated devices to unplug: " +- "[all,][ide-disks,][aux-ide-disks,][nics]\n"); ++ "[all,][ide-disks,][aux-ide-disks,][nics] (default is 'all')\n"); -+ /* -+ * Disconnect the emulated devices. -+ */ -+ outl(1, (ioaddr + 4)); + struct pci_dev *xen_platform_pdev; + +@@ -290,6 +290,10 @@ static int check_platform_magic(struct d + short magic, unplug = 0; + char protocol, *p, *q, *err; + ++ /* Unconditionally unplug everything */ ++ if (!dev_unplug) ++ unplug = UNPLUG_ALL; + - ret = init_hypercall_stubs(); - if (ret < 0) - goto out; + for (p = dev_unplug; p; p = q) { + q = strchr(dev_unplug, ','); + if (q) diff --git a/xen.changes b/xen.changes index 76967c6..7f62ee5 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,119 @@ +------------------------------------------------------------------- +Mon Sep 9 09:26:18 MDT 2013 - carnold@suse.com + +- Upstream patches from Jan + 521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch + 521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch + 521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch + 521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch + 521e1156-x86-AVX-instruction-emulation-fixes.patch + 521ef8d9-AMD-IOMMU-add-missing-checks.patch + 52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch + 52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch + 52205e27-x86-xsave-initialization-improvements.patch + 5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch + 52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch + 52285317-hvmloader-fix-SeaBIOS-interface.patch + +------------------------------------------------------------------- +Tue Sep 3 16:23:16 MDT 2013 - carnold@suse.com + +- bnc#837585 - xen* pkg update DISables `xencommons` and + `xendomains` systemd services + xen.spec + +------------------------------------------------------------------- +Fri Aug 30 20:11:46 CEST 2013 - ohering@suse.de + +- remove unneeded patch, autoload is handled by PCI device, without + PCI device xen_platform_pci would not work anyway + xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch + +------------------------------------------------------------------- +Fri Aug 30 20:07:41 CEST 2013 - ohering@suse.de + +- Update our xen-3.0.4 version of unplug code in qemu-trad + add comments about the usage of the code + rename handler function + reenable handlers for writing/reading from emulated PCI device + +------------------------------------------------------------------- +Fri Aug 30 19:51:03 CEST 2013 - ohering@suse.de + +- Change unplugging of emulated devices in PVonHVM guests + Since 3.0.4 xen-platform-pci.ko triggerd the unplug by writing + to the PCI space of the emulated PCI device. 3.3 introduced an + official unplug protocol. The option to unplug wit the official + protocol is disabled per default. + Remove our version and enable the unplug via official protocol + +------------------------------------------------------------------- +Fri Aug 30 08:11:55 MDT 2013 - carnold@suse.com + +- Upstream patches from Jan + 51e517e6-AMD-IOMMU-allocate-IRTEs.patch + 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch + 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch + 52146070-ACPI-fix-acpi_os_map_memory.patch + 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch + 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch + 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch + 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch + 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch + 5215d8b0-Correct-X2-APIC-HVM-emulation.patch +- Dropped 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch + +------------------------------------------------------------------- +Mon Aug 26 15:48:57 MDT 2013 - carnold@suse.com + +- bnc#836239 - SLES 11 SP3 Xen security patch does not + automatically update UEFI boot binary + xen.spec + +------------------------------------------------------------------- +Tue Aug 20 07:56:13 MDT 2013 - carnold@suse.com + +- Upstream patches from Jan + 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch + 51dd155c-adjust-x86-EFI-build.patch + 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch + 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch + 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch + 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch + 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch + 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch + 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch + 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch + 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch + 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch + 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch + 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch + 520d417d-xen-Add-stdbool.h-workaround-for-BSD.patch + +------------------------------------------------------------------- +Fri Aug 16 14:54:53 MDT 2013 - carnold@suse.com + +- The xencommons.service file handles the starting of xenstored + and xenconsoled. Drop the following services files as + unecessary. Update xendomains.service to reflect these changes. + xenstored.service + xenconsoled.service + blktapctrl.service + +------------------------------------------------------------------- +Thu Aug 16 08:54:04 MDT 2013 - carnold@suse.com + +- Add xencommons.service to xendomains.service 'After' tag + xendomains.service + +------------------------------------------------------------------- +Thu Aug 15 14:54:04 MDT 2013 - carnold@suse.com + +- Change the default bridge in xl.conf from xenbr0 to just br0 + xl-conf-default-bridge.patch +- Add network.target to xendomains.service 'After' tag + xendomains.service + ------------------------------------------------------------------- Wed Jul 31 11:34:14 MDT 2013 - carnold@suse.com diff --git a/xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch b/xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch deleted file mode 100644 index 75980f4..0000000 --- a/xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch +++ /dev/null @@ -1,40 +0,0 @@ -References: fate#311487 - -Provide a modalias entry in xen-plaform-pci.ko to allow early autoloading in -initrd based on /sys/class/dmi/id/modalias - -Signed-off-by: Olaf Hering - ---- - unmodified_drivers/linux-2.6/platform-pci/platform-pci.c | 13 +++++++++++++ - 1 file changed, 13 insertions(+) - ---- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -472,6 +473,18 @@ static struct pci_device_id platform_pci - - MODULE_DEVICE_TABLE(pci, platform_pci_tbl); - -+static const struct dmi_system_id platform_dmi_tbl[] = { -+ { -+ .ident = "Xen PV-on-HVM", -+ .matches = { -+ DMI_MATCH(DMI_SYS_VENDOR, "Xen"), -+ DMI_MATCH(DMI_PRODUCT_NAME, "HVM domU"), -+ }, -+ }, -+ { }, -+}; -+MODULE_DEVICE_TABLE(dmi, platform_dmi_tbl); -+ - static struct pci_driver platform_driver = { - name: DRV_NAME, - probe: platform_pci_init, diff --git a/xen.spec b/xen.spec index 5fe63af..497c34d 100644 --- a/xen.spec +++ b/xen.spec @@ -138,7 +138,7 @@ BuildRequires: xorg-x11 BuildRequires: lndir %endif %endif -Version: 4.3.0_08 +Version: 4.3.0_10 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) @@ -185,13 +185,10 @@ Source34: init.pciback Source35: sysconfig.pciback Source36: xnloader.py # Systemd service files -Source40: xenstored.service -Source41: blktapctrl.service -Source42: xend.service -Source43: xenconsoled.service -Source44: xen-watchdog.service -Source45: xendomains.service -Source46: xencommons.service +Source40: xend.service +Source41: xencommons.service +Source42: xendomains.service +Source43: xen-watchdog.service Source99: baselibs.conf # http://xenbits.xensource.com/ext/xenalyze Source20000: xenalyze.hg.tar.bz2 @@ -199,7 +196,43 @@ Source20000: xenalyze.hg.tar.bz2 Patch1: 51d277a3-x86-don-t-pass-negative-time-to-gtime_to_gtsc-try-2.patch Patch2: 51d27807-iommu-amd-Fix-logic-for-clearing-the-IOMMU-interrupt-bits.patch Patch3: 51d27841-iommu-amd-Workaround-for-erratum-787.patch -Patch4: 51daa074-Revert-hvmloader-always-include-HPET-table.patch +Patch4: 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch +Patch5: 51daa074-Revert-hvmloader-always-include-HPET-table.patch +Patch6: 51dd155c-adjust-x86-EFI-build.patch +Patch7: 51e517e6-AMD-IOMMU-allocate-IRTEs.patch +Patch8: 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch +Patch9: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch +Patch10: 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch +Patch11: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch +Patch12: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch +Patch13: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch +Patch14: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch +Patch15: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch +Patch16: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch +Patch17: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch +Patch18: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch +Patch19: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch +Patch20: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch +Patch21: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch +Patch22: 52146070-ACPI-fix-acpi_os_map_memory.patch +Patch23: 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch +Patch24: 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch +Patch25: 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch +Patch26: 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch +Patch27: 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch +Patch28: 5215d8b0-Correct-X2-APIC-HVM-emulation.patch +Patch29: 521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch +Patch30: 521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch +Patch31: 521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch +Patch32: 521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch +Patch33: 521e1156-x86-AVX-instruction-emulation-fixes.patch +Patch34: 521ef8d9-AMD-IOMMU-add-missing-checks.patch +Patch35: 52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch +Patch36: 52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch +Patch37: 52205e27-x86-xsave-initialization-improvements.patch +Patch38: 5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch +Patch39: 52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch +Patch40: 52285317-hvmloader-fix-SeaBIOS-interface.patch # Upstream qemu patches # Our patches Patch301: xen-destdir.patch @@ -212,6 +245,7 @@ Patch312: bridge-bonding.patch Patch313: bridge-record-creation.patch Patch314: vif-bridge-no-iptables.patch Patch315: vif-bridge-tap-fix.patch +Patch316: xl-conf-default-bridge.patch Patch320: network-nat-open-SuSEfirewall2-FORWARD.patch Patch321: udev-rules.patch Patch322: libxen_permissive.patch @@ -236,7 +270,6 @@ Patch503: x86-dom-print.patch Patch504: x86-extra-trap-info.patch Patch520: supported_module.patch Patch521: magic_ioport_compat.patch -Patch522: xen.sles11sp1.fate311487.xen_platform_pci.dmistring.patch Patch523: disable_emulated_device.patch # Legacy Xend and Qemu patches Patch800: xend-traditional-qemu.patch @@ -492,6 +525,42 @@ Authors %patch2 -p1 %patch3 -p1 %patch4 -p1 +%patch5 -p1 +%patch6 -p1 +%patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 +%patch13 -p1 +%patch14 -p1 +%patch15 -p1 +%patch16 -p1 +%patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 +%patch29 -p1 +%patch30 -p1 +%patch31 -p1 +%patch32 -p1 +%patch33 -p1 +%patch34 -p1 +%patch35 -p1 +%patch36 -p1 +%patch37 -p1 +%patch38 -p1 +%patch39 -p1 +%patch40 -p1 %patch301 -p1 %patch302 -p1 %patch303 -p1 @@ -502,6 +571,7 @@ Authors %patch313 -p1 %patch314 -p1 %patch315 -p1 +%patch316 -p1 %patch320 -p1 %patch321 -p1 %patch322 -p1 @@ -523,7 +593,6 @@ Authors %patch504 -p1 %patch520 -p1 %patch521 -p1 -%patch522 -p1 %patch523 -p1 %patch800 -p1 %patch99997 -p1 @@ -764,13 +833,10 @@ mv $RPM_BUILD_ROOT/etc/udev/rules.d/xend.rules $RPM_BUILD_ROOT/etc/udev/rules.d/ # Systemd %if %{?with_systemd}0 mkdir -p %{buildroot}%{_unitdir} -install -m 644 %{SOURCE40} %{buildroot}%{_unitdir}/xenstored.service -install -m 644 %{SOURCE41} %{buildroot}%{_unitdir}/blktapctrl.service -install -m 644 %{SOURCE42} %{buildroot}%{_unitdir}/xend.service -install -m 644 %{SOURCE43} %{buildroot}%{_unitdir}/xenconsoled.service -install -m 644 %{SOURCE44} %{buildroot}%{_unitdir}/xen-watchdog.service -install -m 644 %{SOURCE45} %{buildroot}%{_unitdir}/xendomains.service -install -m 644 %{SOURCE46} %{buildroot}%{_unitdir}/xencommons.service +install -m 644 %{SOURCE40} %{buildroot}%{_unitdir}/xend.service +install -m 644 %{SOURCE41} %{buildroot}%{_unitdir}/xencommons.service +install -m 644 %{SOURCE42} %{buildroot}%{_unitdir}/xendomains.service +install -m 644 %{SOURCE43} %{buildroot}%{_unitdir}/xen-watchdog.service %endif # Xen utils @@ -888,12 +954,10 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %endif /usr/sbin/xenconsoled /usr/sbin/xencov -/usr/sbin/xen-destroy %ifnarch %arm aarch64 /usr/sbin/xen-hptool /usr/sbin/xen-hvmcrash /usr/sbin/xen-hvmctx -/usr/sbin/xen-list /usr/sbin/xenlockprof /usr/sbin/xen-lowmemd /usr/sbin/xenmon.py @@ -909,7 +973,6 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper /usr/sbin/xentop %ifnarch %arm aarch64 /usr/sbin/xentrace_setmask -/usr/sbin/xen-vmresync %endif /usr/sbin/xenwatchdogd /usr/sbin/xsview @@ -930,7 +993,6 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %dir /etc/xen/scripts /etc/xen/scripts/blktap /etc/xen/scripts/block* -/etc/xen/scripts/domain-lock* /etc/xen/scripts/external-device-migrate /etc/xen/scripts/hotplugpath.sh /etc/xen/scripts/locking.sh @@ -939,10 +1001,8 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %ifnarch %arm aarch64 /etc/xen/scripts/qemu-ifup %endif -/etc/xen/scripts/set-lock /etc/xen/scripts/vif2 /etc/xen/scripts/vif-* -/etc/xen/scripts/vm-monitor /etc/xen/scripts/vscsi /etc/xen/scripts/xen-hotplug-* /etc/xen/scripts/xen-network-common.sh @@ -985,11 +1045,8 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %config /etc/init.d/pciback %endif %if %{?with_systemd}0 -%{_unitdir}/xendomains.service %{_unitdir}/xencommons.service -%{_unitdir}/xenstored.service -%{_unitdir}/blktapctrl.service -%{_unitdir}/xenconsoled.service +%{_unitdir}/xendomains.service %{_unitdir}/xen-watchdog.service %endif %dir /etc/modprobe.d @@ -1026,7 +1083,6 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %{_defaultdocdir}/xen/boot.local.xenU %{_defaultdocdir}/xen/boot.xen %{_defaultdocdir}/xen/misc -%{_mandir}/man1/xen-list.1.gz %{_mandir}/man1/xentop.1.gz %{_mandir}/man1/xentrace_format.1.gz %{_mandir}/man1/xl.1.gz @@ -1046,6 +1102,9 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper /usr/sbin/xend /usr/sbin/xen-bugtool /usr/sbin/xen-python-path +/usr/sbin/xen-list +/usr/sbin/xen-destroy +/usr/sbin/xen-vmresync %dir /var/lib/xen/xend-db %dir /var/lib/xen/xend-db/domain %dir /var/lib/xen/xend-db/migrate @@ -1066,6 +1125,9 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %config(noreplace) /etc/xen/*.xml %ifnarch %arm aarch64 /etc/xen/scripts/xend-relocation.sh +/etc/xen/scripts/domain-lock* +/etc/xen/scripts/vm-monitor +/etc/xen/scripts/set-lock %{_libdir}/python%{pyver}/site-packages/xen/remus/* %{_libdir}/python%{pyver}/site-packages/xen/sv/* %{_libdir}/python%{pyver}/site-packages/xen/util/* @@ -1076,6 +1138,7 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %{_mandir}/man1/xm.1.gz %{_mandir}/man5/xmdomain.cfg.5.gz %{_mandir}/man5/xend-config.sxp.5.gz +%{_mandir}/man1/xen-list.1.gz %endif %endif @@ -1102,6 +1165,11 @@ rm -f $RPM_BUILD_ROOT/usr/libexec/qemu-bridge-helper %if %{?with_dom0_support}0 +%post +if [ -x /sbin/update-bootloader ]; then + /sbin/update-bootloader --refresh; exit 0 +fi + %post tools %if %{?with_xend}0 %if %{?with_systemd}0 @@ -1167,9 +1235,11 @@ fi %preun tools %if %{?with_systemd}0 -/bin/systemctl disable xend.service -/bin/systemctl disable xencommons.service -/bin/systemctl disable xendomains.service +if [ $1 -eq 0 ]; then + /bin/systemctl disable xend.service + /bin/systemctl disable xencommons.service + /bin/systemctl disable xendomains.service +fi %else %{stop_on_removal xendomains xend xencommons} %endif diff --git a/xenconsoled.service b/xenconsoled.service deleted file mode 100644 index e4dc9a4..0000000 --- a/xenconsoled.service +++ /dev/null @@ -1,17 +0,0 @@ -[Unit] -Description=Xenconsoled - handles logging from guest consoles and hypervisor -After=xenstored.service -ConditionPathExists=/proc/xen - -[Service] -Type=simple -Environment=XENCONSOLED_ARGS= -Environment=XENCONSOLED_LOG=none -Environment=XENCONSOLED_LOG_DIR=/var/log/xen/console -EnvironmentFile=-/etc/sysconfig/xenconsoled -PIDFile=/var/run/xenconsoled.pid -ExecStartPre=/bin/grep -q control_d /proc/xen/capabilities -ExecStart=/usr/sbin/xenconsoled --log=${XENCONSOLED_LOG} --log-dir=${XENCONSOLED_LOG_DIR} $XENCONSOLED_ARGS - -[Install] -WantedBy=multi-user.target diff --git a/xend-traditional-qemu.patch b/xend-traditional-qemu.patch index 14175ea..76f1cf9 100644 --- a/xend-traditional-qemu.patch +++ b/xend-traditional-qemu.patch @@ -4022,13 +4022,13 @@ Index: xen-4.3.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c #include #include -@@ -335,11 +337,66 @@ static void xen_platform_ioport_writeb(v +@@ -335,11 +337,71 @@ static void xen_platform_ioport_writeb(v } } +static uint32_t ioport_base; + -+static void platform_ioport_write(void *opaque, uint32_t addr, uint32_t val) ++static void suse_platform_ioport_write(void *opaque, uint32_t addr, uint32_t val) +{ + DECLARE_DOMCTL; + int rc; @@ -4038,6 +4038,7 @@ Index: xen-4.3.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c + + switch (addr - ioport_base) { + case 0: ++ /* FIXME Unknown who makes use of this code! */ + fprintf(logfile, "Init hypercall page %x, addr %x.\n", val, addr); + domctl.domain = (domid_t)domid; + domctl.u.hypercall_init.gmfn = val; @@ -4046,6 +4047,10 @@ Index: xen-4.3.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c + fprintf(logfile, "result -> %d.\n", rc); + break; + case 4: ++ /* xen-kmp used this since xen-3.0.4, instead the official protocol from xen-3.3+ ++ * pre vmdp 1.7 made use of 4 and 8 depending on how vmdp was configured. ++ * If vmdp was to control both disk and LAN it would use 4. ++ * If it controlled just disk or just LAN, it would use 8 below. */ + fprintf(logfile, "Disconnect IDE hard disk...\n"); + ide_unplug_harddisks(); + fprintf(logfile, "Disconnect SCSI hard disk...\n"); @@ -4070,8 +4075,8 @@ Index: xen-4.3.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c + } + break; + default: -+ fprintf(logfile, "Write to bad port %x (base %x) on evtchn device.\n", -+ addr, ioport_base); ++ fprintf(logfile, "Write %x to bad port %x (base %x) on evtchn device.\n", ++ val, addr, ioport_base); + break; + } +} @@ -4080,12 +4085,12 @@ Index: xen-4.3.0-testing/tools/qemu-xen-traditional-dir-remote/hw/xen_platform.c { + ioport_base = addr; + -+ register_ioport_write(addr, 16, 4, platform_ioport_write, NULL); -+/* ++ register_ioport_write(addr, 16, 4, suse_platform_ioport_write, NULL); ++ PCIXenPlatformState *d = (PCIXenPlatformState *)pci_dev; register_ioport_write(addr, size, 1, xen_platform_ioport_writeb, d); register_ioport_read(addr, size, 1, xen_platform_ioport_readb, d); -+*/ ++ } static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr) @@ -5307,7 +5312,7 @@ Index: xen-4.3.0-testing/tools/python/xen/util/pci.py def pci_dict_to_xc_str(dev): return __pci_dict_to_fmt_str('0x%x, 0x%x, 0x%x, 0x%x', dev) -@@ -560,6 +567,115 @@ def find_all_assignable_devices(): +@@ -561,6 +568,115 @@ def find_all_assignable_devices(): dev_list = dev_list + [dev] return dev_list diff --git a/xendomains.service b/xendomains.service index 96adcbe..5804bc7 100644 --- a/xendomains.service +++ b/xendomains.service @@ -1,7 +1,6 @@ [Unit] Description=Xendomains - start and stop Xen VMs on boot and shutdown -Requires=xenstored.service xenconsoled.service -After=xenstored.service xenconsoled.service +After=xencommons.service network.target ConditionPathExists=/proc/xen [Service] diff --git a/xenstored.service b/xenstored.service deleted file mode 100644 index 9d0ea83..0000000 --- a/xenstored.service +++ /dev/null @@ -1,16 +0,0 @@ -[Unit] -Description=Xenstored - daemon managing xenstore file system -Before=libvirtd.service libvirt-guests.service -RefuseManualStop=true -ConditionPathExists=/proc/xen - -[Service] -Type=forking -Environment=XENSTORED_ARGS= -EnvironmentFile=-/etc/sysconfig/xenstored -PIDFile=/var/run/xenstored.pid -ExecStartPre=/bin/grep -q control_d /proc/xen/capabilities -ExecStart=/usr/sbin/xenstored --pid-file /var/run/xenstored.pid $XENSTORED_ARGS - -[Install] -WantedBy=multi-user.target diff --git a/xl-conf-default-bridge.patch b/xl-conf-default-bridge.patch new file mode 100644 index 0000000..9d146f6 --- /dev/null +++ b/xl-conf-default-bridge.patch @@ -0,0 +1,11 @@ +--- xen-4.3.0-testing/tools/examples/xl.conf.orig 2013-08-15 12:00:06.000000000 -0600 ++++ xen-4.3.0-testing/tools/examples/xl.conf 2013-08-15 12:00:56.000000000 -0600 +@@ -26,7 +26,7 @@ + #vif.default.script="vif-bridge" + + # default bridge device to use with vif-bridge hotplug scripts +-#vif.default.bridge="xenbr0" ++vif.default.bridge="br0" + + # Reserve a claim of memory when launching a guest. This guarantees immediate + # feedback whether the guest can be launched due to memory exhaustion