diff --git a/51d277a3-x86-don-t-pass-negative-time-to-gtime_to_gtsc-try-2.patch b/51d277a3-x86-don-t-pass-negative-time-to-gtime_to_gtsc-try-2.patch deleted file mode 100644 index 57bd9a7..0000000 --- a/51d277a3-x86-don-t-pass-negative-time-to-gtime_to_gtsc-try-2.patch +++ /dev/null @@ -1,44 +0,0 @@ -# Commit 5ad914bc867c5a6a4957869c89918f4e1f9dd9c4 -# Date 2013-07-02 08:48:03 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: don't pass negative time to gtime_to_gtsc() (try 2) - -This mostly reverts commit eb60be3d ("x86: don't pass negative time to -gtime_to_gtsc()") and instead corrects __update_vcpu_system_time()'s -handling of this_cpu(cpu_time).stime_local_stamp dating back before the -start of a HVM guest (which would otherwise lead to a negative value -getting passed to gtime_to_gtsc(), causing scale_delta() to produce -meaningless output). - -Flushing the value to zero was wrong, and printing a message for -something that can validly happen wasn't very useful either. - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -823,16 +823,13 @@ static void __update_vcpu_system_time(st - struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; - - stime += pl->stime_offset + v->arch.hvm_vcpu.stime_offset; -- if ( (s64)stime < 0 ) -- { -- printk(XENLOG_G_WARNING "d%dv%d: bogus time %" PRId64 -- " (offsets %" PRId64 "/%" PRId64 ")\n", -- d->domain_id, v->vcpu_id, stime, -- pl->stime_offset, v->arch.hvm_vcpu.stime_offset); -- stime = 0; -- } -+ if ( stime >= 0 ) -+ tsc_stamp = gtime_to_gtsc(d, stime); -+ else -+ tsc_stamp = -gtime_to_gtsc(d, -stime); - } -- tsc_stamp = gtime_to_gtsc(d, stime); -+ else -+ tsc_stamp = gtime_to_gtsc(d, stime); - } - else - { diff --git a/51d27807-iommu-amd-Fix-logic-for-clearing-the-IOMMU-interrupt-bits.patch b/51d27807-iommu-amd-Fix-logic-for-clearing-the-IOMMU-interrupt-bits.patch deleted file mode 100644 index 86a467e..0000000 --- a/51d27807-iommu-amd-Fix-logic-for-clearing-the-IOMMU-interrupt-bits.patch +++ /dev/null @@ -1,285 +0,0 @@ -# Commit 2823a0c7dfc979db316787e1dd42a8845e5825c0 -# Date 2013-07-02 08:49:43 +0200 -# Author Suravee Suthikulpanit -# Committer Jan Beulich -iommu/amd: Fix logic for clearing the IOMMU interrupt bits - -The IOMMU interrupt bits in the IOMMU status registers are -"read-only, and write-1-to-clear (RW1C). Therefore, the existing -logic which reads the register, set the bit, and then writing back -the values could accidentally clear certain bits if it has been set. - -The correct logic would just be writing only the value which only -set the interrupt bits, and leave the rest to zeros. - -This patch also, clean up #define masks as Jan has suggested. - -Signed-off-by: Suravee Suthikulpanit - -With iommu_interrupt_handler() properly having got switched its readl() -from status to control register, the subsequent writel() needed to be -switched too (and the RW1C comment there was bogus). - -Some of the cleanup went too far - undone. - -Further, with iommu_interrupt_handler() now actually disabling the -interrupt sources, they also need to get re-enabled by the tasklet once -it finished processing the respective log. This also implies re-running -the tasklet so that log entries added between reading the log and re- -enabling the interrupt will get handled in a timely manner. - -Finally, guest write emulation to the status register needs to be done -with the RW1C (and RO for all other bits) semantics in mind too. - -Signed-off-by: Jan Beulich -Reviewed-by: Tim Deegan -Acked-by: Suravee Suthikulpanit - ---- a/xen/drivers/passthrough/amd/iommu_cmd.c -+++ b/xen/drivers/passthrough/amd/iommu_cmd.c -@@ -75,11 +75,9 @@ static void flush_command_buffer(struct - u32 cmd[4], status; - int loop_count, comp_wait; - -- /* clear 'ComWaitInt' in status register (WIC) */ -- set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, -- IOMMU_STATUS_COMP_WAIT_INT_MASK, -- IOMMU_STATUS_COMP_WAIT_INT_SHIFT, &status); -- writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ /* RW1C 'ComWaitInt' in status register */ -+ writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, -+ iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /* send an empty COMPLETION_WAIT command to flush command buffer */ - cmd[3] = cmd[2] = 0; -@@ -103,9 +101,9 @@ static void flush_command_buffer(struct - - if ( comp_wait ) - { -- /* clear 'ComWaitInt' in status register (WIC) */ -- status &= IOMMU_STATUS_COMP_WAIT_INT_MASK; -- writel(status, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ /* RW1C 'ComWaitInt' in status register */ -+ writel(IOMMU_STATUS_COMP_WAIT_INT_MASK, -+ iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - return; - } - AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); ---- a/xen/drivers/passthrough/amd/iommu_guest.c -+++ b/xen/drivers/passthrough/amd/iommu_guest.c -@@ -754,7 +754,14 @@ static void guest_iommu_mmio_write64(str - u64_to_reg(&iommu->ppr_log.reg_tail, val); - break; - case IOMMU_STATUS_MMIO_OFFSET: -- u64_to_reg(&iommu->reg_status, val); -+ val &= IOMMU_STATUS_EVENT_OVERFLOW_MASK | -+ IOMMU_STATUS_EVENT_LOG_INT_MASK | -+ IOMMU_STATUS_COMP_WAIT_INT_MASK | -+ IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK | -+ IOMMU_STATUS_PPR_LOG_INT_MASK | -+ IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK | -+ IOMMU_STATUS_GAPIC_LOG_INT_MASK; -+ u64_to_reg(&iommu->reg_status, reg_to_u64(iommu->reg_status) & ~val); - break; - - default: ---- a/xen/drivers/passthrough/amd/iommu_init.c -+++ b/xen/drivers/passthrough/amd/iommu_init.c -@@ -344,13 +344,13 @@ static void set_iommu_ppr_log_control(st - writeq(0, iommu->mmio_base + IOMMU_PPR_LOG_TAIL_OFFSET); - - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); -- iommu_set_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); -+ iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); - iommu_set_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); - } - else - { - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_ENABLE_SHIFT); -- iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_INT_SHIFT); -+ iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); - iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT); - } - -@@ -410,7 +410,7 @@ static void iommu_reset_log(struct amd_i - void (*ctrl_func)(struct amd_iommu *iommu, int)) - { - u32 entry; -- int log_run, run_bit, of_bit; -+ int log_run, run_bit; - int loop_count = 1000; - - BUG_ON(!iommu || ((log != &iommu->event_log) && (log != &iommu->ppr_log))); -@@ -419,10 +419,6 @@ static void iommu_reset_log(struct amd_i - IOMMU_STATUS_EVENT_LOG_RUN_SHIFT : - IOMMU_STATUS_PPR_LOG_RUN_SHIFT; - -- of_bit = ( log == &iommu->event_log ) ? -- IOMMU_STATUS_EVENT_OVERFLOW_SHIFT : -- IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT; -- - /* wait until EventLogRun bit = 0 */ - do { - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -@@ -439,9 +435,10 @@ static void iommu_reset_log(struct amd_i - - ctrl_func(iommu, IOMMU_CONTROL_DISABLED); - -- /*clear overflow bit */ -- iommu_clear_bit(&entry, of_bit); -- writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ /* RW1C overflow bit */ -+ writel(log == &iommu->event_log ? IOMMU_STATUS_EVENT_OVERFLOW_MASK -+ : IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK, -+ iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); - - /*reset event log base address */ - log->head = 0; -@@ -611,22 +608,33 @@ static void iommu_check_event_log(struct - u32 entry; - unsigned long flags; - -+ /* RW1C interrupt status bit */ -+ writel(IOMMU_STATUS_EVENT_LOG_INT_MASK, -+ iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ - iommu_read_log(iommu, &iommu->event_log, - sizeof(event_entry_t), parse_event_log_entry); - - spin_lock_irqsave(&iommu->lock, flags); - -- /*check event overflow */ -+ /* Check event overflow. */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -- - if ( iommu_get_bit(entry, IOMMU_STATUS_EVENT_OVERFLOW_SHIFT) ) - iommu_reset_log(iommu, &iommu->event_log, set_iommu_event_log_control); -- -- /* reset interrupt status bit */ -- entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -- iommu_set_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); -- -- writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ else -+ { -+ entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); -+ if ( !(entry & IOMMU_CONTROL_EVENT_LOG_INT_MASK) ) -+ { -+ entry |= IOMMU_CONTROL_EVENT_LOG_INT_MASK; -+ writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); -+ /* -+ * Re-schedule the tasklet to handle eventual log entries added -+ * between reading the log above and re-enabling the interrupt. -+ */ -+ tasklet_schedule(&amd_iommu_irq_tasklet); -+ } -+ } - - spin_unlock_irqrestore(&iommu->lock, flags); - } -@@ -681,22 +689,33 @@ static void iommu_check_ppr_log(struct a - u32 entry; - unsigned long flags; - -+ /* RW1C interrupt status bit */ -+ writel(IOMMU_STATUS_PPR_LOG_INT_MASK, -+ iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ - iommu_read_log(iommu, &iommu->ppr_log, - sizeof(ppr_entry_t), parse_ppr_log_entry); - - spin_lock_irqsave(&iommu->lock, flags); - -- /*check event overflow */ -+ /* Check event overflow. */ - entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -- - if ( iommu_get_bit(entry, IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT) ) - iommu_reset_log(iommu, &iommu->ppr_log, set_iommu_ppr_log_control); -- -- /* reset interrupt status bit */ -- entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -- iommu_set_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); -- -- writel(entry, iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ else -+ { -+ entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); -+ if ( !(entry & IOMMU_CONTROL_PPR_LOG_INT_MASK) ) -+ { -+ entry |= IOMMU_CONTROL_PPR_LOG_INT_MASK; -+ writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); -+ /* -+ * Re-schedule the tasklet to handle eventual log entries added -+ * between reading the log above and re-enabling the interrupt. -+ */ -+ tasklet_schedule(&amd_iommu_irq_tasklet); -+ } -+ } - - spin_unlock_irqrestore(&iommu->lock, flags); - } -@@ -733,11 +752,14 @@ static void iommu_interrupt_handler(int - - spin_lock_irqsave(&iommu->lock, flags); - -- /* Silence interrupts from both event and PPR logging */ -- entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -- iommu_clear_bit(&entry, IOMMU_STATUS_EVENT_LOG_INT_SHIFT); -- iommu_clear_bit(&entry, IOMMU_STATUS_PPR_LOG_INT_SHIFT); -- writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET); -+ /* -+ * Silence interrupts from both event and PPR by clearing the -+ * enable logging bits in the control register -+ */ -+ entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); -+ iommu_clear_bit(&entry, IOMMU_CONTROL_EVENT_LOG_INT_SHIFT); -+ iommu_clear_bit(&entry, IOMMU_CONTROL_PPR_LOG_INT_SHIFT); -+ writel(entry, iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET); - - spin_unlock_irqrestore(&iommu->lock, flags); - ---- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h -+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h -@@ -336,14 +336,17 @@ - #define IOMMU_CONTROL_ISOCHRONOUS_SHIFT 11 - #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK 0x00001000 - #define IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT 12 -+#define IOMMU_CONTROL_PPR_LOG_ENABLE_MASK 0x00002000 -+#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 -+#define IOMMU_CONTROL_PPR_LOG_INT_MASK 0x00004000 -+#define IOMMU_CONTROL_PPR_LOG_INT_SHIFT 14 -+#define IOMMU_CONTROL_PPR_ENABLE_MASK 0x00008000 -+#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 -+#define IOMMU_CONTROL_GT_ENABLE_MASK 0x00010000 -+#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 - #define IOMMU_CONTROL_RESTART_MASK 0x80000000 - #define IOMMU_CONTROL_RESTART_SHIFT 31 - --#define IOMMU_CONTROL_PPR_LOG_ENABLE_SHIFT 13 --#define IOMMU_CONTROL_PPR_INT_SHIFT 14 --#define IOMMU_CONTROL_PPR_ENABLE_SHIFT 15 --#define IOMMU_CONTROL_GT_ENABLE_SHIFT 16 -- - /* Exclusion Register */ - #define IOMMU_EXCLUSION_BASE_LOW_OFFSET 0x20 - #define IOMMU_EXCLUSION_BASE_HIGH_OFFSET 0x24 -@@ -395,9 +398,18 @@ - #define IOMMU_STATUS_EVENT_LOG_RUN_SHIFT 3 - #define IOMMU_STATUS_CMD_BUFFER_RUN_MASK 0x00000010 - #define IOMMU_STATUS_CMD_BUFFER_RUN_SHIFT 4 -+#define IOMMU_STATUS_PPR_LOG_OVERFLOW_MASK 0x00000020 - #define IOMMU_STATUS_PPR_LOG_OVERFLOW_SHIFT 5 -+#define IOMMU_STATUS_PPR_LOG_INT_MASK 0x00000040 - #define IOMMU_STATUS_PPR_LOG_INT_SHIFT 6 -+#define IOMMU_STATUS_PPR_LOG_RUN_MASK 0x00000080 - #define IOMMU_STATUS_PPR_LOG_RUN_SHIFT 7 -+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_MASK 0x00000100 -+#define IOMMU_STATUS_GAPIC_LOG_OVERFLOW_SHIFT 8 -+#define IOMMU_STATUS_GAPIC_LOG_INT_MASK 0x00000200 -+#define IOMMU_STATUS_GAPIC_LOG_INT_SHIFT 9 -+#define IOMMU_STATUS_GAPIC_LOG_RUN_MASK 0x00000400 -+#define IOMMU_STATUS_GAPIC_LOG_RUN_SHIFT 10 - - /* I/O Page Table */ - #define IOMMU_PAGE_TABLE_ENTRY_SIZE 8 diff --git a/51d27841-iommu-amd-Workaround-for-erratum-787.patch b/51d27841-iommu-amd-Workaround-for-erratum-787.patch deleted file mode 100644 index ee9ef4a..0000000 --- a/51d27841-iommu-amd-Workaround-for-erratum-787.patch +++ /dev/null @@ -1,57 +0,0 @@ -# Commit 9eabb0735400e2b6059dfa3f0b47a426f61f570a -# Date 2013-07-02 08:50:41 +0200 -# Author Suravee Suthikulpanit -# Committer Jan Beulich -iommu/amd: Workaround for erratum 787 - -The IOMMU interrupt handling in bottom half must clear the PPR log interrupt -and event log interrupt bits to re-enable the interrupt. This is done by -writing 1 to the memory mapped register to clear the bit. Due to hardware bug, -if the driver tries to clear this bit while the IOMMU hardware also setting -this bit, the conflict will result with the bit being set. If the interrupt -handling code does not make sure to clear this bit, subsequent changes in the -event/PPR logs will no longer generating interrupts, and would result if -buffer overflow. After clearing the bits, the driver must read back -the register to verify. - -Signed-off-by: Suravee Suthikulpanit - -Adjust to apply on top of heavily modified patch 1. Adjust flow to get away -with a single readl() in each instance of the status register checks. - -Signed-off-by: Jan Beulich -Reviewed-by: Tim Deegan -Acked-by: Suravee Suthikulpanit - ---- a/xen/drivers/passthrough/amd/iommu_init.c -+++ b/xen/drivers/passthrough/amd/iommu_init.c -@@ -636,6 +636,14 @@ static void iommu_check_event_log(struct - } - } - -+ /* -+ * Workaround for erratum787: -+ * Re-check to make sure the bit has been cleared. -+ */ -+ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ if ( entry & IOMMU_STATUS_EVENT_LOG_INT_MASK ) -+ tasklet_schedule(&amd_iommu_irq_tasklet); -+ - spin_unlock_irqrestore(&iommu->lock, flags); - } - -@@ -717,6 +725,14 @@ static void iommu_check_ppr_log(struct a - } - } - -+ /* -+ * Workaround for erratum787: -+ * Re-check to make sure the bit has been cleared. -+ */ -+ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET); -+ if ( entry & IOMMU_STATUS_PPR_LOG_INT_MASK ) -+ tasklet_schedule(&amd_iommu_irq_tasklet); -+ - spin_unlock_irqrestore(&iommu->lock, flags); - } - diff --git a/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch b/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch deleted file mode 100644 index a433513..0000000 --- a/51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch +++ /dev/null @@ -1,30 +0,0 @@ -# Commit d3a55d7d9bb518efe08143d050deff9f4ee80ec1 -# Date 2013-07-04 10:33:18 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/mm: Ensure useful progress in alloc_l2_table() - -While debugging the issue which turned out to be XSA-58, a printk in this loop -showed that it was quite easy to never make useful progress, because of -consistently failing the preemption check. - -One single l2 entry is a reasonable amount of work to do, even if an action is -pending, and also assures forwards progress across repeat continuations. - -Tweak the continuation criteria to fail on the first iteration of the loop. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -1278,7 +1278,8 @@ static int alloc_l2_table(struct page_in - - for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) - { -- if ( preemptible && i && hypercall_preempt_check() ) -+ if ( preemptible && i > page->nr_validated_ptes -+ && hypercall_preempt_check() ) - { - page->nr_validated_ptes = i; - rc = -EAGAIN; diff --git a/51daa074-Revert-hvmloader-always-include-HPET-table.patch b/51daa074-Revert-hvmloader-always-include-HPET-table.patch deleted file mode 100644 index c2cb60f..0000000 --- a/51daa074-Revert-hvmloader-always-include-HPET-table.patch +++ /dev/null @@ -1,37 +0,0 @@ -References: bnc#817799 - -# Commit 4867685f7916bb594a67f2f64a28bbf5ecb4949c -# Date 2013-07-08 13:20:20 +0200 -# Author Jan Beulich -# Committer Jan Beulich -Revert "hvmloader: always include HPET table" - -This reverts commit e4fd0475a08fda414da27c4e57b568f147cfc07e. - -Conflicts: - tools/firmware/hvmloader/acpi/build.c - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/tools/firmware/hvmloader/acpi/build.c -+++ b/tools/firmware/hvmloader/acpi/build.c -@@ -268,11 +268,13 @@ static int construct_secondary_tables(un - table_ptrs[nr_tables++] = (unsigned long)madt; - } - -- /* HPET. Always included in DSDT, so always include it here too. */ -- /* (And it's unconditionally required by Windows SVVP tests.) */ -- hpet = construct_hpet(); -- if (!hpet) return -1; -- table_ptrs[nr_tables++] = (unsigned long)hpet; -+ /* HPET. */ -+ if ( hpet_exists(ACPI_HPET_ADDRESS) ) -+ { -+ hpet = construct_hpet(); -+ if (!hpet) return -1; -+ table_ptrs[nr_tables++] = (unsigned long)hpet; -+ } - - /* WAET. */ - waet = construct_waet(); diff --git a/51dd155c-adjust-x86-EFI-build.patch b/51dd155c-adjust-x86-EFI-build.patch deleted file mode 100644 index 72d99e2..0000000 --- a/51dd155c-adjust-x86-EFI-build.patch +++ /dev/null @@ -1,27 +0,0 @@ -# Commit 5656b93d215d7c5160790ea87758625ba1de16b1 -# Date 2013-07-10 10:03:40 +0200 -# Author Jan Beulich -# Committer Jan Beulich -adjust x86 EFI build - -While the rule to generate .init.o files from .o ones already correctly -included $(extra-y), the setting of the necessary compiler flag didn't -have the same. With some yet to be posted patch this resulted in build -breakage because of the compiler deciding not to inline a few functions -(which then results in .text not being empty as required for these -object files). - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/Rules.mk -+++ b/xen/Rules.mk -@@ -101,7 +101,7 @@ obj-y := $(patsubst %/,%/built-in.o,$ - - subdir-all := $(subdir-y) $(subdir-n) - --$(filter %.init.o,$(obj-y) $(obj-bin-y)): CFLAGS += -DINIT_SECTIONS_ONLY -+$(filter %.init.o,$(obj-y) $(obj-bin-y) $(extra-y)): CFLAGS += -DINIT_SECTIONS_ONLY - - $(obj-$(coverage)): CFLAGS += -fprofile-arcs -ftest-coverage -DTEST_COVERAGE - diff --git a/51e517e6-AMD-IOMMU-allocate-IRTEs.patch b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch index b928ab6..d924369 100644 --- a/51e517e6-AMD-IOMMU-allocate-IRTEs.patch +++ b/51e517e6-AMD-IOMMU-allocate-IRTEs.patch @@ -31,8 +31,10 @@ Reported-by: Sander Eikelenboom Signed-off-by: Jan Beulich Acked-by: Suravee Suthikulpanit ---- a/xen/drivers/passthrough/amd/iommu_acpi.c -+++ b/xen/drivers/passthrough/amd/iommu_acpi.c +Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_acpi.c +=================================================================== +--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/iommu_acpi.c ++++ xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_acpi.c @@ -72,12 +72,15 @@ static void __init add_ivrs_mapping_entr /* allocate per-device interrupt remapping table */ if ( amd_iommu_perdev_intremap ) @@ -51,16 +53,16 @@ Acked-by: Suravee Suthikulpanit } } /* assgin iommu hardware */ -@@ -671,7 +674,7 @@ static u16 __init parse_ivhd_device_spec - if ( IO_APIC_ID(apic) != special->handle ) - continue; +@@ -678,7 +681,7 @@ static u16 __init parse_ivhd_device_spec + return 0; + } - if ( ioapic_sbdf[special->handle].pin_setup ) + if ( ioapic_sbdf[special->handle].pin_2_idx ) { if ( ioapic_sbdf[special->handle].bdf == bdf && ioapic_sbdf[special->handle].seg == seg ) -@@ -691,14 +694,17 @@ static u16 __init parse_ivhd_device_spec +@@ -698,14 +701,17 @@ static u16 __init parse_ivhd_device_spec ioapic_sbdf[special->handle].bdf = bdf; ioapic_sbdf[special->handle].seg = seg; @@ -81,7 +83,7 @@ Acked-by: Suravee Suthikulpanit } break; } -@@ -926,7 +932,7 @@ static int __init parse_ivrs_table(struc +@@ -933,7 +939,7 @@ static int __init parse_ivrs_table(struc for ( apic = 0; !error && iommu_intremap && apic < nr_ioapics; ++apic ) { if ( !nr_ioapic_entries[apic] || @@ -90,7 +92,7 @@ Acked-by: Suravee Suthikulpanit continue; printk(XENLOG_ERR "IVHD Error: no information for IO-APIC %#x\n", -@@ -935,9 +941,12 @@ static int __init parse_ivrs_table(struc +@@ -942,9 +948,12 @@ static int __init parse_ivrs_table(struc error = -ENXIO; else { @@ -106,8 +108,10 @@ Acked-by: Suravee Suthikulpanit { printk(XENLOG_ERR "IVHD Error: Out of memory\n"); error = -ENOMEM; ---- a/xen/drivers/passthrough/amd/iommu_intr.c -+++ b/xen/drivers/passthrough/amd/iommu_intr.c +Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_intr.c +=================================================================== +--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/iommu_intr.c ++++ xen-4.3.1-testing/xen/drivers/passthrough/amd/iommu_intr.c @@ -31,6 +31,7 @@ struct ioapic_sbdf ioapic_sbdf[MAX_IO_APICS]; struct hpet_sbdf hpet_sbdf; @@ -580,8 +584,10 @@ Acked-by: Suravee Suthikulpanit + + return rc; } ---- a/xen/drivers/passthrough/amd/pci_amd_iommu.c -+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c +Index: xen-4.3.1-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c +=================================================================== +--- xen-4.3.1-testing.orig/xen/drivers/passthrough/amd/pci_amd_iommu.c ++++ xen-4.3.1-testing/xen/drivers/passthrough/amd/pci_amd_iommu.c @@ -637,7 +637,7 @@ const struct iommu_ops amd_iommu_ops = { .get_device_group_id = amd_iommu_group_id, .update_ire_from_apic = amd_iommu_ioapic_update_ire, @@ -591,8 +597,10 @@ Acked-by: Suravee Suthikulpanit .read_msi_from_ire = amd_iommu_read_msi_from_ire, .setup_hpet_msi = amd_setup_hpet_msi, .suspend = amd_iommu_suspend, ---- a/xen/include/asm-x86/amd-iommu.h -+++ b/xen/include/asm-x86/amd-iommu.h +Index: xen-4.3.1-testing/xen/include/asm-x86/amd-iommu.h +=================================================================== +--- xen-4.3.1-testing.orig/xen/include/asm-x86/amd-iommu.h ++++ xen-4.3.1-testing/xen/include/asm-x86/amd-iommu.h @@ -119,6 +119,7 @@ struct ivrs_mappings { /* per device interrupt remapping table */ @@ -601,8 +609,10 @@ Acked-by: Suravee Suthikulpanit spinlock_t intremap_lock; /* ivhd device data settings */ ---- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h -+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h +=================================================================== +--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h ++++ xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h @@ -470,10 +470,6 @@ #define MAX_AMD_IOMMUS 32 @@ -614,8 +624,10 @@ Acked-by: Suravee Suthikulpanit #define INT_REMAP_ENTRY_REMAPEN_MASK 0x00000001 #define INT_REMAP_ENTRY_REMAPEN_SHIFT 0 #define INT_REMAP_ENTRY_SUPIOPF_MASK 0x00000002 ---- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h -+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h +=================================================================== +--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h ++++ xen-4.3.1-testing/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h @@ -89,10 +89,12 @@ struct amd_iommu *find_iommu_for_device( /* interrupt remapping */ diff --git a/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch b/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch deleted file mode 100644 index 5c1c604..0000000 --- a/51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch +++ /dev/null @@ -1,44 +0,0 @@ -# Commit 85047d9e4f4afeb73bca1e98f705a2f4f1d51c03 -# Date 2013-07-17 08:45:20 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/cpuidle: Change logging for unknown APIC IDs - -Dom0 uses this hypercall to pass ACPI information to Xen. It is not very -uncommon for more cpus to be listed in the ACPI tables than are present on the -system, particularly on systems with a common BIOS for a 2 and 4 socket server -varients. - -As Dom0 does not control the number of entries in the ACPI tables, and is -required to pass everything it finds to Xen, change the logging. - -There is now an single unconditional warning for the first unknown ID, and -further warnings if "cpuinfo" is requested by the user on the command line. - -Signed-off-by: Andrew Cooper - ---- a/xen/arch/x86/acpi/cpu_idle.c -+++ b/xen/arch/x86/acpi/cpu_idle.c -@@ -1031,7 +1031,10 @@ long set_cx_pminfo(uint32_t cpu, struct - cpu_id = get_cpu_id(cpu); - if ( cpu_id == -1 ) - { -- printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu); -+ static bool_t warn_once = 1; -+ if ( warn_once || opt_cpu_info ) -+ printk(XENLOG_WARNING "No CPU ID for APIC ID %#x\n", cpu); -+ warn_once = 0; - return -EINVAL; - } - ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -63,7 +63,7 @@ static struct cpu_dev default_cpu = { - }; - static struct cpu_dev * this_cpu = &default_cpu; - --bool_t __cpuinitdata opt_cpu_info; -+bool_t opt_cpu_info; - boolean_param("cpuinfo", opt_cpu_info); - - int __cpuinit get_model_name(struct cpuinfo_x86 *c) diff --git a/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch index 1ff5d47..2f4f4a1 100644 --- a/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch +++ b/51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch @@ -15,9 +15,11 @@ Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper Reviewed-by: Yang Zhang ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -386,6 +386,9 @@ void vlapic_EOI_set(struct vlapic *vlapi +Index: xen-4.3.1-testing/xen/arch/x86/hvm/vlapic.c +=================================================================== +--- xen-4.3.1-testing.orig/xen/arch/x86/hvm/vlapic.c ++++ xen-4.3.1-testing/xen/arch/x86/hvm/vlapic.c +@@ -395,6 +395,9 @@ void vlapic_EOI_set(struct vlapic *vlapi vlapic_clear_vector(vector, &vlapic->regs->data[APIC_ISR]); @@ -27,9 +29,11 @@ Reviewed-by: Yang Zhang if ( vlapic_test_and_clear_vector(vector, &vlapic->regs->data[APIC_TMR]) ) vioapic_update_EOI(vlapic_domain(vlapic), vector); ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1502,6 +1502,15 @@ static void vmx_sync_pir_to_irr(struct v +Index: xen-4.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c +=================================================================== +--- xen-4.3.1-testing.orig/xen/arch/x86/hvm/vmx/vmx.c ++++ xen-4.3.1-testing/xen/arch/x86/hvm/vmx/vmx.c +@@ -1507,6 +1507,15 @@ static void vmx_sync_pir_to_irr(struct v vlapic_set_vector(i, &vlapic->regs->data[APIC_IRR]); } @@ -45,7 +49,7 @@ Reviewed-by: Yang Zhang static struct hvm_function_table __initdata vmx_function_table = { .name = "VMX", .cpu_up_prepare = vmx_cpu_up_prepare, -@@ -1554,6 +1563,7 @@ static struct hvm_function_table __initd +@@ -1559,6 +1568,7 @@ static struct hvm_function_table __initd .process_isr = vmx_process_isr, .deliver_posted_intr = vmx_deliver_posted_intr, .sync_pir_to_irr = vmx_sync_pir_to_irr, @@ -53,7 +57,7 @@ Reviewed-by: Yang Zhang .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m, }; -@@ -1580,7 +1590,10 @@ const struct hvm_function_table * __init +@@ -1585,7 +1595,10 @@ const struct hvm_function_table * __init setup_ept_dump(); } @@ -65,8 +69,10 @@ Reviewed-by: Yang Zhang if ( cpu_has_vmx_posted_intr_processing ) alloc_direct_apic_vector(&posted_intr_vector, event_check_interrupt); else ---- a/xen/include/asm-x86/hvm/hvm.h -+++ b/xen/include/asm-x86/hvm/hvm.h +Index: xen-4.3.1-testing/xen/include/asm-x86/hvm/hvm.h +=================================================================== +--- xen-4.3.1-testing.orig/xen/include/asm-x86/hvm/hvm.h ++++ xen-4.3.1-testing/xen/include/asm-x86/hvm/hvm.h @@ -186,6 +186,7 @@ struct hvm_function_table { void (*process_isr)(int isr, struct vcpu *v); void (*deliver_posted_intr)(struct vcpu *v, u8 vector); diff --git a/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch b/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch deleted file mode 100644 index 39ee3f7..0000000 --- a/51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch +++ /dev/null @@ -1,41 +0,0 @@ -# Commit 68caac7f6f4687241a24e804a9fca19aa26fe183 -# Date 2013-07-17 10:21:33 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: don't use destroy_xen_mappings() for vunmap() - -Its attempt to tear down intermediate page table levels may race with -map_pages_to_xen() establishing them, and now that -map_domain_page_global() is backed by vmap() this teardown is also -wasteful (as it's very likely to need the same address space populated -again within foreseeable time). - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/common/vmap.c -+++ b/xen/common/vmap.c -@@ -196,9 +196,13 @@ void *vmap(const unsigned long *mfn, uns - - void vunmap(const void *va) - { -+#ifndef _PAGE_NONE - unsigned long addr = (unsigned long)va; - - destroy_xen_mappings(addr, addr + PAGE_SIZE * vm_size(va)); -+#else /* Avoid tearing down intermediate page tables. */ -+ map_pages_to_xen((unsigned long)va, 0, vm_size(va), _PAGE_NONE); -+#endif - vm_free(va); - } - #endif ---- a/xen/include/asm-x86/page.h -+++ b/xen/include/asm-x86/page.h -@@ -288,6 +288,7 @@ extern l1_pgentry_t l1_identmap[L1_PAGET - void paging_init(void); - #endif /* !defined(__ASSEMBLY__) */ - -+#define _PAGE_NONE _AC(0x000,U) - #define _PAGE_PRESENT _AC(0x001,U) - #define _PAGE_RW _AC(0x002,U) - #define _PAGE_USER _AC(0x004,U) diff --git a/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch b/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch deleted file mode 100644 index 165f9ab..0000000 --- a/51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch +++ /dev/null @@ -1,24 +0,0 @@ -# Commit 915a59f25c5eddd86bc2cae6389d0ed2ab87e69e -# Date 2013-07-18 09:16:15 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/time: Update wallclock in shared info when altering domain time offset - -domain_set_time_offset() udpates d->time_offset_seconds, but does not correct -the wallclock in the shared info, meaning that it is incorrect until the next -XENPF_settime hypercall from dom0 which resynchronises the wallclock for all -domains. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -931,6 +931,7 @@ void domain_set_time_offset(struct domai - d->time_offset_seconds = time_offset_seconds; - if ( is_hvm_domain(d) ) - rtc_update_clock(d); -+ update_domain_wallclock_time(d); - } - - int cpu_frequency_change(u64 freq) diff --git a/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch b/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch deleted file mode 100644 index b97e081..0000000 --- a/51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch +++ /dev/null @@ -1,62 +0,0 @@ -# Commit b0e55bd49725c7c0183eb18670997b9e5930adac -# Date 2013-08-05 18:40:23 +0200 -# Author Jan Beulich -# Committer Jan Beulich -fix off-by-one mistakes in vm_alloc() - -Also add another pair of assertions to catch eventual further cases of -incorrect accounting. - -Signed-off-by: Jan Beulich -Reviewed-by Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/common/vmap.c -+++ b/xen/common/vmap.c -@@ -57,8 +57,8 @@ void *vm_alloc(unsigned int nr, unsigned - { - struct page_info *pg; - -- ASSERT(!test_bit(vm_low, vm_bitmap)); -- for ( start = vm_low; ; ) -+ ASSERT(vm_low == vm_top || !test_bit(vm_low, vm_bitmap)); -+ for ( start = vm_low; start < vm_top; ) - { - bit = find_next_bit(vm_bitmap, vm_top, start + 1); - if ( bit > vm_top ) -@@ -68,12 +68,18 @@ void *vm_alloc(unsigned int nr, unsigned - * corresponding page a guard one. - */ - start = (start + align) & ~(align - 1); -- if ( start + nr <= bit ) -- break; -- start = bit < vm_top ? -- find_next_zero_bit(vm_bitmap, vm_top, bit + 1) : bit; -- if ( start >= vm_top ) -- break; -+ if ( bit < vm_top ) -+ { -+ if ( start + nr < bit ) -+ break; -+ start = find_next_zero_bit(vm_bitmap, vm_top, bit + 1); -+ } -+ else -+ { -+ if ( start + nr <= bit ) -+ break; -+ start = bit; -+ } - } - - if ( start < vm_top ) -@@ -115,6 +121,10 @@ void *vm_alloc(unsigned int nr, unsigned - - for ( bit = start; bit < start + nr; ++bit ) - __set_bit(bit, vm_bitmap); -+ if ( bit < vm_top ) -+ ASSERT(!test_bit(bit, vm_bitmap)); -+ else -+ ASSERT(bit == vm_top); - if ( start <= vm_low + 2 ) - vm_low = bit; - spin_unlock(&vm_lock); diff --git a/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch b/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch deleted file mode 100644 index 1dabcc0..0000000 --- a/51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch +++ /dev/null @@ -1,60 +0,0 @@ -# Commit c58d9f2f4844c2ce8859a8d0f26a54cd058eb51f -# Date 2013-08-05 18:42:37 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: refine FPU selector handling code for XSAVEOPT - -Some extra tweaks are necessary to deal with the situation of XSAVEOPT -not writing the FPU portion of the save image (due to it detecting that -the register state did not get modified since the last XRSTOR). - -Signed-off-by: Jan Beulich -Tested-by: Ben Guthro -Acked-by: Keir Fraser - ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -71,10 +71,28 @@ void xsave(struct vcpu *v, uint64_t mask - - if ( word_size <= 0 || !is_pv_32bit_vcpu(v) ) - { -+ typeof(ptr->fpu_sse.fip.sel) fcs = ptr->fpu_sse.fip.sel; -+ typeof(ptr->fpu_sse.fdp.sel) fds = ptr->fpu_sse.fdp.sel; -+ - if ( cpu_has_xsaveopt ) -+ { -+ /* -+ * xsaveopt may not write the FPU portion even when the respective -+ * mask bit is set. For the check further down to work we hence -+ * need to put the save image back into the state that it was in -+ * right after the previous xsaveopt. -+ */ -+ if ( word_size > 0 && -+ (ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 4 || -+ ptr->fpu_sse.x[FPU_WORD_SIZE_OFFSET] == 2) ) -+ { -+ ptr->fpu_sse.fip.sel = 0; -+ ptr->fpu_sse.fdp.sel = 0; -+ } - asm volatile ( ".byte 0x48,0x0f,0xae,0x37" - : "=m" (*ptr) - : "a" (lmask), "d" (hmask), "D" (ptr) ); -+ } - else - asm volatile ( ".byte 0x48,0x0f,0xae,0x27" - : "=m" (*ptr) -@@ -87,7 +105,14 @@ void xsave(struct vcpu *v, uint64_t mask - */ - (!(ptr->fpu_sse.fsw & 0x0080) && - boot_cpu_data.x86_vendor == X86_VENDOR_AMD) ) -+ { -+ if ( cpu_has_xsaveopt && word_size > 0 ) -+ { -+ ptr->fpu_sse.fip.sel = fcs; -+ ptr->fpu_sse.fdp.sel = fds; -+ } - return; -+ } - - if ( word_size > 0 && - !((ptr->fpu_sse.fip.addr | ptr->fpu_sse.fdp.addr) >> 32) ) diff --git a/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch b/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch deleted file mode 100644 index 75b6356..0000000 --- a/520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch +++ /dev/null @@ -1,23 +0,0 @@ -# Commit e1ab5c77b44b7bd835a2c032fa4963b36545fdb3 -# Date 2013-08-06 17:22:35 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Flush TLBs and Caches if paging mode changed - -According to SDM, if paging mode is changed, then whole TLBs and caches will -be flushed. This is missed in nested handle logic. Also this fixed the issue -that 64 bits windows cannot boot up on top of L1 kvm. - -Signed-off-by: Yang Zhang -Acked-by: Keir Fraser - ---- a/xen/arch/x86/mm/paging.c -+++ b/xen/arch/x86/mm/paging.c -@@ -709,6 +709,7 @@ void paging_update_nestedmode(struct vcp - else - /* TODO: shadow-on-shadow */ - v->arch.paging.nestedmode = NULL; -+ hvm_asid_flush_vcpu(v); - } - - void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn, diff --git a/520119fc-xen-conring-Write-to-console-ring-even-if-console-lock-is-busted.patch b/520119fc-xen-conring-Write-to-console-ring-even-if-console-lock-is-busted.patch deleted file mode 100644 index d62b8bb..0000000 --- a/520119fc-xen-conring-Write-to-console-ring-even-if-console-lock-is-busted.patch +++ /dev/null @@ -1,40 +0,0 @@ -# Commit 66450c1d1ab3c4480bbba949113b95d1ab6a943a -# Date 2013-08-06 17:45:00 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -xen/conring: Write to console ring even if console lock is busted - -console_lock_busted gets set when an NMI/MCE/Double Fault handler decides to -bring Xen down in an emergency. conring_puts() cannot block and does -not have problematic interactions with the console_lock. - -Therefore, choosing to not put the string into the console ring simply means -that the kexec environment cant find any panic() message caused by an IST -interrupt, which is unhelpful for debugging purposes. - -In the case that two pcpus fight with console_force_unlock(), having slightly -garbled strings in the console ring is far more useful than having nothing at -all. - -Signed-off-by: Andrew Cooper -Acked-by: Matt Wilson -Acked-by: Keir Fraser - ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -463,12 +463,11 @@ static void __putstr(const char *str) - sercon_puts(str); - video_puts(str); - -+ while ( (c = *str++) != '\0' ) -+ putchar_console_ring(c); -+ - if ( !console_locks_busted ) -- { -- while ( (c = *str++) != '\0' ) -- putchar_console_ring(c); - tasklet_schedule(¬ify_dom0_con_ring_tasklet); -- } - } - - static int printk_prefix_check(char *p, char **pp) diff --git a/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch b/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch deleted file mode 100644 index 1d781af..0000000 --- a/520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch +++ /dev/null @@ -1,138 +0,0 @@ -# Commit 85fc517ec3055e8e8d9c9e36e15a81e630237252 -# Date 2013-08-13 14:22:14 +0200 -# Author Suravee Suthikulpanit -# Committer Jan Beulich -x86/AMD: Fix nested svm crash due to assertion in __virt_to_maddr - -Fix assertion in __virt_to_maddr when starting nested SVM guest -in debug mode. Investigation has shown that svm_vmsave/svm_vmload -make use of __pa() with invalid address. - -Signed-off-by: Suravee Suthikulpanit -Reviewed-by: Tim Deegan - ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1792,6 +1792,32 @@ svm_vmexit_do_vmrun(struct cpu_user_regs - return; - } - -+static struct page_info * -+nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr) -+{ -+ p2m_type_t p2mt; -+ struct page_info *page; -+ struct nestedvcpu *nv = &vcpu_nestedhvm(v); -+ -+ if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) -+ return NULL; -+ -+ /* Need to translate L1-GPA to MPA */ -+ page = get_page_from_gfn(v->domain, -+ nv->nv_vvmcxaddr >> PAGE_SHIFT, -+ &p2mt, P2M_ALLOC | P2M_UNSHARE); -+ if ( !page ) -+ return NULL; -+ -+ if ( !p2m_is_ram(p2mt) || p2m_is_readonly(p2mt) ) -+ { -+ put_page(page); -+ return NULL; -+ } -+ -+ return page; -+} -+ - static void - svm_vmexit_do_vmload(struct vmcb_struct *vmcb, - struct cpu_user_regs *regs, -@@ -1799,7 +1825,7 @@ svm_vmexit_do_vmload(struct vmcb_struct - { - int ret; - unsigned int inst_len; -- struct nestedvcpu *nv = &vcpu_nestedhvm(v); -+ struct page_info *page; - - if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 ) - return; -@@ -1810,13 +1836,18 @@ svm_vmexit_do_vmload(struct vmcb_struct - goto inject; - } - -- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { -- gdprintk(XENLOG_ERR, "VMLOAD: mapping vmcb failed, injecting #UD\n"); -+ page = nsvm_get_nvmcb_page(v, vmcbaddr); -+ if ( !page ) -+ { -+ gdprintk(XENLOG_ERR, -+ "VMLOAD: mapping failed, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; - } - -- svm_vmload(nv->nv_vvmcx); -+ svm_vmload_pa(page_to_maddr(page)); -+ put_page(page); -+ - /* State in L1 VMCB is stale now */ - v->arch.hvm_svm.vmcb_in_sync = 0; - -@@ -1835,7 +1866,7 @@ svm_vmexit_do_vmsave(struct vmcb_struct - { - int ret; - unsigned int inst_len; -- struct nestedvcpu *nv = &vcpu_nestedhvm(v); -+ struct page_info *page; - - if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 ) - return; -@@ -1846,14 +1877,17 @@ svm_vmexit_do_vmsave(struct vmcb_struct - goto inject; - } - -- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { -- gdprintk(XENLOG_ERR, "VMSAVE: mapping vmcb failed, injecting #UD\n"); -+ page = nsvm_get_nvmcb_page(v, vmcbaddr); -+ if ( !page ) -+ { -+ gdprintk(XENLOG_ERR, -+ "VMSAVE: mapping vmcb failed, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; - } - -- svm_vmsave(nv->nv_vvmcx); -- -+ svm_vmsave_pa(page_to_maddr(page)); -+ put_page(page); - __update_guest_eip(regs, inst_len); - return; - ---- a/xen/include/asm-x86/hvm/svm/svm.h -+++ b/xen/include/asm-x86/hvm/svm/svm.h -@@ -41,18 +41,21 @@ - #define SVM_REG_R14 (14) - #define SVM_REG_R15 (15) - --static inline void svm_vmload(void *vmcb) -+#define svm_vmload(x) svm_vmload_pa(__pa(x)) -+#define svm_vmsave(x) svm_vmsave_pa(__pa(x)) -+ -+static inline void svm_vmload_pa(paddr_t vmcb) - { - asm volatile ( - ".byte 0x0f,0x01,0xda" /* vmload */ -- : : "a" (__pa(vmcb)) : "memory" ); -+ : : "a" (vmcb) : "memory" ); - } - --static inline void svm_vmsave(void *vmcb) -+static inline void svm_vmsave_pa(paddr_t vmcb) - { - asm volatile ( - ".byte 0x0f,0x01,0xdb" /* vmsave */ -- : : "a" (__pa(vmcb)) : "memory" ); -+ : : "a" (vmcb) : "memory" ); - } - - static inline void svm_invlpga(unsigned long vaddr, uint32_t asid) diff --git a/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch b/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch deleted file mode 100644 index 8e2977b..0000000 --- a/520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch +++ /dev/null @@ -1,91 +0,0 @@ -# Commit 910daaf5aaa837624099c0fc5c373bea7202ff43 -# Date 2013-08-13 14:24:16 +0200 -# Author Suravee Suthikulpanit -# Committer Jan Beulich -x86/AMD: Inject #GP instead of #UD when unable to map vmcb - -According to AMD Programmer's Manual vol2, vmrun, vmsave and vmload -should inject #GP instead of #UD when unable to access memory -location for vmcb. Also, the code should make sure that L1 guest -EFER.SVME is not zero. Otherwise, #UD should be injected. - -Signed-off-by: Suravee Suthikulpanit -Reviewed-by: Tim Deegan - ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1776,15 +1776,17 @@ static void - svm_vmexit_do_vmrun(struct cpu_user_regs *regs, - struct vcpu *v, uint64_t vmcbaddr) - { -- if (!nestedhvm_enabled(v->domain)) { -+ if ( !nsvm_efer_svm_enabled(v) ) -+ { - gdprintk(XENLOG_ERR, "VMRUN: nestedhvm disabled, injecting #UD\n"); - hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); - return; - } - -- if (!nestedsvm_vmcb_map(v, vmcbaddr)) { -- gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #UD\n"); -- hvm_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE); -+ if ( !nestedsvm_vmcb_map(v, vmcbaddr) ) -+ { -+ gdprintk(XENLOG_ERR, "VMRUN: mapping vmcb failed, injecting #GP\n"); -+ hvm_inject_hw_exception(TRAP_gp_fault, HVM_DELIVER_NO_ERROR_CODE); - return; - } - -@@ -1830,7 +1832,8 @@ svm_vmexit_do_vmload(struct vmcb_struct - if ( (inst_len = __get_instruction_length(v, INSTR_VMLOAD)) == 0 ) - return; - -- if (!nestedhvm_enabled(v->domain)) { -+ if ( !nsvm_efer_svm_enabled(v) ) -+ { - gdprintk(XENLOG_ERR, "VMLOAD: nestedhvm disabled, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; -@@ -1840,8 +1843,8 @@ svm_vmexit_do_vmload(struct vmcb_struct - if ( !page ) - { - gdprintk(XENLOG_ERR, -- "VMLOAD: mapping failed, injecting #UD\n"); -- ret = TRAP_invalid_op; -+ "VMLOAD: mapping failed, injecting #GP\n"); -+ ret = TRAP_gp_fault; - goto inject; - } - -@@ -1871,7 +1874,8 @@ svm_vmexit_do_vmsave(struct vmcb_struct - if ( (inst_len = __get_instruction_length(v, INSTR_VMSAVE)) == 0 ) - return; - -- if (!nestedhvm_enabled(v->domain)) { -+ if ( !nsvm_efer_svm_enabled(v) ) -+ { - gdprintk(XENLOG_ERR, "VMSAVE: nestedhvm disabled, injecting #UD\n"); - ret = TRAP_invalid_op; - goto inject; -@@ -1881,8 +1885,8 @@ svm_vmexit_do_vmsave(struct vmcb_struct - if ( !page ) - { - gdprintk(XENLOG_ERR, -- "VMSAVE: mapping vmcb failed, injecting #UD\n"); -- ret = TRAP_invalid_op; -+ "VMSAVE: mapping vmcb failed, injecting #GP\n"); -+ ret = TRAP_gp_fault; - goto inject; - } - ---- a/xen/include/asm-x86/hvm/svm/nestedsvm.h -+++ b/xen/include/asm-x86/hvm/svm/nestedsvm.h -@@ -94,7 +94,7 @@ struct nestedsvm { - #define vcpu_nestedsvm(v) (vcpu_nestedhvm(v).u.nsvm) - - /* True when l1 guest enabled SVM in EFER */ --#define hvm_svm_enabled(v) \ -+#define nsvm_efer_svm_enabled(v) \ - (!!((v)->arch.hvm_vcpu.guest_efer & EFER_SVME)) - - int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr); diff --git a/520a2705-watchdog-crash-Always-disable-watchdog-in-console_force_unlock.patch b/520a2705-watchdog-crash-Always-disable-watchdog-in-console_force_unlock.patch deleted file mode 100644 index 3960102..0000000 --- a/520a2705-watchdog-crash-Always-disable-watchdog-in-console_force_unlock.patch +++ /dev/null @@ -1,49 +0,0 @@ -# Commit 7b9fa702ca323164d6b49e8b639a57f880454a8c -# Date 2013-08-13 14:31:01 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -watchdog/crash: Always disable watchdog in console_force_unlock() - -Depending on the state of the conring and serial_tx_buffer, -console_force_unlock() can be a long running operation, usually because of -serial_start_sync() - -XenServer testing has found a reliable case where console_force_unlock() on -one PCPU takes long enough for another PCPU to timeout due to the watchdog -(such as waiting for a tlb flush callin). - -The watchdog timeout causes the second PCPU to repeat the -console_force_unlock(), at which point the first PCPU typically fails an -assertion in spin_unlock_irqrestore(&port->tx_lock) (because the tx_lock has -been unlocked behind itself). - -console_force_unlock() is only on emergency paths, so one way or another the -host is going down. Disable the watchdog before forcing the console lock to -help prevent having pcpus completing with each other to bring the host down. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/x86_64/traps.c -+++ b/xen/arch/x86/x86_64/traps.c -@@ -226,8 +226,6 @@ void do_double_fault(struct cpu_user_reg - unsigned int cpu; - unsigned long crs[8]; - -- watchdog_disable(); -- - console_force_unlock(); - - asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) ); ---- a/xen/drivers/char/console.c -+++ b/xen/drivers/char/console.c -@@ -736,6 +736,9 @@ void console_end_log_everything(void) - - void console_force_unlock(void) - { -+#ifdef CONFIG_X86 -+ watchdog_disable(); -+#endif - spin_lock_init(&console_lock); - serial_force_unlock(sercon_handle); - console_locks_busted = 1; diff --git a/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch b/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch deleted file mode 100644 index 2f6e767..0000000 --- a/520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch +++ /dev/null @@ -1,38 +0,0 @@ -# Commit 0c006b41a283a0a569c863d44abde5aa5750ae01 -# Date 2013-08-13 17:47:16 +0200 -# Author Yang Zhang -# Committer Jan Beulich -VMX: add boot parameter to enable/disable APIC-v dynamically - -Add a boot parameter to enable/disable the APIC-v dynamically. APIC-v is -enabled by default. User can use apicv=0 to disable it. - -Signed-off-by: Yang Zhang - ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -46,6 +46,9 @@ boolean_param("vpid", opt_vpid_enabled); - static bool_t __read_mostly opt_unrestricted_guest_enabled = 1; - boolean_param("unrestricted_guest", opt_unrestricted_guest_enabled); - -+static bool_t __read_mostly opt_apicv_enabled = 1; -+boolean_param("apicv", opt_apicv_enabled); -+ - /* - * These two parameters are used to config the controls for Pause-Loop Exiting: - * ple_gap: upper bound on the amount of time between two successive -@@ -196,12 +199,12 @@ static int vmx_init_vmcs_config(void) - * "APIC Register Virtualization" and "Virtual Interrupt Delivery" - * can be set only when "use TPR shadow" is set - */ -- if ( _vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW ) -+ if ( (_vmx_cpu_based_exec_control & CPU_BASED_TPR_SHADOW) && -+ opt_apicv_enabled ) - opt |= SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - -- - _vmx_secondary_exec_control = adjust_vmx_controls( - "Secondary Exec Control", min, opt, - MSR_IA32_VMX_PROCBASED_CTLS2, &mismatch); diff --git a/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch b/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch deleted file mode 100644 index 979c64b..0000000 --- a/520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch +++ /dev/null @@ -1,41 +0,0 @@ -# Commit e8e8b030ecf916fea19639f0b6a446c1c9dbe174 -# Date 2013-08-14 11:18:24 +0200 -# Author Jan Beulich -# Committer Jan Beulich -VT-d: protect against bogus information coming from BIOS - -Add checks similar to those done by Linux: The DRHD address must not -be all zeros or all ones (Linux only checks for zero), and capabilities -as well as extended capabilities must not be all ones. - -Signed-off-by: Jan Beulich -Reviewed-by: Ben Guthro -Reviewed-by: Andrew Cooper -Tested-by: Ben Guthro -Acked by: Yang Zhang -Acked-by: Xiantao Zhang - ---- a/xen/drivers/passthrough/vtd/dmar.c -+++ b/xen/drivers/passthrough/vtd/dmar.c -@@ -447,6 +447,9 @@ acpi_parse_one_drhd(struct acpi_dmar_hea - if ( (ret = acpi_dmar_check_length(header, sizeof(*drhd))) != 0 ) - return ret; - -+ if ( !drhd->address || !(drhd->address + 1) ) -+ return -ENODEV; -+ - dmaru = xzalloc(struct acpi_drhd_unit); - if ( !dmaru ) - return -ENOMEM; ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -1159,6 +1159,9 @@ int __init iommu_alloc(struct acpi_drhd_ - dprintk(VTDPREFIX, - "cap = %"PRIx64" ecap = %"PRIx64"\n", iommu->cap, iommu->ecap); - } -+ if ( !(iommu->cap + 1) || !(iommu->ecap + 1) ) -+ return -ENODEV; -+ - if ( cap_fault_reg_offset(iommu->cap) + - cap_num_fault_regs(iommu->cap) * PRIMARY_FAULT_REG_LEN >= PAGE_SIZE || - ecap_iotlb_offset(iommu->ecap) >= PAGE_SIZE ) diff --git a/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch b/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch deleted file mode 100644 index 8770e53..0000000 --- a/520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch +++ /dev/null @@ -1,24 +0,0 @@ -# Commit f67af6d5803b6a015e30cb490a94f9547cb0437c -# Date 2013-08-14 11:20:26 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/MTRR: fix range check in mtrr_add_page() - -Extracted from Yinghai Lu's Linux commit d5c78673 ("x86: Fix /proc/mtrr -with base/size more than 44bits"). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/cpu/mtrr/main.c -+++ b/xen/arch/x86/cpu/mtrr/main.c -@@ -340,7 +340,7 @@ int mtrr_add_page(unsigned long base, un - return -EINVAL; - } - -- if (base & size_or_mask || size & size_or_mask) { -+ if ((base | (base + size - 1)) >> (paddr_bits - PAGE_SHIFT)) { - printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); - return -EINVAL; - } diff --git a/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch b/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch deleted file mode 100644 index 5bebae5..0000000 --- a/520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch +++ /dev/null @@ -1,22 +0,0 @@ -# Commit ab7f9a793c78dfea81c037b34b0dd2db7070d8f8 -# Date 2013-08-15 13:17:10 +0200 -# Author Tim Deegan -# Committer Jan Beulich -x86/time: fix check for negative time in __update_vcpu_system_time() - -Clang points out that u64 stime variable is always >= 0. - -Signed-off-by: Tim Deegan - ---- a/xen/arch/x86/time.c -+++ b/xen/arch/x86/time.c -@@ -817,7 +817,8 @@ static void __update_vcpu_system_time(st - - if ( d->arch.vtsc ) - { -- u64 stime = t->stime_local_stamp; -+ s_time_t stime = t->stime_local_stamp; -+ - if ( is_hvm_domain(d) ) - { - struct pl_time *pl = &v->domain->arch.hvm_domain.pl_time; diff --git a/52146070-ACPI-fix-acpi_os_map_memory.patch b/52146070-ACPI-fix-acpi_os_map_memory.patch deleted file mode 100644 index bf4e38c..0000000 --- a/52146070-ACPI-fix-acpi_os_map_memory.patch +++ /dev/null @@ -1,132 +0,0 @@ -References: bnc#833251, bnc#834751 - -# Commit 2ee9cbf9d8eaeff6e21222905d22dbd58dc5fe29 -# Date 2013-08-21 08:38:40 +0200 -# Author Jan Beulich -# Committer Jan Beulich -ACPI: fix acpi_os_map_memory() - -It using map_domain_page() was entirely wrong. Use __acpi_map_table() -instead for the time being, with locking added as the mappings it -produces get replaced with subsequent invocations. Using locking in -this way is acceptable here since the only two runtime callers are -acpi_os_{read,write}_memory(), which don't leave mappings pending upon -returning to their callers. - -Also fix __acpi_map_table()'s first parameter's type - while benign for -unstable, backports to pre-4.3 trees will need this. - -Signed-off-by: Jan Beulich - -# Commit c5ba8ed4c6f005d332a49d93a3ef8ff2b690b256 -# Date 2013-08-21 08:40:22 +0200 -# Author Jan Beulich -# Committer Jan Beulich -ACPI: use ioremap() in acpi_os_map_memory() - -This drops the post-boot use of __acpi_map_table() here again (together -with the somewhat awkward locking), in favor of using ioremap(). - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/acpi/lib.c -+++ b/xen/arch/x86/acpi/lib.c -@@ -39,7 +39,7 @@ u32 __read_mostly x86_acpiid_to_apicid[M - * from the fixed base. That's why we start at FIX_ACPI_END and - * count idx down while incrementing the phys address. - */ --char *__acpi_map_table(unsigned long phys, unsigned long size) -+char *__acpi_map_table(paddr_t phys, unsigned long size) - { - unsigned long base, offset, mapped_size; - int idx; ---- a/xen/drivers/acpi/osl.c -+++ b/xen/drivers/acpi/osl.c -@@ -38,6 +38,7 @@ - #include - #include - #include -+#include - - #define _COMPONENT ACPI_OS_SERVICES - ACPI_MODULE_NAME("osl") -@@ -83,14 +84,25 @@ acpi_physical_address __init acpi_os_get - } - } - --void __iomem *__init -+void __iomem * - acpi_os_map_memory(acpi_physical_address phys, acpi_size size) - { -- return __acpi_map_table((unsigned long)phys, size); -+ if (system_state >= SYS_STATE_active) { -+ unsigned long pfn = PFN_DOWN(phys); -+ unsigned int offs = phys & (PAGE_SIZE - 1); -+ -+ /* The low first Mb is always mapped. */ -+ if ( !((phys + size - 1) >> 20) ) -+ return __va(phys); -+ return __vmap(&pfn, PFN_UP(offs + size), 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs; -+ } -+ return __acpi_map_table(phys, size); - } - --void __init acpi_os_unmap_memory(void __iomem * virt, acpi_size size) -+void acpi_os_unmap_memory(void __iomem * virt, acpi_size size) - { -+ if (system_state >= SYS_STATE_active) -+ vunmap((void *)((unsigned long)virt & PAGE_MASK)); - } - - acpi_status acpi_os_read_port(acpi_io_address port, u32 * value, u32 width) -@@ -133,9 +145,8 @@ acpi_status - acpi_os_read_memory(acpi_physical_address phys_addr, u32 * value, u32 width) - { - u32 dummy; -- void __iomem *virt_addr; -+ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); - -- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); - if (!value) - value = &dummy; - -@@ -153,7 +164,7 @@ acpi_os_read_memory(acpi_physical_addres - BUG(); - } - -- unmap_domain_page(virt_addr); -+ acpi_os_unmap_memory(virt_addr, width >> 3); - - return AE_OK; - } -@@ -161,9 +172,7 @@ acpi_os_read_memory(acpi_physical_addres - acpi_status - acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) - { -- void __iomem *virt_addr; -- -- virt_addr = map_domain_page(phys_addr>>PAGE_SHIFT); -+ void __iomem *virt_addr = acpi_os_map_memory(phys_addr, width >> 3); - - switch (width) { - case 8: -@@ -179,7 +188,7 @@ acpi_os_write_memory(acpi_physical_addre - BUG(); - } - -- unmap_domain_page(virt_addr); -+ acpi_os_unmap_memory(virt_addr, width >> 3); - - return AE_OK; - } ---- a/xen/include/xen/acpi.h -+++ b/xen/include/xen/acpi.h -@@ -56,7 +56,7 @@ typedef int (*acpi_table_handler) (struc - typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); - - unsigned int acpi_get_processor_id (unsigned int cpu); --char * __acpi_map_table (unsigned long phys_addr, unsigned long size); -+char * __acpi_map_table (paddr_t phys_addr, unsigned long size); - int acpi_boot_init (void); - int acpi_boot_table_init (void); - int acpi_numa_init (void); diff --git a/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch b/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch deleted file mode 100644 index 834f182..0000000 --- a/5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch +++ /dev/null @@ -1,50 +0,0 @@ -# Commit c9c6abab583d27fdca1d979a7f1d18ae30f54e9b -# Date 2013-08-21 16:44:58 +0200 -# Author Jan Beulich -# Committer Jan Beulich -VT-d: warn about Compatibility Format Interrupts being enabled by firmware - -... as being insecure. - -Also drop the second (redundant) read DMAR_GSTS_REG from enable_intremap(). - -Signed-off-by: Jan Beulich -Acked-by Xiantao Zhang - ---- a/xen/drivers/passthrough/vtd/intremap.c -+++ b/xen/drivers/passthrough/vtd/intremap.c -@@ -706,8 +706,8 @@ int enable_intremap(struct iommu *iommu, - - if ( !platform_supports_intremap() ) - { -- dprintk(XENLOG_ERR VTDPREFIX, -- "Platform firmware does not support interrupt remapping\n"); -+ printk(XENLOG_ERR VTDPREFIX -+ " Platform firmware does not support interrupt remapping\n"); - return -EINVAL; - } - -@@ -718,15 +718,19 @@ int enable_intremap(struct iommu *iommu, - if ( (sts & DMA_GSTS_IRES) && ir_ctrl->iremap_maddr ) - return 0; - -- sts = dmar_readl(iommu->reg, DMAR_GSTS_REG); - if ( !(sts & DMA_GSTS_QIES) ) - { -- dprintk(XENLOG_ERR VTDPREFIX, -- "Queued invalidation is not enabled, should not enable " -- "interrupt remapping\n"); -+ printk(XENLOG_ERR VTDPREFIX -+ " Queued invalidation is not enabled on IOMMU #%u:" -+ " Should not enable interrupt remapping\n", iommu->index); - return -EINVAL; - } - -+ if ( !eim && (sts & DMA_GSTS_CFIS) ) -+ printk(XENLOG_WARNING VTDPREFIX -+ " Compatibility Format Interrupts permitted on IOMMU #%u:" -+ " Device pass-through will be insecure\n", iommu->index); -+ - if ( ir_ctrl->iremap_maddr == 0 ) - { - drhd = iommu_to_drhd(iommu); diff --git a/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch b/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch deleted file mode 100644 index 5f93087..0000000 --- a/5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch +++ /dev/null @@ -1,26 +0,0 @@ -# Commit 7fb5c6b9ef22915e3fcac95cd44857f4457ba783 -# Date 2013-08-22 10:49:24 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Check whether interrupt is blocked by TPR - -If interrupt is blocked by L1's TPR, L2 should not see it and keep -running. Adding the check before L2 to retrive interrupt. - -Signed-off-by: Yang Zhang -Acked-by: "Dong, Eddie" - ---- a/xen/arch/x86/hvm/vmx/intr.c -+++ b/xen/arch/x86/hvm/vmx/intr.c -@@ -165,6 +165,11 @@ static int nvmx_intr_intercept(struct vc - { - u32 ctrl; - -+ /* If blocked by L1's tpr, then nothing to do. */ -+ if ( nestedhvm_vcpu_in_guestmode(v) && -+ hvm_interrupt_blocked(v, intack) == hvm_intblk_tpr ) -+ return 1; -+ - if ( nvmx_intr_blocked(v) != hvm_intblk_none ) - { - enable_intr_window(v, intack); diff --git a/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch b/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch deleted file mode 100644 index eda8b87..0000000 --- a/5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch +++ /dev/null @@ -1,36 +0,0 @@ -# Commit b35d0a26983843c092bfa353fd6b9aa8c3bf4886 -# Date 2013-08-22 10:50:13 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Force check ISR when L2 is running - -External interrupt is allowed to notify CPU only when it has higher -priority than current in servicing interrupt. With APIC-v, the priority -comparing is done by hardware and hardware will inject the interrupt to -VCPU when it recognizes an interrupt. Currently, there is no virtual -APIC-v feature available for L1 to use, so when L2 is running, we still need -to compare interrupt priority with ISR in hypervisor instead via hardware. - -Signed-off-by: Yang Zhang -Acked-by: "Dong, Eddie" - ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -37,6 +37,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -1037,7 +1038,8 @@ int vlapic_has_pending_irq(struct vcpu * - if ( irr == -1 ) - return -1; - -- if ( vlapic_virtual_intr_delivery_enabled() ) -+ if ( vlapic_virtual_intr_delivery_enabled() && -+ !nestedhvm_vcpu_in_guestmode(v) ) - return irr; - - isr = vlapic_find_highest_isr(vlapic); diff --git a/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch b/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch deleted file mode 100644 index ed714cb..0000000 --- a/5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch +++ /dev/null @@ -1,43 +0,0 @@ -# Commit 375a1035002fb257087756a86e6caeda649fc0f1 -# Date 2013-08-22 10:52:05 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Clear APIC-v control bit in vmcs02 - -There is no vAPIC-v support, so mask APIC-v control bit when -constructing vmcs02. - -Signed-off-by: Yang Zhang -Acked-by: "Dong, Eddie" - ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -613,8 +613,15 @@ void nvmx_update_secondary_exec_control( - u32 shadow_cntrl; - struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); - struct nestedvmx *nvmx = &vcpu_2_nvmx(v); -+ u32 apicv_bit = SECONDARY_EXEC_APIC_REGISTER_VIRT | -+ SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; - -+ host_cntrl &= ~apicv_bit; - shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, SECONDARY_VM_EXEC_CONTROL); -+ -+ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ -+ ASSERT(!(shadow_cntrl & apicv_bit)); -+ - nvmx->ept.enabled = !!(shadow_cntrl & SECONDARY_EXEC_ENABLE_EPT); - shadow_cntrl |= host_cntrl; - __vmwrite(SECONDARY_VM_EXEC_CONTROL, shadow_cntrl); -@@ -625,7 +632,12 @@ static void nvmx_update_pin_control(stru - u32 shadow_cntrl; - struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); - -+ host_cntrl &= ~PIN_BASED_POSTED_INTERRUPT; - shadow_cntrl = __get_vvmcs(nvcpu->nv_vvmcx, PIN_BASED_VM_EXEC_CONTROL); -+ -+ /* No vAPIC-v support, so it shouldn't be set in vmcs12. */ -+ ASSERT(!(shadow_cntrl & PIN_BASED_POSTED_INTERRUPT)); -+ - shadow_cntrl |= host_cntrl; - __vmwrite(PIN_BASED_VM_EXEC_CONTROL, shadow_cntrl); - } diff --git a/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch b/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch deleted file mode 100644 index 529ea7a..0000000 --- a/5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch +++ /dev/null @@ -1,247 +0,0 @@ -# Commit 84e6af58707520baf59c1c86c29237419e439afb -# Date 2013-08-22 10:59:01 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Update APIC-v(RVI/SVI) when vmexit to L1 - -If enabling APIC-v, all interrupts to L1 are delivered through APIC-v. -But when L2 is running, external interrupt will casue L1 vmexit with -reason external interrupt. Then L1 will pick up the interrupt through -vmcs12. when L1 ack the interrupt, since the APIC-v is enabled when -L1 is running, so APIC-v hardware still will do vEOI updating. The problem -is that the interrupt is delivered not through APIC-v hardware, this means -SVI/RVI/vPPR are not setting, but hardware required them when doing vEOI -updating. The solution is that, when L1 tried to pick up the interrupt -from vmcs12, then hypervisor will help to update the SVI/RVI/vPPR to make -sure the following vEOI updating and vPPR updating corrently. - -Also, since interrupt is delivered through vmcs12, so APIC-v hardware will -not cleare vIRR and hypervisor need to clear it before L1 running. - -Signed-off-by: Yang Zhang -Acked-by: "Dong, Eddie" - ---- a/xen/arch/x86/hvm/irq.c -+++ b/xen/arch/x86/hvm/irq.c -@@ -437,7 +437,7 @@ struct hvm_intack hvm_vcpu_ack_pending_i - intack.vector = (uint8_t)vector; - break; - case hvm_intsrc_lapic: -- if ( !vlapic_ack_pending_irq(v, intack.vector) ) -+ if ( !vlapic_ack_pending_irq(v, intack.vector, 0) ) - intack = hvm_intack_none; - break; - case hvm_intsrc_vector: ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -168,6 +168,14 @@ static uint32_t vlapic_get_ppr(struct vl - return ppr; - } - -+uint32_t vlapic_set_ppr(struct vlapic *vlapic) -+{ -+ uint32_t ppr = vlapic_get_ppr(vlapic); -+ -+ vlapic_set_reg(vlapic, APIC_PROCPRI, ppr); -+ return ppr; -+} -+ - static int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda) - { - int result = 0; -@@ -1050,15 +1058,15 @@ int vlapic_has_pending_irq(struct vcpu * - return irr; - } - --int vlapic_ack_pending_irq(struct vcpu *v, int vector) -+int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack) - { - struct vlapic *vlapic = vcpu_vlapic(v); - -- if ( vlapic_virtual_intr_delivery_enabled() ) -- return 1; -- -- vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); -- vlapic_clear_irr(vector, vlapic); -+ if ( force_ack || !vlapic_virtual_intr_delivery_enabled() ) -+ { -+ vlapic_set_vector(vector, &vlapic->regs->data[APIC_ISR]); -+ vlapic_clear_irr(vector, vlapic); -+ } - - return 1; - } ---- a/xen/arch/x86/hvm/vmx/intr.c -+++ b/xen/arch/x86/hvm/vmx/intr.c -@@ -185,7 +185,7 @@ static int nvmx_intr_intercept(struct vc - if ( !(ctrl & PIN_BASED_EXT_INTR_MASK) ) - return 0; - -- vmx_inject_extint(intack.vector); -+ vmx_inject_extint(intack.vector, intack.source); - - ctrl = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, VM_EXIT_CONTROLS); - if ( ctrl & VM_EXIT_ACK_INTR_ON_EXIT ) -@@ -314,7 +314,7 @@ void vmx_intr_assist(void) - else - { - HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0); -- vmx_inject_extint(intack.vector); -+ vmx_inject_extint(intack.vector, intack.source); - pt_intr_post(v, intack); - } - ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1205,7 +1205,7 @@ static void vmx_update_guest_efer(struct - } - - void nvmx_enqueue_n2_exceptions(struct vcpu *v, -- unsigned long intr_fields, int error_code) -+ unsigned long intr_fields, int error_code, uint8_t source) - { - struct nestedvmx *nvmx = &vcpu_2_nvmx(v); - -@@ -1213,6 +1213,7 @@ void nvmx_enqueue_n2_exceptions(struct v - /* enqueue the exception till the VMCS switch back to L1 */ - nvmx->intr.intr_info = intr_fields; - nvmx->intr.error_code = error_code; -+ nvmx->intr.source = source; - vcpu_nestedhvm(v).nv_vmexit_pending = 1; - return; - } -@@ -1224,7 +1225,8 @@ void nvmx_enqueue_n2_exceptions(struct v - - static int nvmx_vmexit_trap(struct vcpu *v, struct hvm_trap *trap) - { -- nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code); -+ nvmx_enqueue_n2_exceptions(v, trap->vector, trap->error_code, -+ hvm_intsrc_none); - return NESTEDHVM_VMEXIT_DONE; - } - -@@ -1255,7 +1257,7 @@ static void __vmx_inject_exception(int t - curr->arch.hvm_vmx.vmx_emulate = 1; - } - --void vmx_inject_extint(int trap) -+void vmx_inject_extint(int trap, uint8_t source) - { - struct vcpu *v = current; - u32 pin_based_cntrl; -@@ -1266,7 +1268,7 @@ void vmx_inject_extint(int trap) - if ( pin_based_cntrl & PIN_BASED_EXT_INTR_MASK ) { - nvmx_enqueue_n2_exceptions (v, - INTR_INFO_VALID_MASK | (X86_EVENTTYPE_EXT_INTR<<8) | trap, -- HVM_DELIVER_NO_ERROR_CODE); -+ HVM_DELIVER_NO_ERROR_CODE, source); - return; - } - } -@@ -1285,7 +1287,7 @@ void vmx_inject_nmi(void) - if ( pin_based_cntrl & PIN_BASED_NMI_EXITING ) { - nvmx_enqueue_n2_exceptions (v, - INTR_INFO_VALID_MASK | (X86_EVENTTYPE_NMI<<8) | TRAP_nmi, -- HVM_DELIVER_NO_ERROR_CODE); -+ HVM_DELIVER_NO_ERROR_CODE, hvm_intsrc_nmi); - return; - } - } -@@ -1353,7 +1355,7 @@ static void vmx_inject_trap(struct hvm_t - { - nvmx_enqueue_n2_exceptions (curr, - INTR_INFO_VALID_MASK | (_trap.type<<8) | _trap.vector, -- _trap.error_code); -+ _trap.error_code, hvm_intsrc_none); - return; - } - else ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -1295,6 +1295,36 @@ static void sync_exception_state(struct - } - } - -+static void nvmx_update_apicv(struct vcpu *v) -+{ -+ struct nestedvmx *nvmx = &vcpu_2_nvmx(v); -+ struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); -+ unsigned long reason = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_REASON); -+ uint32_t intr_info = __get_vvmcs(nvcpu->nv_vvmcx, VM_EXIT_INTR_INFO); -+ -+ if ( reason == EXIT_REASON_EXTERNAL_INTERRUPT && -+ nvmx->intr.source == hvm_intsrc_lapic && -+ (intr_info & INTR_INFO_VALID_MASK) ) -+ { -+ uint16_t status; -+ uint32_t rvi, ppr; -+ uint32_t vector = intr_info & 0xff; -+ struct vlapic *vlapic = vcpu_vlapic(v); -+ -+ vlapic_ack_pending_irq(v, vector, 1); -+ -+ ppr = vlapic_set_ppr(vlapic); -+ WARN_ON((ppr & 0xf0) != (vector & 0xf0)); -+ -+ status = vector << 8; -+ rvi = vlapic_has_pending_irq(v); -+ if ( rvi != -1 ) -+ status |= rvi & 0xff; -+ -+ __vmwrite(GUEST_INTR_STATUS, status); -+ } -+} -+ - static void virtual_vmexit(struct cpu_user_regs *regs) - { - struct vcpu *v = current; -@@ -1340,6 +1370,9 @@ static void virtual_vmexit(struct cpu_us - /* updating host cr0 to sync TS bit */ - __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); - -+ if ( cpu_has_vmx_virtual_intr_delivery ) -+ nvmx_update_apicv(v); -+ - vmreturn(regs, VMSUCCEED); - } - ---- a/xen/include/asm-x86/hvm/vlapic.h -+++ b/xen/include/asm-x86/hvm/vlapic.h -@@ -98,7 +98,7 @@ bool_t is_vlapic_lvtpc_enabled(struct vl - void vlapic_set_irq(struct vlapic *vlapic, uint8_t vec, uint8_t trig); - - int vlapic_has_pending_irq(struct vcpu *v); --int vlapic_ack_pending_irq(struct vcpu *v, int vector); -+int vlapic_ack_pending_irq(struct vcpu *v, int vector, bool_t force_ack); - - int vlapic_init(struct vcpu *v); - void vlapic_destroy(struct vcpu *v); -@@ -110,6 +110,7 @@ void vlapic_tdt_msr_set(struct vlapic *v - uint64_t vlapic_tdt_msr_get(struct vlapic *vlapic); - - int vlapic_accept_pic_intr(struct vcpu *v); -+uint32_t vlapic_set_ppr(struct vlapic *vlapic); - - void vlapic_adjust_i8259_target(struct domain *d); - ---- a/xen/include/asm-x86/hvm/vmx/vmx.h -+++ b/xen/include/asm-x86/hvm/vmx/vmx.h -@@ -448,7 +448,7 @@ static inline int __vmxon(u64 addr) - - void vmx_get_segment_register(struct vcpu *, enum x86_segment, - struct segment_register *); --void vmx_inject_extint(int trap); -+void vmx_inject_extint(int trap, uint8_t source); - void vmx_inject_nmi(void); - - int ept_p2m_init(struct p2m_domain *p2m); ---- a/xen/include/asm-x86/hvm/vmx/vvmx.h -+++ b/xen/include/asm-x86/hvm/vmx/vvmx.h -@@ -36,6 +36,7 @@ struct nestedvmx { - struct { - unsigned long intr_info; - u32 error_code; -+ u8 source; - } intr; - struct { - bool_t enabled; diff --git a/5215d8b0-Correct-X2-APIC-HVM-emulation.patch b/5215d8b0-Correct-X2-APIC-HVM-emulation.patch deleted file mode 100644 index 014de12..0000000 --- a/5215d8b0-Correct-X2-APIC-HVM-emulation.patch +++ /dev/null @@ -1,24 +0,0 @@ -References: bnc#835896 - -# Commit 69962e19ed432570f6cdcfdb5f6f22d6e3c54e6c -# Date 2013-08-22 11:24:00 +0200 -# Author Juergen Gross -# Committer Jan Beulich -Correct X2-APIC HVM emulation - -commit 6859874b61d5ddaf5289e72ed2b2157739b72ca5 ("x86/HVM: fix x2APIC -APIC_ID read emulation") introduced an error for the hvm emulation of -x2apic. Any try to write to APIC_ICR MSR will result in a GP fault. - -Signed-off-by: Juergen Gross - ---- a/xen/arch/x86/hvm/vlapic.c -+++ b/xen/arch/x86/hvm/vlapic.c -@@ -868,6 +868,7 @@ int hvm_x2apic_msr_write(struct vcpu *v, - rc = vlapic_reg_write(v, APIC_ICR2, (uint32_t)(msr_content >> 32)); - if ( rc ) - return rc; -+ break; - - case APIC_ICR2: - return X86EMUL_UNHANDLEABLE; diff --git a/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch b/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch deleted file mode 100644 index 6259d1b..0000000 --- a/521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch +++ /dev/null @@ -1,24 +0,0 @@ -# Commit 850188e1278cecd1dfb9b936024bee2d8dfdcc18 -# Date 2013-08-27 11:11:38 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: don't allow Dom0 access to the MSI address range - -In particular, MMIO assignments should not be done using this area. - -Signed-off-by: Jan Beulich -Acked-by Xiantao Zhang - ---- a/xen/arch/x86/domain_build.c -+++ b/xen/arch/x86/domain_build.c -@@ -1122,6 +1122,10 @@ int __init construct_dom0( - if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn) ) - rc |= iomem_deny_access(dom0, mfn, mfn); - } -+ /* MSI range. */ -+ rc |= iomem_deny_access(dom0, paddr_to_pfn(MSI_ADDR_BASE_LO), -+ paddr_to_pfn(MSI_ADDR_BASE_LO + -+ MSI_ADDR_DEST_ID_MASK)); - - /* Remove access to E820_UNUSABLE I/O regions above 1MB. */ - for ( i = 0; i < e820.nr_map; i++ ) diff --git a/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch b/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch deleted file mode 100644 index 966c5d5..0000000 --- a/521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch +++ /dev/null @@ -1,52 +0,0 @@ -# Commit 3e787021fb2420851c7bdc3911ea53c728ba5ac0 -# Date 2013-08-27 11:15:15 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/Intel: add support for Haswell CPU models - -... according to their most recent public documentation. - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/acpi/cpu_idle.c -+++ b/xen/arch/x86/acpi/cpu_idle.c -@@ -135,8 +135,10 @@ static void do_get_hw_residencies(void * - case 0x3A: - case 0x3E: - /* Haswell */ -- case 0x3c: -+ case 0x3C: -+ case 0x3F: - case 0x45: -+ case 0x46: - GET_PC2_RES(hw_res->pc2); - GET_CC7_RES(hw_res->cc7); - /* fall through */ ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1814,7 +1814,7 @@ static const struct lbr_info *last_branc - /* Ivy Bridge */ - case 58: case 62: - /* Haswell */ -- case 60: case 69: -+ case 60: case 63: case 69: case 70: - return nh_lbr; - break; - /* Atom */ ---- a/xen/arch/x86/hvm/vmx/vpmu_core2.c -+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c -@@ -878,7 +878,12 @@ int vmx_vpmu_initialise(struct vcpu *v, - - case 0x3a: /* IvyBridge */ - case 0x3e: /* IvyBridge EP */ -- case 0x3c: /* Haswell */ -+ -+ /* Haswell: */ -+ case 0x3c: -+ case 0x3f: -+ case 0x45: -+ case 0x46: - ret = core2_vpmu_initialise(v, vpmu_flags); - if ( !ret ) - vpmu->arch_vpmu_ops = &core2_vpmu_ops; diff --git a/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch b/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch deleted file mode 100644 index fdd9ba2..0000000 --- a/521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch +++ /dev/null @@ -1,42 +0,0 @@ -# Commit 9e2c5938246546a5b3f698b7421640d85602b994 -# Date 2013-08-28 10:18:39 +0200 -# Author Tomasz Wroblewski -# Committer Jan Beulich -Fix inactive timer list corruption on second S3 resume - -init_timer cannot be safely called multiple times on same timer since it does memset(0) -on the structure, erasing the auxiliary member used by linked list code. This breaks -inactive timer list in common/timer.c. - -Moved resume_timer initialisation to ns16550_init_postirq, so it's only done once. - -Signed-off-by: Tomasz Wroblewski -Acked-by: Keir Fraser - ---- a/xen/drivers/char/ns16550.c -+++ b/xen/drivers/char/ns16550.c -@@ -128,6 +128,8 @@ static struct ns16550 { - #define RESUME_DELAY MILLISECS(10) - #define RESUME_RETRIES 100 - -+static void ns16550_delayed_resume(void *data); -+ - static char ns_read_reg(struct ns16550 *uart, int reg) - { - if ( uart->remapped_io_base == NULL ) -@@ -323,6 +325,7 @@ static void __init ns16550_init_postirq( - serial_async_transmit(port); - - init_timer(&uart->timer, ns16550_poll, port, 0); -+ init_timer(&uart->resume_timer, ns16550_delayed_resume, port, 0); - - /* Calculate time to fill RX FIFO and/or empty TX FIFO for polling. */ - bits = uart->data_bits + uart->stop_bits + !!uart->parity; -@@ -413,7 +416,6 @@ static void ns16550_resume(struct serial - if ( ns16550_ioport_invalid(uart) ) - { - delayed_resume_tries = RESUME_RETRIES; -- init_timer(&uart->resume_timer, ns16550_delayed_resume, port, 0); - set_timer(&uart->resume_timer, NOW() + RESUME_DELAY); - } - else diff --git a/521e1156-x86-AVX-instruction-emulation-fixes.patch b/521e1156-x86-AVX-instruction-emulation-fixes.patch deleted file mode 100644 index 4a51069..0000000 --- a/521e1156-x86-AVX-instruction-emulation-fixes.patch +++ /dev/null @@ -1,254 +0,0 @@ -# Commit 062919448e2f4b127c9c3c085b1a8e1d56a33051 -# Date 2013-08-28 17:03:50 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: AVX instruction emulation fixes - -- we used the C4/C5 (first prefix) byte instead of the apparent ModR/M - one as the second prefix byte -- early decoding normalized vex.reg, thus corrupting it for the main - consumer (copy_REX_VEX()), resulting in #UD on the two-operand - instructions we emulate - -Also add respective test cases to the testing utility plus -- fix get_fpu() (the fall-through order was inverted) -- add cpu_has_avx2, even if it's currently unused (as in the new test - cases I decided to refrain from using AVX2 instructions in order to - be able to actually run all the tests on the hardware I have) -- slightly tweak cpu_has_avx to more consistently express the outputs - we don't care about (sinking them all into the same variable) - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/tools/tests/x86_emulator/test_x86_emulator.c -+++ b/tools/tests/x86_emulator/test_x86_emulator.c -@@ -94,13 +94,25 @@ static inline uint64_t xgetbv(uint32_t x - } - - #define cpu_has_avx ({ \ -- unsigned int eax = 1, ecx = 0, edx; \ -- cpuid(&eax, &edx, &ecx, &edx, NULL); \ -+ unsigned int eax = 1, ecx = 0; \ -+ cpuid(&eax, &eax, &ecx, &eax, NULL); \ - if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ - ecx = 0; \ - (ecx & (1U << 28)) != 0; \ - }) - -+#define cpu_has_avx2 ({ \ -+ unsigned int eax = 1, ebx, ecx = 0; \ -+ cpuid(&eax, &ebx, &ecx, &eax, NULL); \ -+ if ( !(ecx & (1U << 27)) || ((xgetbv(0) & 6) != 6) ) \ -+ ebx = 0; \ -+ else { \ -+ eax = 7, ecx = 0; \ -+ cpuid(&eax, &ebx, &ecx, &eax, NULL); \ -+ } \ -+ (ebx & (1U << 5)) != 0; \ -+}) -+ - int get_fpu( - void (*exception_callback)(void *, struct cpu_user_regs *), - void *exception_callback_arg, -@@ -111,14 +123,14 @@ int get_fpu( - { - case X86EMUL_FPU_fpu: - break; -- case X86EMUL_FPU_ymm: -- if ( cpu_has_avx ) -+ case X86EMUL_FPU_mmx: -+ if ( cpu_has_mmx ) - break; - case X86EMUL_FPU_xmm: - if ( cpu_has_sse ) - break; -- case X86EMUL_FPU_mmx: -- if ( cpu_has_mmx ) -+ case X86EMUL_FPU_ymm: -+ if ( cpu_has_avx ) - break; - default: - return X86EMUL_UNHANDLEABLE; -@@ -629,6 +641,73 @@ int main(int argc, char **argv) - else - printf("skipped\n"); - -+ printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); -+ if ( stack_exec && cpu_has_avx ) -+ { -+ extern const unsigned char vmovdqu_to_mem[]; -+ -+ asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" -+ ".pushsection .test, \"a\", @progbits\n" -+ "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" -+ ".popsection" :: "c" (NULL) ); -+ -+ memcpy(instr, vmovdqu_to_mem, 15); -+ memset(res, 0x55, 128); -+ memset(res + 16, 0xff, 16); -+ memset(res + 20, 0x00, 16); -+ regs.eip = (unsigned long)&instr[0]; -+ regs.ecx = (unsigned long)res; -+ rc = x86_emulate(&ctxt, &emulops); -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) -+ goto fail; -+ printf("okay\n"); -+ } -+ else -+ printf("skipped\n"); -+ -+ printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); -+ if ( stack_exec && cpu_has_avx ) -+ { -+ extern const unsigned char vmovdqu_from_mem[]; -+ -+#if 0 /* Don't use AVX2 instructions for now */ -+ asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" -+#else -+ asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" -+ "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" -+#endif -+ ".pushsection .test, \"a\", @progbits\n" -+ "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" -+ ".popsection" :: "d" (NULL) ); -+ -+ memcpy(instr, vmovdqu_from_mem, 15); -+ memset(res + 4, 0xff, 16); -+ regs.eip = (unsigned long)&instr[0]; -+ regs.ecx = 0; -+ regs.edx = (unsigned long)res; -+ rc = x86_emulate(&ctxt, &emulops); -+ if ( rc != X86EMUL_OKAY ) -+ goto fail; -+#if 0 /* Don't use AVX2 instructions for now */ -+ asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" -+ "vpcmpeqb %%ymm4, %%ymm2, %%ymm0\n\t" -+ "vpmovmskb %%ymm1, %0" : "=r" (rc) ); -+#else -+ asm ( "vextractf128 $1, %%ymm4, %%xmm3\n\t" -+ "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n\t" -+ "vpcmpeqb %%xmm4, %%xmm2, %%xmm0\n\t" -+ "vpcmpeqb %%xmm3, %%xmm2, %%xmm1\n\t" -+ "vpmovmskb %%xmm0, %0\n\t" -+ "vpmovmskb %%xmm1, %1" : "=r" (rc), "=r" (i) ); -+ rc |= i << 16; -+#endif -+ if ( rc != 0xffffffff ) -+ goto fail; -+ printf("okay\n"); -+ } -+ else -+ printf("skipped\n"); -+ - printf("%-40s", "Testing movsd %xmm5,(%ecx)..."); - memset(res, 0x77, 64); - memset(res + 10, 0x66, 8); -@@ -683,6 +762,59 @@ int main(int argc, char **argv) - else - printf("skipped\n"); - -+ printf("%-40s", "Testing vmovsd %xmm5,(%ecx)..."); -+ memset(res, 0x88, 64); -+ memset(res + 10, 0x77, 8); -+ if ( stack_exec && cpu_has_avx ) -+ { -+ extern const unsigned char vmovsd_to_mem[]; -+ -+ asm volatile ( "vbroadcastsd %0, %%ymm5\n" -+ ".pushsection .test, \"a\", @progbits\n" -+ "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" -+ ".popsection" :: "m" (res[10]), "c" (NULL) ); -+ -+ memcpy(instr, vmovsd_to_mem, 15); -+ regs.eip = (unsigned long)&instr[0]; -+ regs.ecx = (unsigned long)(res + 2); -+ regs.edx = 0; -+ rc = x86_emulate(&ctxt, &emulops); -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) -+ goto fail; -+ printf("okay\n"); -+ } -+ else -+ { -+ printf("skipped\n"); -+ memset(res + 2, 0x77, 8); -+ } -+ -+ printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); -+ if ( stack_exec && cpu_has_avx ) -+ { -+ extern const unsigned char vmovaps_from_mem[]; -+ -+ asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" -+ ".pushsection .test, \"a\", @progbits\n" -+ "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" -+ ".popsection" :: "d" (NULL) ); -+ -+ memcpy(instr, vmovaps_from_mem, 15); -+ regs.eip = (unsigned long)&instr[0]; -+ regs.ecx = 0; -+ regs.edx = (unsigned long)res; -+ rc = x86_emulate(&ctxt, &emulops); -+ if ( rc != X86EMUL_OKAY ) -+ goto fail; -+ asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" -+ "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); -+ if ( rc != 0xff ) -+ goto fail; -+ printf("okay\n"); -+ } -+ else -+ printf("skipped\n"); -+ - for ( j = 1; j <= 2; j++ ) - { - #if defined(__i386__) ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -1454,10 +1454,10 @@ x86_emulate( - /* VEX */ - generate_exception_if(rex_prefix || vex.pfx, EXC_UD, -1); - -- vex.raw[0] = b; -+ vex.raw[0] = modrm; - if ( b & 1 ) - { -- vex.raw[1] = b; -+ vex.raw[1] = modrm; - vex.opcx = vex_0f; - vex.x = 1; - vex.b = 1; -@@ -1479,10 +1479,7 @@ x86_emulate( - } - } - } -- vex.reg ^= 0xf; -- if ( !mode_64bit() ) -- vex.reg &= 0x7; -- else if ( !vex.r ) -+ if ( mode_64bit() && !vex.r ) - rex_prefix |= REX_R; - - fail_if(vex.opcx != vex_0f); -@@ -3899,8 +3896,9 @@ x86_emulate( - else - { - fail_if((vex.opcx != vex_0f) || -- (vex.reg && ((ea.type == OP_MEM) || -- !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); -+ ((vex.reg != 0xf) && -+ ((ea.type == OP_MEM) || -+ !(vex.pfx & VEX_PREFIX_SCALAR_MASK)))); - vcpu_must_have_avx(); - get_fpu(X86EMUL_FPU_ymm, &fic); - ea.bytes = 16 << vex.l; -@@ -4168,7 +4166,7 @@ x86_emulate( - } - else - { -- fail_if((vex.opcx != vex_0f) || vex.reg || -+ fail_if((vex.opcx != vex_0f) || (vex.reg != 0xf) || - ((vex.pfx != vex_66) && (vex.pfx != vex_f3))); - vcpu_must_have_avx(); - get_fpu(X86EMUL_FPU_ymm, &fic); diff --git a/521ef8d9-AMD-IOMMU-add-missing-checks.patch b/521ef8d9-AMD-IOMMU-add-missing-checks.patch deleted file mode 100644 index 950201a..0000000 --- a/521ef8d9-AMD-IOMMU-add-missing-checks.patch +++ /dev/null @@ -1,29 +0,0 @@ -# Commit 3785d30efe8264b899499e0883b10cc434bd0959 -# Date 2013-08-29 09:31:37 +0200 -# Author Jan Beulich -# Committer Jan Beulich -AMD IOMMU: add missing check - -We shouldn't accept IVHD tables specifying IO-APIC IDs beyond the limit -we support (MAX_IO_APICS, currently 128). - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Suravee Suthikulpanit - ---- a/xen/drivers/passthrough/amd/iommu_acpi.c -+++ b/xen/drivers/passthrough/amd/iommu_acpi.c -@@ -674,6 +674,13 @@ static u16 __init parse_ivhd_device_spec - if ( IO_APIC_ID(apic) != special->handle ) - continue; - -+ if ( special->handle >= ARRAY_SIZE(ioapic_sbdf) ) -+ { -+ printk(XENLOG_ERR "IVHD Error: IO-APIC %#x entry beyond bounds\n", -+ special->handle); -+ return 0; -+ } -+ - if ( ioapic_sbdf[special->handle].pin_2_idx ) - { - if ( ioapic_sbdf[special->handle].bdf == bdf && diff --git a/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch b/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch deleted file mode 100644 index 983c37a..0000000 --- a/52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch +++ /dev/null @@ -1,28 +0,0 @@ -# Commit 4aa19549e17650b9bfe2b31d7f52a95696d388f0 -# Date 2013-08-30 10:40:29 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -hvmloader/smbios: Correctly count the number of tables written - -Fixes regression indirectly introduced by c/s 4d23036e709627 - -That changeset added some smbios tables which were option based on the -toolstack providing appropriate xenstore keys. The do_struct() macro would -unconditionally increment nr_structs, even if a table was not actually -written. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/tools/firmware/hvmloader/smbios.c -+++ b/tools/firmware/hvmloader/smbios.c -@@ -192,7 +192,8 @@ write_smbios_tables(void *ep, void *star - - #define do_struct(fn) do { \ - q = (fn); \ -- (*nr_structs)++; \ -+ if ( q != p ) \ -+ (*nr_structs)++; \ - if ( (q - p) > *max_struct_size ) \ - *max_struct_size = q - p; \ - p = q; \ diff --git a/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch b/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch deleted file mode 100644 index 6585c0c..0000000 --- a/52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch +++ /dev/null @@ -1,42 +0,0 @@ -# Commit 0f4cb23c3ea5b987c49c9a9368e7a0d505ec064f -# Date 2013-08-30 10:40:48 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -public/hvm_xs_strings.h: Fix ABI regression for OEM SMBios strings - -The old code for OEM SMBios strings was: - - char path[20] = "bios-strings/oem-XX"; - path[(sizeof path) - 3] = '0' + ((i < 10) ? i : i / 10); - path[(sizeof path) - 2] = (i < 10) ? '\0' : '0' + (i % 10); - -Where oem-1 thru 9 specifically had no leading 0. - -However, the definition of HVM_XS_OEM_STRINGS specifically requires leading -0s. - -This regression was introduced by the combination of c/s 4d23036e709627 and -e64c3f71ceb662 - -I realise that this patch causes a change to the public headers. However I -feel it is justified as: - -* All toolstacks used to have to embed the magic string (and almost certainly - still do) -* If by some miriacle a new toolstack has started using the new define will - continue to work. -* The only intree consumer of the define is hvmloader itself. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/include/public/hvm/hvm_xs_strings.h -+++ b/xen/include/public/hvm/hvm_xs_strings.h -@@ -75,6 +75,6 @@ - /* 1 to 99 OEM strings can be set in xenstore using values of the form - * below. These strings will be loaded into the SMBIOS type 11 structure. - */ --#define HVM_XS_OEM_STRINGS "bios-strings/oem-%02d" -+#define HVM_XS_OEM_STRINGS "bios-strings/oem-%d" - - #endif /* __XEN_PUBLIC_HVM_HVM_XS_STRINGS_H__ */ diff --git a/52205e27-x86-xsave-initialization-improvements.patch b/52205e27-x86-xsave-initialization-improvements.patch deleted file mode 100644 index 1384556..0000000 --- a/52205e27-x86-xsave-initialization-improvements.patch +++ /dev/null @@ -1,105 +0,0 @@ -References: bnc#833796 - -# Commit c6066e78f4a66005b0d5d86c6ade32e2ab78923a -# Date 2013-08-30 10:56:07 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/xsave: initialization improvements - -- properly validate available feature set on APs -- also validate xsaveopt availability on APs -- properly indicate whether the initialization is on the BSP (we - shouldn't be using "cpu == 0" checks for this) - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -304,7 +304,7 @@ void __cpuinit identify_cpu(struct cpuin - clear_bit(X86_FEATURE_XSAVE, boot_cpu_data.x86_capability); - - if ( cpu_has_xsave ) -- xstate_init(); -+ xstate_init(c == &boot_cpu_data); - - /* - * The vendor-specific functions might have changed features. Now ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -247,11 +247,10 @@ void xstate_free_save_area(struct vcpu * - } - - /* Collect the information of processor's extended state */ --void xstate_init(void) -+void xstate_init(bool_t bsp) - { -- u32 eax, ebx, ecx, edx; -- int cpu = smp_processor_id(); -- u32 min_size; -+ u32 eax, ebx, ecx, edx, min_size; -+ u64 feature_mask; - - if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) - return; -@@ -260,6 +259,7 @@ void xstate_init(void) - - BUG_ON((eax & XSTATE_FP_SSE) != XSTATE_FP_SSE); - BUG_ON((eax & XSTATE_YMM) && !(eax & XSTATE_SSE)); -+ feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; - - /* FP/SSE, XSAVE.HEADER, YMM */ - min_size = XSTATE_AREA_MIN_SIZE; -@@ -271,31 +271,33 @@ void xstate_init(void) - * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. - */ - set_in_cr4(X86_CR4_OSXSAVE); -- if ( !set_xcr0((((u64)edx << 32) | eax) & XCNTXT_MASK) ) -+ if ( !set_xcr0(feature_mask) ) - BUG(); - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - -- if ( cpu == 0 ) -+ if ( bsp ) - { -+ xfeature_mask = feature_mask; - /* - * xsave_cntxt_size is the max size required by enabled features. - * We know FP/SSE and YMM about eax, and nothing about edx at present. - */ - xsave_cntxt_size = ebx; -- xfeature_mask = eax + ((u64)edx << 32); -- xfeature_mask &= XCNTXT_MASK; - printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n", - __func__, xsave_cntxt_size, xfeature_mask); -- -- /* Check XSAVEOPT feature. */ -- cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); -- cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); - } - else - { -+ BUG_ON(xfeature_mask != feature_mask); - BUG_ON(xsave_cntxt_size != ebx); -- BUG_ON(xfeature_mask != (xfeature_mask & XCNTXT_MASK)); - } -+ -+ /* Check XSAVEOPT feature. */ -+ cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); -+ if ( bsp ) -+ cpu_has_xsaveopt = !!(eax & XSTATE_FEATURE_XSAVEOPT); -+ else -+ BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT)); - } - - int handle_xsetbv(u32 index, u64 new_bv) ---- a/xen/include/asm-x86/xstate.h -+++ b/xen/include/asm-x86/xstate.h -@@ -81,6 +81,6 @@ int __must_check handle_xsetbv(u32 index - /* extended state init and cleanup functions */ - void xstate_free_save_area(struct vcpu *v); - int xstate_alloc_save_area(struct vcpu *v); --void xstate_init(void); -+void xstate_init(bool_t bsp); - - #endif /* __ASM_XSTATE_H */ diff --git a/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch b/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch deleted file mode 100644 index 78b3ca9..0000000 --- a/5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch +++ /dev/null @@ -1,31 +0,0 @@ -# Commit 1893cf77992cc0ce9d827a8d345437fa2494b540 -# Date 2013-09-03 16:36:47 +0100 -# Author Steven Noonan -# Committer Ian Campbell -xend: handle extended PCI configuration space when saving state - -Newer PCI standards (e.g., PCI-X 2.0 and PCIe) introduce extended -configuration space which is larger than 256 bytes. This patch uses -stat() to determine the amount of space used to correctly save all of -the PCI configuration space. Resets handled by the xen-pciback driver -don't have this problem, as that code correctly handles saving -extended configuration space. - -Signed-off-by: Steven Noonan -Reviewed-by: Matt Wilson -[msw: adjusted commit message] -Signed-off-by: Matt Wilson - ---- a/tools/python/xen/util/pci.py -+++ b/tools/python/xen/util/pci.py -@@ -521,8 +521,9 @@ def save_pci_conf_space(devs_string): - pci_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + pci_str + \ - SYSFS_PCI_DEV_CONFIG_PATH - fd = os.open(pci_path, os.O_RDONLY) -+ size = os.fstat(fd).st_size - configs = [] -- for i in range(0, 256, 4): -+ for i in range(0, size, 4): - configs = configs + [os.read(fd,4)] - os.close(fd) - pci_list = pci_list + [pci_path] diff --git a/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch b/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch deleted file mode 100644 index 0f2f8e7..0000000 --- a/52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch +++ /dev/null @@ -1,48 +0,0 @@ -# Commit 749019afca4fd002d36856bad002cc11f7d0ddda -# Date 2013-09-03 16:36:52 +0100 -# Author Xi Xiong -# Committer Ian Campbell -xend: fix file descriptor leak in pci utilities - -A file descriptor leak was detected after creating multiple domUs with -pass-through PCI devices. This patch fixes the issue. - -Signed-off-by: Xi Xiong -Reviewed-by: Matt Wilson -[msw: adjusted commit message] -Signed-off-by: Matt Wilson - ---- a/tools/python/xen/util/pci.py -+++ b/tools/python/xen/util/pci.py -@@ -969,18 +969,22 @@ class PciDevice: - ttl = 480; # 3840 bytes, minimum 8 bytes per capability - pos = 0x100 - -+ fd = None - try: - fd = os.open(path, os.O_RDONLY) - os.lseek(fd, pos, 0) - h = os.read(fd, 4) - if len(h) == 0: # MMCONF is not enabled? -+ os.close(fd) - return 0 - header = struct.unpack('I', h)[0] - if header == 0 or header == -1: -+ os.close(fd) - return 0 - - while ttl > 0: - if (header & 0x0000ffff) == cap: -+ os.close(fd) - return pos - pos = (header >> 20) & 0xffc - if pos < 0x100: -@@ -990,6 +994,8 @@ class PciDevice: - ttl = ttl - 1 - os.close(fd) - except OSError, (errno, strerr): -+ if fd is not None: -+ os.close(fd) - raise PciDeviceParseError(('Error when accessing sysfs: %s (%d)' % - (strerr, errno))) - return 0 diff --git a/52285317-hvmloader-fix-SeaBIOS-interface.patch b/52285317-hvmloader-fix-SeaBIOS-interface.patch deleted file mode 100644 index 9b97af2..0000000 --- a/52285317-hvmloader-fix-SeaBIOS-interface.patch +++ /dev/null @@ -1,84 +0,0 @@ -# Commit 5f2875739beef3a75c7a7e8579b6cbcb464e61b3 -# Date 2013-09-05 11:47:03 +0200 -# Author Jan Beulich -# Committer Jan Beulich -hvmloader: fix SeaBIOS interface - -The SeaBIOS ROM image may validly exceed 128k in size, it's only our -interface code that so far assumed that it wouldn't. Remove that -restriction by setting the base address depending on image size. - -Add a check to HVM loader so that too big images won't result in silent -guest failure anymore. - -Uncomment the intended build-time size check for rombios, moving it -into a function so that it would actually compile. - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - ---- a/tools/firmware/hvmloader/config-seabios.h -+++ b/tools/firmware/hvmloader/config-seabios.h -@@ -3,8 +3,6 @@ - - #define BIOS_INFO_PHYSICAL_ADDRESS 0x00001000 - --#define SEABIOS_PHYSICAL_ADDRESS 0x000E0000 -- - #endif /* __HVMLOADER_CONFIG_SEABIOS_H__ */ - - /* ---- a/tools/firmware/hvmloader/hvmloader.c -+++ b/tools/firmware/hvmloader/hvmloader.c -@@ -292,8 +292,12 @@ int main(void) - if ( bios->bios_load ) - bios->bios_load(bios); - else -+ { -+ BUG_ON(bios->bios_address + bios->image_size > -+ HVMLOADER_PHYSICAL_ADDRESS); - memcpy((void *)bios->bios_address, bios->image, - bios->image_size); -+ } - - if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode ) - { ---- a/tools/firmware/hvmloader/rombios.c -+++ b/tools/firmware/hvmloader/rombios.c -@@ -127,6 +127,8 @@ static void rombios_load(const struct bi - uint32_t bioshigh; - struct rombios_info *info; - -+ BUILD_BUG_ON(sizeof(rombios) > 0x100000 - ROMBIOS_PHYSICAL_ADDRESS); -+ - memcpy((void *)config->bios_address, config->image, - config->image_size); - -@@ -206,8 +208,6 @@ static void rombios_create_smbios_tables - SMBIOS_PHYSICAL_END); - } - --//BUILD_BUG_ON(sizeof(rombios) > (0x00100000U - ROMBIOS_PHYSICAL_ADDRESS)); -- - struct bios_config rombios_config = { - .name = "ROMBIOS", - ---- a/tools/firmware/hvmloader/seabios.c -+++ b/tools/firmware/hvmloader/seabios.c -@@ -133,15 +133,13 @@ static void seabios_setup_e820(void) - dump_e820_table(e820, info->e820_nr); - } - --//BUILD_BUG_ON(sizeof(seabios) > (0x00100000U - SEABIOS_PHYSICAL_ADDRESS)); -- - struct bios_config seabios_config = { - .name = "SeaBIOS", - - .image = seabios, - .image_size = sizeof(seabios), - -- .bios_address = SEABIOS_PHYSICAL_ADDRESS, -+ .bios_address = 0x100000 - sizeof(seabios), - - .load_roms = NULL, - diff --git a/522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch b/522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch deleted file mode 100644 index e4b079b..0000000 --- a/522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch +++ /dev/null @@ -1,195 +0,0 @@ -References: bnc#833251, bnc#834751 - -# Commit a350f3f43bcfac9c1591e28d8e43c505fcb172a5 -# Date 2013-09-09 10:40:11 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/EFI: properly handle run time memory regions outside the 1:1 map - -Namely with PFN compression, MMIO ranges that the firmware may need -runtime access to can live in the holes that gets shrunk/eliminated by -PFN compression, and hence no mappings would result from simply -copying Xen's direct mapping table's L3 page table entries. Build -mappings for this "manually" in the EFI runtime call 1:1 page tables. - -Use the opportunity to also properly identify (via a forcibly undefined -manifest constant) all the disabled code regions associated with it not -being acceptable for us to call SetVirtualAddressMap(). - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/efi/boot.c -+++ b/xen/arch/x86/efi/boot.c -@@ -26,6 +26,9 @@ - #include - #include - -+/* Using SetVirtualAddressMap() is incompatible with kexec: */ -+#undef USE_SET_VIRTUAL_ADDRESS_MAP -+ - #define SHIM_LOCK_PROTOCOL_GUID \ - { 0x605dab50, 0xe046, 0x4300, {0xab, 0xb6, 0x3d, 0xd8, 0x10, 0xdd, 0x8b, 0x23} } - -@@ -1434,7 +1437,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY - - /* Adjust pointers into EFI. */ - efi_ct = (void *)efi_ct + DIRECTMAP_VIRT_START; --#if 0 /* Only needed when using virtual mode (see efi_init_memory()). */ -+#ifdef USE_SET_VIRTUAL_ADDRESS_MAP - efi_rs = (void *)efi_rs + DIRECTMAP_VIRT_START; - #endif - efi_memmap = (void *)efi_memmap + DIRECTMAP_VIRT_START; -@@ -1477,6 +1480,7 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY - for( ; ; ); /* not reached */ - } - -+#ifndef USE_SET_VIRTUAL_ADDRESS_MAP - static __init void copy_mapping(unsigned long mfn, unsigned long end, - bool_t (*is_valid)(unsigned long smfn, - unsigned long emfn)) -@@ -1520,6 +1524,7 @@ static bool_t __init rt_range_valid(unsi - { - return 1; - } -+#endif - - #define INVALID_VIRTUAL_ADDRESS (0xBAAADUL << \ - (EFI_PAGE_SHIFT + BITS_PER_LONG - 32)) -@@ -1527,6 +1532,13 @@ static bool_t __init rt_range_valid(unsi - void __init efi_init_memory(void) - { - unsigned int i; -+#ifndef USE_SET_VIRTUAL_ADDRESS_MAP -+ struct rt_extra { -+ struct rt_extra *next; -+ unsigned long smfn, emfn; -+ unsigned int prot; -+ } *extra, *extra_head = NULL; -+#endif - - printk(XENLOG_INFO "EFI memory map:\n"); - for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) -@@ -1573,6 +1585,8 @@ void __init efi_init_memory(void) - !(smfn & pfn_hole_mask) && - !((smfn ^ (emfn - 1)) & ~pfn_pdx_bottom_mask) ) - { -+ if ( (unsigned long)mfn_to_virt(emfn - 1) >= HYPERVISOR_VIRT_END ) -+ prot &= ~_PAGE_GLOBAL; - if ( map_pages_to_xen((unsigned long)mfn_to_virt(smfn), - smfn, emfn - smfn, prot) == 0 ) - desc->VirtualStart = -@@ -1581,15 +1595,29 @@ void __init efi_init_memory(void) - printk(XENLOG_ERR "Could not map MFNs %#lx-%#lx\n", - smfn, emfn - 1); - } -+#ifndef USE_SET_VIRTUAL_ADDRESS_MAP -+ else if ( !((desc->PhysicalStart + len - 1) >> (VADDR_BITS - 1)) && -+ (extra = xmalloc(struct rt_extra)) != NULL ) -+ { -+ extra->smfn = smfn; -+ extra->emfn = emfn; -+ extra->prot = prot & ~_PAGE_GLOBAL; -+ extra->next = extra_head; -+ extra_head = extra; -+ desc->VirtualStart = desc->PhysicalStart; -+ } -+#endif - else - { -+#ifdef USE_SET_VIRTUAL_ADDRESS_MAP - /* XXX allocate e.g. down from FIXADDR_START */ -+#endif - printk(XENLOG_ERR "No mapping for MFNs %#lx-%#lx\n", - smfn, emfn - 1); - } - } - --#if 0 /* Incompatible with kexec. */ -+#ifdef USE_SET_VIRTUAL_ADDRESS_MAP - efi_rs->SetVirtualAddressMap(efi_memmap_size, efi_mdesc_size, - mdesc_ver, efi_memmap); - #else -@@ -1600,20 +1628,74 @@ void __init efi_init_memory(void) - - copy_mapping(0, max_page, ram_range_valid); - -- /* Insert non-RAM runtime mappings. */ -+ /* Insert non-RAM runtime mappings inside the direct map. */ - for ( i = 0; i < efi_memmap_size; i += efi_mdesc_size ) - { - const EFI_MEMORY_DESCRIPTOR *desc = efi_memmap + i; - -- if ( desc->Attribute & EFI_MEMORY_RUNTIME ) -+ if ( (desc->Attribute & EFI_MEMORY_RUNTIME) && -+ desc->VirtualStart != INVALID_VIRTUAL_ADDRESS && -+ desc->VirtualStart != desc->PhysicalStart ) -+ copy_mapping(PFN_DOWN(desc->PhysicalStart), -+ PFN_UP(desc->PhysicalStart + -+ (desc->NumberOfPages << EFI_PAGE_SHIFT)), -+ rt_range_valid); -+ } -+ -+ /* Insert non-RAM runtime mappings outside of the direct map. */ -+ while ( (extra = extra_head) != NULL ) -+ { -+ unsigned long addr = extra->smfn << PAGE_SHIFT; -+ l4_pgentry_t l4e = efi_l4_pgtable[l4_table_offset(addr)]; -+ l3_pgentry_t *pl3e; -+ l2_pgentry_t *pl2e; -+ l1_pgentry_t *l1t; -+ -+ if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) - { -- if ( desc->VirtualStart != INVALID_VIRTUAL_ADDRESS ) -- copy_mapping(PFN_DOWN(desc->PhysicalStart), -- PFN_UP(desc->PhysicalStart + -- (desc->NumberOfPages << EFI_PAGE_SHIFT)), -- rt_range_valid); -- else -- /* XXX */; -+ pl3e = alloc_xen_pagetable(); -+ BUG_ON(!pl3e); -+ clear_page(pl3e); -+ efi_l4_pgtable[l4_table_offset(addr)] = -+ l4e_from_paddr(virt_to_maddr(pl3e), __PAGE_HYPERVISOR); -+ } -+ else -+ pl3e = l4e_to_l3e(l4e); -+ pl3e += l3_table_offset(addr); -+ if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) -+ { -+ pl2e = alloc_xen_pagetable(); -+ BUG_ON(!pl2e); -+ clear_page(pl2e); -+ *pl3e = l3e_from_paddr(virt_to_maddr(pl2e), __PAGE_HYPERVISOR); -+ } -+ else -+ { -+ BUG_ON(l3e_get_flags(*pl3e) & _PAGE_PSE); -+ pl2e = l3e_to_l2e(*pl3e); -+ } -+ pl2e += l2_table_offset(addr); -+ if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) -+ { -+ l1t = alloc_xen_pagetable(); -+ BUG_ON(!l1t); -+ clear_page(l1t); -+ *pl2e = l2e_from_paddr(virt_to_maddr(l1t), __PAGE_HYPERVISOR); -+ } -+ else -+ { -+ BUG_ON(l2e_get_flags(*pl2e) & _PAGE_PSE); -+ l1t = l2e_to_l1e(*pl2e); -+ } -+ for ( i = l1_table_offset(addr); -+ i < L1_PAGETABLE_ENTRIES && extra->smfn < extra->emfn; -+ ++i, ++extra->smfn ) -+ l1t[i] = l1e_from_pfn(extra->smfn, extra->prot); -+ -+ if ( extra->smfn == extra->emfn ) -+ { -+ extra_head = extra->next; -+ xfree(extra); - } - } - diff --git a/522d8a1f-x86-allow-guest-to-set-clear-MSI-X-mask-bit-try-2.patch b/522d8a1f-x86-allow-guest-to-set-clear-MSI-X-mask-bit-try-2.patch deleted file mode 100644 index 940b87c..0000000 --- a/522d8a1f-x86-allow-guest-to-set-clear-MSI-X-mask-bit-try-2.patch +++ /dev/null @@ -1,145 +0,0 @@ -# Commit a35137373aa9042424565e5ee76dc0a3bb7642ae -# Date 2013-09-09 10:43:11 +0200 -# Author Joby Poriyath -# Committer Jan Beulich -x86: allow guest to set/clear MSI-X mask bit (try 2) - -Guest needs the ability to enable and disable MSI-X interrupts -by setting the MSI-X control bit, for a passed-through device. -Guest is allowed to write MSI-X mask bit only if Xen *thinks* -that mask is clear (interrupts enabled). If the mask is set by -Xen (interrupts disabled), writes to mask bit by the guest is -ignored. - -Currently, a write to MSI-X mask bit by the guest is silently -ignored. - -A likely scenario is where we have a 82599 SR-IOV nic passed -through to a guest. From the guest if you do - - ifconfig down - ifconfig up - -the interrupts remain masked. On VF reset, the mask bit is set -by the controller. At this point, Xen is not aware that mask is set. -However, interrupts are enabled by VF driver by clearing the mask -bit by writing directly to BAR3 region containing the MSI-X table. - -From dom0, we can verify that -interrupts are being masked using 'xl debug-keys M'. - -Initially, guest was allowed to modify MSI-X bit. -Later this behaviour was changed. -See changeset 74c213c506afcd74a8556dd092995fd4dc38b225. - -Signed-off-by: Joby Poriyath - ---- a/xen/arch/x86/hvm/vmsi.c -+++ b/xen/arch/x86/hvm/vmsi.c -@@ -187,6 +187,19 @@ static struct msixtbl_entry *msixtbl_fin - return NULL; - } - -+static struct msi_desc *virt_to_msi_desc(struct pci_dev *dev, void *virt) -+{ -+ struct msi_desc *desc; -+ -+ list_for_each_entry( desc, &dev->msi_list, list ) -+ if ( desc->msi_attrib.type == PCI_CAP_ID_MSIX && -+ virt >= desc->mask_base && -+ virt < desc->mask_base + PCI_MSIX_ENTRY_SIZE ) -+ return desc; -+ -+ return NULL; -+} -+ - static void __iomem *msixtbl_addr_to_virt( - struct msixtbl_entry *entry, unsigned long addr) - { -@@ -247,13 +260,16 @@ out: - } - - static int msixtbl_write(struct vcpu *v, unsigned long address, -- unsigned long len, unsigned long val) -+ unsigned long len, unsigned long val) - { - unsigned long offset; - struct msixtbl_entry *entry; -+ const struct msi_desc *msi_desc; - void *virt; - unsigned int nr_entry, index; - int r = X86EMUL_UNHANDLEABLE; -+ unsigned long flags, orig; -+ struct irq_desc *desc; - - if ( len != 4 || (address & 3) ) - return r; -@@ -283,22 +299,57 @@ static int msixtbl_write(struct vcpu *v, - if ( !virt ) - goto out; - -- /* Do not allow the mask bit to be changed. */ --#if 0 /* XXX -- * As the mask bit is the only defined bit in the word, and as the -- * host MSI-X code doesn't preserve the other bits anyway, doing -- * this is pointless. So for now just discard the write (also -- * saving us from having to determine the matching irq_desc). -- */ -+ msi_desc = virt_to_msi_desc(entry->pdev, virt); -+ if ( !msi_desc || msi_desc->irq < 0 ) -+ goto out; -+ -+ desc = irq_to_desc(msi_desc->irq); -+ if ( !desc ) -+ goto out; -+ - spin_lock_irqsave(&desc->lock, flags); -+ -+ if ( !desc->msi_desc ) -+ goto unlock; -+ -+ ASSERT(msi_desc == desc->msi_desc); -+ - orig = readl(virt); -- val &= ~PCI_MSIX_VECTOR_BITMASK; -- val |= orig & PCI_MSIX_VECTOR_BITMASK; -+ -+ /* -+ * Do not allow guest to modify MSI-X control bit if it is masked -+ * by Xen. We'll only handle the case where Xen thinks that -+ * bit is unmasked, but hardware has silently masked the bit -+ * (in case of SR-IOV VF reset, etc). On the other hand, if Xen -+ * thinks that the bit is masked, but it's really not, -+ * we log a warning. -+ */ -+ if ( msi_desc->msi_attrib.masked ) -+ { -+ if ( !(orig & PCI_MSIX_VECTOR_BITMASK) ) -+ printk(XENLOG_WARNING "MSI-X control bit is unmasked when" -+ " it is expected to be masked [%04x:%02x:%02x.%u]\n", -+ entry->pdev->seg, entry->pdev->bus, -+ PCI_SLOT(entry->pdev->devfn), -+ PCI_FUNC(entry->pdev->devfn)); -+ -+ goto unlock; -+ } -+ -+ /* -+ * The mask bit is the only defined bit in the word. But we -+ * ought to preserve the reserved bits. Clearing the reserved -+ * bits can result in undefined behaviour (see PCI Local Bus -+ * Specification revision 2.3). -+ */ -+ val &= PCI_MSIX_VECTOR_BITMASK; -+ val |= (orig & ~PCI_MSIX_VECTOR_BITMASK); - writel(val, virt); -- spin_unlock_irqrestore(&desc->lock, flags); --#endif - -+unlock: -+ spin_unlock_irqrestore(&desc->lock, flags); - r = X86EMUL_OKAY; -+ - out: - rcu_read_unlock(&msixtbl_rcu_lock); - return r; diff --git a/522dc044-xmalloc-make-whole-pages-xfree-clear-the-order-field-ab-used-by-xmalloc.patch b/522dc044-xmalloc-make-whole-pages-xfree-clear-the-order-field-ab-used-by-xmalloc.patch deleted file mode 100644 index 93950c8..0000000 --- a/522dc044-xmalloc-make-whole-pages-xfree-clear-the-order-field-ab-used-by-xmalloc.patch +++ /dev/null @@ -1,27 +0,0 @@ -# Commit 0fbf3208d9c1a568aeeb61d9f4fbca03b1cfa1f8 -# Date 2013-09-09 14:34:12 +0200 -# Author Jan Beulich -# Committer Jan Beulich -xmalloc: make whole pages xfree() clear the order field (ab)used by xmalloc() - -Not doing this was found to cause problems with sequences of allocation -(multi-page), freeing, and then again allocation of the same page upon -boot when interrupts are still disabled (causing the owner field to be -non-zero, thus making the allocator attempt a TLB flush and, in its -processing, triggering an assertion). - -Reported-by: Tomasz Wroblewski -Signed-off-by: Jan Beulich -Tested-by: Tomasz Wroblewski -Acked-by: Keir Fraser - ---- a/xen/common/xmalloc_tlsf.c -+++ b/xen/common/xmalloc_tlsf.c -@@ -629,6 +629,7 @@ void xfree(void *p) - unsigned int i, order = get_order_from_pages(size); - - BUG_ON((unsigned long)p & ((PAGE_SIZE << order) - 1)); -+ PFN_ORDER(virt_to_page(p)) = 0; - for ( i = 0; ; ++i ) - { - if ( !(size & (1 << i)) ) diff --git a/522dc0e6-x86-xsave-fix-migration-from-xsave-capable-to-xsave-incapable-host.patch b/522dc0e6-x86-xsave-fix-migration-from-xsave-capable-to-xsave-incapable-host.patch deleted file mode 100644 index ace1ed8..0000000 --- a/522dc0e6-x86-xsave-fix-migration-from-xsave-capable-to-xsave-incapable-host.patch +++ /dev/null @@ -1,629 +0,0 @@ -References: bnc#833796 - -# Commit 4cc1344447a0458df5d222960f2adf1b65084fa8 -# Date 2013-09-09 14:36:54 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/xsave: fix migration from xsave-capable to xsave-incapable host - -With CPUID features suitably masked this is supposed to work, but was -completely broken (i.e. the case wasn't even considered when the -original xsave save/restore code was written). - -First of all, xsave_enabled() wrongly returned the value of -cpu_has_xsave, i.e. not even taking into consideration attributes of -the vCPU in question. Instead this function ought to check whether the -guest ever enabled xsave support (by writing a [non-zero] value to -XCR0). As a result of this, a vCPU's xcr0 and xcr0_accum must no longer -be initialized to XSTATE_FP_SSE (since that's a valid value a guest -could write to XCR0), and the xsave/xrstor as well as the context -switch code need to suitably account for this (by always enforcing at -least this part of the state to be saved/loaded). - -This involves undoing large parts of c/s 22945:13a7d1f7f62c ("x86: add -strictly sanity check for XSAVE/XRSTOR") - we need to cleanly -distinguish between hardware capabilities and vCPU used features. - -Next both HVM and PV save code needed tweaking to not always save the -full state supported by the underlying hardware, but just the parts -that the guest actually used. Similarly the restore code should bail -not just on state being restored that the hardware cannot handle, but -also on inconsistent save state (inconsistent XCR0 settings or size of -saved state not in line with XCR0). - -And finally the PV extended context get/set code needs to use slightly -different logic than the HVM one, as here we can't just key off of -xsave_enabled() (i.e. avoid doing anything if a guest doesn't use -xsave) because the tools use this function to determine host -capabilities as well as read/write vCPU state. The set operation in -particular needs to be capable of cleanly dealing with input that -consists of only the xcr0 and xcr0_accum values (if they're both zero -then no further data is required). - -While for things to work correctly both sides (saving _and_ restoring -host) need to run with the fixed code, afaict no breakage should occur -if either side isn't up to date (other than the breakage that this -patch attempts to fix). - -Signed-off-by: Jan Beulich -Reviewed-by: Yang Zhang -Acked-by: Keir Fraser - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -618,7 +618,7 @@ unsigned long pv_guest_cr4_fixup(const s - hv_cr4_mask &= ~X86_CR4_DE; - if ( cpu_has_fsgsbase && !is_pv_32bit_domain(v->domain) ) - hv_cr4_mask &= ~X86_CR4_FSGSBASE; -- if ( xsave_enabled(v) ) -+ if ( cpu_has_xsave ) - hv_cr4_mask &= ~X86_CR4_OSXSAVE; - - if ( (guest_cr4 & hv_cr4_mask) != (hv_cr4 & hv_cr4_mask) ) -@@ -1347,9 +1347,13 @@ static void __context_switch(void) - if ( !is_idle_vcpu(n) ) - { - memcpy(stack_regs, &n->arch.user_regs, CTXT_SWITCH_STACK_BYTES); -- if ( xsave_enabled(n) && n->arch.xcr0 != get_xcr0() && -- !set_xcr0(n->arch.xcr0) ) -- BUG(); -+ if ( cpu_has_xsave ) -+ { -+ u64 xcr0 = n->arch.xcr0 ?: XSTATE_FP_SSE; -+ -+ if ( xcr0 != get_xcr0() && !set_xcr0(xcr0) ) -+ BUG(); -+ } - vcpu_restore_fpu_eager(n); - n->arch.ctxt_switch_to(n); - } ---- a/xen/arch/x86/domctl.c -+++ b/xen/arch/x86/domctl.c -@@ -1047,11 +1047,8 @@ long arch_do_domctl( - struct xen_domctl_vcpuextstate *evc; - struct vcpu *v; - uint32_t offset = 0; -- uint64_t _xfeature_mask = 0; -- uint64_t _xcr0, _xcr0_accum; -- void *receive_buf = NULL, *_xsave_area; - --#define PV_XSAVE_SIZE (2 * sizeof(uint64_t) + xsave_cntxt_size) -+#define PV_XSAVE_SIZE(xcr0) (2 * sizeof(uint64_t) + xstate_ctxt_size(xcr0)) - - evc = &domctl->u.vcpuextstate; - -@@ -1062,15 +1059,16 @@ long arch_do_domctl( - - if ( domctl->cmd == XEN_DOMCTL_getvcpuextstate ) - { -+ unsigned int size = PV_XSAVE_SIZE(v->arch.xcr0_accum); -+ - if ( !evc->size && !evc->xfeature_mask ) - { - evc->xfeature_mask = xfeature_mask; -- evc->size = PV_XSAVE_SIZE; -+ evc->size = size; - ret = 0; - goto vcpuextstate_out; - } -- if ( evc->size != PV_XSAVE_SIZE || -- evc->xfeature_mask != xfeature_mask ) -+ if ( evc->size != size || evc->xfeature_mask != xfeature_mask ) - { - ret = -EINVAL; - goto vcpuextstate_out; -@@ -1093,7 +1091,7 @@ long arch_do_domctl( - offset += sizeof(v->arch.xcr0_accum); - if ( copy_to_guest_offset(domctl->u.vcpuextstate.buffer, - offset, (void *)v->arch.xsave_area, -- xsave_cntxt_size) ) -+ size - 2 * sizeof(uint64_t)) ) - { - ret = -EFAULT; - goto vcpuextstate_out; -@@ -1101,13 +1099,14 @@ long arch_do_domctl( - } - else - { -- ret = -EINVAL; -+ void *receive_buf; -+ uint64_t _xcr0, _xcr0_accum; -+ const struct xsave_struct *_xsave_area; - -- _xfeature_mask = evc->xfeature_mask; -- /* xsave context must be restored on compatible target CPUs */ -- if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask ) -- goto vcpuextstate_out; -- if ( evc->size > PV_XSAVE_SIZE || evc->size < 2 * sizeof(uint64_t) ) -+ ret = -EINVAL; -+ if ( evc->size < 2 * sizeof(uint64_t) || -+ evc->size > 2 * sizeof(uint64_t) + -+ xstate_ctxt_size(xfeature_mask) ) - goto vcpuextstate_out; - - receive_buf = xmalloc_bytes(evc->size); -@@ -1128,20 +1127,30 @@ long arch_do_domctl( - _xcr0_accum = *(uint64_t *)(receive_buf + sizeof(uint64_t)); - _xsave_area = receive_buf + 2 * sizeof(uint64_t); - -- if ( !(_xcr0 & XSTATE_FP) || _xcr0 & ~xfeature_mask ) -+ if ( _xcr0_accum ) - { -- xfree(receive_buf); -- goto vcpuextstate_out; -+ if ( evc->size >= 2 * sizeof(uint64_t) + XSTATE_AREA_MIN_SIZE ) -+ ret = validate_xstate(_xcr0, _xcr0_accum, -+ _xsave_area->xsave_hdr.xstate_bv, -+ evc->xfeature_mask); - } -- if ( (_xcr0 & _xcr0_accum) != _xcr0 ) -+ else if ( !_xcr0 ) -+ ret = 0; -+ if ( ret ) - { - xfree(receive_buf); - goto vcpuextstate_out; - } - -- v->arch.xcr0 = _xcr0; -- v->arch.xcr0_accum = _xcr0_accum; -- memcpy(v->arch.xsave_area, _xsave_area, evc->size - 2 * sizeof(uint64_t) ); -+ if ( evc->size <= PV_XSAVE_SIZE(_xcr0_accum) ) -+ { -+ v->arch.xcr0 = _xcr0; -+ v->arch.xcr0_accum = _xcr0_accum; -+ memcpy(v->arch.xsave_area, _xsave_area, -+ evc->size - 2 * sizeof(uint64_t)); -+ } -+ else -+ ret = -EINVAL; - - xfree(receive_buf); - } ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -906,14 +906,12 @@ static int hvm_load_cpu_ctxt(struct doma - hvm_set_segment_register(v, x86_seg_ldtr, &seg); - - /* In case xsave-absent save file is restored on a xsave-capable host */ -- if ( xsave_enabled(v) ) -+ if ( cpu_has_xsave && !xsave_enabled(v) ) - { - struct xsave_struct *xsave_area = v->arch.xsave_area; - - memcpy(v->arch.xsave_area, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); - xsave_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; -- v->arch.xcr0_accum = XSTATE_FP_SSE; -- v->arch.xcr0 = XSTATE_FP_SSE; - } - else - memcpy(v->arch.fpu_ctxt, ctxt.fpu_regs, sizeof(ctxt.fpu_regs)); -@@ -957,7 +955,9 @@ static int hvm_load_cpu_ctxt(struct doma - HVM_REGISTER_SAVE_RESTORE(CPU, hvm_save_cpu_ctxt, hvm_load_cpu_ctxt, - 1, HVMSR_PER_VCPU); - --#define HVM_CPU_XSAVE_SIZE (3 * sizeof(uint64_t) + xsave_cntxt_size) -+#define HVM_CPU_XSAVE_SIZE(xcr0) (offsetof(struct hvm_hw_cpu_xsave, \ -+ save_area) + \ -+ xstate_ctxt_size(xcr0)) - - static int hvm_save_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) - { -@@ -969,20 +969,20 @@ static int hvm_save_cpu_xsave_states(str - - for_each_vcpu ( d, v ) - { -+ unsigned int size = HVM_CPU_XSAVE_SIZE(v->arch.xcr0_accum); -+ - if ( !xsave_enabled(v) ) - continue; -- if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, HVM_CPU_XSAVE_SIZE) ) -+ if ( _hvm_init_entry(h, CPU_XSAVE_CODE, v->vcpu_id, size) ) - return 1; - ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur]; -- h->cur += HVM_CPU_XSAVE_SIZE; -- memset(ctxt, 0, HVM_CPU_XSAVE_SIZE); -+ h->cur += size; - - ctxt->xfeature_mask = xfeature_mask; - ctxt->xcr0 = v->arch.xcr0; - ctxt->xcr0_accum = v->arch.xcr0_accum; -- if ( v->fpu_initialised ) -- memcpy(&ctxt->save_area, -- v->arch.xsave_area, xsave_cntxt_size); -+ memcpy(&ctxt->save_area, v->arch.xsave_area, -+ size - offsetof(struct hvm_hw_cpu_xsave, save_area)); - } - - return 0; -@@ -990,11 +990,11 @@ static int hvm_save_cpu_xsave_states(str - - static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h) - { -- int vcpuid; -+ unsigned int vcpuid, size; -+ int err; - struct vcpu *v; - struct hvm_hw_cpu_xsave *ctxt; - struct hvm_save_descriptor *desc; -- uint64_t _xfeature_mask; - - /* Which vcpu is this? */ - vcpuid = hvm_load_instance(h); -@@ -1006,47 +1006,74 @@ static int hvm_load_cpu_xsave_states(str - } - - /* Fails since we can't restore an img saved on xsave-capable host. */ -- if ( !xsave_enabled(v) ) -- return -EINVAL; -+ if ( !cpu_has_xsave ) -+ return -EOPNOTSUPP; - - /* Customized checking for entry since our entry is of variable length */ - desc = (struct hvm_save_descriptor *)&h->data[h->cur]; - if ( sizeof (*desc) > h->size - h->cur) - { - printk(XENLOG_G_WARNING -- "HVM%d restore: not enough data left to read descriptor" -- "for type %u\n", d->domain_id, CPU_XSAVE_CODE); -- return -1; -+ "HVM%d.%d restore: not enough data left to read xsave descriptor\n", -+ d->domain_id, vcpuid); -+ return -ENODATA; - } - if ( desc->length + sizeof (*desc) > h->size - h->cur) - { - printk(XENLOG_G_WARNING -- "HVM%d restore: not enough data left to read %u bytes " -- "for type %u\n", d->domain_id, desc->length, CPU_XSAVE_CODE); -- return -1; -+ "HVM%d.%d restore: not enough data left to read %u xsave bytes\n", -+ d->domain_id, vcpuid, desc->length); -+ return -ENODATA; -+ } -+ if ( desc->length < offsetof(struct hvm_hw_cpu_xsave, save_area) + -+ XSTATE_AREA_MIN_SIZE ) -+ { -+ printk(XENLOG_G_WARNING -+ "HVM%d.%d restore mismatch: xsave length %u < %zu\n", -+ d->domain_id, vcpuid, desc->length, -+ offsetof(struct hvm_hw_cpu_xsave, -+ save_area) + XSTATE_AREA_MIN_SIZE); -+ return -EINVAL; - } -- if ( CPU_XSAVE_CODE != desc->typecode || (desc->length > HVM_CPU_XSAVE_SIZE) ) -+ size = HVM_CPU_XSAVE_SIZE(xfeature_mask); -+ if ( desc->length > size ) - { - printk(XENLOG_G_WARNING -- "HVM%d restore mismatch: expected type %u with max length %u, " -- "saw type %u length %u\n", d->domain_id, CPU_XSAVE_CODE, -- (unsigned int)HVM_CPU_XSAVE_SIZE, -- desc->typecode, desc->length); -- return -1; -+ "HVM%d.%d restore mismatch: xsave length %u > %u\n", -+ d->domain_id, vcpuid, desc->length, size); -+ return -EOPNOTSUPP; - } - h->cur += sizeof (*desc); -- /* Checking finished */ - - ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur]; - h->cur += desc->length; - -- _xfeature_mask = ctxt->xfeature_mask; -- if ( (_xfeature_mask & xfeature_mask) != _xfeature_mask ) -- return -EINVAL; -+ err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum, -+ ctxt->save_area.xsave_hdr.xstate_bv, -+ ctxt->xfeature_mask); -+ if ( err ) -+ { -+ printk(XENLOG_G_WARNING -+ "HVM%d.%d restore: inconsistent xsave state (feat=%#"PRIx64 -+ " accum=%#"PRIx64" xcr0=%#"PRIx64" bv=%#"PRIx64" err=%d)\n", -+ d->domain_id, vcpuid, ctxt->xfeature_mask, ctxt->xcr0_accum, -+ ctxt->xcr0, ctxt->save_area.xsave_hdr.xstate_bv, err); -+ return err; -+ } -+ size = HVM_CPU_XSAVE_SIZE(ctxt->xcr0_accum); -+ if ( desc->length > size ) -+ { -+ printk(XENLOG_G_WARNING -+ "HVM%d.%d restore mismatch: xsave length %u > %u\n", -+ d->domain_id, vcpuid, desc->length, size); -+ return -EOPNOTSUPP; -+ } -+ /* Checking finished */ - - v->arch.xcr0 = ctxt->xcr0; - v->arch.xcr0_accum = ctxt->xcr0_accum; -- memcpy(v->arch.xsave_area, &ctxt->save_area, xsave_cntxt_size); -+ memcpy(v->arch.xsave_area, &ctxt->save_area, -+ desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area)); - - return 0; - } -@@ -1060,7 +1087,8 @@ static int __init __hvm_register_CPU_XSA - "CPU_XSAVE", - hvm_save_cpu_xsave_states, - hvm_load_cpu_xsave_states, -- HVM_CPU_XSAVE_SIZE + sizeof (struct hvm_save_descriptor), -+ HVM_CPU_XSAVE_SIZE(xfeature_mask) + -+ sizeof(struct hvm_save_descriptor), - HVMSR_PER_VCPU); - return 0; - } -@@ -2767,7 +2795,7 @@ void hvm_cpuid(unsigned int input, unsig - __clear_bit(X86_FEATURE_APIC & 31, edx); - - /* Fix up OSXSAVE. */ -- if ( xsave_enabled(v) ) -+ if ( cpu_has_xsave ) - *ecx |= (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_OSXSAVE) ? - cpufeat_mask(X86_FEATURE_OSXSAVE) : 0; - ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -947,8 +947,7 @@ static int construct_vmcs(struct vcpu *v - /* Host control registers. */ - v->arch.hvm_vmx.host_cr0 = read_cr0() | X86_CR0_TS; - __vmwrite(HOST_CR0, v->arch.hvm_vmx.host_cr0); -- __vmwrite(HOST_CR4, -- mmu_cr4_features | (xsave_enabled(v) ? X86_CR4_OSXSAVE : 0)); -+ __vmwrite(HOST_CR4, mmu_cr4_features); - - /* Host CS:RIP. */ - __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); ---- a/xen/arch/x86/i387.c -+++ b/xen/arch/x86/i387.c -@@ -38,14 +38,15 @@ static inline void fpu_xrstor(struct vcp - { - bool_t ok; - -+ ASSERT(v->arch.xsave_area); - /* - * XCR0 normally represents what guest OS set. In case of Xen itself, -- * we set all supported feature mask before doing save/restore. -+ * we set the accumulated feature mask before doing save/restore. - */ -- ok = set_xcr0(v->arch.xcr0_accum); -+ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); - ASSERT(ok); - xrstor(v, mask); -- ok = set_xcr0(v->arch.xcr0); -+ ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); - ASSERT(ok); - } - -@@ -124,13 +125,15 @@ static inline void fpu_xsave(struct vcpu - { - bool_t ok; - -- /* XCR0 normally represents what guest OS set. In case of Xen itself, -- * we set all accumulated feature mask before doing save/restore. -+ ASSERT(v->arch.xsave_area); -+ /* -+ * XCR0 normally represents what guest OS set. In case of Xen itself, -+ * we set the accumulated feature mask before doing save/restore. - */ -- ok = set_xcr0(v->arch.xcr0_accum); -+ ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); - ASSERT(ok); - xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); -- ok = set_xcr0(v->arch.xcr0); -+ ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); - ASSERT(ok); - } - -@@ -238,7 +241,7 @@ void vcpu_restore_fpu_lazy(struct vcpu * - if ( v->fpu_dirtied ) - return; - -- if ( xsave_enabled(v) ) -+ if ( cpu_has_xsave ) - fpu_xrstor(v, XSTATE_LAZY); - else if ( v->fpu_initialised ) - { -@@ -268,7 +271,7 @@ void vcpu_save_fpu(struct vcpu *v) - /* This can happen, if a paravirtualised guest OS has set its CR0.TS. */ - clts(); - -- if ( xsave_enabled(v) ) -+ if ( cpu_has_xsave ) - fpu_xsave(v); - else if ( cpu_has_fxsr ) - fpu_fxsave(v); ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -816,7 +816,7 @@ static void pv_cpuid(struct cpu_user_reg - __clear_bit(X86_FEATURE_PDCM % 32, &c); - __clear_bit(X86_FEATURE_PCID % 32, &c); - __clear_bit(X86_FEATURE_DCA % 32, &c); -- if ( !xsave_enabled(current) ) -+ if ( !cpu_has_xsave ) - { - __clear_bit(X86_FEATURE_XSAVE % 32, &c); - __clear_bit(X86_FEATURE_AVX % 32, &c); -@@ -841,7 +841,7 @@ static void pv_cpuid(struct cpu_user_reg - break; - - case 0x0000000d: /* XSAVE */ -- if ( !xsave_enabled(current) ) -+ if ( !cpu_has_xsave ) - goto unsupported; - break; - ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -21,7 +21,7 @@ bool_t __read_mostly cpu_has_xsaveopt; - * the supported and enabled features on the processor, including the - * XSAVE.HEADER. We only enable XCNTXT_MASK that we have known. - */ --u32 xsave_cntxt_size; -+static u32 __read_mostly xsave_cntxt_size; - - /* A 64-bit bitmask of the XSAVE/XRSTOR features supported by processor. */ - u64 xfeature_mask; -@@ -206,13 +206,13 @@ void xrstor(struct vcpu *v, uint64_t mas - - bool_t xsave_enabled(const struct vcpu *v) - { -- if ( cpu_has_xsave ) -- { -- ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE); -- ASSERT(v->arch.xsave_area); -- } -+ if ( !cpu_has_xsave ) -+ return 0; - -- return cpu_has_xsave; -+ ASSERT(xsave_cntxt_size >= XSTATE_AREA_MIN_SIZE); -+ ASSERT(v->arch.xsave_area); -+ -+ return !!v->arch.xcr0_accum; - } - - int xstate_alloc_save_area(struct vcpu *v) -@@ -234,8 +234,8 @@ int xstate_alloc_save_area(struct vcpu * - save_area->xsave_hdr.xstate_bv = XSTATE_FP_SSE; - - v->arch.xsave_area = save_area; -- v->arch.xcr0 = XSTATE_FP_SSE; -- v->arch.xcr0_accum = XSTATE_FP_SSE; -+ v->arch.xcr0 = 0; -+ v->arch.xcr0_accum = 0; - - return 0; - } -@@ -253,7 +253,11 @@ void xstate_init(bool_t bsp) - u64 feature_mask; - - if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) -+ { -+ BUG_ON(!bsp); -+ setup_clear_cpu_cap(X86_FEATURE_XSAVE); - return; -+ } - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - -@@ -273,7 +277,6 @@ void xstate_init(bool_t bsp) - set_in_cr4(X86_CR4_OSXSAVE); - if ( !set_xcr0(feature_mask) ) - BUG(); -- cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - if ( bsp ) - { -@@ -282,14 +285,14 @@ void xstate_init(bool_t bsp) - * xsave_cntxt_size is the max size required by enabled features. - * We know FP/SSE and YMM about eax, and nothing about edx at present. - */ -- xsave_cntxt_size = ebx; -+ xsave_cntxt_size = xstate_ctxt_size(feature_mask); - printk("%s: using cntxt_size: %#x and states: %#"PRIx64"\n", - __func__, xsave_cntxt_size, xfeature_mask); - } - else - { - BUG_ON(xfeature_mask != feature_mask); -- BUG_ON(xsave_cntxt_size != ebx); -+ BUG_ON(xsave_cntxt_size != xstate_ctxt_size(feature_mask)); - } - - /* Check XSAVEOPT feature. */ -@@ -300,6 +303,42 @@ void xstate_init(bool_t bsp) - BUG_ON(!cpu_has_xsaveopt != !(eax & XSTATE_FEATURE_XSAVEOPT)); - } - -+unsigned int xstate_ctxt_size(u64 xcr0) -+{ -+ u32 ebx = 0; -+ -+ if ( xcr0 ) -+ { -+ u64 act_xcr0 = get_xcr0(); -+ u32 eax, ecx, edx; -+ bool_t ok = set_xcr0(xcr0); -+ -+ ASSERT(ok); -+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); -+ ASSERT(ebx <= ecx); -+ ok = set_xcr0(act_xcr0); -+ ASSERT(ok); -+ } -+ -+ return ebx; -+} -+ -+int validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, u64 xfeat_mask) -+{ -+ if ( (xcr0_accum & ~xfeat_mask) || -+ (xstate_bv & ~xcr0_accum) || -+ (xcr0 & ~xcr0_accum) || -+ !(xcr0 & XSTATE_FP) || -+ ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) || -+ ((xcr0_accum & XSTATE_YMM) && !(xcr0_accum & XSTATE_SSE)) ) -+ return -EINVAL; -+ -+ if ( xcr0_accum & ~xfeature_mask ) -+ return -EOPNOTSUPP; -+ -+ return 0; -+} -+ - int handle_xsetbv(u32 index, u64 new_bv) - { - struct vcpu *curr = current; ---- a/xen/include/asm-x86/domain.h -+++ b/xen/include/asm-x86/domain.h -@@ -456,9 +456,9 @@ unsigned long pv_guest_cr4_fixup(const s - #define pv_guest_cr4_to_real_cr4(v) \ - (((v)->arch.pv_vcpu.ctrlreg[4] \ - | (mmu_cr4_features \ -- & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP)) \ -- | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0) \ -- | ((xsave_enabled(v))? X86_CR4_OSXSAVE : 0)) \ -+ & (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_SMEP | \ -+ X86_CR4_OSXSAVE)) \ -+ | ((v)->domain->arch.vtsc ? X86_CR4_TSD : 0)) \ - & ~X86_CR4_DE) - #define real_cr4_to_pv_guest_cr4(c) \ - ((c) & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_TSD \ ---- a/xen/include/asm-x86/hvm/hvm.h -+++ b/xen/include/asm-x86/hvm/hvm.h -@@ -368,7 +368,7 @@ static inline int hvm_event_pending(stru - ((nestedhvm_enabled((_v)->domain) && cpu_has_vmx)\ - ? X86_CR4_VMXE : 0) | \ - (cpu_has_pcid ? X86_CR4_PCIDE : 0) | \ -- (xsave_enabled(_v) ? X86_CR4_OSXSAVE : 0)))) -+ (cpu_has_xsave ? X86_CR4_OSXSAVE : 0)))) - - /* These exceptions must always be intercepted. */ - #define HVM_TRAP_MASK ((1U << TRAP_machine_check) | (1U << TRAP_invalid_op)) ---- a/xen/include/asm-x86/xstate.h -+++ b/xen/include/asm-x86/xstate.h -@@ -33,7 +33,6 @@ - #define XSTATE_NONLAZY (XSTATE_LWP) - #define XSTATE_LAZY (XSTATE_ALL & ~XSTATE_NONLAZY) - --extern unsigned int xsave_cntxt_size; - extern u64 xfeature_mask; - - /* extended state save area */ -@@ -76,11 +75,14 @@ uint64_t get_xcr0(void); - void xsave(struct vcpu *v, uint64_t mask); - void xrstor(struct vcpu *v, uint64_t mask); - bool_t xsave_enabled(const struct vcpu *v); -+int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum, u64 xstate_bv, -+ u64 xfeat_mask); - int __must_check handle_xsetbv(u32 index, u64 new_bv); - - /* extended state init and cleanup functions */ - void xstate_free_save_area(struct vcpu *v); - int xstate_alloc_save_area(struct vcpu *v); - void xstate_init(bool_t bsp); -+unsigned int xstate_ctxt_size(u64 xcr0); - - #endif /* __ASM_XSTATE_H */ diff --git a/522f2f9f-Nested-VMX-Clear-bit-31-of-IA32_VMX_BASIC-MSR.patch b/522f2f9f-Nested-VMX-Clear-bit-31-of-IA32_VMX_BASIC-MSR.patch deleted file mode 100644 index e220677..0000000 --- a/522f2f9f-Nested-VMX-Clear-bit-31-of-IA32_VMX_BASIC-MSR.patch +++ /dev/null @@ -1,25 +0,0 @@ -# Commit f3a4eb9253826d1e49e682314c8666b28fa0b717 -# Date 2013-09-10 16:41:35 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: Clear bit 31 of IA32_VMX_BASIC MSR - -The bit 31 of revision_id will set to 1 if vmcs shadowing enabled. And -according intel SDM, the bit 31 of IA32_VMX_BASIC MSR is always 0. So we -cannot set low 32 bit of IA32_VMX_BASIC to revision_id directly. Must clear -the bit 31 to 0. - -Signed-off-by: Yang Zhang -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -1828,7 +1828,7 @@ int nvmx_msr_read_intercept(unsigned int - switch (msr) { - case MSR_IA32_VMX_BASIC: - data = (host_data & (~0ul << 32)) | -- ((v->arch.hvm_vmx.vmcs)->vmcs_revision_id); -+ (v->arch.hvm_vmx.vmcs->vmcs_revision_id & 0x7fffffff); - break; - case MSR_IA32_VMX_PINBASED_CTLS: - case MSR_IA32_VMX_TRUE_PINBASED_CTLS: diff --git a/522f37b2-sched-arinc653-check-for-guest-data-transfer-failures.patch b/522f37b2-sched-arinc653-check-for-guest-data-transfer-failures.patch deleted file mode 100644 index 053cf63..0000000 --- a/522f37b2-sched-arinc653-check-for-guest-data-transfer-failures.patch +++ /dev/null @@ -1,41 +0,0 @@ -# Commit 546ba2f17008387cf9821df46e6dac04f0883a9b -# Date 2013-09-10 17:16:02 +0200 -# Author Matthew Daley -# Committer Jan Beulich -sched/arinc653: check for guest data transfer failures - -Coverity-ID: 1055121 -Coverity-ID: 1055122 -Coverity-ID: 1055123 -Coverity-ID: 1055124 -Signed-off-by: Matthew Daley -Reviewed-by: Andrew Cooper -Acked-by: George Dunlap -Acked-by: Keir Fraser - ---- a/xen/common/sched_arinc653.c -+++ b/xen/common/sched_arinc653.c -@@ -635,12 +635,21 @@ a653sched_adjust_global(const struct sch - switch ( sc->cmd ) - { - case XEN_SYSCTL_SCHEDOP_putinfo: -- copy_from_guest(&local_sched, sc->u.sched_arinc653.schedule, 1); -+ if ( copy_from_guest(&local_sched, sc->u.sched_arinc653.schedule, 1) ) -+ { -+ rc = -EFAULT; -+ break; -+ } -+ - rc = arinc653_sched_set(ops, &local_sched); - break; - case XEN_SYSCTL_SCHEDOP_getinfo: - rc = arinc653_sched_get(ops, &local_sched); -- copy_to_guest(sc->u.sched_arinc653.schedule, &local_sched, 1); -+ if ( rc ) -+ break; -+ -+ if ( copy_to_guest(sc->u.sched_arinc653.schedule, &local_sched, 1) ) -+ rc = -EFAULT; - break; - } - diff --git a/523172d5-x86-fix-memory-cut-off-when-using-PFN-compression.patch b/523172d5-x86-fix-memory-cut-off-when-using-PFN-compression.patch deleted file mode 100644 index a938210..0000000 --- a/523172d5-x86-fix-memory-cut-off-when-using-PFN-compression.patch +++ /dev/null @@ -1,116 +0,0 @@ -References: bnc#839600 - -# Commit 8efce9d69998a3d3c720ac7dbdb9b7e240369957 -# Date 2013-09-12 09:52:53 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: fix memory cut-off when using PFN compression - -For one setup_max_pdx(), when invoked a second time (after SRAT got -parsed), needs to start from the original max_page value again (using -the already adjusted one from the first invocation would not allow the -cut-off boundary to be moved up). - -Second, _if_ we need to cut off some part of memory, we must not allow -this to also propagate into the NUMA accounting. Otherwise -cutoff_node() results in nodes_cover_memory() to find some parts of -memory apparently not having a PXM association, causing all SRAT info -to be ignored. - -The only possibly problematic consumer of node_spanned_pages (the -meaning of which gets altered here in that it now also includes memory -Xen can't actively make use of) is XEN_SYSCTL_numainfo: At a first -glance the potentially larger reported memory size shouldn't confuse -tool stacks. - -And finally we must not put our boot time modules at addresses which -(at that time) can't be guaranteed to be accessible later. This applies -to both the EFI boot loader and the module relocation code. - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser -Acked-by: Dario Faggioli - ---- a/xen/arch/x86/efi/boot.c -+++ b/xen/arch/x86/efi/boot.c -@@ -459,7 +459,8 @@ static bool_t __init read_file(EFI_FILE_ - what = what ?: L"Seek"; - else - { -- file->addr = (EFI_PHYSICAL_ADDRESS)1 << (32 + PAGE_SHIFT); -+ file->addr = min(1UL << (32 + PAGE_SHIFT), -+ HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START); - ret = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData, - PFN_UP(size), &file->addr); - } ---- a/xen/arch/x86/setup.c -+++ b/xen/arch/x86/setup.c -@@ -377,9 +377,9 @@ static uint64_t __init consider_modules( - return e; - } - --static void __init setup_max_pdx(void) -+static void __init setup_max_pdx(unsigned long top_page) - { -- max_pdx = pfn_to_pdx(max_page - 1) + 1; -+ max_pdx = pfn_to_pdx(top_page - 1) + 1; - - if ( max_pdx > (DIRECTMAP_SIZE >> PAGE_SHIFT) ) - max_pdx = DIRECTMAP_SIZE >> PAGE_SHIFT; -@@ -547,7 +547,7 @@ void __init __start_xen(unsigned long mb - unsigned int initrdidx; - multiboot_info_t *mbi = __va(mbi_p); - module_t *mod = (module_t *)__va(mbi->mods_addr); -- unsigned long nr_pages, modules_headroom, *module_map; -+ unsigned long nr_pages, raw_max_page, modules_headroom, *module_map; - int i, j, e820_warn = 0, bytes = 0; - bool_t acpi_boot_table_init_done = 0; - struct ns16550_defaults ns16550 = { -@@ -751,7 +751,7 @@ void __init __start_xen(unsigned long mb - } - - /* Sanitise the raw E820 map to produce a final clean version. */ -- max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr); -+ max_page = raw_max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr); - - /* Create a temporary copy of the E820 map. */ - memcpy(&boot_e820, &e820, sizeof(e820)); -@@ -820,7 +820,10 @@ void __init __start_xen(unsigned long mb - (end - s) >> PAGE_SHIFT, PAGE_HYPERVISOR); - } - -- e = min_t(uint64_t, e, 1ULL << (PAGE_SHIFT + 32)); -+ if ( e > min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START, -+ 1UL << (PAGE_SHIFT + 32)) ) -+ e = min(HYPERVISOR_VIRT_END - DIRECTMAP_VIRT_START, -+ 1UL << (PAGE_SHIFT + 32)); - #define reloc_size ((__pa(&_end) + mask) & ~mask) - /* Is the region suitable for relocating Xen? */ - if ( !xen_phys_start && e <= limit ) -@@ -969,7 +972,7 @@ void __init __start_xen(unsigned long mb - /* Late kexec reservation (dynamic start address). */ - kexec_reserve_area(&boot_e820); - -- setup_max_pdx(); -+ setup_max_pdx(raw_max_page); - if ( highmem_start ) - xenheap_max_mfn(PFN_DOWN(highmem_start)); - -@@ -995,7 +998,7 @@ void __init __start_xen(unsigned long mb - { - acpi_boot_table_init_done = 1; - srat_parse_regions(s); -- setup_max_pdx(); -+ setup_max_pdx(raw_max_page); - } - - if ( pfn_to_pdx((e - 1) >> PAGE_SHIFT) >= max_pdx ) -@@ -1133,7 +1136,7 @@ void __init __start_xen(unsigned long mb - - acpi_numa_init(); - -- numa_initmem_init(0, max_page); -+ numa_initmem_init(0, raw_max_page); - - end_boot_allocator(); - system_state = SYS_STATE_boot; diff --git a/5231e090-libxc-x86-fix-page-table-creation-for-huge-guests.patch b/5231e090-libxc-x86-fix-page-table-creation-for-huge-guests.patch deleted file mode 100644 index 42037ad..0000000 --- a/5231e090-libxc-x86-fix-page-table-creation-for-huge-guests.patch +++ /dev/null @@ -1,94 +0,0 @@ -# Commit 06d086832155fc7f5344e9d108b979de34674d11 -# Date 2013-09-12 17:41:04 +0200 -# Author Jan Beulich -# Committer Jan Beulich -libxc/x86: fix page table creation for huge guests - -The switch-over logic from one page directory to the next was wrong; -it needs to be deferred until we actually reach the last page within -a given region, instead of being done when the last entry of a page -directory gets started with. - -Signed-off-by: Jan Beulich -Tested-by: Konrad Rzeszutek Wilk -Acked-by: Ian Jackson - ---- a/tools/libxc/xc_dom_x86.c -+++ b/tools/libxc/xc_dom_x86.c -@@ -251,7 +251,7 @@ static int setup_pgtables_x86_32_pae(str - l3_pgentry_64_t *l3tab; - l2_pgentry_64_t *l2tab = NULL; - l1_pgentry_64_t *l1tab = NULL; -- unsigned long l3off, l2off, l1off; -+ unsigned long l3off, l2off = 0, l1off; - xen_vaddr_t addr; - xen_pfn_t pgpfn; - xen_pfn_t l3mfn = xc_dom_p2m_guest(dom, l3pfn); -@@ -299,8 +299,6 @@ static int setup_pgtables_x86_32_pae(str - l2off = l2_table_offset_pae(addr); - l2tab[l2off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT; -- if ( l2off == (L2_PAGETABLE_ENTRIES_PAE - 1) ) -- l2tab = NULL; - l1pfn++; - } - -@@ -312,8 +310,13 @@ static int setup_pgtables_x86_32_pae(str - if ( (addr >= dom->pgtables_seg.vstart) && - (addr < dom->pgtables_seg.vend) ) - l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ -+ - if ( l1off == (L1_PAGETABLE_ENTRIES_PAE - 1) ) -+ { - l1tab = NULL; -+ if ( l2off == (L2_PAGETABLE_ENTRIES_PAE - 1) ) -+ l2tab = NULL; -+ } - } - - if ( dom->virt_pgtab_end <= 0xc0000000 ) -@@ -360,7 +363,7 @@ static int setup_pgtables_x86_64(struct - l3_pgentry_64_t *l3tab = NULL; - l2_pgentry_64_t *l2tab = NULL; - l1_pgentry_64_t *l1tab = NULL; -- uint64_t l4off, l3off, l2off, l1off; -+ uint64_t l4off, l3off = 0, l2off = 0, l1off; - uint64_t addr; - xen_pfn_t pgpfn; - -@@ -391,8 +394,6 @@ static int setup_pgtables_x86_64(struct - l3off = l3_table_offset_x86_64(addr); - l3tab[l3off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l2pfn)) | L3_PROT; -- if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) ) -- l3tab = NULL; - l2pfn++; - } - -@@ -405,8 +406,6 @@ static int setup_pgtables_x86_64(struct - l2off = l2_table_offset_x86_64(addr); - l2tab[l2off] = - pfn_to_paddr(xc_dom_p2m_guest(dom, l1pfn)) | L2_PROT; -- if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) ) -- l2tab = NULL; - l1pfn++; - } - -@@ -418,8 +417,17 @@ static int setup_pgtables_x86_64(struct - if ( (addr >= dom->pgtables_seg.vstart) && - (addr < dom->pgtables_seg.vend) ) - l1tab[l1off] &= ~_PAGE_RW; /* page tables are r/o */ -+ - if ( l1off == (L1_PAGETABLE_ENTRIES_X86_64 - 1) ) -+ { - l1tab = NULL; -+ if ( l2off == (L2_PAGETABLE_ENTRIES_X86_64 - 1) ) -+ { -+ l2tab = NULL; -+ if ( l3off == (L3_PAGETABLE_ENTRIES_X86_64 - 1) ) -+ l3tab = NULL; -+ } -+ } - } - return 0; - diff --git a/5231f00c-cpufreq-missing-check-of-copy_from_guest.patch b/5231f00c-cpufreq-missing-check-of-copy_from_guest.patch deleted file mode 100644 index 5ab3a0d..0000000 --- a/5231f00c-cpufreq-missing-check-of-copy_from_guest.patch +++ /dev/null @@ -1,30 +0,0 @@ -# Commit 803f9a6cdfeda64beee908576de0ad02d6b0c480 -# Date 2013-09-12 17:47:08 +0100 -# Author Tim Deegan -# Committer Tim Deegan -cpufreq: missing check of copy_from_guest() - -Coverity CID 1055131 -Coverity CID 1055132 - -Signed-off-by: Tim Deegan -Reviewed-by: Andrew Cooper -Acked-by: Jan Beulich - ---- a/xen/drivers/cpufreq/cpufreq.c -+++ b/xen/drivers/cpufreq/cpufreq.c -@@ -471,8 +471,12 @@ int set_px_pminfo(uint32_t acpi_id, stru - ret = -ENOMEM; - goto out; - } -- copy_from_guest(pxpt->states, dom0_px_info->states, -- dom0_px_info->state_count); -+ if ( copy_from_guest(pxpt->states, dom0_px_info->states, -+ dom0_px_info->state_count) ) -+ { -+ ret = -EFAULT; -+ goto out; -+ } - pxpt->state_count = dom0_px_info->state_count; - - if ( cpufreq_verbose ) diff --git a/523304b6-x86-machine_restart-must-not-call-acpi_dmar_reinstate-twice.patch b/523304b6-x86-machine_restart-must-not-call-acpi_dmar_reinstate-twice.patch deleted file mode 100644 index 5485a8f..0000000 --- a/523304b6-x86-machine_restart-must-not-call-acpi_dmar_reinstate-twice.patch +++ /dev/null @@ -1,40 +0,0 @@ -# Commit a54dc5f4fe1eae6b1beb21326ef0338cd3969cd1 -# Date 2013-09-13 14:27:34 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: machine_restart() must not call acpi_dmar_reinstate() twice - -.. as that function is not idempotent (it always alters the table -checksum). The (generally) duplicate call was a result from it being -made before machine_restart() re-invoking itself on the boot CPU. - -Considering that no problem arose so far from the table corruption I -doubt that we need to restore the correct table signature on the -reboot path in general. The only case I can see this as potentially -necessary is the tboot one, hence do the call just in that case. - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/shutdown.c -+++ b/xen/arch/x86/shutdown.c -@@ -115,8 +115,6 @@ void machine_restart(unsigned int delay_ - console_start_sync(); - spin_debug_disable(); - -- acpi_dmar_reinstate(); -- - local_irq_enable(); - - /* Ensure we are the boot CPU. */ -@@ -141,7 +139,10 @@ void machine_restart(unsigned int delay_ - mdelay(delay_millisecs); - - if ( tboot_in_measured_env() ) -+ { -+ acpi_dmar_reinstate(); - tboot_shutdown(TB_SHUTDOWN_REBOOT); -+ } - - efi_reset_system(reboot_mode != 0); - diff --git a/5239a064-x86-HVM-fix-failure-path-in-hvm_vcpu_initialise.patch b/5239a064-x86-HVM-fix-failure-path-in-hvm_vcpu_initialise.patch deleted file mode 100644 index 7a523e4..0000000 --- a/5239a064-x86-HVM-fix-failure-path-in-hvm_vcpu_initialise.patch +++ /dev/null @@ -1,29 +0,0 @@ -# Commit 925fbcb7fdd6238f26b1576dc1f3e297f1f24f1e -# Date 2013-09-18 14:45:24 +0200 -# Author George Dunlap -# Committer Jan Beulich -x86/HVM: fix failure path in hvm_vcpu_initialise - -It looks like one of the failure cases in hvm_vcpu_initialise jumps to -the wrong label; this could lead to slow leaks if something isn't -cleaned up properly. - -I will probably change these labels in a future patch, but I figured -it was better to have this fix separately. - -This is also a candidate for backport. - -Signed-off-by: George Dunlap -Signed-off-by: Mukesh Rathor - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -1125,7 +1125,7 @@ int hvm_vcpu_initialise(struct vcpu *v) - /* Create bufioreq event channel. */ - rc = alloc_unbound_xen_event_channel(v, dm_domid, NULL); - if ( rc < 0 ) -- goto fail2; -+ goto fail4; - d->arch.hvm_domain.params[HVM_PARAM_BUFIOREQ_EVTCHN] = rc; - } - diff --git a/5239a076-VMX-fix-failure-path-in-construct_vmcs.patch b/5239a076-VMX-fix-failure-path-in-construct_vmcs.patch deleted file mode 100644 index 111cbbb..0000000 --- a/5239a076-VMX-fix-failure-path-in-construct_vmcs.patch +++ /dev/null @@ -1,26 +0,0 @@ -# Commit dad7e45bf44c0569546a3ed7d0fa4182a4a73f0a -# Date 2013-09-18 14:45:42 +0200 -# Author George Dunlap -# Committer Jan Beulich -VMX: fix failure path in construct_vmcs - -If the allocation fails, make sure to call vmx_vmcs_exit(). - -This is a candidate for backport. - -Signed-off-by: George Dunlap -Signed-off-by: Mukesh Rathor - ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -894,7 +894,10 @@ static int construct_vmcs(struct vcpu *v - unsigned long *msr_bitmap = alloc_xenheap_page(); - - if ( msr_bitmap == NULL ) -+ { -+ vmx_vmcs_exit(v); - return -ENOMEM; -+ } - - memset(msr_bitmap, ~0, PAGE_SIZE); - v->arch.hvm_vmx.msr_bitmap = msr_bitmap; diff --git a/523c0ed4-x86-HVM-properly-handle-wide-MMIO.patch b/523c0ed4-x86-HVM-properly-handle-wide-MMIO.patch deleted file mode 100644 index dd1464f..0000000 --- a/523c0ed4-x86-HVM-properly-handle-wide-MMIO.patch +++ /dev/null @@ -1,184 +0,0 @@ -# Commit 3b89f08a498ddac09d4002d9849e329018ceb107 -# Date 2013-09-20 11:01:08 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/HVM: properly handle MMIO reads and writes wider than a machine word - -Just like real hardware we ought to split such accesses transparently -to the caller. With little extra effort we can at once even handle page -crossing accesses correctly. - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -438,6 +438,7 @@ static int __hvmemul_read( - { - struct vcpu *curr = current; - unsigned long addr, reps = 1; -+ unsigned int off, chunk = min(bytes, 1U << LONG_BYTEORDER); - uint32_t pfec = PFEC_page_present; - struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; - paddr_t gpa; -@@ -447,16 +448,38 @@ static int __hvmemul_read( - seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr); - if ( rc != X86EMUL_OKAY ) - return rc; -+ off = addr & (PAGE_SIZE - 1); -+ /* -+ * We only need to handle sizes actual instruction operands can have. All -+ * such sizes are either powers of 2 or the sum of two powers of 2. Thus -+ * picking as initial chunk size the largest power of 2 not greater than -+ * the total size will always result in only power-of-2 size requests -+ * issued to hvmemul_do_mmio() (hvmemul_do_io() rejects non-powers-of-2). -+ */ -+ while ( chunk & (chunk - 1) ) -+ chunk &= chunk - 1; -+ if ( off + bytes > PAGE_SIZE ) -+ while ( off & (chunk - 1) ) -+ chunk >>= 1; - - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) - { -- unsigned int off = addr & (PAGE_SIZE - 1); - if ( access_type == hvm_access_insn_fetch ) - return X86EMUL_UNHANDLEABLE; - gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); -- if ( (off + bytes) <= PAGE_SIZE ) -- return hvmemul_do_mmio(gpa, &reps, bytes, 0, -- IOREQ_READ, 0, p_data); -+ while ( (off + chunk) <= PAGE_SIZE ) -+ { -+ rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); -+ if ( rc != X86EMUL_OKAY || bytes == chunk ) -+ return rc; -+ addr += chunk; -+ off += chunk; -+ gpa += chunk; -+ p_data += chunk; -+ bytes -= chunk; -+ if ( bytes < chunk ) -+ chunk = bytes; -+ } - } - - if ( (seg != x86_seg_none) && -@@ -473,14 +496,32 @@ static int __hvmemul_read( - return X86EMUL_EXCEPTION; - case HVMCOPY_unhandleable: - return X86EMUL_UNHANDLEABLE; -- case HVMCOPY_bad_gfn_to_mfn: -+ case HVMCOPY_bad_gfn_to_mfn: - if ( access_type == hvm_access_insn_fetch ) - return X86EMUL_UNHANDLEABLE; -- rc = hvmemul_linear_to_phys( -- addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); -- if ( rc != X86EMUL_OKAY ) -- return rc; -- return hvmemul_do_mmio(gpa, &reps, bytes, 0, IOREQ_READ, 0, p_data); -+ rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, -+ hvmemul_ctxt); -+ while ( rc == X86EMUL_OKAY ) -+ { -+ rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_READ, 0, p_data); -+ if ( rc != X86EMUL_OKAY || bytes == chunk ) -+ break; -+ addr += chunk; -+ off += chunk; -+ p_data += chunk; -+ bytes -= chunk; -+ if ( bytes < chunk ) -+ chunk = bytes; -+ if ( off < PAGE_SIZE ) -+ gpa += chunk; -+ else -+ { -+ rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, -+ hvmemul_ctxt); -+ off = 0; -+ } -+ } -+ return rc; - case HVMCOPY_gfn_paged_out: - return X86EMUL_RETRY; - case HVMCOPY_gfn_shared: -@@ -537,6 +578,7 @@ static int hvmemul_write( - container_of(ctxt, struct hvm_emulate_ctxt, ctxt); - struct vcpu *curr = current; - unsigned long addr, reps = 1; -+ unsigned int off, chunk = min(bytes, 1U << LONG_BYTEORDER); - uint32_t pfec = PFEC_page_present | PFEC_write_access; - struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; - paddr_t gpa; -@@ -546,14 +588,30 @@ static int hvmemul_write( - seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr); - if ( rc != X86EMUL_OKAY ) - return rc; -+ off = addr & (PAGE_SIZE - 1); -+ /* See the respective comment in __hvmemul_read(). */ -+ while ( chunk & (chunk - 1) ) -+ chunk &= chunk - 1; -+ if ( off + bytes > PAGE_SIZE ) -+ while ( off & (chunk - 1) ) -+ chunk >>= 1; - - if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) - { -- unsigned int off = addr & (PAGE_SIZE - 1); - gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); -- if ( (off + bytes) <= PAGE_SIZE ) -- return hvmemul_do_mmio(gpa, &reps, bytes, 0, -- IOREQ_WRITE, 0, p_data); -+ while ( (off + chunk) <= PAGE_SIZE ) -+ { -+ rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_WRITE, 0, p_data); -+ if ( rc != X86EMUL_OKAY || bytes == chunk ) -+ return rc; -+ addr += chunk; -+ off += chunk; -+ gpa += chunk; -+ p_data += chunk; -+ bytes -= chunk; -+ if ( bytes < chunk ) -+ chunk = bytes; -+ } - } - - if ( (seg != x86_seg_none) && -@@ -569,12 +627,29 @@ static int hvmemul_write( - case HVMCOPY_unhandleable: - return X86EMUL_UNHANDLEABLE; - case HVMCOPY_bad_gfn_to_mfn: -- rc = hvmemul_linear_to_phys( -- addr, &gpa, bytes, &reps, pfec, hvmemul_ctxt); -- if ( rc != X86EMUL_OKAY ) -- return rc; -- return hvmemul_do_mmio(gpa, &reps, bytes, 0, -- IOREQ_WRITE, 0, p_data); -+ rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, -+ hvmemul_ctxt); -+ while ( rc == X86EMUL_OKAY ) -+ { -+ rc = hvmemul_do_mmio(gpa, &reps, chunk, 0, IOREQ_WRITE, 0, p_data); -+ if ( rc != X86EMUL_OKAY || bytes == chunk ) -+ break; -+ addr += chunk; -+ off += chunk; -+ p_data += chunk; -+ bytes -= chunk; -+ if ( bytes < chunk ) -+ chunk = bytes; -+ if ( off < PAGE_SIZE ) -+ gpa += chunk; -+ else -+ { -+ rc = hvmemul_linear_to_phys(addr, &gpa, chunk, &reps, pfec, -+ hvmemul_ctxt); -+ off = 0; -+ } -+ } -+ return rc; - case HVMCOPY_gfn_paged_out: - return X86EMUL_RETRY; - case HVMCOPY_gfn_shared: diff --git a/523c1758-sched_credit-filter-node-affinity-mask-against-online-cpus.patch b/523c1758-sched_credit-filter-node-affinity-mask-against-online-cpus.patch deleted file mode 100644 index 03d7bc4..0000000 --- a/523c1758-sched_credit-filter-node-affinity-mask-against-online-cpus.patch +++ /dev/null @@ -1,155 +0,0 @@ -# Commit 5e5a44b6c942d6ea47f15d6f1ed02b03e0d69445 -# Date 2013-09-20 11:37:28 +0200 -# Author Dario Faggioli -# Committer Jan Beulich -sched_credit: filter node-affinity mask against online cpus - -in _csched_cpu_pick(), as not doing so may result in the domain's -node-affinity mask (as retrieved by csched_balance_cpumask() ) -and online mask (as retrieved by cpupool_scheduler_cpumask() ) -having an empty intersection. - -Therefore, when attempting a node-affinity load balancing step -and running this: - - ... - /* Pick an online CPU from the proper affinity mask */ - csched_balance_cpumask(vc, balance_step, &cpus); - cpumask_and(&cpus, &cpus, online); - ... - -we end up with an empty cpumask (in cpus). At this point, in -the following code: - - .... - /* If present, prefer vc's current processor */ - cpu = cpumask_test_cpu(vc->processor, &cpus) - ? vc->processor - : cpumask_cycle(vc->processor, &cpus); - .... - -an ASSERT (from inside cpumask_cycle() ) triggers like this: - -(XEN) Xen call trace: -(XEN) [] _csched_cpu_pick+0x1d2/0x652 -(XEN) [] csched_cpu_pick+0xe/0x10 -(XEN) [] vcpu_migrate+0x167/0x31e -(XEN) [] cpu_disable_scheduler+0x1c8/0x287 -(XEN) [] cpupool_unassign_cpu_helper+0x20/0xb4 -(XEN) [] continue_hypercall_tasklet_handler+0x4a/0xb1 -(XEN) [] do_tasklet_work+0x78/0xab -(XEN) [] do_tasklet+0x5f/0x8b -(XEN) [] idle_loop+0x57/0x5e -(XEN) -(XEN) -(XEN) **************************************** -(XEN) Panic on CPU 1: -(XEN) Assertion 'cpu < nr_cpu_ids' failed at /home/dario/Sources/xen/xen/xen.git/xen/include/xe:16481 - -It is for example sufficient to have a domain with node-affinity -to NUMA node 1 running, and issueing a `xl cpupool-numa-split' -would make the above happen. That is because, by default, all -the existing domains remain assigned to the first cpupool, and -it now (after the cpupool-numa-split) only includes NUMA node 0. - -This change prevents that by generalizing the function used -for figuring out whether a node-affinity load balancing step -is legit or not. This way we can, in _csched_cpu_pick(), -figure out early enough that the mask would end up empty, -skip the step all together and avoid the splat. - -Signed-off-by: Dario Faggioli -Reviewed-by: George Dunlap - ---- a/xen/common/sched_credit.c -+++ b/xen/common/sched_credit.c -@@ -296,15 +296,28 @@ static void csched_set_node_affinity( - * vcpu-affinity balancing is always necessary and must never be skipped. - * OTOH, if a domain's node-affinity is said to be automatically computed - * (or if it just spans all the nodes), we can safely avoid dealing with -- * node-affinity entirely. Ah, node-affinity is also deemed meaningless -- * in case it has empty intersection with the vcpu's vcpu-affinity, as it -- * would mean trying to schedule it on _no_ pcpu! -+ * node-affinity entirely. -+ * -+ * Node-affinity is also deemed meaningless in case it has empty -+ * intersection with mask, to cover the cases where using the node-affinity -+ * mask seems legit, but would instead led to trying to schedule the vcpu -+ * on _no_ pcpu! Typical use cases are for mask to be equal to the vcpu's -+ * vcpu-affinity, or to the && of vcpu-affinity and the set of online cpus -+ * in the domain's cpupool. - */ --#define __vcpu_has_node_affinity(vc) \ -- ( !(cpumask_full(CSCHED_DOM(vc->domain)->node_affinity_cpumask) \ -- || !cpumask_intersects(vc->cpu_affinity, \ -- CSCHED_DOM(vc->domain)->node_affinity_cpumask) \ -- || vc->domain->auto_node_affinity == 1) ) -+static inline int __vcpu_has_node_affinity(const struct vcpu *vc, -+ const cpumask_t *mask) -+{ -+ const struct domain *d = vc->domain; -+ const struct csched_dom *sdom = CSCHED_DOM(d); -+ -+ if ( d->auto_node_affinity -+ || cpumask_full(sdom->node_affinity_cpumask) -+ || !cpumask_intersects(sdom->node_affinity_cpumask, mask) ) -+ return 0; -+ -+ return 1; -+} - - /* - * Each csched-balance step uses its own cpumask. This function determines -@@ -393,7 +406,8 @@ __runq_tickle(unsigned int cpu, struct c - int new_idlers_empty; - - if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY -- && !__vcpu_has_node_affinity(new->vcpu) ) -+ && !__vcpu_has_node_affinity(new->vcpu, -+ new->vcpu->cpu_affinity) ) - continue; - - /* Are there idlers suitable for new (for this balance step)? */ -@@ -626,11 +640,32 @@ _csched_cpu_pick(const struct scheduler - int cpu = vc->processor; - int balance_step; - -+ /* Store in cpus the mask of online cpus on which the domain can run */ - online = cpupool_scheduler_cpumask(vc->domain->cpupool); -+ cpumask_and(&cpus, vc->cpu_affinity, online); -+ - for_each_csched_balance_step( balance_step ) - { -+ /* -+ * We want to pick up a pcpu among the ones that are online and -+ * can accommodate vc, which is basically what we computed above -+ * and stored in cpus. As far as vcpu-affinity is concerned, -+ * there always will be at least one of these pcpus, hence cpus -+ * is never empty and the calls to cpumask_cycle() and -+ * cpumask_test_cpu() below are ok. -+ * -+ * On the other hand, when considering node-affinity too, it -+ * is possible for the mask to become empty (for instance, if the -+ * domain has been put in a cpupool that does not contain any of the -+ * nodes in its node-affinity), which would result in the ASSERT()-s -+ * inside cpumask_*() operations triggering (in debug builds). -+ * -+ * Therefore, in this case, we filter the node-affinity mask against -+ * cpus and, if the result is empty, we just skip the node-affinity -+ * balancing step all together. -+ */ - if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY -- && !__vcpu_has_node_affinity(vc) ) -+ && !__vcpu_has_node_affinity(vc, &cpus) ) - continue; - - /* Pick an online CPU from the proper affinity mask */ -@@ -1449,7 +1484,7 @@ csched_runq_steal(int peer_cpu, int cpu, - * or counter. - */ - if ( balance_step == CSCHED_BALANCE_NODE_AFFINITY -- && !__vcpu_has_node_affinity(vc) ) -+ && !__vcpu_has_node_affinity(vc, vc->cpu_affinity) ) - continue; - - csched_balance_cpumask(vc, balance_step, csched_balance_mask); diff --git a/523c1834-unmodified_drivers-enable-unplug-per-default.patch b/523c1834-unmodified_drivers-enable-unplug-per-default.patch deleted file mode 100644 index 6016a7a..0000000 --- a/523c1834-unmodified_drivers-enable-unplug-per-default.patch +++ /dev/null @@ -1,48 +0,0 @@ -# Commit df17e9c889c48c9c10aa3f9dd0bb11077f54efc4 -# Date 2013-09-20 11:41:08 +0200 -# Author Olaf Hering -# Committer Jan Beulich -unmodified_drivers: enable unplug per default - -Since xen-3.3 an official unplug protocol for emulated hardware is -available in the toolstack. The pvops kernel does the unplug per -default, so it is safe to do it also in the drivers for forward ported -xenlinux. -Currently its required to load xen-platform-pci with the module -parameter dev_unplug=all, which is cumbersome. -Also recognize the dev_unplug=never parameter, which provides the -default before this patch. - -Signed-off-by: Olaf Hering - ---- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -+++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c -@@ -66,7 +66,7 @@ MODULE_LICENSE("GPL"); - static char *dev_unplug; - module_param(dev_unplug, charp, 0644); - MODULE_PARM_DESC(dev_unplug, "Emulated devices to unplug: " -- "[all,][ide-disks,][aux-ide-disks,][nics]\n"); -+ "[all,][ide-disks,][aux-ide-disks,][nics,][never] (default is 'all')\n"); - - struct pci_dev *xen_platform_pdev; - -@@ -290,6 +290,10 @@ static int check_platform_magic(struct d - short magic, unplug = 0; - char protocol, *p, *q, *err; - -+ /* Unconditionally unplug everything */ -+ if (!dev_unplug) -+ unplug = UNPLUG_ALL; -+ - for (p = dev_unplug; p; p = q) { - q = strchr(dev_unplug, ','); - if (q) -@@ -302,6 +306,8 @@ static int check_platform_magic(struct d - unplug |= UNPLUG_AUX_IDE_DISKS; - else if (!strcmp(p, "nics")) - unplug |= UNPLUG_ALL_NICS; -+ else if (!strcmp(p, "never")) -+ unplug = 0; - else - dev_warn(dev, "unrecognised option '%s' " - "in module parameter 'dev_unplug'\n", p); diff --git a/523ff393-x86-HVM-linear-address-must-be-canonical-for-the-whole-accessed-range.patch b/523ff393-x86-HVM-linear-address-must-be-canonical-for-the-whole-accessed-range.patch deleted file mode 100644 index dd5fee5..0000000 --- a/523ff393-x86-HVM-linear-address-must-be-canonical-for-the-whole-accessed-range.patch +++ /dev/null @@ -1,92 +0,0 @@ -# Commit 7f12732670b31b2fea899a4160d455574658474f -# Date 2013-09-23 09:53:55 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/HVM: linear address must be canonical for the whole accessed range - -... rather than just for the first byte. - -While at it, also -- make the real mode case at least dpo a wrap around check -- drop the mis-named "gpf" label (we're not generating faults here) - and use in-place returns instead - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -1938,8 +1938,7 @@ int hvm_virtual_to_linear_addr( - unsigned int addr_size, - unsigned long *linear_addr) - { -- unsigned long addr = offset; -- uint32_t last_byte; -+ unsigned long addr = offset, last_byte; - - if ( !(current->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) ) - { -@@ -1948,6 +1947,9 @@ int hvm_virtual_to_linear_addr( - * Certain of them are not done in native real mode anyway. - */ - addr = (uint32_t)(addr + reg->base); -+ last_byte = (uint32_t)addr + bytes - 1; -+ if ( last_byte < addr ) -+ return 0; - } - else if ( addr_size != 64 ) - { -@@ -1959,17 +1961,17 @@ int hvm_virtual_to_linear_addr( - { - case hvm_access_read: - if ( (reg->attr.fields.type & 0xa) == 0x8 ) -- goto gpf; /* execute-only code segment */ -+ return 0; /* execute-only code segment */ - break; - case hvm_access_write: - if ( (reg->attr.fields.type & 0xa) != 0x2 ) -- goto gpf; /* not a writable data segment */ -+ return 0; /* not a writable data segment */ - break; - default: - break; - } - -- last_byte = offset + bytes - 1; -+ last_byte = (uint32_t)offset + bytes - 1; - - /* Is this a grows-down data segment? Special limit check if so. */ - if ( (reg->attr.fields.type & 0xc) == 0x4 ) -@@ -1980,10 +1982,10 @@ int hvm_virtual_to_linear_addr( - - /* Check first byte and last byte against respective bounds. */ - if ( (offset <= reg->limit) || (last_byte < offset) ) -- goto gpf; -+ return 0; - } - else if ( (last_byte > reg->limit) || (last_byte < offset) ) -- goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */ -+ return 0; /* last byte is beyond limit or wraps 0xFFFFFFFF */ - - /* - * Hardware truncates to 32 bits in compatibility mode. -@@ -2000,15 +2002,14 @@ int hvm_virtual_to_linear_addr( - if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) ) - addr += reg->base; - -- if ( !is_canonical_address(addr) ) -- goto gpf; -+ last_byte = addr + bytes - 1; -+ if ( !is_canonical_address(addr) || last_byte < addr || -+ !is_canonical_address(last_byte) ) -+ return 0; - } - - *linear_addr = addr; - return 1; -- -- gpf: -- return 0; - } - - /* On non-NULL return, we leave this function holding an additional diff --git a/523ff3e2-x86-HVM-refuse-doing-string-operations-in-certain-situations.patch b/523ff3e2-x86-HVM-refuse-doing-string-operations-in-certain-situations.patch deleted file mode 100644 index 7c1eeca..0000000 --- a/523ff3e2-x86-HVM-refuse-doing-string-operations-in-certain-situations.patch +++ /dev/null @@ -1,65 +0,0 @@ -# Commit 14fcce2fa883405bab26b60821a6cc5f2c770833 -# Date 2013-09-23 09:55:14 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/HVM: refuse doing string operations in certain situations - -We shouldn't do any acceleration for -- "rep movs" when either side is passed through MMIO or when both sides - are handled by qemu -- "rep ins" and "rep outs" when the memory operand is any kind of MMIO - -Signed-off-by: Jan Beulich -Acked-by: Keir Fraser - ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -686,6 +686,7 @@ static int hvmemul_rep_ins( - unsigned long addr; - uint32_t pfec = PFEC_page_present | PFEC_write_access; - paddr_t gpa; -+ p2m_type_t p2mt; - int rc; - - rc = hvmemul_virtual_to_linear( -@@ -702,6 +703,10 @@ static int hvmemul_rep_ins( - if ( rc != X86EMUL_OKAY ) - return rc; - -+ (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt); -+ if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm ) -+ return X86EMUL_UNHANDLEABLE; -+ - return hvmemul_do_pio(src_port, reps, bytes_per_rep, gpa, IOREQ_READ, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), NULL); - } -@@ -719,6 +724,7 @@ static int hvmemul_rep_outs( - unsigned long addr; - uint32_t pfec = PFEC_page_present; - paddr_t gpa; -+ p2m_type_t p2mt; - int rc; - - rc = hvmemul_virtual_to_linear( -@@ -735,6 +741,10 @@ static int hvmemul_rep_outs( - if ( rc != X86EMUL_OKAY ) - return rc; - -+ (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt); -+ if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm ) -+ return X86EMUL_UNHANDLEABLE; -+ - return hvmemul_do_pio(dst_port, reps, bytes_per_rep, gpa, IOREQ_WRITE, - !!(ctxt->regs->eflags & X86_EFLAGS_DF), NULL); - } -@@ -787,6 +797,10 @@ static int hvmemul_rep_movs( - (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt); - (void) get_gfn_query_unlocked(current->domain, dgpa >> PAGE_SHIFT, &dp2mt); - -+ if ( sp2mt == p2m_mmio_direct || dp2mt == p2m_mmio_direct || -+ (sp2mt == p2m_mmio_dm && dp2mt == p2m_mmio_dm) ) -+ return X86EMUL_UNHANDLEABLE; -+ - if ( sp2mt == p2m_mmio_dm ) - return hvmemul_do_mmio( - sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); diff --git a/5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch b/5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch deleted file mode 100644 index 045e433..0000000 --- a/5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch +++ /dev/null @@ -1,52 +0,0 @@ -References: bnc#839596 CVE-2013-1442 XSA-62 - -# Commit 63a75ba0de817d6f384f96d25427a05c313e2179 -# Date 2013-09-25 10:41:25 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86/xsave: initialize extended register state when guests enable it - -Till now, when setting previously unset bits in XCR0 we wouldn't touch -the active register state, thus leaving in the newly enabled registers -whatever a prior user of it left there, i.e. potentially leaking -information between guests. - -This is CVE-2013-1442 / XSA-62. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/xstate.c -+++ b/xen/arch/x86/xstate.c -@@ -342,6 +342,7 @@ int validate_xstate(u64 xcr0, u64 xcr0_a - int handle_xsetbv(u32 index, u64 new_bv) - { - struct vcpu *curr = current; -+ u64 mask; - - if ( index != XCR_XFEATURE_ENABLED_MASK ) - return -EOPNOTSUPP; -@@ -355,9 +356,23 @@ int handle_xsetbv(u32 index, u64 new_bv) - if ( !set_xcr0(new_bv) ) - return -EFAULT; - -+ mask = new_bv & ~curr->arch.xcr0_accum; - curr->arch.xcr0 = new_bv; - curr->arch.xcr0_accum |= new_bv; - -+ mask &= curr->fpu_dirtied ? ~XSTATE_FP_SSE : XSTATE_NONLAZY; -+ if ( mask ) -+ { -+ unsigned long cr0 = read_cr0(); -+ -+ clts(); -+ if ( curr->fpu_dirtied ) -+ asm ( "stmxcsr %0" : "=m" (curr->arch.xsave_area->fpu_sse.mxcsr) ); -+ xrstor(curr, mask); -+ if ( cr0 & X86_CR0_TS ) -+ write_cr0(cr0); -+ } -+ - return 0; - } - diff --git a/52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch b/52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch deleted file mode 100644 index 1aa9ec5..0000000 --- a/52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch +++ /dev/null @@ -1,177 +0,0 @@ -References: bnc#840592 CVE-2013-4355 XSA-63 - -# Commit 6bb838e7375f5b031e9ac346b353775c90de45dc -# Date 2013-09-30 14:17:46 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: properly handle hvm_copy_from_guest_{phys,virt}() errors - -Ignoring them generally implies using uninitialized data and, in all -but two of the cases dealt with here, potentially leaking hypervisor -stack contents to guests. - -This is CVE-2013-4355 / XSA-63. - -Signed-off-by: Jan Beulich -Reviewed-by: Tim Deegan -Reviewed-by: Andrew Cooper - ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -2316,11 +2316,7 @@ void hvm_task_switch( - - rc = hvm_copy_from_guest_virt( - &tss, prev_tr.base, sizeof(tss), PFEC_page_present); -- if ( rc == HVMCOPY_bad_gva_to_gfn ) -- goto out; -- if ( rc == HVMCOPY_gfn_paged_out ) -- goto out; -- if ( rc == HVMCOPY_gfn_shared ) -+ if ( rc != HVMCOPY_okay ) - goto out; - - eflags = regs->eflags; -@@ -2365,13 +2361,11 @@ void hvm_task_switch( - - rc = hvm_copy_from_guest_virt( - &tss, tr.base, sizeof(tss), PFEC_page_present); -- if ( rc == HVMCOPY_bad_gva_to_gfn ) -- goto out; -- if ( rc == HVMCOPY_gfn_paged_out ) -- goto out; -- /* Note: this could be optimised, if the callee functions knew we want RO -- * access */ -- if ( rc == HVMCOPY_gfn_shared ) -+ /* -+ * Note: The HVMCOPY_gfn_shared case could be optimised, if the callee -+ * functions knew we want RO access. -+ */ -+ if ( rc != HVMCOPY_okay ) - goto out; - - ---- a/xen/arch/x86/hvm/intercept.c -+++ b/xen/arch/x86/hvm/intercept.c -@@ -87,17 +87,28 @@ static int hvm_mmio_access(struct vcpu * - { - for ( i = 0; i < p->count; i++ ) - { -- int ret; -- -- ret = hvm_copy_from_guest_phys(&data, -- p->data + (sign * i * p->size), -- p->size); -- if ( (ret == HVMCOPY_gfn_paged_out) || -- (ret == HVMCOPY_gfn_shared) ) -+ switch ( hvm_copy_from_guest_phys(&data, -+ p->data + sign * i * p->size, -+ p->size) ) - { -+ case HVMCOPY_okay: -+ break; -+ case HVMCOPY_gfn_paged_out: -+ case HVMCOPY_gfn_shared: - rc = X86EMUL_RETRY; - break; -+ case HVMCOPY_bad_gfn_to_mfn: -+ data = ~0; -+ break; -+ case HVMCOPY_bad_gva_to_gfn: -+ ASSERT(0); -+ /* fall through */ -+ default: -+ rc = X86EMUL_UNHANDLEABLE; -+ break; - } -+ if ( rc != X86EMUL_OKAY ) -+ break; - rc = write_handler(v, p->addr + (sign * i * p->size), p->size, - data); - if ( rc != X86EMUL_OKAY ) -@@ -165,8 +176,28 @@ static int process_portio_intercept(port - for ( i = 0; i < p->count; i++ ) - { - data = 0; -- (void)hvm_copy_from_guest_phys(&data, p->data + sign*i*p->size, -- p->size); -+ switch ( hvm_copy_from_guest_phys(&data, -+ p->data + sign * i * p->size, -+ p->size) ) -+ { -+ case HVMCOPY_okay: -+ break; -+ case HVMCOPY_gfn_paged_out: -+ case HVMCOPY_gfn_shared: -+ rc = X86EMUL_RETRY; -+ break; -+ case HVMCOPY_bad_gfn_to_mfn: -+ data = ~0; -+ break; -+ case HVMCOPY_bad_gva_to_gfn: -+ ASSERT(0); -+ /* fall through */ -+ default: -+ rc = X86EMUL_UNHANDLEABLE; -+ break; -+ } -+ if ( rc != X86EMUL_OKAY ) -+ break; - rc = action(IOREQ_WRITE, p->addr, p->size, &data); - if ( rc != X86EMUL_OKAY ) - break; ---- a/xen/arch/x86/hvm/io.c -+++ b/xen/arch/x86/hvm/io.c -@@ -340,14 +340,24 @@ static int dpci_ioport_write(uint32_t mp - data = p->data; - if ( p->data_is_ptr ) - { -- int ret; -- -- ret = hvm_copy_from_guest_phys(&data, -- p->data + (sign * i * p->size), -- p->size); -- if ( (ret == HVMCOPY_gfn_paged_out) && -- (ret == HVMCOPY_gfn_shared) ) -+ switch ( hvm_copy_from_guest_phys(&data, -+ p->data + sign * i * p->size, -+ p->size) ) -+ { -+ case HVMCOPY_okay: -+ break; -+ case HVMCOPY_gfn_paged_out: -+ case HVMCOPY_gfn_shared: - return X86EMUL_RETRY; -+ case HVMCOPY_bad_gfn_to_mfn: -+ data = ~0; -+ break; -+ case HVMCOPY_bad_gva_to_gfn: -+ ASSERT(0); -+ /* fall through */ -+ default: -+ return X86EMUL_UNHANDLEABLE; -+ } - } - - switch ( p->size ) ---- a/xen/arch/x86/hvm/vmx/realmode.c -+++ b/xen/arch/x86/hvm/vmx/realmode.c -@@ -39,7 +39,9 @@ static void realmode_deliver_exception( - - again: - last_byte = (vector * 4) + 3; -- if ( idtr->limit < last_byte ) -+ if ( idtr->limit < last_byte || -+ hvm_copy_from_guest_phys(&cs_eip, idtr->base + vector * 4, 4) != -+ HVMCOPY_okay ) - { - /* Software interrupt? */ - if ( insn_len != 0 ) -@@ -64,8 +66,6 @@ static void realmode_deliver_exception( - } - } - -- (void)hvm_copy_from_guest_phys(&cs_eip, idtr->base + vector * 4, 4); -- - frame[0] = regs->eip + insn_len; - frame[1] = csr->sel; - frame[2] = regs->eflags & ~X86_EFLAGS_RF; diff --git a/52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch b/52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch deleted file mode 100644 index 5aa4b7e..0000000 --- a/52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch +++ /dev/null @@ -1,56 +0,0 @@ -References: bnc#840593 CVE-2013-4356 XSA-64 - -# Commit f46befdd825c8a459c5eb21adb7d5b0dc6e30ad5 -# Date 2013-09-30 14:18:25 +0200 -# Author Tim Deegan -# Committer Jan Beulich -x86/mm/shadow: Fix initialization of PV shadow L4 tables. - -Shadowed PV L4 tables must have the same Xen mappings as their -unshadowed equivalent. This is done by copying the Xen entries -verbatim from the idle pagetable, and then using guest_l4_slot() -in the SHADOW_FOREACH_L4E() iterator to avoid touching those entries. - -adc5afbf1c70ef55c260fb93e4b8ce5ccb918706 (x86: support up to 16Tb) -changed the definition of ROOT_PAGETABLE_XEN_SLOTS to extend right to -the top of the address space, which causes the shadow code to -copy Xen mappings into guest-kernel-address slots too. - -In the common case, all those slots are zero in the idle pagetable, -and no harm is done. But if any slot above #271 is non-zero, Xen will -crash when that slot is later cleared (it attempts to drop -shadow-pagetable refcounts on its own L4 pagetables). - -Fix by using the new ROOT_PAGETABLE_PV_XEN_SLOTS when appropriate. -Monitor pagetables need the full Xen mappings, so they keep using the -old name (with its new semantics). - -This is CVE-2013-4356 / XSA-64. - -Signed-off-by: Tim Deegan -Reviewed-by: Jan Beulich - ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -1433,15 +1433,19 @@ void sh_install_xen_entries_in_l4(struct - { - struct domain *d = v->domain; - shadow_l4e_t *sl4e; -+ unsigned int slots; - - sl4e = sh_map_domain_page(sl4mfn); - ASSERT(sl4e != NULL); - ASSERT(sizeof (l4_pgentry_t) == sizeof (shadow_l4e_t)); -- -+ - /* Copy the common Xen mappings from the idle domain */ -+ slots = (shadow_mode_external(d) -+ ? ROOT_PAGETABLE_XEN_SLOTS -+ : ROOT_PAGETABLE_PV_XEN_SLOTS); - memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], - &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], -- ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); -+ slots * sizeof(l4_pgentry_t)); - - /* Install the per-domain mappings for this domain */ - sl4e[shadow_l4_table_offset(PERDOMAIN_VIRT_START)] = diff --git a/52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch b/52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch deleted file mode 100644 index f65afa3..0000000 --- a/52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch +++ /dev/null @@ -1,29 +0,0 @@ -References: bnc#841766 CVE-2013-4361 XSA-66 - -# Commit 28b706efb6abb637fabfd74cde70a50935a5640b -# Date 2013-09-30 14:18:58 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: properly set up fbld emulation operand address - -This is CVE-2013-4361 / XSA-66. - -Signed-off-by: Jan Beulich -Acked-by: Ian Jackson - ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -3156,11 +3156,11 @@ x86_emulate( - break; - case 4: /* fbld m80dec */ - ea.bytes = 10; -- dst = ea; -+ src = ea; - if ( (rc = ops->read(src.mem.seg, src.mem.off, - &src.val, src.bytes, ctxt)) != 0 ) - goto done; -- emulate_fpu_insn_memdst("fbld", src.val); -+ emulate_fpu_insn_memsrc("fbld", src.val); - break; - case 5: /* fild m64i */ - ea.bytes = 8; diff --git a/52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch b/52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch deleted file mode 100644 index fe8318d..0000000 --- a/52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch +++ /dev/null @@ -1,116 +0,0 @@ -# Commit ca145fe70bad3a25ad54c6ded1ef237e45a2311e -# Date 2013-09-30 15:28:12 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: don't blindly create L3 tables for the direct map - -Now that the direct map area can extend all the way up to almost the -end of address space, this is wasteful. - -Also fold two almost redundant messages in SRAT parsing into one. - -Signed-off-by: Jan Beulich -Tested-by: Malcolm Crossley -Reviewed-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -137,7 +137,7 @@ l1_pgentry_t __attribute__ ((__section__ - #define PTE_UPDATE_WITH_CMPXCHG - #endif - --bool_t __read_mostly mem_hotplug = 0; -+paddr_t __read_mostly mem_hotplug; - - /* Private domain structs for DOMID_XEN and DOMID_IO. */ - struct domain *dom_xen, *dom_io, *dom_cow; ---- a/xen/arch/x86/srat.c -+++ b/xen/arch/x86/srat.c -@@ -113,6 +113,7 @@ static __init void bad_srat(void) - apicid_to_node[i] = NUMA_NO_NODE; - for (i = 0; i < ARRAY_SIZE(pxm2node); i++) - pxm2node[i] = NUMA_NO_NODE; -+ mem_hotplug = 0; - } - - /* -@@ -257,13 +258,6 @@ acpi_numa_memory_affinity_init(struct ac - return; - } - /* It is fine to add this area to the nodes data it will be used later*/ -- if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) -- { -- printk(KERN_INFO "SRAT: hot plug zone found %"PRIx64" - %"PRIx64" \n", -- start, end); -- mem_hotplug = 1; -- } -- - i = conflicting_memblks(start, end); - if (i == node) { - printk(KERN_WARNING -@@ -287,8 +281,11 @@ acpi_numa_memory_affinity_init(struct ac - if (nd->end < end) - nd->end = end; - } -- printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"\n", node, pxm, -- start, end); -+ if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && end > mem_hotplug) -+ mem_hotplug = end; -+ printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"%s\n", -+ node, pxm, start, end, -+ ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : ""); - - node_memblk_range[num_node_memblks].start = start; - node_memblk_range[num_node_memblks].end = end; ---- a/xen/arch/x86/x86_64/mm.c -+++ b/xen/arch/x86/x86_64/mm.c -@@ -621,25 +621,20 @@ void __init paging_init(void) - * We setup the L3s for 1:1 mapping if host support memory hotplug - * to avoid sync the 1:1 mapping on page fault handler - */ -- if ( mem_hotplug ) -+ for ( va = DIRECTMAP_VIRT_START; -+ va < DIRECTMAP_VIRT_END && (void *)va < __va(mem_hotplug); -+ va += (1UL << L4_PAGETABLE_SHIFT) ) - { -- unsigned long va; -- -- for ( va = DIRECTMAP_VIRT_START; -- va < DIRECTMAP_VIRT_END; -- va += (1UL << L4_PAGETABLE_SHIFT) ) -+ if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) & -+ _PAGE_PRESENT) ) - { -- if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) & -- _PAGE_PRESENT) ) -- { -- l3_pg = alloc_domheap_page(NULL, 0); -- if ( !l3_pg ) -- goto nomem; -- l3_ro_mpt = page_to_virt(l3_pg); -- clear_page(l3_ro_mpt); -- l4e_write(&idle_pg_table[l4_table_offset(va)], -- l4e_from_page(l3_pg, __PAGE_HYPERVISOR)); -- } -+ l3_pg = alloc_domheap_page(NULL, 0); -+ if ( !l3_pg ) -+ goto nomem; -+ l3_ro_mpt = page_to_virt(l3_pg); -+ clear_page(l3_ro_mpt); -+ l4e_write(&idle_pg_table[l4_table_offset(va)], -+ l4e_from_page(l3_pg, __PAGE_HYPERVISOR)); - } - } - ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -399,7 +399,7 @@ static inline int get_page_and_type(stru - int check_descriptor(const struct domain *, struct desc_struct *d); - - extern bool_t opt_allow_superpage; --extern bool_t mem_hotplug; -+extern paddr_t mem_hotplug; - - /****************************************************************************** - * With shadow pagetables, the different kinds of address start diff --git a/524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch b/524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch deleted file mode 100644 index 24f0f16..0000000 --- a/524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch +++ /dev/null @@ -1,82 +0,0 @@ -# Commit 0aa27ce3351f7eb09d13e863a1d5f303086aa32a -# Date 2013-10-04 12:23:23 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/idle: Fix get_cpu_idle_time()'s interaction with offline pcpus - -Checking for "idle_vcpu[cpu] != NULL" is insufficient protection against -offline pcpus. From a hypercall, vcpu_runstate_get() will determine "v != -current", and try to take the vcpu_schedule_lock(). This will try to look up -per_cpu(schedule_data, v->processor) and promptly suffer a NULL structure -deference as v->processors' __per_cpu_offset is INVALID_PERCPU_AREA. - -One example might look like this: - -... -Xen call trace: - [] vcpu_runstate_get+0x50/0x113 - [] get_cpu_idle_time+0x28/0x2e - [] do_sysctl+0x3db/0xeb8 - [] compat_hypercall+0xbd/0x116 - -Pagetable walk from 0000000000000040: - L4[0x000] = 0000000186df8027 0000000000028207 - L3[0x000] = 0000000188e36027 00000000000261c9 - L2[0x000] = 0000000000000000 ffffffffffffffff - -**************************************** -Panic on CPU 11: -... - -get_cpu_idle_time() has been updated to correctly deal with offline pcpus -itself by returning 0, in the same way as it would if it was missing the -idle_vcpu[] pointer. - -In doing so, XENPF_getidletime needed updating to correctly retain its -described behaviour of clearing bits in the cpumap for offline pcpus. - -As this crash can only be triggered with toolstack hypercalls, it is not a -security issue and just a simple bug. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/platform_hypercall.c -+++ b/xen/arch/x86/platform_hypercall.c -@@ -355,10 +355,14 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PA - - for_each_cpu ( cpu, cpumap ) - { -- if ( idle_vcpu[cpu] == NULL ) -- cpumask_clear_cpu(cpu, cpumap); - idletime = get_cpu_idle_time(cpu); - -+ if ( !idletime ) -+ { -+ cpumask_clear_cpu(cpu, cpumap); -+ continue; -+ } -+ - if ( copy_to_guest_offset(idletimes, cpu, &idletime, 1) ) - { - ret = -EFAULT; ---- a/xen/common/schedule.c -+++ b/xen/common/schedule.c -@@ -176,13 +176,12 @@ void vcpu_runstate_get(struct vcpu *v, s - - uint64_t get_cpu_idle_time(unsigned int cpu) - { -- struct vcpu_runstate_info state; -- struct vcpu *v; -+ struct vcpu_runstate_info state = { 0 }; -+ struct vcpu *v = idle_vcpu[cpu]; - -- if ( (v = idle_vcpu[cpu]) == NULL ) -- return 0; -+ if ( cpu_online(cpu) && v ) -+ vcpu_runstate_get(v, &state); - -- vcpu_runstate_get(v, &state); - return state.time[RUNSTATE_running]; - } - diff --git a/524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch b/524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch deleted file mode 100644 index 7102ff3..0000000 --- a/524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch +++ /dev/null @@ -1,35 +0,0 @@ -# Commit 7cfb0053629c4dd1a6f01dc43cca7c0c25b8b7bf -# Date 2013-10-04 12:24:34 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/percpu: Force INVALID_PERCPU_AREA into the non-canonical address region - -This causes accidental uses of per_cpu() on a pcpu with an INVALID_PERCPU_AREA -to result in a #GF for attempting to access the middle of the non-canonical -virtual address region. - -This is preferable to the current behaviour, where incorrect use of per_cpu() -will result in an effective NULL structure dereference which has security -implication in the context of PV guests. - -Signed-off-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/percpu.c -+++ b/xen/arch/x86/percpu.c -@@ -6,7 +6,14 @@ - #include - - unsigned long __per_cpu_offset[NR_CPUS]; --#define INVALID_PERCPU_AREA (-(long)__per_cpu_start) -+ -+/* -+ * Force uses of per_cpu() with an invalid area to attempt to access the -+ * middle of the non-canonical address space resulting in a #GP, rather than a -+ * possible #PF at (NULL + a little) which has security implications in the -+ * context of PV guests. -+ */ -+#define INVALID_PERCPU_AREA (0x8000000000000000L - (long)__per_cpu_start) - #define PERCPU_ORDER (get_order_from_bytes(__per_cpu_data_end-__per_cpu_start)) - - void __init percpu_init_areas(void) diff --git a/524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch b/524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch deleted file mode 100644 index 1532797..0000000 --- a/524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch +++ /dev/null @@ -1,82 +0,0 @@ -# Commit 190b667ac20e8175758f4a3a0f13c4d990e6af7e -# Date 2013-10-04 12:28:14 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: check VMX capability before read VMX related MSRs - -VMX MSRs only available when the CPU support the VMX feature. In addition, -VMX_TRUE* MSRs only available when bit 55 of VMX_BASIC MSR is set. - -Signed-off-by: Yang Zhang - -Cleanup. - -Signed-off-by: Jan Beulich -Acked-by: Jun Nakajima - ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -78,6 +78,7 @@ static DEFINE_PER_CPU(struct list_head, - static DEFINE_PER_CPU(bool_t, vmxon); - - static u32 vmcs_revision_id __read_mostly; -+u64 __read_mostly vmx_basic_msr; - - static void __init vmx_display_features(void) - { -@@ -301,6 +302,8 @@ static int vmx_init_vmcs_config(void) - vmx_vmexit_control = _vmx_vmexit_control; - vmx_vmentry_control = _vmx_vmentry_control; - cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22)); -+ vmx_basic_msr = ((u64)vmx_basic_msr_high << 32) | -+ vmx_basic_msr_low; - vmx_display_features(); - } - else ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -1814,12 +1814,33 @@ int nvmx_handle_invvpid(struct cpu_user_ - int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) - { - struct vcpu *v = current; -+ unsigned int ecx, dummy; - u64 data = 0, host_data = 0; - int r = 1; - - if ( !nestedhvm_enabled(v->domain) ) - return 0; - -+ /* VMX capablity MSRs are available only when guest supports VMX. */ -+ hvm_cpuid(0x1, &dummy, &dummy, &ecx, &dummy); -+ if ( !(ecx & cpufeat_mask(X86_FEATURE_VMXE)) ) -+ return 0; -+ -+ /* -+ * Those MSRs are available only when bit 55 of -+ * MSR_IA32_VMX_BASIC is set. -+ */ -+ switch ( msr ) -+ { -+ case MSR_IA32_VMX_TRUE_PINBASED_CTLS: -+ case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: -+ case MSR_IA32_VMX_TRUE_EXIT_CTLS: -+ case MSR_IA32_VMX_TRUE_ENTRY_CTLS: -+ if ( !(vmx_basic_msr & VMX_BASIC_DEFAULT1_ZERO) ) -+ return 0; -+ break; -+ } -+ - rdmsrl(msr, host_data); - - /* ---- a/xen/include/asm-x86/hvm/vmx/vmcs.h -+++ b/xen/include/asm-x86/hvm/vmx/vmcs.h -@@ -284,6 +284,8 @@ extern bool_t cpu_has_vmx_ins_outs_instr - */ - #define VMX_BASIC_DEFAULT1_ZERO (1ULL << 55) - -+extern u64 vmx_basic_msr; -+ - /* Guest interrupt status */ - #define VMX_GUEST_INTR_STATUS_SUBFIELD_BITMASK 0x0FF - #define VMX_GUEST_INTR_STATUS_SVI_OFFSET 8 diff --git a/524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch b/524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch deleted file mode 100644 index f4dd99f..0000000 --- a/524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch +++ /dev/null @@ -1,115 +0,0 @@ -# Commit c6f92aed0e209df823d2cb5780dbb1ea12fc6d4a -# Date 2013-10-04 12:30:09 +0200 -# Author Yang Zhang -# Committer Jan Beulich -Nested VMX: fix IA32_VMX_CR4_FIXED1 msr emulation - -Currently, it use hardcode value for IA32_VMX_CR4_FIXED1. This is wrong. -We should check guest's cpuid to know which bits are writeable in CR4 by guest -and allow the guest to set the corresponding bit only when guest has the feature. - -Signed-off-by: Yang Zhang - -Cleanup. - -Signed-off-by: Jan Beulich -Acked-by: Jun Nakajima - ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -1814,7 +1814,7 @@ int nvmx_handle_invvpid(struct cpu_user_ - int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) - { - struct vcpu *v = current; -- unsigned int ecx, dummy; -+ unsigned int eax, ebx, ecx, edx, dummy; - u64 data = 0, host_data = 0; - int r = 1; - -@@ -1822,7 +1822,7 @@ int nvmx_msr_read_intercept(unsigned int - return 0; - - /* VMX capablity MSRs are available only when guest supports VMX. */ -- hvm_cpuid(0x1, &dummy, &dummy, &ecx, &dummy); -+ hvm_cpuid(0x1, &dummy, &dummy, &ecx, &edx); - if ( !(ecx & cpufeat_mask(X86_FEATURE_VMXE)) ) - return 0; - -@@ -1946,8 +1946,55 @@ int nvmx_msr_read_intercept(unsigned int - data = X86_CR4_VMXE; - break; - case MSR_IA32_VMX_CR4_FIXED1: -- /* allow 0-settings except SMXE */ -- data = 0x267ff & ~X86_CR4_SMXE; -+ if ( edx & cpufeat_mask(X86_FEATURE_VME) ) -+ data |= X86_CR4_VME | X86_CR4_PVI; -+ if ( edx & cpufeat_mask(X86_FEATURE_TSC) ) -+ data |= X86_CR4_TSD; -+ if ( edx & cpufeat_mask(X86_FEATURE_DE) ) -+ data |= X86_CR4_DE; -+ if ( edx & cpufeat_mask(X86_FEATURE_PSE) ) -+ data |= X86_CR4_PSE; -+ if ( edx & cpufeat_mask(X86_FEATURE_PAE) ) -+ data |= X86_CR4_PAE; -+ if ( edx & cpufeat_mask(X86_FEATURE_MCE) ) -+ data |= X86_CR4_MCE; -+ if ( edx & cpufeat_mask(X86_FEATURE_PGE) ) -+ data |= X86_CR4_PGE; -+ if ( edx & cpufeat_mask(X86_FEATURE_FXSR) ) -+ data |= X86_CR4_OSFXSR; -+ if ( edx & cpufeat_mask(X86_FEATURE_XMM) ) -+ data |= X86_CR4_OSXMMEXCPT; -+ if ( ecx & cpufeat_mask(X86_FEATURE_VMXE) ) -+ data |= X86_CR4_VMXE; -+ if ( ecx & cpufeat_mask(X86_FEATURE_SMXE) ) -+ data |= X86_CR4_SMXE; -+ if ( ecx & cpufeat_mask(X86_FEATURE_PCID) ) -+ data |= X86_CR4_PCIDE; -+ if ( ecx & cpufeat_mask(X86_FEATURE_XSAVE) ) -+ data |= X86_CR4_OSXSAVE; -+ -+ hvm_cpuid(0x0, &eax, &dummy, &dummy, &dummy); -+ switch ( eax ) -+ { -+ default: -+ hvm_cpuid(0xa, &eax, &dummy, &dummy, &dummy); -+ /* Check whether guest has the perf monitor feature. */ -+ if ( (eax & 0xff) && (eax & 0xff00) ) -+ data |= X86_CR4_PCE; -+ /* fall through */ -+ case 0x7 ... 0x9: -+ ecx = 0; -+ hvm_cpuid(0x7, &dummy, &ebx, &ecx, &dummy); -+ if ( ebx & cpufeat_mask(X86_FEATURE_FSGSBASE) ) -+ data |= X86_CR4_FSGSBASE; -+ if ( ebx & cpufeat_mask(X86_FEATURE_SMEP) ) -+ data |= X86_CR4_SMEP; -+ if ( ebx & cpufeat_mask(X86_FEATURE_SMAP) ) -+ data |= X86_CR4_SMAP; -+ /* fall through */ -+ case 0x0 ... 0x6: -+ break; -+ } - break; - case MSR_IA32_VMX_MISC: - /* Do not support CR3-target feature now */ ---- a/xen/include/asm-x86/cpufeature.h -+++ b/xen/include/asm-x86/cpufeature.h -@@ -148,6 +148,7 @@ - #define X86_FEATURE_INVPCID (7*32+10) /* Invalidate Process Context ID */ - #define X86_FEATURE_RTM (7*32+11) /* Restricted Transactional Memory */ - #define X86_FEATURE_NO_FPU_SEL (7*32+13) /* FPU CS/DS stored as zero */ -+#define X86_FEATURE_SMAP (7*32+20) /* Supervisor Mode Access Prevention */ - - #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) - #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) ---- a/xen/include/asm-x86/processor.h -+++ b/xen/include/asm-x86/processor.h -@@ -87,6 +87,7 @@ - #define X86_CR4_PCIDE 0x20000 /* enable PCID */ - #define X86_CR4_OSXSAVE 0x40000 /* enable XSAVE/XRSTOR */ - #define X86_CR4_SMEP 0x100000/* enable SMEP */ -+#define X86_CR4_SMAP 0x200000/* enable SMAP */ - - /* - * Trap/fault mnemonics. diff --git a/524e9dc0-xsm-forbid-PV-guest-console-reads.patch b/524e9dc0-xsm-forbid-PV-guest-console-reads.patch deleted file mode 100644 index fac4aa4..0000000 --- a/524e9dc0-xsm-forbid-PV-guest-console-reads.patch +++ /dev/null @@ -1,28 +0,0 @@ -# Commit 65ba631bcb62c79eb33ebfde8a0471fd012c37a8 -# Date 2013-10-04 12:51:44 +0200 -# Author Daniel De Graaf -# Committer Jan Beulich -xsm: forbid PV guest console reads - -The CONSOLEIO_read operation was incorrectly allowed to PV guests if the -hypervisor was compiled in debug mode (with VERBOSE defined). - -Reported-by: Jan Beulich -Signed-off-by: Daniel De Graaf - ---- a/xen/include/xsm/dummy.h -+++ b/xen/include/xsm/dummy.h -@@ -222,10 +222,10 @@ static XSM_INLINE int xsm_console_io(XSM - { - XSM_ASSERT_ACTION(XSM_OTHER); - #ifdef VERBOSE -- return xsm_default_action(XSM_HOOK, current->domain, NULL); --#else -- return xsm_default_action(XSM_PRIV, current->domain, NULL); -+ if ( cmd == CONSOLEIO_write ) -+ return xsm_default_action(XSM_HOOK, d, NULL); - #endif -+ return xsm_default_action(XSM_PRIV, d, NULL); - } - - static XSM_INLINE int xsm_profile(XSM_DEFAULT_ARG struct domain *d, int op) diff --git a/5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch b/5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch deleted file mode 100644 index b298a33..0000000 --- a/5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch +++ /dev/null @@ -1,43 +0,0 @@ -References: bnc#842511 CVE-2013-4368 XSA-67 - -# Commit 0771faba163769089c9f05f7f76b63e397677613 -# Date 2013-10-10 15:19:53 +0200 -# Author Matthew Daley -# Committer Jan Beulich -x86: check segment descriptor read result in 64-bit OUTS emulation - -When emulating such an operation from a 64-bit context (CS has long -mode set), and the data segment is overridden to FS/GS, the result of -reading the overridden segment's descriptor (read_descriptor) is not -checked. If it fails, data_base is left uninitialized. - -This can lead to 8 bytes of Xen's stack being leaked to the guest -(implicitly, i.e. via the address given in a #PF). - -Coverity-ID: 1055116 - -This is CVE-2013-4368 / XSA-67. - -Signed-off-by: Matthew Daley - -Fix formatting. - -Signed-off-by: Jan Beulich - ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -1990,10 +1990,10 @@ static int emulate_privileged_op(struct - break; - } - } -- else -- read_descriptor(data_sel, v, regs, -- &data_base, &data_limit, &ar, -- 0); -+ else if ( !read_descriptor(data_sel, v, regs, -+ &data_base, &data_limit, &ar, 0) || -+ !(ar & _SEGMENT_S) || !(ar & _SEGMENT_P) ) -+ goto fail; - data_limit = ~0UL; - ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P; - } diff --git a/5256be57-libxl-fix-vif-rate-parsing.patch b/5256be57-libxl-fix-vif-rate-parsing.patch deleted file mode 100644 index c9f16b3..0000000 --- a/5256be57-libxl-fix-vif-rate-parsing.patch +++ /dev/null @@ -1,71 +0,0 @@ -References: bnc#842512 CVE-2013-4369 XSA-68 - -# Commit c53702cee1d6f9f1b72f0cae0b412e21bcda8724 -# Date 2013-10-10 15:48:55 +0100 -# Author Ian Jackson -# Committer Ian Jackson -libxl: fix vif rate parsing - -strtok can return NULL here. We don't need to use strtok anyway, so just -use a simple strchr method. - -Coverity-ID: 1055642 - -This is CVE-2013-4369 / XSA-68 - -Signed-off-by: Matthew Daley - -Fix type. Add test case - -Signed-off-by: Ian Campbell - ---- a/tools/libxl/check-xl-vif-parse -+++ b/tools/libxl/check-xl-vif-parse -@@ -206,4 +206,8 @@ expected -# Committer Ian Jackson -tools/ocaml: fix erroneous free of cpumap in stub_xc_vcpu_getaffinity - -Not sure how it got there... - -Coverity-ID: 1056196 - -This is CVE-2013-4370 / XSA-69 - -Signed-off-by: Matthew Daley -Acked-by: Ian Campbell - ---- a/tools/ocaml/libs/xc/xenctrl_stubs.c -+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c -@@ -461,8 +461,6 @@ CAMLprim value stub_xc_vcpu_getaffinity( - - retval = xc_vcpu_getaffinity(_H(xch), _D(domid), - Int_val(vcpu), c_cpumap); -- free(c_cpumap); -- - if (retval < 0) { - free(c_cpumap); - failwith_xc(_H(xch)); diff --git a/5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch b/5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch deleted file mode 100644 index 5585156..0000000 --- a/5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch +++ /dev/null @@ -1,28 +0,0 @@ -References: bnc#842514 CVE-2013-4371 XSA-70 - -# Commit 4c37ed562224295c0f8b00211287d57cae629782 -# Date 2013-10-10 15:49:54 +0100 -# Author Matthew Daley -# Committer Ian Jackson -libxl: fix out-of-memory error handling in libxl_list_cpupool - -...otherwise it will return freed memory. All the current users of this -function check already for a NULL return, so use that. - -Coverity-ID: 1056194 - -This is CVE-2013-4371 / XSA-70 - -Signed-off-by: Matthew Daley -Acked-by: Ian Campbell - ---- a/tools/libxl/libxl.c -+++ b/tools/libxl/libxl.c -@@ -649,6 +649,7 @@ libxl_cpupoolinfo * libxl_list_cpupool(l - if (!tmp) { - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "allocating cpupool info"); - libxl_cpupoolinfo_list_free(ptr, i); -+ ptr = NULL; - goto out; - } - ptr = tmp; diff --git a/5257a89a-x86-correct-LDT-checks.patch b/5257a89a-x86-correct-LDT-checks.patch deleted file mode 100644 index b0bb2bf..0000000 --- a/5257a89a-x86-correct-LDT-checks.patch +++ /dev/null @@ -1,176 +0,0 @@ -# Commit 40d66baa46ca8a9ffa6df3e063a967d08ec92bcf -# Date 2013-10-11 09:28:26 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: correct LDT checks - -- MMUEXT_SET_LDT should behave as similarly to the LLDT instruction as - possible: fail only if the base address is non-canonical -- instead LDT descriptor accesses should fault if the descriptor - address ends up being non-canonical (by ensuring this we at once - avoid reading an entry from the mach-to-phys table and consider it a - page table entry) -- fault propagation on using LDT selectors must distinguish #PF and #GP - (the latter must be raised for a non-canonical descriptor address, - which also applies to several other uses of propagate_page_fault(), - and hence the problem is being fixed there) -- map_ldt_shadow_page() should properly wrap addresses for 32-bit VMs - -At once remove the odd invokation of map_ldt_shadow_page() from the -MMUEXT_SET_LDT handler: There's nothing really telling us that the -first LDT page is going to be preferred over others. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/domain.c -+++ b/xen/arch/x86/domain.c -@@ -674,12 +674,7 @@ int arch_set_info_guest( - fixup_guest_code_selector(d, c.nat->trap_ctxt[i].cs); - } - -- /* LDT safety checks. */ -- if ( ((c.nat->ldt_base & (PAGE_SIZE-1)) != 0) || -- (c.nat->ldt_ents > 8192) || -- !array_access_ok(c.nat->ldt_base, -- c.nat->ldt_ents, -- LDT_ENTRY_SIZE) ) -+ if ( !__addr_ok(c.nat->ldt_base) ) - return -EINVAL; - } - else -@@ -692,15 +687,12 @@ int arch_set_info_guest( - - for ( i = 0; i < ARRAY_SIZE(c.cmp->trap_ctxt); i++ ) - fixup_guest_code_selector(d, c.cmp->trap_ctxt[i].cs); -- -- /* LDT safety checks. */ -- if ( ((c.cmp->ldt_base & (PAGE_SIZE-1)) != 0) || -- (c.cmp->ldt_ents > 8192) || -- !compat_array_access_ok(c.cmp->ldt_base, -- c.cmp->ldt_ents, -- LDT_ENTRY_SIZE) ) -- return -EINVAL; - } -+ -+ /* LDT safety checks. */ -+ if ( ((c(ldt_base) & (PAGE_SIZE - 1)) != 0) || -+ (c(ldt_ents) > 8192) ) -+ return -EINVAL; - } - - v->fpu_initialised = !!(flags & VGCF_I387_VALID); ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -582,6 +582,8 @@ int map_ldt_shadow_page(unsigned int off - - BUG_ON(unlikely(in_irq())); - -+ if ( is_pv_32bit_domain(d) ) -+ gva = (u32)gva; - guest_get_eff_kern_l1e(v, gva, &l1e); - if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) ) - return 0; -@@ -3229,9 +3231,8 @@ long do_mmuext_op( - MEM_LOG("ignoring SET_LDT hypercall from external domain"); - okay = 0; - } -- else if ( ((ptr & (PAGE_SIZE-1)) != 0) || -- (ents > 8192) || -- !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) ) -+ else if ( ((ptr & (PAGE_SIZE - 1)) != 0) || !__addr_ok(ptr) || -+ (ents > 8192) ) - { - okay = 0; - MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents); -@@ -3244,8 +3245,6 @@ long do_mmuext_op( - curr->arch.pv_vcpu.ldt_base = ptr; - curr->arch.pv_vcpu.ldt_ents = ents; - load_LDT(curr); -- if ( ents != 0 ) -- (void)map_ldt_shadow_page(0); - } - break; - } ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -1070,12 +1070,24 @@ static void reserved_bit_page_fault( - show_execution_state(regs); - } - --void propagate_page_fault(unsigned long addr, u16 error_code) -+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code) - { - struct trap_info *ti; - struct vcpu *v = current; - struct trap_bounce *tb = &v->arch.pv_vcpu.trap_bounce; - -+ if ( unlikely(!is_canonical_address(addr)) ) -+ { -+ ti = &v->arch.pv_vcpu.trap_ctxt[TRAP_gp_fault]; -+ tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE; -+ tb->error_code = 0; -+ tb->cs = ti->cs; -+ tb->eip = ti->address; -+ if ( TI_GET_IF(ti) ) -+ tb->flags |= TBF_INTERRUPT; -+ return tb; -+ } -+ - v->arch.pv_vcpu.ctrlreg[2] = addr; - arch_set_cr2(v, addr); - -@@ -1102,6 +1114,8 @@ void propagate_page_fault(unsigned long - - if ( unlikely(error_code & PFEC_reserved_bit) ) - reserved_bit_page_fault(addr, guest_cpu_user_regs()); -+ -+ return NULL; - } - - static int handle_gdt_ldt_mapping_fault( -@@ -1135,13 +1149,16 @@ static int handle_gdt_ldt_mapping_fault( - } - else - { -+ struct trap_bounce *tb; -+ - /* In hypervisor mode? Leave it to the #PF handler to fix up. */ - if ( !guest_mode(regs) ) - return 0; -- /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */ -- propagate_page_fault( -- curr->arch.pv_vcpu.ldt_base + offset, -- regs->error_code); -+ /* In guest mode? Propagate fault to guest, with adjusted %cr2. */ -+ tb = propagate_page_fault(curr->arch.pv_vcpu.ldt_base + offset, -+ regs->error_code); -+ if ( tb ) -+ tb->error_code = ((u16)offset & ~3) | 4; - } - } - else ---- a/xen/include/asm-x86/mm.h -+++ b/xen/include/asm-x86/mm.h -@@ -555,7 +555,7 @@ int new_guest_cr3(unsigned long pfn); - void make_cr3(struct vcpu *v, unsigned long mfn); - void update_cr3(struct vcpu *v); - int vcpu_destroy_pagetables(struct vcpu *); --void propagate_page_fault(unsigned long addr, u16 error_code); -+struct trap_bounce *propagate_page_fault(unsigned long addr, u16 error_code); - void *do_page_walk(struct vcpu *v, unsigned long addr); - - int __sync_local_execstate(void); ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -386,7 +386,8 @@ guest_get_eff_l1e(struct vcpu *v, unsign - if ( likely(!paging_mode_translate(v->domain)) ) - { - ASSERT(!paging_mode_external(v->domain)); -- if ( __copy_from_user(eff_l1e, -+ if ( !__addr_ok(addr) || -+ __copy_from_user(eff_l1e, - &__linear_l1_table[l1_linear_offset(addr)], - sizeof(l1_pgentry_t)) != 0 ) - *(l1_pgentry_t *)eff_l1e = l1e_empty(); diff --git a/5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch b/5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch deleted file mode 100644 index 1a99973..0000000 --- a/5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch +++ /dev/null @@ -1,26 +0,0 @@ -# Commit d06a0d715ec1423b6c42141ab1b0ff69a3effb56 -# Date 2013-10-11 09:29:43 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: add address validity check to guest_map_l1e() - -Just like for guest_get_eff_l1e() this prevents accessing as page -tables (and with the wrong memory attribute) internal data inside Xen -happening to be mapped with 1Gb pages. - -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/include/asm-x86/paging.h -+++ b/xen/include/asm-x86/paging.h -@@ -360,7 +360,8 @@ guest_map_l1e(struct vcpu *v, unsigned l - return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn); - - /* Find this l1e and its enclosing l1mfn in the linear map */ -- if ( __copy_from_user(&l2e, -+ if ( !__addr_ok(addr) || -+ __copy_from_user(&l2e, - &__linear_l2_table[l2_linear_offset(addr)], - sizeof(l2_pgentry_t)) != 0 ) - return NULL; diff --git a/5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch b/5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch deleted file mode 100644 index 1720d85..0000000 --- a/5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch +++ /dev/null @@ -1,38 +0,0 @@ -# Commit 6fd9b0361e2eb5a7f12bdd5cbf7e42c0d1937d26 -# Date 2013-10-11 09:31:16 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86: check for canonical address before doing page walks - -... as there doesn't really exists any valid mapping for them. - -Particularly in the case of do_page_walk() this also avoids returning -non-NULL for such invalid input. - -Suggested-by: Andrew Cooper -Signed-off-by: Jan Beulich -Reviewed-by: Andrew Cooper -Acked-by: Keir Fraser - ---- a/xen/arch/x86/x86_64/mm.c -+++ b/xen/arch/x86/x86_64/mm.c -@@ -135,7 +135,7 @@ void *do_page_walk(struct vcpu *v, unsig - l2_pgentry_t l2e, *l2t; - l1_pgentry_t l1e, *l1t; - -- if ( is_hvm_vcpu(v) ) -+ if ( is_hvm_vcpu(v) || !is_canonical_address(addr) ) - return NULL; - - l4t = map_domain_page(mfn); ---- a/xen/arch/x86/x86_64/traps.c -+++ b/xen/arch/x86/x86_64/traps.c -@@ -169,6 +169,8 @@ void show_page_walk(unsigned long addr) - l1_pgentry_t l1e, *l1t; - - printk("Pagetable walk from %016lx:\n", addr); -+ if ( !is_canonical_address(addr) ) -+ return; - - l4t = map_domain_page(mfn); - l4e = l4t[l4_table_offset(addr)]; diff --git a/52654798-x86-xsave-also-save-restore-XCR0-across-suspend-ACPI-S3.patch b/52654798-x86-xsave-also-save-restore-XCR0-across-suspend-ACPI-S3.patch new file mode 100644 index 0000000..a390c8f --- /dev/null +++ b/52654798-x86-xsave-also-save-restore-XCR0-across-suspend-ACPI-S3.patch @@ -0,0 +1,46 @@ +# Commit e47a90e6dca491c0ceea6ffa18055e7e32565e8e +# Date 2013-10-21 17:26:16 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/xsave: also save/restore XCR0 across suspend (ACPI S3) + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/acpi/suspend.c ++++ b/xen/arch/x86/acpi/suspend.c +@@ -13,12 +13,14 @@ + #include + #include + #include ++#include + #include + + static unsigned long saved_lstar, saved_cstar; + static unsigned long saved_sysenter_esp, saved_sysenter_eip; + static unsigned long saved_fs_base, saved_gs_base, saved_kernel_gs_base; + static uint16_t saved_segs[4]; ++static uint64_t saved_xcr0; + + void save_rest_processor_state(void) + { +@@ -38,6 +40,8 @@ void save_rest_processor_state(void) + rdmsrl(MSR_IA32_SYSENTER_ESP, saved_sysenter_esp); + rdmsrl(MSR_IA32_SYSENTER_EIP, saved_sysenter_eip); + } ++ if ( cpu_has_xsave ) ++ saved_xcr0 = get_xcr0(); + } + + +@@ -77,6 +81,9 @@ void restore_rest_processor_state(void) + do_set_segment_base(SEGBASE_GS_USER_SEL, saved_segs[3]); + } + ++ if ( cpu_has_xsave && !set_xcr0(saved_xcr0) ) ++ BUG(); ++ + /* Maybe load the debug registers. */ + BUG_ON(is_hvm_vcpu(curr)); + if ( !is_idle_vcpu(curr) && curr->arch.debugreg[7] ) diff --git a/526e43d4-x86-refine-address-validity-checks-before-accessing-page-tables.patch b/526e43d4-x86-refine-address-validity-checks-before-accessing-page-tables.patch new file mode 100644 index 0000000..3b9fba3 --- /dev/null +++ b/526e43d4-x86-refine-address-validity-checks-before-accessing-page-tables.patch @@ -0,0 +1,62 @@ +# Commit 343cad8c70585c4dba8afc75e1ec1b7610605ab2 +# Date 2013-10-28 12:00:36 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86: refine address validity checks before accessing page tables + +In commit 40d66baa ("x86: correct LDT checks") and d06a0d71 ("x86: add +address validity check to guest_map_l1e()") I didn't really pay +attention to the fact that these checks would better be done before the +paging_mode_translate() ones, as there's also no equivalent check down +the shadow code paths involved here (at least not up to the first use +of the address), and such generic checks shouldn't really be done by +particular backend functions anyway. + +Signed-off-by: Jan Beulich +Acked-by: Tim Deegan + +--- a/xen/include/asm-x86/paging.h ++++ b/xen/include/asm-x86/paging.h +@@ -356,12 +356,14 @@ guest_map_l1e(struct vcpu *v, unsigned l + { + l2_pgentry_t l2e; + ++ if ( unlikely(!__addr_ok(addr)) ) ++ return NULL; ++ + if ( unlikely(paging_mode_translate(v->domain)) ) + return paging_get_hostmode(v)->guest_map_l1e(v, addr, gl1mfn); + + /* Find this l1e and its enclosing l1mfn in the linear map */ +- if ( !__addr_ok(addr) || +- __copy_from_user(&l2e, ++ if ( __copy_from_user(&l2e, + &__linear_l2_table[l2_linear_offset(addr)], + sizeof(l2_pgentry_t)) != 0 ) + return NULL; +@@ -382,16 +384,21 @@ guest_unmap_l1e(struct vcpu *v, void *p) + + /* Read the guest's l1e that maps this address. */ + static inline void +-guest_get_eff_l1e(struct vcpu *v, unsigned long addr, void *eff_l1e) ++guest_get_eff_l1e(struct vcpu *v, unsigned long addr, l1_pgentry_t *eff_l1e) + { ++ if ( unlikely(!__addr_ok(addr)) ) ++ { ++ *eff_l1e = l1e_empty(); ++ return; ++ } ++ + if ( likely(!paging_mode_translate(v->domain)) ) + { + ASSERT(!paging_mode_external(v->domain)); +- if ( !__addr_ok(addr) || +- __copy_from_user(eff_l1e, ++ if ( __copy_from_user(eff_l1e, + &__linear_l1_table[l1_linear_offset(addr)], + sizeof(l1_pgentry_t)) != 0 ) +- *(l1_pgentry_t *)eff_l1e = l1e_empty(); ++ *eff_l1e = l1e_empty(); + return; + } + diff --git a/526f786a-fix-locking-in-cpu_disable_scheduler.patch b/526f786a-fix-locking-in-cpu_disable_scheduler.patch new file mode 100644 index 0000000..855fbd7 --- /dev/null +++ b/526f786a-fix-locking-in-cpu_disable_scheduler.patch @@ -0,0 +1,74 @@ +# Commit 41a0cc9e26160a89245c9ba3233e3f70bf9cd4b4 +# Date 2013-10-29 09:57:14 +0100 +# Author Jan Beulich +# Committer Jan Beulich +fix locking in cpu_disable_scheduler() + +So commit eedd6039 ("scheduler: adjust internal locking interface") +uncovered - by now using proper spin lock constructs - a bug after all: +When bringing down a CPU, cpu_disable_scheduler() gets called with +interrupts disabled, and hence the use of vcpu_schedule_lock_irq() was +never really correct (i.e. the caller ended up with interrupts enabled +despite having disabled them explicitly). + +Fixing this however surfaced another problem: The call path +vcpu_migrate() -> evtchn_move_pirqs() wants to acquire the event lock, +which however is a non-IRQ-safe once, and hence check_lock() doesn't +like this lock to be acquired when interrupts are already off. As we're +in stop-machine context here, getting things wrong wrt interrupt state +management during lock acquire/release is out of question though, so +the simple solution to this appears to be to just suppress spin lock +debugging for the period of time while the stop machine callback gets +run. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/common/schedule.c ++++ b/xen/common/schedule.c +@@ -600,7 +600,8 @@ int cpu_disable_scheduler(unsigned int c + { + for_each_vcpu ( d, v ) + { +- spinlock_t *lock = vcpu_schedule_lock_irq(v); ++ unsigned long flags; ++ spinlock_t *lock = vcpu_schedule_lock_irqsave(v, &flags); + + cpumask_and(&online_affinity, v->cpu_affinity, c->cpu_valid); + if ( cpumask_empty(&online_affinity) && +@@ -621,14 +622,12 @@ int cpu_disable_scheduler(unsigned int c + if ( v->processor == cpu ) + { + set_bit(_VPF_migrating, &v->pause_flags); +- vcpu_schedule_unlock_irq(lock, v); ++ vcpu_schedule_unlock_irqrestore(lock, flags, v); + vcpu_sleep_nosync(v); + vcpu_migrate(v); + } + else +- { +- vcpu_schedule_unlock_irq(lock, v); +- } ++ vcpu_schedule_unlock_irqrestore(lock, flags, v); + + /* + * A vcpu active in the hypervisor will not be migratable. +--- a/xen/common/stop_machine.c ++++ b/xen/common/stop_machine.c +@@ -110,6 +110,7 @@ int stop_machine_run(int (*fn)(void *), + local_irq_disable(); + stopmachine_set_state(STOPMACHINE_DISABLE_IRQ); + stopmachine_wait_state(); ++ spin_debug_disable(); + + stopmachine_set_state(STOPMACHINE_INVOKE); + if ( (cpu == smp_processor_id()) || (cpu == NR_CPUS) ) +@@ -117,6 +118,7 @@ int stop_machine_run(int (*fn)(void *), + stopmachine_wait_state(); + ret = stopmachine_data.fn_result; + ++ spin_debug_enable(); + stopmachine_set_state(STOPMACHINE_EXIT); + stopmachine_wait_state(); + local_irq_enable(); diff --git a/5277639c-gnttab-correct-locking-order-reversal.patch b/5277639c-gnttab-correct-locking-order-reversal.patch new file mode 100644 index 0000000..7266759 --- /dev/null +++ b/5277639c-gnttab-correct-locking-order-reversal.patch @@ -0,0 +1,101 @@ +References: bnc#848657 CVE-2013-4494 XSA-73 + +# HG changeset patch +# User Andrew Cooper +# Date 1383556439 -3600 +# Node ID f63cb4c06a991a69b0f11789c88ef069eb39f64c +# Parent c30539bc5b235c9ce657f483c2305212ad1cdfba +gnttab: correct locking order reversal + +Coverity ID 1087189 + +Correct a lock order reversal between a domains page allocation and grant +table locks. + +This is CVE-2013-4494 / XSA-73. + +Signed-off-by: Andrew Cooper + +Consolidate error handling. + +Signed-off-by: Jan Beulich +Reviewed-by: Keir Fraser +Tested-by: Matthew Daley + +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c +@@ -1518,6 +1518,8 @@ gnttab_transfer( + + for ( i = 0; i < count; i++ ) + { ++ bool_t okay; ++ + if (i && hypercall_preempt_check()) + return i; + +@@ -1626,16 +1628,18 @@ gnttab_transfer( + * pages when it is dying. + */ + if ( unlikely(e->is_dying) || +- unlikely(e->tot_pages >= e->max_pages) || +- unlikely(!gnttab_prepare_for_transfer(e, d, gop.ref)) ) ++ unlikely(e->tot_pages >= e->max_pages) ) + { +- if ( !e->is_dying ) +- gdprintk(XENLOG_INFO, "gnttab_transfer: " +- "Transferee has no reservation " +- "headroom (%d,%d) or provided a bad grant ref (%08x) " +- "or is dying (%d)\n", +- e->tot_pages, e->max_pages, gop.ref, e->is_dying); + spin_unlock(&e->page_alloc_lock); ++ ++ if ( e->is_dying ) ++ gdprintk(XENLOG_INFO, "gnttab_transfer: " ++ "Transferee (d%d) is dying\n", e->domain_id); ++ else ++ gdprintk(XENLOG_INFO, "gnttab_transfer: " ++ "Transferee (d%d) has no headroom (tot %u, max %u)\n", ++ e->domain_id, e->tot_pages, e->max_pages); ++ + rcu_unlock_domain(e); + put_gfn(d, gop.mfn); + page->count_info &= ~(PGC_count_mask|PGC_allocated); +@@ -1647,6 +1651,38 @@ gnttab_transfer( + /* Okay, add the page to 'e'. */ + if ( unlikely(domain_adjust_tot_pages(e, 1) == 1) ) + get_knownalive_domain(e); ++ ++ /* ++ * We must drop the lock to avoid a possible deadlock in ++ * gnttab_prepare_for_transfer. We have reserved a page in e so can ++ * safely drop the lock and re-aquire it later to add page to the ++ * pagelist. ++ */ ++ spin_unlock(&e->page_alloc_lock); ++ okay = gnttab_prepare_for_transfer(e, d, gop.ref); ++ spin_lock(&e->page_alloc_lock); ++ ++ if ( unlikely(!okay) || unlikely(e->is_dying) ) ++ { ++ bool_t drop_dom_ref = !domain_adjust_tot_pages(e, -1); ++ ++ spin_unlock(&e->page_alloc_lock); ++ ++ if ( okay /* i.e. e->is_dying due to the surrounding if() */ ) ++ gdprintk(XENLOG_INFO, "gnttab_transfer: " ++ "Transferee (d%d) is now dying\n", e->domain_id); ++ ++ if ( drop_dom_ref ) ++ put_domain(e); ++ rcu_unlock_domain(e); ++ ++ put_gfn(d, gop.mfn); ++ page->count_info &= ~(PGC_count_mask|PGC_allocated); ++ free_domheap_page(page); ++ gop.status = GNTST_general_error; ++ goto copyback; ++ } ++ + page_list_add_tail(page, &e->page_list); + page_set_owner(page, e); + diff --git a/5277646c-x86-ACPI-x2APIC-guard-against-out-of-range-ACPI-or-APIC-IDs.patch b/5277646c-x86-ACPI-x2APIC-guard-against-out-of-range-ACPI-or-APIC-IDs.patch new file mode 100644 index 0000000..bc95d92 --- /dev/null +++ b/5277646c-x86-ACPI-x2APIC-guard-against-out-of-range-ACPI-or-APIC-IDs.patch @@ -0,0 +1,39 @@ +References: bnc#848014 + +# Commit 2c24cdcce3269f3286790c63821951a1de93c66a +# Date 2013-11-04 10:10:04 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/ACPI/x2APIC: guard against out of range ACPI or APIC IDs + +Other than for the legacy APIC, the x2APIC MADT entries have valid +ranges possibly extending beyond what our internal arrays can handle, +and hence we need to guard ourselves against corrupting memory here. + +Signed-off-by: Jan Beulich +Reviewed-by: Keir Fraser + +--- a/xen/arch/x86/acpi/boot.c ++++ b/xen/arch/x86/acpi/boot.c +@@ -97,7 +97,20 @@ acpi_parse_x2apic(struct acpi_subtable_h + + acpi_table_print_madt_entry(header); + +- /* Record local apic id only when enabled */ ++ /* Record local apic id only when enabled and fitting. */ ++ if (processor->local_apic_id >= MAX_APICS || ++ processor->uid >= MAX_MADT_ENTRIES) { ++ printk("%sAPIC ID %#x and/or ACPI ID %#x beyond limit" ++ " - processor ignored\n", ++ processor->lapic_flags & ACPI_MADT_ENABLED ? ++ KERN_WARNING "WARNING: " : KERN_INFO, ++ processor->local_apic_id, processor->uid); ++ /* ++ * Must not return an error here, to prevent ++ * acpi_table_parse_entries() from terminating early. ++ */ ++ return 0 /* -ENOSPC */; ++ } + if (processor->lapic_flags & ACPI_MADT_ENABLED) { + x86_acpiid_to_apicid[processor->uid] = + processor->local_apic_id; diff --git a/5277a134-x86-make-sure-memory-block-is-RAM-before-passing-to-the-allocator.patch b/5277a134-x86-make-sure-memory-block-is-RAM-before-passing-to-the-allocator.patch new file mode 100644 index 0000000..f25437b --- /dev/null +++ b/5277a134-x86-make-sure-memory-block-is-RAM-before-passing-to-the-allocator.patch @@ -0,0 +1,27 @@ +References: bnc#842417 + +# Commit 227258983401b7e6091967ffaf22ad83f4ebaf6f +# Date 2013-11-04 14:29:24 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86: make sure memory block is RAM before passing to the allocator + +Memory blocks outside of the always visible 1:1 mapping range get +passed to the allocator separately (once enough other setup was done). +Skipping non-RAM regions, however, was forgotten in adc5afbf ("x86: +support up to 16Tb"). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1154,6 +1154,8 @@ void __init __start_xen(unsigned long mb + { + uint64_t s, e; + ++ if ( boot_e820.map[i].type != E820_RAM ) ++ continue; + s = (boot_e820.map[i].addr + mask) & ~mask; + e = (boot_e820.map[i].addr + boot_e820.map[i].size) & ~mask; + if ( PFN_DOWN(e) <= limit ) diff --git a/5278f7f9-x86-HVM-32-bit-IN-result-must-be-zero-extended-to-64-bits.patch b/5278f7f9-x86-HVM-32-bit-IN-result-must-be-zero-extended-to-64-bits.patch new file mode 100644 index 0000000..4a3e488 --- /dev/null +++ b/5278f7f9-x86-HVM-32-bit-IN-result-must-be-zero-extended-to-64-bits.patch @@ -0,0 +1,82 @@ +# Commit 9d89100ba8b7b02adb7c2e89ef7c81e734942e7c +# Date 2013-11-05 14:51:53 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/HVM: 32-bit IN result must be zero-extended to 64 bits + +Just like for all other operations with 32-bit operand size. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +# Commit 1e521eddeb51a9f1bf0e4dd1d17efc873eafae41 +# Date 2013-11-15 11:01:49 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/HVM: 32-bit IN result must be zero-extended to 64 bits (part 2) + +Just spotted a counterpart of what commit 9d89100b (same title) dealt +with. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/io.c ++++ b/xen/arch/x86/hvm/io.c +@@ -221,13 +221,15 @@ int handle_mmio_with_translation(unsigne + return handle_mmio(); + } + +-int handle_pio(uint16_t port, int size, int dir) ++int handle_pio(uint16_t port, unsigned int size, int dir) + { + struct vcpu *curr = current; + struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; + unsigned long data, reps = 1; + int rc; + ++ ASSERT((size - 1) < 4 && size != 3); ++ + if ( dir == IOREQ_WRITE ) + data = guest_cpu_user_regs()->eax; + +@@ -237,7 +239,12 @@ int handle_pio(uint16_t port, int size, + { + case X86EMUL_OKAY: + if ( dir == IOREQ_READ ) +- memcpy(&guest_cpu_user_regs()->eax, &data, vio->io_size); ++ { ++ if ( size == 4 ) /* Needs zero extension. */ ++ guest_cpu_user_regs()->rax = (uint32_t)data; ++ else ++ memcpy(&guest_cpu_user_regs()->rax, &data, size); ++ } + break; + case X86EMUL_RETRY: + if ( vio->io_state != HVMIO_awaiting_completion ) +@@ -281,8 +288,10 @@ void hvm_io_assist(void) + (void)handle_mmio(); + break; + case HVMIO_handle_pio_awaiting_completion: +- memcpy(&guest_cpu_user_regs()->eax, +- &p->data, vio->io_size); ++ if ( vio->io_size == 4 ) /* Needs zero extension. */ ++ guest_cpu_user_regs()->rax = (uint32_t)p->data; ++ else ++ memcpy(&guest_cpu_user_regs()->rax, &p->data, vio->io_size); + break; + default: + break; +--- a/xen/include/asm-x86/hvm/io.h ++++ b/xen/include/asm-x86/hvm/io.h +@@ -119,7 +119,7 @@ void send_timeoffset_req(unsigned long t + void send_invalidate_req(void); + int handle_mmio(void); + int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn); +-int handle_pio(uint16_t port, int size, int dir); ++int handle_pio(uint16_t port, unsigned int size, int dir); + void hvm_interrupt_post(struct vcpu *v, int vector, int type); + void hvm_io_assist(void); + void hvm_dpci_eoi(struct domain *d, unsigned int guest_irq, diff --git a/527a0a05-call-sched_destroy_domain-before-cpupool_rm_domain.patch b/527a0a05-call-sched_destroy_domain-before-cpupool_rm_domain.patch new file mode 100644 index 0000000..7bc3ffe --- /dev/null +++ b/527a0a05-call-sched_destroy_domain-before-cpupool_rm_domain.patch @@ -0,0 +1,38 @@ +# Commit 117f67350fd18b11ab09d628b4edea3364b09441 +# Date 2013-11-06 10:21:09 +0100 +# Author Nathan Studer +# Committer Jan Beulich +call sched_destroy_domain before cpupool_rm_domain + +The domain destruction code, removes a domain from its cpupool +before attempting to destroy its scheduler information. Since +the scheduler framework uses the domain's cpupool information +to decide on which scheduler ops to use, this results in the +the wrong scheduler's destroy domain function being called +when the cpupool scheduler and the initial scheduler are +different. + +Correct this by destroying the domain's scheduling information +before removing it from the pool. + +Signed-off-by: Nathan Studer +Reviewed-by: Juergen Gross +Reviewed-by: Andrew Cooper +Reviewed-by: George Dunlap +Acked-by: Keir Fraser + +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -720,10 +720,10 @@ static void complete_domain_destroy(stru + + rangeset_domain_destroy(d); + +- cpupool_rm_domain(d); +- + sched_destroy_domain(d); + ++ cpupool_rm_domain(d); ++ + /* Free page used by xen oprofile buffer. */ + #ifdef CONFIG_XENOPROF + free_xenoprof_pages(d); diff --git a/527cb7d2-x86-hvm-fix-restart-of-RTC-periodic-timer-with-vpt_align-1.patch b/527cb7d2-x86-hvm-fix-restart-of-RTC-periodic-timer-with-vpt_align-1.patch new file mode 100644 index 0000000..ea5ec64 --- /dev/null +++ b/527cb7d2-x86-hvm-fix-restart-of-RTC-periodic-timer-with-vpt_align-1.patch @@ -0,0 +1,29 @@ +# Commit 48535f5798e3e237d9920a74c1ce3802958136c0 +# Date 2013-11-08 11:07:14 +0100 +# Author Kouya Shimura +# Committer Jan Beulich +x86/hvm: fix restart of RTC periodic timer with vpt_align=1 + +The commit 58afa7ef "x86/hvm: Run the RTC periodic timer on a +consistent time series" aligns the RTC periodic timer to the VM's boot time. +However, it's aligned later again to the system time in create_periodic_time() +with vpt_align=1. The next tick might be skipped. + +Signed-off-by: Kouya Shimura +Reviewed-by: Jan Beulich +Acked-by: Tim Deegan + +--- a/xen/arch/x86/hvm/rtc.c ++++ b/xen/arch/x86/hvm/rtc.c +@@ -130,7 +130,10 @@ static void rtc_timer_update(RTCState *s + s->pt_code = period_code; + period = 1 << (period_code - 1); /* period in 32 Khz cycles */ + period = DIV_ROUND(period * 1000000000ULL, 32768); /* in ns */ +- delta = period - ((NOW() - s->start_time) % period); ++ if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] ) ++ delta = 0; ++ else ++ delta = period - ((NOW() - s->start_time) % period); + create_periodic_time(v, &s->pt, delta, period, + RTC_IRQ, NULL, s); + } diff --git a/527cb820-x86-EFI-make-trampoline-allocation-more-flexible.patch b/527cb820-x86-EFI-make-trampoline-allocation-more-flexible.patch new file mode 100644 index 0000000..5c9972c --- /dev/null +++ b/527cb820-x86-EFI-make-trampoline-allocation-more-flexible.patch @@ -0,0 +1,107 @@ +References: bnc#833483 + +# Commit c1f2dfe8f6a559bc28935f24e31bb33d17d9713d +# Date 2013-11-08 11:08:32 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/EFI: make trampoline allocation more flexible + +Certain UEFI implementations reserve all memory below 1Mb at boot time, +making it impossible to properly allocate the chunk necessary for the +trampoline. Fall back to simply grabbing a chunk from EfiBootServices* +regions immediately prior to calling ExitBootServices(). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/efi/boot.c ++++ b/xen/arch/x86/efi/boot.c +@@ -746,6 +746,22 @@ static void __init relocate_image(unsign + extern const s32 __trampoline_rel_start[], __trampoline_rel_stop[]; + extern const s32 __trampoline_seg_start[], __trampoline_seg_stop[]; + ++static void __init relocate_trampoline(unsigned long phys) ++{ ++ const s32 *trampoline_ptr; ++ ++ trampoline_phys = phys; ++ /* Apply relocations to trampoline. */ ++ for ( trampoline_ptr = __trampoline_rel_start; ++ trampoline_ptr < __trampoline_rel_stop; ++ ++trampoline_ptr ) ++ *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += phys; ++ for ( trampoline_ptr = __trampoline_seg_start; ++ trampoline_ptr < __trampoline_seg_stop; ++ ++trampoline_ptr ) ++ *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = phys >> 4; ++} ++ + void EFIAPI __init __attribute__((__noreturn__)) + efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable) + { +@@ -765,7 +781,6 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + EFI_GRAPHICS_OUTPUT_MODE_INFORMATION *mode_info; + EFI_FILE_HANDLE dir_handle; + union string section = { NULL }, name; +- const s32 *trampoline_ptr; + struct e820entry *e; + u64 efer; + bool_t base_video = 0; +@@ -1268,23 +1283,13 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + cfg.size = trampoline_end - trampoline_start; + status = efi_bs->AllocatePages(AllocateMaxAddress, EfiLoaderData, + PFN_UP(cfg.size), &cfg.addr); +- if ( EFI_ERROR(status) ) ++ if ( status == EFI_SUCCESS ) ++ relocate_trampoline(cfg.addr); ++ else + { + cfg.addr = 0; +- blexit(L"No memory for trampoline\r\n"); ++ PrintStr(L"Trampoline space cannot be allocated; will try fallback.\r\n"); + } +- trampoline_phys = cfg.addr; +- /* Apply relocations to trampoline. */ +- for ( trampoline_ptr = __trampoline_rel_start; +- trampoline_ptr < __trampoline_rel_stop; +- ++trampoline_ptr ) +- *(u32 *)(*trampoline_ptr + (long)trampoline_ptr) += +- trampoline_phys; +- for ( trampoline_ptr = __trampoline_seg_start; +- trampoline_ptr < __trampoline_seg_stop; +- ++trampoline_ptr ) +- *(u16 *)(*trampoline_ptr + (long)trampoline_ptr) = +- trampoline_phys >> 4; + + /* Initialise L2 identity-map and boot-map page table entries (16MB). */ + for ( i = 0; i < 8; ++i ) +@@ -1400,10 +1405,14 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + type = E820_RESERVED; + break; + case EfiConventionalMemory: +- case EfiLoaderCode: +- case EfiLoaderData: + case EfiBootServicesCode: + case EfiBootServicesData: ++ if ( !trampoline_phys && desc->PhysicalStart + len <= 0x100000 && ++ len >= cfg.size && desc->PhysicalStart + len > cfg.addr ) ++ cfg.addr = (desc->PhysicalStart + len - cfg.size) & PAGE_MASK; ++ /* fall through */ ++ case EfiLoaderCode: ++ case EfiLoaderData: + if ( desc->Attribute & EFI_MEMORY_WB ) + type = E820_RAM; + else +@@ -1431,6 +1440,12 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SY + ++e820nr; + } + } ++ if ( !trampoline_phys ) ++ { ++ if ( !cfg.addr ) ++ blexit(L"No memory for trampoline"); ++ relocate_trampoline(cfg.addr); ++ } + + status = efi_bs->ExitBootServices(ImageHandle, map_key); + if ( EFI_ERROR(status) ) diff --git a/52809208-nested-VMX-VMLANUCH-VMRESUME-emulation-must-check-permission-1st.patch b/52809208-nested-VMX-VMLANUCH-VMRESUME-emulation-must-check-permission-1st.patch new file mode 100644 index 0000000..4184637 --- /dev/null +++ b/52809208-nested-VMX-VMLANUCH-VMRESUME-emulation-must-check-permission-1st.patch @@ -0,0 +1,62 @@ +References: bnc#849665 CVE-2013-4551 XSA-75 + +# Commit 4e87bc5b03e05123ba5c888f77969140c8ebd1bf +# Date 2013-11-11 09:15:04 +0100 +# Author Jan Beulich +# Committer Jan Beulich +nested VMX: VMLANUCH/VMRESUME emulation must check permission first thing + +Otherwise uninitialized data may be used, leading to crashes. + +This is CVE-2013-4551 / XSA-75. + +Reported-and-tested-by: Jeff Zimmerman +Signed-off-by: Jan Beulich +Reviewed-and-tested-by: Andrew Cooper +Acked-by: Ian Campbell + +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -1508,15 +1508,10 @@ static void clear_vvmcs_launched(struct + } + } + +-int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs) ++static int nvmx_vmresume(struct vcpu *v, struct cpu_user_regs *regs) + { + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); +- int rc; +- +- rc = vmx_inst_check_privilege(regs, 0); +- if ( rc != X86EMUL_OKAY ) +- return rc; + + /* check VMCS is valid and IO BITMAP is set */ + if ( (nvcpu->nv_vvmcxaddr != VMCX_EADDR) && +@@ -1535,6 +1530,10 @@ int nvmx_handle_vmresume(struct cpu_user + struct vcpu *v = current; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ int rc = vmx_inst_check_privilege(regs, 0); ++ ++ if ( rc != X86EMUL_OKAY ) ++ return rc; + + if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR ) + { +@@ -1554,10 +1553,13 @@ int nvmx_handle_vmresume(struct cpu_user + int nvmx_handle_vmlaunch(struct cpu_user_regs *regs) + { + bool_t launched; +- int rc; + struct vcpu *v = current; + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); ++ int rc = vmx_inst_check_privilege(regs, 0); ++ ++ if ( rc != X86EMUL_OKAY ) ++ return rc; + + if ( vcpu_nestedhvm(v).nv_vvmcxaddr == VMCX_EADDR ) + { diff --git a/5280aae0-x86-idle-reduce-contention-on-ACPI-register-accesses.patch b/5280aae0-x86-idle-reduce-contention-on-ACPI-register-accesses.patch new file mode 100644 index 0000000..42860bc --- /dev/null +++ b/5280aae0-x86-idle-reduce-contention-on-ACPI-register-accesses.patch @@ -0,0 +1,101 @@ +References: bnc#842417 + +# Commit 178fd279dc138243b514b4ecd48509e4bf5d1ede +# Date 2013-11-11 11:01:04 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86/idle: reduce contention on ACPI register accesses + +Other than when they're located in I/O port space, accessing them when +in MMIO space (currently) implies usage of some sort of global lock: In +-unstable this would be due to the use of vmap(), is older trees the +necessary locking was introduced by 2ee9cbf9 ("ACPI: fix +acpi_os_map_memory()"). This contention was observed to result in Dom0 +kernel soft lockups during the loading of the ACPI processor driver +there on systems with very many CPU cores. + +There are a couple of things being done for this: +- re-order elements of an if() condition so that the register access + only happens when we really need it +- turn off arbitration disabling only when the first CPU leaves C3 + (paralleling how arbitration disabling gets turned on) +- only set the (global) bus master reload flag once (when the first + target CPU gets processed) + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -439,8 +439,8 @@ static void acpi_processor_idle(void) + (next_state = cpuidle_current_governor->select(power)) > 0 ) + { + cx = &power->states[next_state]; +- if ( power->flags.bm_check && acpi_idle_bm_check() +- && cx->type == ACPI_STATE_C3 ) ++ if ( cx->type == ACPI_STATE_C3 && power->flags.bm_check && ++ acpi_idle_bm_check() ) + cx = power->safe_state; + if ( cx->idx > max_cstate ) + cx = &power->states[max_cstate]; +@@ -563,8 +563,8 @@ static void acpi_processor_idle(void) + { + /* Enable bus master arbitration */ + spin_lock(&c3_cpu_status.lock); +- acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); +- c3_cpu_status.count--; ++ if ( c3_cpu_status.count-- == num_online_cpus() ) ++ acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); + spin_unlock(&c3_cpu_status.lock); + } + +@@ -821,12 +821,10 @@ static int check_cx(struct acpi_processo + return -EINVAL; + + /* All the logic here assumes flags.bm_check is same across all CPUs */ +- if ( bm_check_flag == -1 ) ++ if ( bm_check_flag < 0 ) + { + /* Determine whether bm_check is needed based on CPU */ + acpi_processor_power_init_bm_check(&(power->flags)); +- bm_check_flag = power->flags.bm_check; +- bm_control_flag = power->flags.bm_control; + } + else + { +@@ -853,14 +851,13 @@ static int check_cx(struct acpi_processo + } + } + /* +- * On older chipsets, BM_RLD needs to be set +- * in order for Bus Master activity to wake the +- * system from C3. Newer chipsets handle DMA +- * during C3 automatically and BM_RLD is a NOP. +- * In either case, the proper way to +- * handle BM_RLD is to set it and leave it set. ++ * On older chipsets, BM_RLD needs to be set in order for Bus ++ * Master activity to wake the system from C3, hence ++ * acpi_set_register() is always being called once below. Newer ++ * chipsets handle DMA during C3 automatically and BM_RLD is a ++ * NOP. In either case, the proper way to handle BM_RLD is to ++ * set it and leave it set. + */ +- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); + } + else + { +@@ -875,7 +872,13 @@ static int check_cx(struct acpi_processo + " for C3 to be enabled on SMP systems\n")); + return -EINVAL; + } +- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); ++ } ++ ++ if ( bm_check_flag < 0 ) ++ { ++ bm_check_flag = power->flags.bm_check; ++ bm_control_flag = power->flags.bm_control; ++ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, bm_check_flag); + } + + break; diff --git a/5281fad4-numa-sched-leave-node-affinity-alone-if-not-in-auto-mode.patch b/5281fad4-numa-sched-leave-node-affinity-alone-if-not-in-auto-mode.patch new file mode 100644 index 0000000..3026aef --- /dev/null +++ b/5281fad4-numa-sched-leave-node-affinity-alone-if-not-in-auto-mode.patch @@ -0,0 +1,75 @@ +# Commit 67348c3ac700b8bc9147638c719c3035c5ef20f5 +# Date 2013-11-12 10:54:28 +0100 +# Author Dario Faggioli +# Committer Jan Beulich +numa-sched: leave node-affinity alone if not in "auto" mode + +If the domain's NUMA node-affinity is being specified by the +user/toolstack (instead of being automatically computed by Xen), +we really should stick to that. This means domain_update_node_affinity() +is wrong when it filters out some stuff from there even in "!auto" +mode. + +This commit fixes that. Of course, this does not mean node-affinity +is always honoured (e.g., a vcpu won't run on a pcpu of a different +cpupool) but the necessary logic for taking into account all the +possible situations lives in the scheduler code, where it belongs. + +What could happen without this change is that, under certain +circumstances, the node-affinity of a domain may change when the +user modifies the vcpu-affinity of the domain's vcpus. This, even +if probably not a real bug, is at least something the user does +not expect, so let's avoid it. + +Signed-off-by: Dario Faggioli +Reviewed-by: George Dunlap +Acked-by: Keir Fraser + +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -345,7 +345,6 @@ void domain_update_node_affinity(struct + cpumask_var_t cpumask; + cpumask_var_t online_affinity; + const cpumask_t *online; +- nodemask_t nodemask = NODE_MASK_NONE; + struct vcpu *v; + unsigned int node; + +@@ -367,28 +366,19 @@ void domain_update_node_affinity(struct + cpumask_or(cpumask, cpumask, online_affinity); + } + ++ /* ++ * If d->auto_node_affinity is true, the domain's node-affinity mask ++ * (d->node_affinity) is automaically computed from all the domain's ++ * vcpus' vcpu-affinity masks (the union of which we have just built ++ * above in cpumask). OTOH, if d->auto_node_affinity is false, we ++ * must leave the node-affinity of the domain alone. ++ */ + if ( d->auto_node_affinity ) + { +- /* Node-affinity is automaically computed from all vcpu-affinities */ ++ nodes_clear(d->node_affinity); + for_each_online_node ( node ) + if ( cpumask_intersects(&node_to_cpumask(node), cpumask) ) +- node_set(node, nodemask); +- +- d->node_affinity = nodemask; +- } +- else +- { +- /* Node-affinity is provided by someone else, just filter out cpus +- * that are either offline or not in the affinity of any vcpus. */ +- nodemask = d->node_affinity; +- for_each_node_mask ( node, d->node_affinity ) +- if ( !cpumask_intersects(&node_to_cpumask(node), cpumask) ) +- node_clear(node, nodemask);//d->node_affinity); +- +- /* Avoid loosing track of node-affinity because of a bad +- * vcpu-affinity has been specified. */ +- if ( !nodes_empty(nodemask) ) +- d->node_affinity = nodemask; ++ node_set(node, d->node_affinity); + } + + sched_set_node_affinity(d, &d->node_affinity); diff --git a/52820823-nested-SVM-adjust-guest-handling-of-structure-mappings.patch b/52820823-nested-SVM-adjust-guest-handling-of-structure-mappings.patch new file mode 100644 index 0000000..bcb1d41 --- /dev/null +++ b/52820823-nested-SVM-adjust-guest-handling-of-structure-mappings.patch @@ -0,0 +1,132 @@ +# Commit b1e87805bf37b446dade93a7eb922bb7d1269756 +# Date 2013-11-12 11:51:15 +0100 +# Author Jan Beulich +# Committer Jan Beulich +nested SVM: adjust guest handling of structure mappings + +For one, nestedsvm_vmcb_map() error checking must not consist of using +assertions: Global (permanent) mappings can fail, and hence failure +needs to be dealt with properly. And non-global (transient) mappings +can't fail anyway. + +And then the I/O port access bitmap handling was broken: It checked +only to first of the accessed ports rather than each of them. + +Signed-off-by: Jan Beulich +Reviewed-by: Christoph Egger +Reviewed-by: Andrew Cooper +Acked-by: Suravee Suthikulpanit + +--- a/xen/arch/x86/hvm/svm/nestedsvm.c ++++ b/xen/arch/x86/hvm/svm/nestedsvm.c +@@ -342,7 +342,7 @@ static int nsvm_vmrun_permissionmap(stru + unsigned int i; + enum hvm_copy_result ret; + unsigned long *ns_viomap; +- bool_t ioport_80, ioport_ed; ++ bool_t ioport_80 = 1, ioport_ed = 1; + + ns_msrpm_ptr = (unsigned long *)svm->ns_cached_msrpm; + +@@ -360,10 +360,12 @@ static int nsvm_vmrun_permissionmap(stru + svm->ns_iomap_pa = ns_vmcb->_iopm_base_pa; + + ns_viomap = hvm_map_guest_frame_ro(svm->ns_iomap_pa >> PAGE_SHIFT, 0); +- ASSERT(ns_viomap != NULL); +- ioport_80 = test_bit(0x80, ns_viomap); +- ioport_ed = test_bit(0xed, ns_viomap); +- hvm_unmap_guest_frame(ns_viomap, 0); ++ if ( ns_viomap ) ++ { ++ ioport_80 = test_bit(0x80, ns_viomap); ++ ioport_ed = test_bit(0xed, ns_viomap); ++ hvm_unmap_guest_frame(ns_viomap, 0); ++ } + + svm->ns_iomap = nestedhvm_vcpu_iomap_get(ioport_80, ioport_ed); + +@@ -866,40 +868,45 @@ nsvm_vmcb_guest_intercepts_msr(unsigned + static int + nsvm_vmcb_guest_intercepts_ioio(paddr_t iopm_pa, uint64_t exitinfo1) + { +- unsigned long iopm_gfn = iopm_pa >> PAGE_SHIFT; +- unsigned long *io_bitmap = NULL; ++ unsigned long gfn = iopm_pa >> PAGE_SHIFT; ++ unsigned long *io_bitmap; + ioio_info_t ioinfo; + uint16_t port; ++ unsigned int size; + bool_t enabled; +- unsigned long gfn = 0; /* gcc ... */ + + ioinfo.bytes = exitinfo1; + port = ioinfo.fields.port; ++ size = ioinfo.fields.sz32 ? 4 : ioinfo.fields.sz16 ? 2 : 1; + +- switch (port) { +- case 0 ... 32767: /* first 4KB page */ +- gfn = iopm_gfn; ++ switch ( port ) ++ { ++ case 0 ... 8 * PAGE_SIZE - 1: /* first 4KB page */ + break; +- case 32768 ... 65535: /* second 4KB page */ +- port -= 32768; +- gfn = iopm_gfn + 1; ++ case 8 * PAGE_SIZE ... 2 * 8 * PAGE_SIZE - 1: /* second 4KB page */ ++ port -= 8 * PAGE_SIZE; ++ ++gfn; + break; + default: + BUG(); + break; + } + +- io_bitmap = hvm_map_guest_frame_ro(gfn, 0); +- if (io_bitmap == NULL) { +- gdprintk(XENLOG_ERR, +- "IOIO intercept: mapping of permission map failed\n"); +- return NESTEDHVM_VMEXIT_ERROR; ++ for ( io_bitmap = hvm_map_guest_frame_ro(gfn, 0); ; ) ++ { ++ enabled = io_bitmap && test_bit(port, io_bitmap); ++ if ( !enabled || !--size ) ++ break; ++ if ( unlikely(++port == 8 * PAGE_SIZE) ) ++ { ++ hvm_unmap_guest_frame(io_bitmap, 0); ++ io_bitmap = hvm_map_guest_frame_ro(++gfn, 0); ++ port -= 8 * PAGE_SIZE; ++ } + } +- +- enabled = test_bit(port, io_bitmap); + hvm_unmap_guest_frame(io_bitmap, 0); + +- if (!enabled) ++ if ( !enabled ) + return NESTEDHVM_VMEXIT_HOST; + + return NESTEDHVM_VMEXIT_INJECT; +@@ -966,8 +973,8 @@ nsvm_vmcb_guest_intercepts_exitcode(stru + switch (exitcode) { + case VMEXIT_MSR: + ASSERT(regs != NULL); +- nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr); +- ASSERT(nv->nv_vvmcx != NULL); ++ if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) ) ++ break; + ns_vmcb = nv->nv_vvmcx; + vmexits = nsvm_vmcb_guest_intercepts_msr(svm->ns_cached_msrpm, + regs->ecx, ns_vmcb->exitinfo1 != 0); +@@ -975,8 +982,8 @@ nsvm_vmcb_guest_intercepts_exitcode(stru + return 0; + break; + case VMEXIT_IOIO: +- nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr); +- ASSERT(nv->nv_vvmcx != NULL); ++ if ( !nestedsvm_vmcb_map(v, nv->nv_vvmcxaddr) ) ++ break; + ns_vmcb = nv->nv_vvmcx; + vmexits = nsvm_vmcb_guest_intercepts_ioio(ns_vmcb->_iopm_base_pa, + ns_vmcb->exitinfo1); diff --git a/52820863-VMX-don-t-crash-processing-d-debug-key.patch b/52820863-VMX-don-t-crash-processing-d-debug-key.patch new file mode 100644 index 0000000..0219fff --- /dev/null +++ b/52820863-VMX-don-t-crash-processing-d-debug-key.patch @@ -0,0 +1,105 @@ +References: bnc#846849 + +# Commit 58929248461ecadce13e92eb5a5d9ef718a7c88e +# Date 2013-11-12 11:52:19 +0100 +# Author Jan Beulich +# Committer Jan Beulich +VMX: don't crash processing 'd' debug key + +There's a window during scheduling where "current" and the active VMCS +may disagree: The former gets set much earlier than the latter. Since +both vmx_vmcs_enter() and vmx_vmcs_exit() immediately return when the +subject vCPU is "current", accessing VMCS fields would, depending on +whether there is any currently active VMCS, either read wrong data, or +cause a crash. + +Going forward we might want to consider reducing the window during +which vmx_vmcs_enter() might fail (e.g. doing a plain __vmptrld() when +v->arch.hvm_vmx.vmcs != this_cpu(current_vmcs) but arch_vmx->active_cpu +== -1), but that would add complexities (acquiring and - more +importantly - properly dropping v->arch.hvm_vmx.vmcs_lock) that don't +look worthwhile adding right now. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/vmx/vmcs.c ++++ b/xen/arch/x86/hvm/vmx/vmcs.c +@@ -591,16 +591,16 @@ struct foreign_vmcs { + }; + static DEFINE_PER_CPU(struct foreign_vmcs, foreign_vmcs); + +-void vmx_vmcs_enter(struct vcpu *v) ++bool_t vmx_vmcs_try_enter(struct vcpu *v) + { + struct foreign_vmcs *fv; + + /* + * NB. We must *always* run an HVM VCPU on its own VMCS, except for +- * vmx_vmcs_enter/exit critical regions. ++ * vmx_vmcs_enter/exit and scheduling tail critical regions. + */ + if ( likely(v == current) ) +- return; ++ return v->arch.hvm_vmx.vmcs == this_cpu(current_vmcs); + + fv = &this_cpu(foreign_vmcs); + +@@ -623,6 +623,15 @@ void vmx_vmcs_enter(struct vcpu *v) + } + + fv->count++; ++ ++ return 1; ++} ++ ++void vmx_vmcs_enter(struct vcpu *v) ++{ ++ bool_t okay = vmx_vmcs_try_enter(v); ++ ++ ASSERT(okay); + } + + void vmx_vmcs_exit(struct vcpu *v) +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -669,7 +669,27 @@ void vmx_get_segment_register(struct vcp + { + uint32_t attr = 0; + +- vmx_vmcs_enter(v); ++ /* ++ * We may get here in the context of dump_execstate(), which may have ++ * interrupted context switching between setting "current" and ++ * vmx_do_resume() reaching the end of vmx_load_vmcs(). That would make ++ * all the VMREADs below fail if we don't bail right away. ++ */ ++ if ( unlikely(!vmx_vmcs_try_enter(v)) ) ++ { ++ static bool_t warned; ++ ++ if ( !warned ) ++ { ++ warned = 1; ++ printk(XENLOG_WARNING "Segment register inaccessible for d%dv%d\n" ++ "(If you see this outside of debugging activity," ++ " please report to xen-devel@lists.xenproject.org)\n", ++ v->domain->domain_id, v->vcpu_id); ++ } ++ memset(reg, 0, sizeof(*reg)); ++ return; ++ } + + switch ( seg ) + { +--- a/xen/include/asm-x86/hvm/vmx/vmcs.h ++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h +@@ -144,6 +144,7 @@ struct arch_vmx_struct { + int vmx_create_vmcs(struct vcpu *v); + void vmx_destroy_vmcs(struct vcpu *v); + void vmx_vmcs_enter(struct vcpu *v); ++bool_t __must_check vmx_vmcs_try_enter(struct vcpu *v); + void vmx_vmcs_exit(struct vcpu *v); + + #define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 diff --git a/5282492f-x86-eliminate-has_arch_mmios.patch b/5282492f-x86-eliminate-has_arch_mmios.patch new file mode 100644 index 0000000..c3092d3 --- /dev/null +++ b/5282492f-x86-eliminate-has_arch_mmios.patch @@ -0,0 +1,84 @@ +# Commit 79233938ab2a8f273fd5dcdbf8e8381b9eb3a461 +# Date 2013-11-12 16:28:47 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86: eliminate has_arch_mmios() + +... as being generally insufficient: Either has_arch_pdevs() or +cache_flush_permitted() should be used (in particular, it is +insufficient to consider MMIO ranges alone - I/O port ranges have the +same requirements if available to a guest). + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1792,7 +1793,7 @@ int hvm_set_cr0(unsigned long value) + } + } + +- if ( has_arch_mmios(v->domain) ) ++ if ( cache_flush_permitted(v->domain) ) + { + if ( (value & X86_CR0_CD) && !(value & X86_CR0_NW) ) + { +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1973,7 +1974,7 @@ static void wbinvd_ipi(void *info) + + static void svm_wbinvd_intercept(void) + { +- if ( has_arch_mmios(current->domain) ) ++ if ( cache_flush_permitted(current->domain) ) + on_each_cpu(wbinvd_ipi, NULL, 1); + } + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2173,10 +2174,7 @@ static void wbinvd_ipi(void *info) + + static void vmx_wbinvd_intercept(void) + { +- if ( !has_arch_mmios(current->domain) ) +- return; +- +- if ( iommu_snoop ) ++ if ( !cache_flush_permitted(current->domain) || iommu_snoop ) + return; + + if ( cpu_has_wbinvd_exiting ) +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h +@@ -316,7 +316,6 @@ struct arch_domain + } __cacheline_aligned; + + #define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list)) +-#define has_arch_mmios(d) (!rangeset_is_empty((d)->iomem_caps)) + + #define gdt_ldt_pt_idx(v) \ + ((v)->vcpu_id >> (PAGETABLE_ORDER - GDT_LDT_VCPU_SHIFT)) diff --git a/52864df2-credit-Update-other-parameters-when-setting-tslice_ms.patch b/52864df2-credit-Update-other-parameters-when-setting-tslice_ms.patch new file mode 100644 index 0000000..97ebf09 --- /dev/null +++ b/52864df2-credit-Update-other-parameters-when-setting-tslice_ms.patch @@ -0,0 +1,63 @@ +# Commit 1320b8100c2ed390fc640557a050f5c700d8338d +# Date 2013-11-15 17:38:10 +0100 +# Author Nate Studer +# Committer Jan Beulich +credit: Update other parameters when setting tslice_ms + +Add a utility function to update the rest of the timeslice +accounting fields when updating the timeslice of the +credit scheduler, so that capped CPUs behave correctly. + +Before this patch changing the timeslice to a value higher +than the default would result in a domain not utilizing +its full capacity and changing the timeslice to a value +lower than the default would result in a domain exceeding +its capacity. + +Signed-off-by: Nate Studer +Reviewed-by: Dario Faggioli +Reviewed-by: George Dunlap + +--- a/xen/common/sched_credit.c ++++ b/xen/common/sched_credit.c +@@ -1073,6 +1073,17 @@ csched_dom_cntl( + return 0; + } + ++static inline void ++__csched_set_tslice(struct csched_private *prv, unsigned timeslice) ++{ ++ prv->tslice_ms = timeslice; ++ prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE; ++ if ( prv->tslice_ms < prv->ticks_per_tslice ) ++ prv->ticks_per_tslice = 1; ++ prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice; ++ prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms; ++} ++ + static int + csched_sys_cntl(const struct scheduler *ops, + struct xen_sysctl_scheduler_op *sc) +@@ -1091,7 +1102,7 @@ csched_sys_cntl(const struct scheduler * + || params->ratelimit_us < XEN_SYSCTL_SCHED_RATELIMIT_MIN)) + || MICROSECS(params->ratelimit_us) > MILLISECS(params->tslice_ms) ) + goto out; +- prv->tslice_ms = params->tslice_ms; ++ __csched_set_tslice(prv, params->tslice_ms); + prv->ratelimit_us = params->ratelimit_us; + /* FALLTHRU */ + case XEN_SYSCTL_SCHEDOP_getinfo: +@@ -1903,12 +1914,7 @@ csched_init(struct scheduler *ops) + sched_credit_tslice_ms = CSCHED_DEFAULT_TSLICE_MS; + } + +- prv->tslice_ms = sched_credit_tslice_ms; +- prv->ticks_per_tslice = CSCHED_TICKS_PER_TSLICE; +- if ( prv->tslice_ms < prv->ticks_per_tslice ) +- prv->ticks_per_tslice = 1; +- prv->tick_period_us = prv->tslice_ms * 1000 / prv->ticks_per_tslice; +- prv->credits_per_tslice = CSCHED_CREDITS_PER_MSEC * prv->tslice_ms; ++ __csched_set_tslice(prv, sched_credit_tslice_ms); + + if ( MICROSECS(sched_ratelimit_us) > MILLISECS(sched_credit_tslice_ms) ) + { diff --git a/52864f30-fix-leaking-of-v-cpu_affinity_saved-on-domain-destruction.patch b/52864f30-fix-leaking-of-v-cpu_affinity_saved-on-domain-destruction.patch new file mode 100644 index 0000000..e41c4c9 --- /dev/null +++ b/52864f30-fix-leaking-of-v-cpu_affinity_saved-on-domain-destruction.patch @@ -0,0 +1,21 @@ +# Commit 6757efe1bf50ac7ff68fa4dd7d9333529f70ae9a +# Date 2013-11-15 17:43:28 +0100 +# Author Dario Faggioli +# Committer Jan Beulich +fix leaking of v->cpu_affinity_saved on domain destruction + +Signed-off-by: Dario Faggioli +Reviewed-by: George Dunlap +Acked-by: Ian Jackson +Reviewed-by: Jan Beulich + +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -726,6 +726,7 @@ static void complete_domain_destroy(stru + { + free_cpumask_var(v->cpu_affinity); + free_cpumask_var(v->cpu_affinity_tmp); ++ free_cpumask_var(v->cpu_affinity_saved); + free_cpumask_var(v->vcpu_dirty_cpumask); + free_vcpu_struct(v); + } diff --git a/5289d225-nested-VMX-don-t-ignore-mapping-errors.patch b/5289d225-nested-VMX-don-t-ignore-mapping-errors.patch new file mode 100644 index 0000000..97cc177 --- /dev/null +++ b/5289d225-nested-VMX-don-t-ignore-mapping-errors.patch @@ -0,0 +1,115 @@ +# Commit e02b14e531a95399fc9d8647ec3cc6f310a7d455 +# Date 2013-11-18 09:39:01 +0100 +# Author Jan Beulich +# Committer Jan Beulich +nested VMX: don't ignore mapping errors + +Rather than ignoring failures to map the virtual VMCS as well as MSR or +I/O port bitmaps, convert those into failures of the respective +instructions (avoiding to dereference NULL pointers). Ultimately such +failures should be handled transparently (by using transient mappings +when they actually need to be accessed, just like nested SVM does). + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Eddie Dong + +--- a/xen/arch/x86/hvm/vmx/vvmx.c ++++ b/xen/arch/x86/hvm/vmx/vvmx.c +@@ -746,7 +746,7 @@ static void __clear_current_vvmcs(struct + __vmpclear(virt_to_maddr(nvcpu->nv_n2vmcx)); + } + +-static void __map_msr_bitmap(struct vcpu *v) ++static bool_t __must_check _map_msr_bitmap(struct vcpu *v) + { + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + unsigned long gpa; +@@ -755,9 +755,11 @@ static void __map_msr_bitmap(struct vcpu + hvm_unmap_guest_frame(nvmx->msrbitmap, 1); + gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, MSR_BITMAP); + nvmx->msrbitmap = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1); ++ ++ return nvmx->msrbitmap != NULL; + } + +-static void __map_io_bitmap(struct vcpu *v, u64 vmcs_reg) ++static bool_t __must_check _map_io_bitmap(struct vcpu *v, u64 vmcs_reg) + { + struct nestedvmx *nvmx = &vcpu_2_nvmx(v); + unsigned long gpa; +@@ -768,12 +770,14 @@ static void __map_io_bitmap(struct vcpu + hvm_unmap_guest_frame(nvmx->iobitmap[index], 1); + gpa = __get_vvmcs(vcpu_nestedhvm(v).nv_vvmcx, vmcs_reg); + nvmx->iobitmap[index] = hvm_map_guest_frame_ro(gpa >> PAGE_SHIFT, 1); ++ ++ return nvmx->iobitmap[index] != NULL; + } + +-static inline void map_io_bitmap_all(struct vcpu *v) ++static inline bool_t __must_check map_io_bitmap_all(struct vcpu *v) + { +- __map_io_bitmap (v, IO_BITMAP_A); +- __map_io_bitmap (v, IO_BITMAP_B); ++ return _map_io_bitmap(v, IO_BITMAP_A) && ++ _map_io_bitmap(v, IO_BITMAP_B); + } + + static void nvmx_purge_vvmcs(struct vcpu *v) +@@ -1609,9 +1613,15 @@ int nvmx_handle_vmptrld(struct cpu_user_ + if ( nvcpu->nv_vvmcxaddr == VMCX_EADDR ) + { + nvcpu->nv_vvmcx = hvm_map_guest_frame_rw(gpa >> PAGE_SHIFT, 1); +- nvcpu->nv_vvmcxaddr = gpa; +- map_io_bitmap_all (v); +- __map_msr_bitmap(v); ++ if ( nvcpu->nv_vvmcx ) ++ nvcpu->nv_vvmcxaddr = gpa; ++ if ( !nvcpu->nv_vvmcx || ++ !map_io_bitmap_all(v) || ++ !_map_msr_bitmap(v) ) ++ { ++ vmreturn(regs, VMFAIL_VALID); ++ goto out; ++ } + } + + if ( cpu_has_vmx_vmcs_shadowing ) +@@ -1723,6 +1733,7 @@ int nvmx_handle_vmwrite(struct cpu_user_ + struct nestedvcpu *nvcpu = &vcpu_nestedhvm(v); + unsigned long operand; + u64 vmcs_encoding; ++ bool_t okay = 1; + + if ( decode_vmx_inst(regs, &decode, &operand, 0) + != X86EMUL_OKAY ) +@@ -1731,16 +1742,21 @@ int nvmx_handle_vmwrite(struct cpu_user_ + vmcs_encoding = reg_read(regs, decode.reg2); + __set_vvmcs(nvcpu->nv_vvmcx, vmcs_encoding, operand); + +- if ( vmcs_encoding == IO_BITMAP_A || vmcs_encoding == IO_BITMAP_A_HIGH ) +- __map_io_bitmap (v, IO_BITMAP_A); +- else if ( vmcs_encoding == IO_BITMAP_B || +- vmcs_encoding == IO_BITMAP_B_HIGH ) +- __map_io_bitmap (v, IO_BITMAP_B); ++ switch ( vmcs_encoding ) ++ { ++ case IO_BITMAP_A: case IO_BITMAP_A_HIGH: ++ okay = _map_io_bitmap(v, IO_BITMAP_A); ++ break; ++ case IO_BITMAP_B: case IO_BITMAP_B_HIGH: ++ okay = _map_io_bitmap(v, IO_BITMAP_B); ++ break; ++ case MSR_BITMAP: case MSR_BITMAP_HIGH: ++ okay = _map_msr_bitmap(v); ++ break; ++ } + +- if ( vmcs_encoding == MSR_BITMAP || vmcs_encoding == MSR_BITMAP_HIGH ) +- __map_msr_bitmap(v); ++ vmreturn(regs, okay ? VMSUCCEED : VMFAIL_VALID); + +- vmreturn(regs, VMSUCCEED); + return X86EMUL_OKAY; + } + diff --git a/528a0e5b-TLB-flushing-in-dma_pte_clear_one.patch b/528a0e5b-TLB-flushing-in-dma_pte_clear_one.patch new file mode 100644 index 0000000..7fae43d --- /dev/null +++ b/528a0e5b-TLB-flushing-in-dma_pte_clear_one.patch @@ -0,0 +1,32 @@ +References: bnc#851386 CVE-2013-6375 XSA-78 + +# HG changeset patch +# User Jan Beulich +# Date 1384779355 -3600 +# Node ID 81fec8e36840041ca5779a4c4f2eed98180eda2e +# Parent de9b11c80e2d3bd795d6329e0979c4734c3b4f96 +VT-d: fix TLB flushing in dma_pte_clear_one() + +The third parameter of __intel_iommu_iotlb_flush() is to indicate +whether the to be flushed entry was a present one. A few lines before, +we bailed if !dma_pte_present(*pte), so there's no need to check the +flag here again - we can simply always pass TRUE here. + +This is CVE-2013-6375 / XSA-78. + +Suggested-by: Cheng Yueqiang +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Keir Fraser + +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -646,7 +646,7 @@ static void dma_pte_clear_one(struct dom + iommu_flush_cache_entry(pte, sizeof(struct dma_pte)); + + if ( !this_cpu(iommu_dont_flush_iotlb) ) +- __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K , 0, 1); ++ __intel_iommu_iotlb_flush(domain, addr >> PAGE_SHIFT_4K, 1, 1); + + unmap_vtd_domain_page(page); + diff --git a/528a0eb0-x86-consider-modules-when-cutting-off-memory.patch b/528a0eb0-x86-consider-modules-when-cutting-off-memory.patch new file mode 100644 index 0000000..71270dd --- /dev/null +++ b/528a0eb0-x86-consider-modules-when-cutting-off-memory.patch @@ -0,0 +1,40 @@ +References: bnc#848014 + +# Commit a5db2c7aab7a638d84f22ac8fe5089d81175438b +# Date 2013-11-18 13:57:20 +0100 +# Author Jan Beulich +# Committer Jan Beulich +x86: consider modules when cutting off memory + +The code in question runs after module ranges got already removed from +the E820 table, so when determining the new maximum page/PDX we need to +explicitly take them into account. + +Furthermore we need to round up the ending addresses here, in order to +fully cover eventual partial trailing pages. + +Signed-off-by: Jan Beulich +Acked-by: Keir Fraser + +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -1012,9 +1012,17 @@ void __init __start_xen(unsigned long mb + ASSERT(j); + } + map_e = boot_e820.map[j].addr + boot_e820.map[j].size; +- if ( (map_e >> PAGE_SHIFT) < max_page ) ++ for ( j = 0; j < mbi->mods_count; ++j ) + { +- max_page = map_e >> PAGE_SHIFT; ++ uint64_t end = pfn_to_paddr(mod[j].mod_start) + ++ mod[j].mod_end; ++ ++ if ( map_e < end ) ++ map_e = end; ++ } ++ if ( PFN_UP(map_e) < max_page ) ++ { ++ max_page = PFN_UP(map_e); + max_pdx = pfn_to_pdx(max_page - 1) + 1; + } + printk(XENLOG_WARNING "Ignoring inaccessible memory range" diff --git a/528f606c-x86-hvm-reset-TSC-to-0-after-domain-resume-from-S3.patch b/528f606c-x86-hvm-reset-TSC-to-0-after-domain-resume-from-S3.patch new file mode 100644 index 0000000..983c976 --- /dev/null +++ b/528f606c-x86-hvm-reset-TSC-to-0-after-domain-resume-from-S3.patch @@ -0,0 +1,30 @@ +# Commit e95dc6ba69daef6468b3ae5912710727244d6e2f +# Date 2013-11-22 14:47:24 +0100 +# Author Tomasz Wroblewski +# Committer Jan Beulich +x86/hvm: reset TSC to 0 after domain resume from S3 + +Host S3 implicitly resets the host TSC to 0, but the tsc offset for hvm +domains is not recalculated when they resume, causing it to go into +negative values. In Linux guest using tsc clocksource, this results in +a hang after wrap back to positive values since the tsc clocksource +implementation expects it reset. + +Signed-off-by: Tomasz Wroblewski + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3607,7 +3607,13 @@ static void hvm_s3_suspend(struct domain + static void hvm_s3_resume(struct domain *d) + { + if ( test_and_clear_bool(d->arch.hvm_domain.is_s3_suspended) ) ++ { ++ struct vcpu *v; ++ ++ for_each_vcpu( d, v ) ++ hvm_set_guest_tsc(v, 0); + domain_unpause(d); ++ } + } + + static int hvmop_set_isa_irq_level( diff --git a/528f609c-x86-crash-disable-the-watchdog-NMIs-on-the-crashing-cpu.patch b/528f609c-x86-crash-disable-the-watchdog-NMIs-on-the-crashing-cpu.patch new file mode 100644 index 0000000..ac247e3 --- /dev/null +++ b/528f609c-x86-crash-disable-the-watchdog-NMIs-on-the-crashing-cpu.patch @@ -0,0 +1,60 @@ +# Commit 2a16fcd5ba0244fef764886211452acc69c0ed00 +# Date 2013-11-22 14:48:12 +0100 +# Author David Vrabel +# Committer Jan Beulich +x86/crash: disable the watchdog NMIs on the crashing cpu + +nmi_shootdown_cpus() is called during a crash to park all the other +CPUs. This changes the NMI trap handlers which means there's no point +in having the watchdog still running. + +This also disables the watchdog before executing any crash kexec image +and prevents the image from receiving unexpected NMIs. + +Signed-off-by: David Vrabel + +PVOps Linux as a kexec image shoots itself in the foot otherwise. + +On a Core2 system, Linux declares a firmware bug and tries to invert some bits +in the performance counter register. It ends up setting the number of retired +instructions to generate another NMI to fewer instructions than the NMI +interrupt path itself, and ceases to make any useful progress. + +The call to disable_lapic_nmi_watchdog() must be this late into the kexec path +to be sure that this cpu is the one which will execute the kexec image. +Otherwise there are race conditions where the NMIs might be disabled on the +wrong cpu, resulting in the kexec image still receiving NMIs. + +Signed-off-by: Andrew Cooper + +--- a/xen/arch/x86/crash.c ++++ b/xen/arch/x86/crash.c +@@ -117,6 +117,7 @@ static void nmi_shootdown_cpus(void) + unsigned long msecs; + int i, cpu = smp_processor_id(); + ++ disable_lapic_nmi_watchdog(); + local_irq_disable(); + + crashing_cpu = cpu; +--- a/xen/arch/x86/nmi.c ++++ b/xen/arch/x86/nmi.c +@@ -165,7 +165,7 @@ static void nmi_timer_fn(void *unused) + set_timer(&this_cpu(nmi_timer), NOW() + MILLISECS(1000)); + } + +-static void disable_lapic_nmi_watchdog(void) ++void disable_lapic_nmi_watchdog(void) + { + if (nmi_active <= 0) + return; +--- a/xen/include/asm-x86/apic.h ++++ b/xen/include/asm-x86/apic.h +@@ -200,6 +200,7 @@ extern void smp_local_timer_interrupt (s + extern void setup_boot_APIC_clock (void); + extern void setup_secondary_APIC_clock (void); + extern void setup_apic_nmi_watchdog (void); ++extern void disable_lapic_nmi_watchdog(void); + extern int reserve_lapic_nmi(void); + extern void release_lapic_nmi(void); + extern void self_nmi(void); diff --git a/52932418-x86-xsave-fix-nonlazy-state-handling.patch b/52932418-x86-xsave-fix-nonlazy-state-handling.patch new file mode 100644 index 0000000..65c5abb --- /dev/null +++ b/52932418-x86-xsave-fix-nonlazy-state-handling.patch @@ -0,0 +1,89 @@ +# Commit 7d8b5dd98463524686bdee8b973b53c00c232122 +# Date 2013-11-25 11:19:04 +0100 +# Author Liu Jinsong +# Committer Jan Beulich +x86/xsave: fix nonlazy state handling + +Nonlazy xstates should be xsaved each time when vcpu_save_fpu. +Operation to nonlazy xstates will not trigger #NM exception, so +whenever vcpu scheduled in it got restored and whenever scheduled +out it should get saved. + +Currently this bug affects AMD LWP feature, and later Intel MPX +feature. With the bugfix both LWP and MPX will work fine. + +Signed-off-by: Liu Jinsong + +Furthermore, during restore we also need to set nonlazy_xstate_used +according to the incoming accumulated XCR0. + +Also adjust the changes to i387.c such that there won't be a pointless +clts()/stts() pair. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -1146,6 +1146,8 @@ long arch_do_domctl( + { + v->arch.xcr0 = _xcr0; + v->arch.xcr0_accum = _xcr0_accum; ++ if ( _xcr0_accum & XSTATE_NONLAZY ) ++ v->arch.nonlazy_xstate_used = 1; + memcpy(v->arch.xsave_area, _xsave_area, + evc->size - 2 * sizeof(uint64_t)); + } +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1073,6 +1073,8 @@ static int hvm_load_cpu_xsave_states(str + + v->arch.xcr0 = ctxt->xcr0; + v->arch.xcr0_accum = ctxt->xcr0_accum; ++ if ( ctxt->xcr0_accum & XSTATE_NONLAZY ) ++ v->arch.nonlazy_xstate_used = 1; + memcpy(v->arch.xsave_area, &ctxt->save_area, + desc->length - offsetof(struct hvm_hw_cpu_xsave, save_area)); + +--- a/xen/arch/x86/i387.c ++++ b/xen/arch/x86/i387.c +@@ -120,11 +120,22 @@ static inline void fpu_frstor(struct vcp + /*******************************/ + /* FPU Save Functions */ + /*******************************/ ++ ++static inline uint64_t vcpu_xsave_mask(const struct vcpu *v) ++{ ++ if ( v->fpu_dirtied ) ++ return v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY; ++ ++ return v->arch.nonlazy_xstate_used ? XSTATE_NONLAZY : 0; ++} ++ + /* Save x87 extended state */ + static inline void fpu_xsave(struct vcpu *v) + { + bool_t ok; ++ uint64_t mask = vcpu_xsave_mask(v); + ++ ASSERT(mask); + ASSERT(v->arch.xsave_area); + /* + * XCR0 normally represents what guest OS set. In case of Xen itself, +@@ -132,7 +143,7 @@ static inline void fpu_xsave(struct vcpu + */ + ok = set_xcr0(v->arch.xcr0_accum | XSTATE_FP_SSE); + ASSERT(ok); +- xsave(v, v->arch.nonlazy_xstate_used ? XSTATE_ALL : XSTATE_LAZY); ++ xsave(v, mask); + ok = set_xcr0(v->arch.xcr0 ?: XSTATE_FP_SSE); + ASSERT(ok); + } +@@ -263,7 +274,7 @@ void vcpu_restore_fpu_lazy(struct vcpu * + */ + void vcpu_save_fpu(struct vcpu *v) + { +- if ( !v->fpu_dirtied ) ++ if ( !v->fpu_dirtied && !v->arch.nonlazy_xstate_used ) + return; + + ASSERT(!is_idle_vcpu(v)); diff --git a/CVE-2013-4375-xsa71.patch b/CVE-2013-4375-xsa71.patch deleted file mode 100644 index 6b59cd3..0000000 --- a/CVE-2013-4375-xsa71.patch +++ /dev/null @@ -1,33 +0,0 @@ -References: bnc#842515 CVE-2013-4375 XSA-71 - -xen_disk: mark ioreq as mapped before unmapping in error case - -Commit c6961b7d ("xen_disk: use bdrv_aio_flush instead of bdrv_flush") -modified the semantics of ioreq_{un,}map so that they are idempotent if -called when they're not needed (ie., twice in a row). However, it neglected -to handle the case where batch mapping is not being used (the default), and -one of the grants fails to map. In this case, ioreq_unmap will be called to -unwind and unmap any mappings already performed, but ioreq_unmap simply -returns due to the aforementioned change (the ioreq has not already been -marked as mapped). - -The frontend user can therefore force xen_disk to leak grant mappings, a -per-backend-domain limited resource. - -Fix by marking the ioreq as mapped before calling ioreq_unmap in this -situation. - -This is XSA-71 / CVE-2013-4375 - -Signed-off-by: Matthew Daley - ---- a/tools/qemu-xen-dir-remote/hw/xen_disk.c -+++ b/tools/qemu-xen-dir-remote/hw/xen_disk.c -@@ -406,6 +406,7 @@ static int ioreq_map(struct ioreq *ioreq - xen_be_printf(&ioreq->blkdev->xendev, 0, - "can't map grant ref %d (%s, %d maps)\n", - refs[i], strerror(errno), ioreq->blkdev->cnt_map); -+ ioreq->mapped = 1; - ioreq_unmap(ioreq); - return -1; - } diff --git a/CVE-2013-4553-xsa74.patch b/CVE-2013-4553-xsa74.patch new file mode 100644 index 0000000..f2e4fbe --- /dev/null +++ b/CVE-2013-4553-xsa74.patch @@ -0,0 +1,43 @@ +References: bnc#849667 CVE-2013-4553 XSA-74 + +x86: restrict XEN_DOMCTL_getmemlist + +Coverity ID 1055652 + +(See the code comment.) + +This is CVE-2013-4553 / XSA-74. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/domctl.c ++++ b/xen/arch/x86/domctl.c +@@ -329,6 +329,26 @@ long arch_do_domctl( + break; + } + ++ /* ++ * XSA-74: This sub-hypercall is broken in several ways: ++ * - lock order inversion (p2m locks inside page_alloc_lock) ++ * - no preemption on huge max_pfns input ++ * - not (re-)checking d->is_dying with page_alloc_lock held ++ * - not honoring start_pfn input (which libxc also doesn't set) ++ * Additionally it is rather useless, as the result is stale by the ++ * time the caller gets to look at it. ++ * As it only has a single, non-production consumer (xen-mceinj), ++ * rather than trying to fix it we restrict it for the time being. ++ */ ++ if ( /* No nested locks inside copy_to_guest_offset(). */ ++ paging_mode_external(current->domain) || ++ /* Arbitrary limit capping processing time. */ ++ max_pfns > GB(4) / PAGE_SIZE ) ++ { ++ ret = -EOPNOTSUPP; ++ break; ++ } ++ + spin_lock(&d->page_alloc_lock); + + ret = i = 0; diff --git a/CVE-2013-4554-xsa76.patch b/CVE-2013-4554-xsa76.patch new file mode 100644 index 0000000..20a5fd1 --- /dev/null +++ b/CVE-2013-4554-xsa76.patch @@ -0,0 +1,22 @@ +References: bnc#849668 CVE-2013-4554 XSA-76 + +x86/HVM: only allow ring 0 guest code to make hypercalls + +Anything else would allow for privilege escalation. + +This is CVE-2013-4554 / XSA-76. + +Signed-off-by: Jan Beulich +Acked-by: Ian Campbell + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -3345,7 +3345,7 @@ int hvm_do_hypercall(struct cpu_user_reg + case 4: + case 2: + hvm_get_segment_register(curr, x86_seg_ss, &sreg); +- if ( unlikely(sreg.attr.fields.dpl == 3) ) ++ if ( unlikely(sreg.attr.fields.dpl) ) + { + default: + regs->eax = -EPERM; diff --git a/pygrub-boot-legacy-sles.patch b/pygrub-boot-legacy-sles.patch index 8aa62cf..bd4388f 100644 --- a/pygrub-boot-legacy-sles.patch +++ b/pygrub-boot-legacy-sles.patch @@ -1,8 +1,8 @@ -Index: xen-4.3.0-testing/tools/pygrub/src/pygrub +Index: xen-4.3.1-testing/tools/pygrub/src/pygrub =================================================================== ---- xen-4.3.0-testing.orig/tools/pygrub/src/pygrub -+++ xen-4.3.0-testing/tools/pygrub/src/pygrub -@@ -606,6 +606,14 @@ def run_grub(file, entry, fs, cfg_args): +--- xen-4.3.1-testing.orig/tools/pygrub/src/pygrub ++++ xen-4.3.1-testing/tools/pygrub/src/pygrub +@@ -607,6 +607,14 @@ def run_grub(file, entry, fs, cfg_args): print " args: %s" % img.args print " initrd: %s" % img.initrd[1] @@ -17,7 +17,7 @@ Index: xen-4.3.0-testing/tools/pygrub/src/pygrub if interactive and not list_entries: curses.wrapper(run_main) else: -@@ -692,6 +700,14 @@ def sniff_netware(fs, cfg): +@@ -693,6 +701,14 @@ def sniff_netware(fs, cfg): return cfg diff --git a/pygrub-netware-xnloader.patch b/pygrub-netware-xnloader.patch index 4f89619..3e1b5bc 100644 --- a/pygrub-netware-xnloader.patch +++ b/pygrub-netware-xnloader.patch @@ -1,7 +1,7 @@ -Index: xen-4.2.1-testing/tools/pygrub/src/pygrub +Index: xen-4.3.1-testing/tools/pygrub/src/pygrub =================================================================== ---- xen-4.2.1-testing.orig/tools/pygrub/src/pygrub -+++ xen-4.2.1-testing/tools/pygrub/src/pygrub +--- xen-4.3.1-testing.orig/tools/pygrub/src/pygrub ++++ xen-4.3.1-testing/tools/pygrub/src/pygrub @@ -26,6 +26,7 @@ import fsimage import grub.GrubConf import grub.LiloConf @@ -10,7 +10,7 @@ Index: xen-4.2.1-testing/tools/pygrub/src/pygrub PYGRUB_VER = 0.6 FS_READ_MAX = 1024 * 1024 -@@ -734,6 +735,8 @@ if __name__ == "__main__": +@@ -735,6 +736,8 @@ if __name__ == "__main__": if len(data) == 0: os.close(tfd) del datafile diff --git a/qemu-xen-dir-remote.tar.bz2 b/qemu-xen-dir-remote.tar.bz2 index 5f1aad4..f8c623e 100644 --- a/qemu-xen-dir-remote.tar.bz2 +++ b/qemu-xen-dir-remote.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:866f1faaf1289e513c60a20f93b75fc34302435f39fa337d4ad57cc7958e7640 -size 6037141 +oid sha256:4b43f14e9cb63a52647fcde22a087606b723ba9b96b7b1a9226826f4896d7f99 +size 6037766 diff --git a/xen-4.3.0-testing-src.tar.bz2 b/xen-4.3.0-testing-src.tar.bz2 deleted file mode 100644 index cf7e03c..0000000 --- a/xen-4.3.0-testing-src.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c25d11d99f7dfbb15987746699f22b213f5b977150ae4bf4c767325430c2d98 -size 4357242 diff --git a/xen-4.3.1-testing-src.tar.bz2 b/xen-4.3.1-testing-src.tar.bz2 new file mode 100644 index 0000000..62ccc67 --- /dev/null +++ b/xen-4.3.1-testing-src.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7298c50445e274cdc34a36aaf295a015fb626249b1c8158ad8b37da2f141d930 +size 4359136 diff --git a/xen.changes b/xen.changes index 37b04ff..d79c50a 100644 --- a/xen.changes +++ b/xen.changes @@ -1,9 +1,79 @@ +------------------------------------------------------------------- +Tue Nov 26 08:26:07 MST 2013 - carnold@suse.com + +- Upstream patches from Jan + 5281fad4-numa-sched-leave-node-affinity-alone-if-not-in-auto-mode.patch + 52820823-nested-SVM-adjust-guest-handling-of-structure-mappings.patch + 52820863-VMX-don-t-crash-processing-d-debug-key.patch + 5282492f-x86-eliminate-has_arch_mmios.patch + 52864df2-credit-Update-other-parameters-when-setting-tslice_ms.patch + 52864f30-fix-leaking-of-v-cpu_affinity_saved-on-domain-destruction.patch + 5289d225-nested-VMX-don-t-ignore-mapping-errors.patch + 528a0eb0-x86-consider-modules-when-cutting-off-memory.patch + 528f606c-x86-hvm-reset-TSC-to-0-after-domain-resume-from-S3.patch + 528f609c-x86-crash-disable-the-watchdog-NMIs-on-the-crashing-cpu.patch + 52932418-x86-xsave-fix-nonlazy-state-handling.patch + +------------------------------------------------------------------- +Fri Nov 22 14:36:12 MST 2013 - carnold@suse.com + +- bnc#851749 - Xen service file does not call xend properly + xend.service + +------------------------------------------------------------------- +Fri Nov 22 08:15:04 UTC 2013 - adrian@suse.de + +- Add missing requires to pciutils package for xend-tools + +------------------------------------------------------------------- +Tue Nov 19 11:52:31 MST 2013 - carnold@suse.com + +- bnc#851386 - VUL-0: xen: XSA-78: Insufficient TLB flushing in + VT-d (iommu) code + 528a0e5b-TLB-flushing-in-dma_pte_clear_one.patch + ------------------------------------------------------------------- Tue Nov 19 10:42:54 CET 2013 - tbehrens@suse.com - Make -devel package depend on libuuid-devel, since libxl.h includes uuid.h +------------------------------------------------------------------- +Mon Nov 11 10:22:20 MST 2013 - carnold@suse.com + +- bnc#849667 - VUL-0: xen: XSA-74: Lock order reversal between + page_alloc_lock and mm_rwlock + CVE-2013-4553-xsa74.patch +- bnc#849665 - VUL-0: CVE-2013-4551: xen: XSA-75: Host crash due to + guest VMX instruction execution + 52809208-nested-VMX-VMLANUCH-VMRESUME-emulation-must-check-permission-1st.patch +- bnc#849668 - VUL-0: xen: XSA-76: Hypercalls exposed to privilege + rings 1 and 2 of HVM guests + CVE-2013-4554-xsa76.patch +- Upstream patches from Jan + 52654798-x86-xsave-also-save-restore-XCR0-across-suspend-ACPI-S3.patch + 526e43d4-x86-refine-address-validity-checks-before-accessing-page-tables.patch + 526f786a-fix-locking-in-cpu_disable_scheduler.patch + 5277646c-x86-ACPI-x2APIC-guard-against-out-of-range-ACPI-or-APIC-IDs.patch + 5277a134-x86-make-sure-memory-block-is-RAM-before-passing-to-the-allocator.patch + 5278f7f9-x86-HVM-32-bit-IN-result-must-be-zero-extended-to-64-bits.patch + 527a0a05-call-sched_destroy_domain-before-cpupool_rm_domain.patch + 527cb7d2-x86-hvm-fix-restart-of-RTC-periodic-timer-with-vpt_align-1.patch + 527cb820-x86-EFI-make-trampoline-allocation-more-flexible.patch + 5280aae0-x86-idle-reduce-contention-on-ACPI-register-accesses.patch + +------------------------------------------------------------------- +Wed Nov 4 09:42:36 MDT 2013 - carnold@suse.com + +- bnc#848657 - VUL-0: xen: CVE-2013-4494: XSA-73: Lock order + reversal between page allocation and grant table locks + 5277639c-gnttab-correct-locking-order-reversal.patch + +------------------------------------------------------------------- +Thu Oct 31 17:07:49 MDT 2013 - carnold@suse.com + +- Update to Xen 4.3.1 + ------------------------------------------------------------------- Tue Oct 22 13:42:54 MDT 2013 - carnold@suse.com @@ -17,6 +87,9 @@ Mon Oct 21 09:57:54 MDT 2013 - carnold@suse.com - bnc#842515 - VUL-0: CVE-2013-4375: XSA-71: xen: qemu disk backend (qdisk) resource leak CVE-2013-4375-xsa71.patch +- bnc#845520 - VUL-0: CVE-2013-4416: xen: ocaml xenstored + mishandles oversized message replies + CVE-2013-4416-xsa72.patch - Upstream patches from Jan 52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch (Replaces CVE-2013-4355-xsa63.patch) 52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch (Replaces CVE-2013-4356-xsa64.patch) diff --git a/xen.spec b/xen.spec index aa77e25..6e881bb 100644 --- a/xen.spec +++ b/xen.spec @@ -15,13 +15,12 @@ # Please submit bugfixes or comments via http://bugs.opensuse.org/ # - Name: xen ExclusiveArch: %ix86 x86_64 %arm aarch64 %define xvers 4.3 %define xvermaj 4 -%define changeset 27204 -%define xen_build_dir xen-4.3.0-testing +%define changeset 27302 +%define xen_build_dir xen-4.3.1-testing %ifarch %arm aarch64 %define with_kmp 0 %define with_stubdom 0 @@ -139,13 +138,13 @@ BuildRequires: xorg-x11 BuildRequires: lndir %endif %endif -Version: 4.3.0_14 +Version: 4.3.1_02 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0+ Group: System/Kernel -Source0: xen-4.3.0-testing-src.tar.bz2 +Source0: xen-4.3.1-testing-src.tar.bz2 Source1: stubdom.tar.bz2 Source2: qemu-xen-traditional-dir-remote.tar.bz2 Source3: qemu-xen-dir-remote.tar.bz2 @@ -194,87 +193,40 @@ Source99: baselibs.conf # http://xenbits.xensource.com/ext/xenalyze Source20000: xenalyze.hg.tar.bz2 # Upstream patches -Patch1: 51d277a3-x86-don-t-pass-negative-time-to-gtime_to_gtsc-try-2.patch -Patch2: 51d27807-iommu-amd-Fix-logic-for-clearing-the-IOMMU-interrupt-bits.patch -Patch3: 51d27841-iommu-amd-Workaround-for-erratum-787.patch -Patch4: 51d5334e-x86-mm-Ensure-useful-progress-in-alloc_l2_table.patch -Patch5: 51daa074-Revert-hvmloader-always-include-HPET-table.patch -Patch6: 51dd155c-adjust-x86-EFI-build.patch -Patch7: 51e517e6-AMD-IOMMU-allocate-IRTEs.patch -Patch8: 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch -Patch9: 51e63d80-x86-cpuidle-Change-logging-for-unknown-APIC-IDs.patch -Patch10: 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch -Patch11: 51e6540d-x86-don-t-use-destroy_xen_mappings-for-vunmap.patch -Patch12: 51e7963f-x86-time-Update-wallclock-in-shared-info-when-altering-domain-time-offset.patch -Patch13: 51ffd577-fix-off-by-one-mistakes-in-vm_alloc.patch -Patch14: 51ffd5fd-x86-refine-FPU-selector-handling-code-for-XSAVEOPT.patch -Patch15: 520114bb-Nested-VMX-Flush-TLBs-and-Caches-if-paging-mode-changed.patch -Patch16: 520119fc-xen-conring-Write-to-console-ring-even-if-console-lock-is-busted.patch -Patch17: 520a24f6-x86-AMD-Fix-nested-svm-crash-due-to-assertion-in-__virt_to_maddr.patch -Patch18: 520a2570-x86-AMD-Inject-GP-instead-of-UD-when-unable-to-map-vmcb.patch -Patch19: 520a2705-watchdog-crash-Always-disable-watchdog-in-console_force_unlock.patch -Patch20: 520a5504-VMX-add-boot-parameter-to-enable-disable-APIC-v-dynamically.patch -Patch21: 520b4b60-VT-d-protect-against-bogus-information-coming-from-BIOS.patch -Patch22: 520b4bda-x86-MTRR-fix-range-check-in-mtrr_add_page.patch -Patch23: 520cb8b6-x86-time-fix-check-for-negative-time-in-__update_vcpu_system_time.patch -Patch24: 52146070-ACPI-fix-acpi_os_map_memory.patch -Patch25: 5214d26a-VT-d-warn-about-CFI-being-enabled-by-firmware.patch -Patch26: 5215d094-Nested-VMX-Check-whether-interrupt-is-blocked-by-TPR.patch -Patch27: 5215d0c5-Nested-VMX-Force-check-ISR-when-L2-is-running.patch -Patch28: 5215d135-Nested-VMX-Clear-APIC-v-control-bit-in-vmcs02.patch -Patch29: 5215d2d5-Nested-VMX-Update-APIC-v-RVI-SVI-when-vmexit-to-L1.patch -Patch30: 5215d8b0-Correct-X2-APIC-HVM-emulation.patch -Patch31: 521c6d4a-x86-don-t-allow-Dom0-access-to-the-MSI-address-range.patch -Patch32: 521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch -Patch33: 521c6e23-x86-Intel-add-support-for-Haswell-CPU-models.patch -Patch34: 521db25f-Fix-inactive-timer-list-corruption-on-second-S3-resume.patch -Patch35: 521e1156-x86-AVX-instruction-emulation-fixes.patch -Patch36: 521ef8d9-AMD-IOMMU-add-missing-checks.patch -Patch37: 52205a7d-hvmloader-smbios-Correctly-count-the-number-of-tables-written.patch -Patch38: 52205a90-public-hvm_xs_strings.h-Fix-ABI-regression-for-OEM-SMBios-strings.patch -Patch39: 52205e27-x86-xsave-initialization-improvements.patch -Patch40: 5226020f-xend-handle-extended-PCI-configuration-space-when-saving-state.patch -Patch41: 52260214-xend-fix-file-descriptor-leak-in-pci-utilities.patch -Patch42: 52285317-hvmloader-fix-SeaBIOS-interface.patch -Patch43: 522d896b-x86-EFI-properly-handle-run-time-memory-regions-outside-the-1-1-map.patch -Patch44: 522d8a1f-x86-allow-guest-to-set-clear-MSI-X-mask-bit-try-2.patch -Patch45: 522dc044-xmalloc-make-whole-pages-xfree-clear-the-order-field-ab-used-by-xmalloc.patch -Patch46: 522dc0e6-x86-xsave-fix-migration-from-xsave-capable-to-xsave-incapable-host.patch -Patch47: 522f2f9f-Nested-VMX-Clear-bit-31-of-IA32_VMX_BASIC-MSR.patch -Patch48: 522f37b2-sched-arinc653-check-for-guest-data-transfer-failures.patch -Patch49: 523172d5-x86-fix-memory-cut-off-when-using-PFN-compression.patch -Patch50: 5231e090-libxc-x86-fix-page-table-creation-for-huge-guests.patch -Patch51: 5231f00c-cpufreq-missing-check-of-copy_from_guest.patch -Patch52: 523304b6-x86-machine_restart-must-not-call-acpi_dmar_reinstate-twice.patch -Patch53: 5239a064-x86-HVM-fix-failure-path-in-hvm_vcpu_initialise.patch -Patch54: 5239a076-VMX-fix-failure-path-in-construct_vmcs.patch -Patch55: 523c0ed4-x86-HVM-properly-handle-wide-MMIO.patch -Patch56: 523c1758-sched_credit-filter-node-affinity-mask-against-online-cpus.patch -Patch57: 523c1834-unmodified_drivers-enable-unplug-per-default.patch -Patch58: 523ff393-x86-HVM-linear-address-must-be-canonical-for-the-whole-accessed-range.patch -Patch59: 523ff3e2-x86-HVM-refuse-doing-string-operations-in-certain-situations.patch -Patch60: 5242a1b5-x86-xsave-initialize-extended-register-state-when-guests-enable-it.patch -Patch61: 52496bea-x86-properly-handle-hvm_copy_from_guest_-phys-virt-errors.patch -Patch62: 52496c11-x86-mm-shadow-Fix-initialization-of-PV-shadow-L4-tables.patch -Patch63: 52496c32-x86-properly-set-up-fbld-emulation-operand-address.patch -Patch64: 52497c6c-x86-don-t-blindly-create-L3-tables-for-the-direct-map.patch -Patch65: 524e971b-x86-idle-Fix-get_cpu_idle_time-s-interaction-with-offline-pcpus.patch -Patch66: 524e9762-x86-percpu-Force-INVALID_PERCPU_AREA-to-non-canonical.patch -Patch67: 524e983e-Nested-VMX-check-VMX-capability-before-read-VMX-related-MSRs.patch -Patch68: 524e98b1-Nested-VMX-fix-IA32_VMX_CR4_FIXED1-msr-emulation.patch -Patch69: 524e9dc0-xsm-forbid-PV-guest-console-reads.patch -Patch70: 5256a979-x86-check-segment-descriptor-read-result-in-64-bit-OUTS-emulation.patch -Patch71: 5256be57-libxl-fix-vif-rate-parsing.patch -Patch72: 5256be84-tools-ocaml-fix-erroneous-free-of-cpumap-in-stub_xc_vcpu_getaffinity.patch -Patch73: 5256be92-libxl-fix-out-of-memory-error-handling-in-libxl_list_cpupool.patch -Patch74: 5257a89a-x86-correct-LDT-checks.patch -Patch75: 5257a8e7-x86-add-address-validity-check-to-guest_map_l1e.patch -Patch76: 5257a944-x86-check-for-canonical-address-before-doing-page-walks.patch -Patch77: 525b95f4-scheduler-adjust-internal-locking-interface.patch -Patch78: 525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch -Patch79: 525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch -Patch80: 525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch -Patch7100: CVE-2013-4375-xsa71.patch +Patch1: 51e517e6-AMD-IOMMU-allocate-IRTEs.patch +Patch2: 51e5183f-AMD-IOMMU-untie-remap-and-vector-maps.patch +Patch3: 51e63df6-VMX-fix-interaction-of-APIC-V-and-Viridian-emulation.patch +Patch4: 521c6d6c-x86-don-t-allow-Dom0-access-to-the-HT-address-range.patch +Patch5: 525b95f4-scheduler-adjust-internal-locking-interface.patch +Patch6: 525b9617-sched-fix-race-between-sched_move_domain-and-vcpu_wake.patch +Patch7: 525e69e8-credit-unpause-parked-vcpu-before-destroying-it.patch +Patch8: 525faf5e-x86-print-relevant-tail-part-of-filename-for-warnings-and-crashes.patch +Patch9: 52654798-x86-xsave-also-save-restore-XCR0-across-suspend-ACPI-S3.patch +Patch10: 526e43d4-x86-refine-address-validity-checks-before-accessing-page-tables.patch +Patch11: 526f786a-fix-locking-in-cpu_disable_scheduler.patch +Patch12: 5277639c-gnttab-correct-locking-order-reversal.patch +Patch13: 5277646c-x86-ACPI-x2APIC-guard-against-out-of-range-ACPI-or-APIC-IDs.patch +Patch14: 5277a134-x86-make-sure-memory-block-is-RAM-before-passing-to-the-allocator.patch +Patch15: 5278f7f9-x86-HVM-32-bit-IN-result-must-be-zero-extended-to-64-bits.patch +Patch16: 527a0a05-call-sched_destroy_domain-before-cpupool_rm_domain.patch +Patch17: 527cb7d2-x86-hvm-fix-restart-of-RTC-periodic-timer-with-vpt_align-1.patch +Patch18: 527cb820-x86-EFI-make-trampoline-allocation-more-flexible.patch +Patch19: 52809208-nested-VMX-VMLANUCH-VMRESUME-emulation-must-check-permission-1st.patch +Patch20: 5280aae0-x86-idle-reduce-contention-on-ACPI-register-accesses.patch +Patch21: 5281fad4-numa-sched-leave-node-affinity-alone-if-not-in-auto-mode.patch +Patch22: 52820823-nested-SVM-adjust-guest-handling-of-structure-mappings.patch +Patch23: 52820863-VMX-don-t-crash-processing-d-debug-key.patch +Patch24: 5282492f-x86-eliminate-has_arch_mmios.patch +Patch25: 52864df2-credit-Update-other-parameters-when-setting-tslice_ms.patch +Patch26: 52864f30-fix-leaking-of-v-cpu_affinity_saved-on-domain-destruction.patch +Patch27: 5289d225-nested-VMX-don-t-ignore-mapping-errors.patch +Patch28: 528a0e5b-TLB-flushing-in-dma_pte_clear_one.patch +Patch29: 528a0eb0-x86-consider-modules-when-cutting-off-memory.patch +Patch30: 528f606c-x86-hvm-reset-TSC-to-0-after-domain-resume-from-S3.patch +Patch31: 528f609c-x86-crash-disable-the-watchdog-NMIs-on-the-crashing-cpu.patch +Patch32: 52932418-x86-xsave-fix-nonlazy-state-handling.patch +Patch7400: CVE-2013-4553-xsa74.patch +Patch7600: CVE-2013-4554-xsa76.patch # Upstream qemu patches # Our patches Patch301: xen-destdir.patch @@ -438,6 +390,7 @@ Authors: %package xend-tools Summary: Xen Virtualization: Control tools for domain 0 Group: System/Kernel +Requires: pciutils Requires: python Requires: python-curses Requires: python-lxml @@ -598,55 +551,8 @@ Authors %patch30 -p1 %patch31 -p1 %patch32 -p1 -%patch33 -p1 -%patch34 -p1 -%patch35 -p1 -%patch36 -p1 -%patch37 -p1 -%patch38 -p1 -%patch39 -p1 -%patch40 -p1 -%patch41 -p1 -%patch42 -p1 -%patch43 -p1 -%patch44 -p1 -%patch45 -p1 -%patch46 -p1 -%patch47 -p1 -%patch48 -p1 -%patch49 -p1 -%patch50 -p1 -%patch51 -p1 -%patch52 -p1 -%patch53 -p1 -%patch54 -p1 -%patch55 -p1 -%patch56 -p1 -%patch57 -p1 -%patch58 -p1 -%patch59 -p1 -%patch60 -p1 -%patch61 -p1 -%patch62 -p1 -%patch63 -p1 -%patch64 -p1 -%patch65 -p1 -%patch66 -p1 -%patch67 -p1 -%patch68 -p1 -%patch69 -p1 -%patch70 -p1 -%patch71 -p1 -%patch72 -p1 -%patch73 -p1 -%patch74 -p1 -%patch75 -p1 -%patch76 -p1 -%patch77 -p1 -%patch78 -p1 -%patch79 -p1 -%patch80 -p1 -%patch7100 -p1 +%patch7400 -p1 +%patch7600 -p1 %patch301 -p1 %patch302 -p1 %patch303 -p1 diff --git a/xend.service b/xend.service index 6ef546d..3b828fa 100644 --- a/xend.service +++ b/xend.service @@ -8,7 +8,7 @@ Type=forking PIDFile=/var/run/xend.pid Environment=HOME=/root ExecStartPre=/bin/grep -q control_d /proc/xen/capabilities -ExecStart=/usr/sbin/xend +ExecStart=/usr/sbin/xend start [Install] WantedBy=multi-user.target