diff --git a/53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch b/53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch deleted file mode 100644 index 643a6c9..0000000 --- a/53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch +++ /dev/null @@ -1,46 +0,0 @@ -# Commit a1ac4cf52e38386bac7ac3440c7da0099662ca5c -# Date 2014-07-29 17:02:25 +0200 -# Author Andrew Cooper -# Committer Jan Beulich -x86/cpu: undo BIOS CPUID max_leaf limit before querying for features - -If IA32_MISC_ENABLE[22] is set by the BIOS, CPUID.0.EAX will be limited to 3. -Lift this limit before considering whether to query CPUID.7[ECX=0].EBX for -features. - -Without this change, dom0 is able to see this feature leaf (as the limit was -subsequently lifted), and will set features appropriately in HVM domain cpuid -policies. - -The specific bug XenServer observed was the advertisement of the FSGSBASE -feature, but an inability to set CR4.FSGSBASE as Xen considered the bit to be -reserved as cpu_has_fsgsbase incorrectly evaluated as false. - -This is a regression introduced by c/s 44e24f8567 "x86: don't call -generic_identify() redundantly" where the redundant call actually resampled -CPUID.7[ECX=0] properly to obtain the feature flags. - -Signed-off-by: Andrew Cooper -Reviewed-by: Jan Beulich - ---- a/xen/arch/x86/cpu/common.c -+++ b/xen/arch/x86/cpu/common.c -@@ -234,6 +234,9 @@ static void __cpuinit generic_identify(s - paddr_bits = cpuid_eax(0x80000008) & 0xff; - } - -+ /* Might lift BIOS max_leaf=3 limit. */ -+ early_intel_workaround(c); -+ - /* Intel-defined flags: level 0x00000007 */ - if ( c->cpuid_level >= 0x00000007 ) { - u32 dummy; -@@ -241,8 +244,6 @@ static void __cpuinit generic_identify(s - c->x86_capability[X86_FEATURE_FSGSBASE / 32] = ebx; - } - -- early_intel_workaround(c); -- - #ifdef CONFIG_X86_HT - c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff; - #endif diff --git a/53df71c7-lz4-check-for-underruns.patch b/53df71c7-lz4-check-for-underruns.patch deleted file mode 100644 index 78e06d4..0000000 --- a/53df71c7-lz4-check-for-underruns.patch +++ /dev/null @@ -1,54 +0,0 @@ -# Commit 9143a6c55ef7e8f630857cb08c03844d372c2345 -# Date 2014-08-04 13:43:03 +0200 -# Author Jan Beulich -# Committer Jan Beulich -lz4: check for underruns - -While overruns are already being taken care of, underruns (resulting -from overflows in the respective "op + length" (or similar) operations -weren't. - -This is CVE-2014-4611. - -Signed-off-by: Jan Beulich -Acked-by: Ian Campbell - - ---- a/xen/common/lz4/decompress.c -+++ b/xen/common/lz4/decompress.c -@@ -84,6 +84,8 @@ static int INIT lz4_uncompress(const uns - ip += length; - break; /* EOF */ - } -+ if (unlikely((unsigned long)cpy < (unsigned long)op)) -+ goto _output_error; - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; -@@ -142,6 +144,8 @@ static int INIT lz4_uncompress(const uns - goto _output_error; - continue; - } -+ if (unlikely((unsigned long)cpy < (unsigned long)op)) -+ goto _output_error; - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ - } -@@ -207,6 +211,8 @@ static int lz4_uncompress_unknownoutputs - op += length; - break;/* Necessarily EOF, due to parsing restrictions */ - } -+ if (unlikely((unsigned long)cpy < (unsigned long)op)) -+ goto _output_error; - LZ4_WILDCOPY(ip, op, cpy); - ip -= (op - cpy); - op = cpy; -@@ -270,6 +276,8 @@ static int lz4_uncompress_unknownoutputs - goto _output_error; - continue; - } -+ if (unlikely((unsigned long)cpy < (unsigned long)op)) -+ goto _output_error; - LZ4_SECURECOPY(ref, op, cpy); - op = cpy; /* correction */ - } diff --git a/53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch b/53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch index 274cb1f..2e1c94a 100644 --- a/53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch +++ b/53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch @@ -1,3 +1,5 @@ +References: bnc#882089 + # Commit fd1863847af15c3676348447755e1a1801f9d394 # Date 2014-08-04 13:46:03 +0200 # Author Jan Beulich diff --git a/53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch b/53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch deleted file mode 100644 index 1487b33..0000000 --- a/53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch +++ /dev/null @@ -1,378 +0,0 @@ -# Commit 3af450fd2d9403f208d3ac6459716f027b8597ad -# Date 2014-08-08 09:34:03 +0200 -# Author Jan Beulich -# Committer Jan Beulich -x86_emulate: properly do IP updates and other side effects on success - -The two MMX/SSE/AVX code blocks failed to update IP properly, and these -as well as get_reg_refix(), which "manually" updated IP so far, failed -to do the TF and RF processing needed at the end of successfully -emulated instructions. - -Fix the test utility at once to check IP is properly getting updated, -and while at it macroize the respective code quite a bit, hopefully -making it easier to add further tests when the need arises. - -Reported-by: Andrei LUTAS -Signed-off-by: Jan Beulich -Tested-by: Razvan Cojocaru -Reviewed-by: Andrew Cooper - - ---- a/tools/tests/x86_emulator/test_x86_emulator.c -+++ b/tools/tests/x86_emulator/test_x86_emulator.c -@@ -597,23 +597,32 @@ int main(int argc, char **argv) - printf("skipped\n"); - #endif - -+#define decl_insn(which) extern const unsigned char which[], which##_len[] -+#define put_insn(which, insn) ".pushsection .test, \"ax\", @progbits\n" \ -+ #which ": " insn "\n" \ -+ ".equ " #which "_len, .-" #which "\n" \ -+ ".popsection" -+#define set_insn(which) (regs.eip = (unsigned long)memcpy(instr, which, \ -+ (unsigned long)which##_len)) -+#define check_eip(which) (regs.eip == (unsigned long)instr + \ -+ (unsigned long)which##_len) -+ - printf("%-40s", "Testing movq %mm3,(%ecx)..."); - if ( stack_exec && cpu_has_mmx ) - { -- extern const unsigned char movq_to_mem[]; -+ decl_insn(movq_to_mem); - - asm volatile ( "pcmpeqb %%mm3, %%mm3\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movq_to_mem: movq %%mm3, (%0)\n" -- ".popsection" :: "c" (NULL) ); -+ put_insn(movq_to_mem, "movq %%mm3, (%0)") -+ :: "c" (NULL) ); - -- memcpy(instr, movq_to_mem, 15); -+ set_insn(movq_to_mem); - memset(res, 0x33, 64); - memset(res + 8, 0xff, 8); -- regs.eip = (unsigned long)&instr[0]; - regs.ecx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || -+ !check_eip(movq_to_mem) ) - goto fail; - printf("okay\n"); - } -@@ -623,19 +632,17 @@ int main(int argc, char **argv) - printf("%-40s", "Testing movq (%edx),%mm5..."); - if ( stack_exec && cpu_has_mmx ) - { -- extern const unsigned char movq_from_mem[]; -+ decl_insn(movq_from_mem); - - asm volatile ( "pcmpgtb %%mm5, %%mm5\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movq_from_mem: movq (%0), %%mm5\n" -- ".popsection" :: "d" (NULL) ); -+ put_insn(movq_from_mem, "movq (%0), %%mm5") -+ :: "d" (NULL) ); - -- memcpy(instr, movq_from_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(movq_from_mem); - regs.ecx = 0; - regs.edx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( rc != X86EMUL_OKAY ) -+ if ( rc != X86EMUL_OKAY || !check_eip(movq_from_mem) ) - goto fail; - asm ( "pcmpeqb %%mm3, %%mm3\n\t" - "pcmpeqb %%mm5, %%mm3\n\t" -@@ -650,20 +657,19 @@ int main(int argc, char **argv) - printf("%-40s", "Testing movdqu %xmm2,(%ecx)..."); - if ( stack_exec && cpu_has_sse2 ) - { -- extern const unsigned char movdqu_to_mem[]; -+ decl_insn(movdqu_to_mem); - - asm volatile ( "pcmpeqb %%xmm2, %%xmm2\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movdqu_to_mem: movdqu %%xmm2, (%0)\n" -- ".popsection" :: "c" (NULL) ); -+ put_insn(movdqu_to_mem, "movdqu %%xmm2, (%0)") -+ :: "c" (NULL) ); - -- memcpy(instr, movdqu_to_mem, 15); -+ set_insn(movdqu_to_mem); - memset(res, 0x55, 64); - memset(res + 8, 0xff, 16); -- regs.eip = (unsigned long)&instr[0]; - regs.ecx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || -+ !check_eip(movdqu_to_mem) ) - goto fail; - printf("okay\n"); - } -@@ -673,19 +679,17 @@ int main(int argc, char **argv) - printf("%-40s", "Testing movdqu (%edx),%xmm4..."); - if ( stack_exec && cpu_has_sse2 ) - { -- extern const unsigned char movdqu_from_mem[]; -+ decl_insn(movdqu_from_mem); - - asm volatile ( "pcmpgtb %%xmm4, %%xmm4\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movdqu_from_mem: movdqu (%0), %%xmm4\n" -- ".popsection" :: "d" (NULL) ); -+ put_insn(movdqu_from_mem, "movdqu (%0), %%xmm4") -+ :: "d" (NULL) ); - -- memcpy(instr, movdqu_from_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(movdqu_from_mem); - regs.ecx = 0; - regs.edx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( rc != X86EMUL_OKAY ) -+ if ( rc != X86EMUL_OKAY || !check_eip(movdqu_from_mem) ) - goto fail; - asm ( "pcmpeqb %%xmm2, %%xmm2\n\t" - "pcmpeqb %%xmm4, %%xmm2\n\t" -@@ -700,21 +704,20 @@ int main(int argc, char **argv) - printf("%-40s", "Testing vmovdqu %ymm2,(%ecx)..."); - if ( stack_exec && cpu_has_avx ) - { -- extern const unsigned char vmovdqu_to_mem[]; -+ decl_insn(vmovdqu_to_mem); - - asm volatile ( "vpcmpeqb %%xmm2, %%xmm2, %%xmm2\n" -- ".pushsection .test, \"a\", @progbits\n" -- "vmovdqu_to_mem: vmovdqu %%ymm2, (%0)\n" -- ".popsection" :: "c" (NULL) ); -+ put_insn(vmovdqu_to_mem, "vmovdqu %%ymm2, (%0)") -+ :: "c" (NULL) ); - -- memcpy(instr, vmovdqu_to_mem, 15); -+ set_insn(vmovdqu_to_mem); - memset(res, 0x55, 128); - memset(res + 16, 0xff, 16); - memset(res + 20, 0x00, 16); -- regs.eip = (unsigned long)&instr[0]; - regs.ecx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) ) -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 16, 64) || -+ !check_eip(vmovdqu_to_mem) ) - goto fail; - printf("okay\n"); - } -@@ -724,7 +727,7 @@ int main(int argc, char **argv) - printf("%-40s", "Testing vmovdqu (%edx),%ymm4..."); - if ( stack_exec && cpu_has_avx ) - { -- extern const unsigned char vmovdqu_from_mem[]; -+ decl_insn(vmovdqu_from_mem); - - #if 0 /* Don't use AVX2 instructions for now */ - asm volatile ( "vpcmpgtb %%ymm4, %%ymm4, %%ymm4\n" -@@ -732,17 +735,15 @@ int main(int argc, char **argv) - asm volatile ( "vpcmpgtb %%xmm4, %%xmm4, %%xmm4\n\t" - "vinsertf128 $1, %%xmm4, %%ymm4, %%ymm4\n" - #endif -- ".pushsection .test, \"a\", @progbits\n" -- "vmovdqu_from_mem: vmovdqu (%0), %%ymm4\n" -- ".popsection" :: "d" (NULL) ); -+ put_insn(vmovdqu_from_mem, "vmovdqu (%0), %%ymm4") -+ :: "d" (NULL) ); - -- memcpy(instr, vmovdqu_from_mem, 15); -+ set_insn(vmovdqu_from_mem); - memset(res + 4, 0xff, 16); -- regs.eip = (unsigned long)&instr[0]; - regs.ecx = 0; - regs.edx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( rc != X86EMUL_OKAY ) -+ if ( rc != X86EMUL_OKAY || !check_eip(vmovdqu_from_mem) ) - goto fail; - #if 0 /* Don't use AVX2 instructions for now */ - asm ( "vpcmpeqb %%ymm2, %%ymm2, %%ymm2\n\t" -@@ -769,20 +770,19 @@ int main(int argc, char **argv) - memset(res + 10, 0x66, 8); - if ( stack_exec && cpu_has_sse2 ) - { -- extern const unsigned char movsd_to_mem[]; -+ decl_insn(movsd_to_mem); - - asm volatile ( "movlpd %0, %%xmm5\n\t" - "movhpd %0, %%xmm5\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movsd_to_mem: movsd %%xmm5, (%1)\n" -- ".popsection" :: "m" (res[10]), "c" (NULL) ); -+ put_insn(movsd_to_mem, "movsd %%xmm5, (%1)") -+ :: "m" (res[10]), "c" (NULL) ); - -- memcpy(instr, movsd_to_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(movsd_to_mem); - regs.ecx = (unsigned long)(res + 2); - regs.edx = 0; - rc = x86_emulate(&ctxt, &emulops); -- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || -+ !check_eip(movsd_to_mem) ) - goto fail; - printf("okay\n"); - } -@@ -795,19 +795,17 @@ int main(int argc, char **argv) - printf("%-40s", "Testing movaps (%edx),%xmm7..."); - if ( stack_exec && cpu_has_sse ) - { -- extern const unsigned char movaps_from_mem[]; -+ decl_insn(movaps_from_mem); - - asm volatile ( "xorps %%xmm7, %%xmm7\n" -- ".pushsection .test, \"a\", @progbits\n" -- "movaps_from_mem: movaps (%0), %%xmm7\n" -- ".popsection" :: "d" (NULL) ); -+ put_insn(movaps_from_mem, "movaps (%0), %%xmm7") -+ :: "d" (NULL) ); - -- memcpy(instr, movaps_from_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(movaps_from_mem); - regs.ecx = 0; - regs.edx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( rc != X86EMUL_OKAY ) -+ if ( rc != X86EMUL_OKAY || !check_eip(movaps_from_mem) ) - goto fail; - asm ( "cmpeqps %1, %%xmm7\n\t" - "movmskps %%xmm7, %0" : "=r" (rc) : "m" (res[8]) ); -@@ -823,19 +821,18 @@ int main(int argc, char **argv) - memset(res + 10, 0x77, 8); - if ( stack_exec && cpu_has_avx ) - { -- extern const unsigned char vmovsd_to_mem[]; -+ decl_insn(vmovsd_to_mem); - - asm volatile ( "vbroadcastsd %0, %%ymm5\n" -- ".pushsection .test, \"a\", @progbits\n" -- "vmovsd_to_mem: vmovsd %%xmm5, (%1)\n" -- ".popsection" :: "m" (res[10]), "c" (NULL) ); -+ put_insn(vmovsd_to_mem, "vmovsd %%xmm5, (%1)") -+ :: "m" (res[10]), "c" (NULL) ); - -- memcpy(instr, vmovsd_to_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(vmovsd_to_mem); - regs.ecx = (unsigned long)(res + 2); - regs.edx = 0; - rc = x86_emulate(&ctxt, &emulops); -- if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) ) -+ if ( (rc != X86EMUL_OKAY) || memcmp(res, res + 8, 32) || -+ !check_eip(vmovsd_to_mem) ) - goto fail; - printf("okay\n"); - } -@@ -848,19 +845,17 @@ int main(int argc, char **argv) - printf("%-40s", "Testing vmovaps (%edx),%ymm7..."); - if ( stack_exec && cpu_has_avx ) - { -- extern const unsigned char vmovaps_from_mem[]; -+ decl_insn(vmovaps_from_mem); - - asm volatile ( "vxorps %%ymm7, %%ymm7, %%ymm7\n" -- ".pushsection .test, \"a\", @progbits\n" -- "vmovaps_from_mem: vmovaps (%0), %%ymm7\n" -- ".popsection" :: "d" (NULL) ); -+ put_insn(vmovaps_from_mem, "vmovaps (%0), %%ymm7") -+ :: "d" (NULL) ); - -- memcpy(instr, vmovaps_from_mem, 15); -- regs.eip = (unsigned long)&instr[0]; -+ set_insn(vmovaps_from_mem); - regs.ecx = 0; - regs.edx = (unsigned long)res; - rc = x86_emulate(&ctxt, &emulops); -- if ( rc != X86EMUL_OKAY ) -+ if ( rc != X86EMUL_OKAY || !check_eip(vmovaps_from_mem) ) - goto fail; - asm ( "vcmpeqps %1, %%ymm7, %%ymm0\n\t" - "vmovmskps %%ymm0, %0" : "=r" (rc) : "m" (res[8]) ); -@@ -871,6 +866,11 @@ int main(int argc, char **argv) - else - printf("skipped\n"); - -+#undef decl_insn -+#undef put_insn -+#undef set_insn -+#undef check_eip -+ - for ( j = 1; j <= 2; j++ ) - { - #if defined(__i386__) ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -720,29 +720,26 @@ do{ uint8_t stub[] = { _bytes, 0xc3 }; - put_fpu(&fic); \ - } while (0) - --static unsigned long __get_rep_prefix( -- struct cpu_user_regs *int_regs, -- struct cpu_user_regs *ext_regs, -+static unsigned long _get_rep_prefix( -+ const struct cpu_user_regs *int_regs, - int ad_bytes) - { -- unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx : -- (ad_bytes == 4) ? (uint32_t)int_regs->ecx : -- int_regs->ecx); -- -- /* Skip the instruction if no repetitions are required. */ -- if ( ecx == 0 ) -- ext_regs->eip = int_regs->eip; -- -- return ecx; -+ return (ad_bytes == 2) ? (uint16_t)int_regs->ecx : -+ (ad_bytes == 4) ? (uint32_t)int_regs->ecx : -+ int_regs->ecx; - } - - #define get_rep_prefix() ({ \ - unsigned long max_reps = 1; \ - if ( rep_prefix() ) \ -- max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes); \ -+ max_reps = _get_rep_prefix(&_regs, ad_bytes); \ - if ( max_reps == 0 ) \ -- goto done; \ -- max_reps; \ -+ { \ -+ /* Skip the instruction if no repetitions are required. */ \ -+ dst.type = OP_NONE; \ -+ goto writeback; \ -+ } \ -+ max_reps; \ - }) - - static void __put_rep_prefix( -@@ -3921,7 +3918,8 @@ x86_emulate( - if ( !rc && (b & 1) && (ea.type == OP_MEM) ) - rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, - ea.bytes, ctxt); -- goto done; -+ dst.type = OP_NONE; -+ break; - } - - case 0x20: /* mov cr,reg */ -@@ -4188,7 +4186,8 @@ x86_emulate( - if ( !rc && (b != 0x6f) && (ea.type == OP_MEM) ) - rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp, - ea.bytes, ctxt); -- goto done; -+ dst.type = OP_NONE; -+ break; - } - - case 0x80 ... 0x8f: /* jcc (near) */ { diff --git a/53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch b/53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch new file mode 100644 index 0000000..938ff7f --- /dev/null +++ b/53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch @@ -0,0 +1,194 @@ +References: bnc#882089 + +# Commit ded2100990d1688b96c2edc7221887c56c1a8e04 +# Date 2014-08-11 15:00:15 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/vHPET: use rwlock instead of simple one + +This namely benefits guests heavily reading the main counter, but not +touching the HPET much otherwise. Note that due to the way +hpet_get_comparator() works hpet_read() has to special cases reads from +the comparator registers and use a write lock there instead of the read +one used for all other registers. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/hvm/hpet.c ++++ b/xen/arch/x86/hvm/hpet.c +@@ -75,7 +75,7 @@ + + static inline uint64_t hpet_read_maincounter(HPETState *h) + { +- ASSERT(spin_is_locked(&h->lock)); ++ ASSERT(rw_is_locked(&h->lock)); + + if ( hpet_enabled(h) ) + return guest_time_hpet(h) + h->mc_offset; +@@ -88,6 +88,8 @@ static uint64_t hpet_get_comparator(HPET + uint64_t comparator; + uint64_t elapsed; + ++ ASSERT(rw_is_write_locked(&h->lock)); ++ + comparator = h->hpet.comparator64[tn]; + if ( timer_is_periodic(h, tn) ) + { +@@ -172,16 +174,24 @@ static int hpet_read( + goto out; + } + +- spin_lock(&h->lock); ++ result = addr < HPET_Tn_CMP(0) || ++ ((addr - HPET_Tn_CMP(0)) % (HPET_Tn_CMP(1) - HPET_Tn_CMP(0))) > 7; ++ if ( result ) ++ read_lock(&h->lock); ++ else ++ write_lock(&h->lock); + + val = hpet_read64(h, addr); + ++ if ( result ) ++ read_unlock(&h->lock); ++ else ++ write_unlock(&h->lock); ++ + result = val; + if ( length != 8 ) + result = (val >> ((addr & 7) * 8)) & ((1ULL << (length * 8)) - 1); + +- spin_unlock(&h->lock); +- + out: + *pval = result; + return X86EMUL_OKAY; +@@ -190,7 +200,7 @@ static int hpet_read( + static void hpet_stop_timer(HPETState *h, unsigned int tn) + { + ASSERT(tn < HPET_TIMER_NUM); +- ASSERT(spin_is_locked(&h->lock)); ++ ASSERT(rw_is_write_locked(&h->lock)); + destroy_periodic_time(&h->pt[tn]); + /* read the comparator to get it updated so a read while stopped will + * return the expected value. */ +@@ -208,7 +218,7 @@ static void hpet_set_timer(HPETState *h, + unsigned int oneshot; + + ASSERT(tn < HPET_TIMER_NUM); +- ASSERT(spin_is_locked(&h->lock)); ++ ASSERT(rw_is_write_locked(&h->lock)); + + if ( (tn == 0) && (h->hpet.config & HPET_CFG_LEGACY) ) + { +@@ -289,7 +299,7 @@ static int hpet_write( + if ( hpet_check_access_length(addr, length) != 0 ) + goto out; + +- spin_lock(&h->lock); ++ write_lock(&h->lock); + + old_val = hpet_read64(h, addr); + new_val = val; +@@ -448,7 +458,7 @@ static int hpet_write( + #undef set_start_timer + #undef set_restart_timer + +- spin_unlock(&h->lock); ++ write_unlock(&h->lock); + + out: + return X86EMUL_OKAY; +@@ -473,7 +483,7 @@ static int hpet_save(struct domain *d, h + HPETState *hp = domain_vhpet(d); + int rc; + +- spin_lock(&hp->lock); ++ write_lock(&hp->lock); + + /* Write the proper value into the main counter */ + hp->hpet.mc64 = hp->mc_offset + guest_time_hpet(hp); +@@ -507,7 +517,7 @@ static int hpet_save(struct domain *d, h + rec->timers[2].cmp = hp->hpet.comparator64[2]; + } + +- spin_unlock(&hp->lock); ++ write_unlock(&hp->lock); + + return rc; + } +@@ -519,12 +529,12 @@ static int hpet_load(struct domain *d, h + uint64_t cmp; + int i; + +- spin_lock(&hp->lock); ++ write_lock(&hp->lock); + + /* Reload the HPET registers */ + if ( _hvm_check_entry(h, HVM_SAVE_CODE(HPET), HVM_SAVE_LENGTH(HPET), 1) ) + { +- spin_unlock(&hp->lock); ++ write_unlock(&hp->lock); + return -EINVAL; + } + +@@ -564,7 +574,7 @@ static int hpet_load(struct domain *d, h + if ( timer_enabled(hp, i) ) + hpet_set_timer(hp, i); + +- spin_unlock(&hp->lock); ++ write_unlock(&hp->lock); + + return 0; + } +@@ -578,7 +588,7 @@ void hpet_init(struct vcpu *v) + + memset(h, 0, sizeof(HPETState)); + +- spin_lock_init(&h->lock); ++ rwlock_init(&h->lock); + + h->stime_freq = S_TO_NS; + +@@ -607,14 +617,14 @@ void hpet_deinit(struct domain *d) + int i; + HPETState *h = domain_vhpet(d); + +- spin_lock(&h->lock); ++ write_lock(&h->lock); + + if ( hpet_enabled(h) ) + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + if ( timer_enabled(h, i) ) + hpet_stop_timer(h, i); + +- spin_unlock(&h->lock); ++ write_unlock(&h->lock); + } + + void hpet_reset(struct domain *d) +--- a/xen/arch/x86/hvm/vpt.c ++++ b/xen/arch/x86/hvm/vpt.c +@@ -508,10 +508,10 @@ void pt_adjust_global_vcpu_target(struct + pt_adjust_vcpu(&pl_time->vrtc.pt, v); + spin_unlock(&pl_time->vrtc.lock); + +- spin_lock(&pl_time->vhpet.lock); ++ write_lock(&pl_time->vhpet.lock); + for ( i = 0; i < HPET_TIMER_NUM; i++ ) + pt_adjust_vcpu(&pl_time->vhpet.pt[i], v); +- spin_unlock(&pl_time->vhpet.lock); ++ write_unlock(&pl_time->vhpet.lock); + } + + +--- a/xen/include/asm-x86/hvm/vpt.h ++++ b/xen/include/asm-x86/hvm/vpt.h +@@ -96,7 +96,7 @@ typedef struct HPETState { + uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */ + uint64_t mc_offset; + struct periodic_time pt[HPET_TIMER_NUM]; +- spinlock_t lock; ++ rwlock_t lock; + } HPETState; + + typedef struct RTCState { diff --git a/53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch b/53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch new file mode 100644 index 0000000..97d07b3 --- /dev/null +++ b/53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch @@ -0,0 +1,39 @@ +# Commit dfa625e15f3d6c374637f2bb789e1f444c2781c3 +# Date 2014-08-22 14:29:37 +0200 +# Author Jan Beulich +# Committer Jan Beulich +VMX: fix DebugCtl MSR clearing + +The previous shortcut was wrong, as it bypassed the necessary vmwrite: +All we really want to avoid if the guest writes zero is to add the MSR +to the host-load list. + +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Acked-by: Kevin Tian + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2170,8 +2170,6 @@ static int vmx_msr_write_intercept(unsig + int i, rc = 0; + uint64_t supported = IA32_DEBUGCTLMSR_LBR | IA32_DEBUGCTLMSR_BTF; + +- if ( !msr_content ) +- break; + if ( msr_content & ~supported ) + { + /* Perhaps some other bits are supported in vpmu. */ +@@ -2191,12 +2189,10 @@ static int vmx_msr_write_intercept(unsig + } + + if ( (rc < 0) || +- (vmx_add_host_load_msr(msr) < 0) ) ++ (msr_content && (vmx_add_host_load_msr(msr) < 0)) ) + hvm_inject_hw_exception(TRAP_machine_check, 0); + else +- { + __vmwrite(GUEST_IA32_DEBUGCTL, msr_content); +- } + + break; + } diff --git a/53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch b/53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch new file mode 100644 index 0000000..bd23790 --- /dev/null +++ b/53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch @@ -0,0 +1,119 @@ +# Commit e13b3203990706db1313ec2aadd9a30b249ee793 +# Date 2014-08-22 14:32:45 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/irq: process softirqs in irq keyhandlers + +Large machines with lots of interrupts can trip over the Xen watchdog. + +Suggested-by: Santosh Jodh +Signed-off-by: Andrew Cooper +Tested-by: Santosh Jodh + +# Commit bd083922f9e78ed19ef98e7de372e5f568402ed3 +# Date 2014-08-26 17:56:52 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/IO-APIC: don't process softirqs during early boot + +Commit e13b320399 ("x86/irq: process softirqs in irq keyhandlers") +made this unconditional, but the boot time use of __print_IO_APIC() +(when "apic_verbosity=debug" was given) can't tolerate that. + +Reported-by: Sander Eikelenboom +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper +Tested-by: Sander Eikelenboom + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1091,7 +1092,7 @@ static inline void UNEXPECTED_IO_APIC(vo + { + } + +-static void /*__init*/ __print_IO_APIC(void) ++static void /*__init*/ __print_IO_APIC(bool_t boot) + { + int apic, i; + union IO_APIC_reg_00 reg_00; +@@ -1112,6 +1113,9 @@ static void /*__init*/ __print_IO_APIC(v + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { ++ if ( !boot ) ++ process_pending_softirqs(); ++ + if (!nr_ioapic_entries[apic]) + continue; + +@@ -1215,6 +1219,10 @@ static void /*__init*/ __print_IO_APIC(v + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for (i = 0; i < nr_irqs_gsi; i++) { + struct irq_pin_list *entry = irq_2_pin + i; ++ ++ if ( !boot && !(i & 0x1f) ) ++ process_pending_softirqs(); ++ + if (entry->pin < 0) + continue; + printk(KERN_DEBUG "IRQ%d ", irq_to_desc(i)->arch.vector); +@@ -1235,12 +1243,12 @@ static void /*__init*/ __print_IO_APIC(v + static void __init print_IO_APIC(void) + { + if (apic_verbosity != APIC_QUIET) +- __print_IO_APIC(); ++ __print_IO_APIC(1); + } + + static void _print_IO_APIC_keyhandler(unsigned char key) + { +- __print_IO_APIC(); ++ __print_IO_APIC(0); + } + static struct keyhandler print_IO_APIC_keyhandler = { + .diagnostic = 1, +@@ -2454,6 +2462,9 @@ void dump_ioapic_irq_info(void) + + for ( irq = 0; irq < nr_irqs_gsi; irq++ ) + { ++ if ( !(irq & 0x1f) ) ++ process_pending_softirqs(); ++ + entry = &irq_2_pin[irq]; + if ( entry->pin == -1 ) + continue; +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -2231,6 +2232,8 @@ static void dump_irqs(unsigned char key) + + for ( irq = 0; irq < nr_irqs; irq++ ) + { ++ if ( !(irq & 0x1f) ) ++ process_pending_softirqs(); + + desc = irq_to_desc(irq); + +@@ -2284,6 +2287,7 @@ static void dump_irqs(unsigned char key) + xfree(ssid); + } + ++ process_pending_softirqs(); + printk("Direct vector information:\n"); + for ( i = FIRST_DYNAMIC_VECTOR; i < NR_VECTORS; ++i ) + if ( direct_apic_vector[i] ) diff --git a/53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch b/53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch new file mode 100644 index 0000000..c02cf0e --- /dev/null +++ b/53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch @@ -0,0 +1,331 @@ +References: bnc#882089 + +# Commit 3d4d4f9336159f3f77a7b480ce9984fd3ff7949f +# Date 2014-08-28 16:02:01 +0200 +# Author Tamas K Lengyel +# Committer Jan Beulich +x86: consolidate boolean inputs in hvm and p2m into a shared bitmap + +This patch consolidates the boolean input parameters of +hvm_hap_nested_page_fault and p2m_mem_access_check into a common bitmap +and defines the bitmap members accordingly. + +Signed-off-by: Tamas K Lengyel +Reviewed-by: Jan Beulich +Acked-by: Kevin Tian +Reviewed-by: Tim Deegan + +# Commit 24857896a30105b7947e2cd36d63768054538bbc +# Date 2014-09-03 15:06:06 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/hvm: fix operator precedence bug introduced by 3d4d4f9336 + +Bitwise or has greater precedence than the ternary operator, making the result +of the expression a constant P2M_UNSHARE. + +Coverity-ID: 1234633 +Signed-off-by: Andrew Cooper +Signed-off-by: Jan Beulich +Reviewed-by: Don Slutz + +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1464,12 +1464,8 @@ void hvm_inject_page_fault(int errcode, + hvm_inject_trap(&trap); + } + +-int hvm_hap_nested_page_fault(paddr_t gpa, +- bool_t gla_valid, +- unsigned long gla, +- bool_t access_r, +- bool_t access_w, +- bool_t access_x) ++int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, ++ struct npfec npfec) + { + unsigned long gfn = gpa >> PAGE_SHIFT; + p2m_type_t p2mt; +@@ -1498,8 +1494,11 @@ int hvm_hap_nested_page_fault(paddr_t gp + * into l1 guest if not fixable. The algorithm is + * the same as for shadow paging. + */ +- rv = nestedhvm_hap_nested_page_fault(v, &gpa, +- access_r, access_w, access_x); ++ ++ rv = nestedhvm_hap_nested_page_fault(v, &gpa, ++ npfec.read_access, ++ npfec.write_access, ++ npfec.insn_fetch); + switch (rv) { + case NESTEDHVM_PAGEFAULT_DONE: + case NESTEDHVM_PAGEFAULT_RETRY: +@@ -1538,47 +1537,49 @@ int hvm_hap_nested_page_fault(paddr_t gp + + p2m = p2m_get_hostp2m(v->domain); + mfn = get_gfn_type_access(p2m, gfn, &p2mt, &p2ma, +- P2M_ALLOC | (access_w ? P2M_UNSHARE : 0), NULL); ++ P2M_ALLOC | (npfec.write_access ? P2M_UNSHARE : 0), ++ NULL); + + /* Check access permissions first, then handle faults */ + if ( mfn_x(mfn) != INVALID_MFN ) + { +- int violation = 0; ++ bool_t violation; ++ + /* If the access is against the permissions, then send to mem_event */ +- switch (p2ma) ++ switch (p2ma) + { + case p2m_access_n: + case p2m_access_n2rwx: + default: +- violation = access_r || access_w || access_x; ++ violation = npfec.read_access || npfec.write_access || npfec.insn_fetch; + break; + case p2m_access_r: +- violation = access_w || access_x; ++ violation = npfec.write_access || npfec.insn_fetch; + break; + case p2m_access_w: +- violation = access_r || access_x; ++ violation = npfec.read_access || npfec.insn_fetch; + break; + case p2m_access_x: +- violation = access_r || access_w; ++ violation = npfec.read_access || npfec.write_access; + break; + case p2m_access_rx: + case p2m_access_rx2rw: +- violation = access_w; ++ violation = npfec.write_access; + break; + case p2m_access_wx: +- violation = access_r; ++ violation = npfec.read_access; + break; + case p2m_access_rw: +- violation = access_x; ++ violation = npfec.insn_fetch; + break; + case p2m_access_rwx: ++ violation = 0; + break; + } + + if ( violation ) + { +- if ( p2m_mem_access_check(gpa, gla_valid, gla, access_r, +- access_w, access_x, &req_ptr) ) ++ if ( p2m_mem_access_check(gpa, gla, npfec, &req_ptr) ) + { + fall_through = 1; + } else { +@@ -1594,7 +1595,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + * to the mmio handler. + */ + if ( (p2mt == p2m_mmio_dm) || +- (access_w && (p2mt == p2m_ram_ro)) ) ++ (npfec.write_access && (p2mt == p2m_ram_ro)) ) + { + put_gfn(p2m->domain, gfn); + +@@ -1613,7 +1614,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + paged = 1; + + /* Mem sharing: unshare the page and try again */ +- if ( access_w && (p2mt == p2m_ram_shared) ) ++ if ( npfec.write_access && (p2mt == p2m_ram_shared) ) + { + ASSERT(!p2m_is_nestedp2m(p2m)); + sharing_enomem = +@@ -1630,7 +1631,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + * a large page, we do not change other pages type within that large + * page. + */ +- if ( access_w ) ++ if ( npfec.write_access ) + { + paging_mark_dirty(v->domain, mfn_x(mfn)); + p2m_change_type(v->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); +@@ -1640,7 +1641,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + } + + /* Shouldn't happen: Maybe the guest was writing to a r/o grant mapping? */ +- if ( access_w && (p2mt == p2m_grant_map_ro) ) ++ if ( npfec.write_access && (p2mt == p2m_grant_map_ro) ) + { + gdprintk(XENLOG_WARNING, + "trying to write to read-only grant mapping\n"); +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1289,7 +1289,7 @@ const struct hvm_function_table * __init + } + + static void svm_do_nested_pgfault(struct vcpu *v, +- struct cpu_user_regs *regs, uint32_t npfec, paddr_t gpa) ++ struct cpu_user_regs *regs, uint32_t pfec, paddr_t gpa) + { + int ret; + unsigned long gfn = gpa >> PAGE_SHIFT; +@@ -1298,10 +1298,13 @@ static void svm_do_nested_pgfault(struct + p2m_access_t p2ma; + struct p2m_domain *p2m = NULL; + +- ret = hvm_hap_nested_page_fault(gpa, 0, ~0ul, +- 1, /* All NPFs count as reads */ +- npfec & PFEC_write_access, +- npfec & PFEC_insn_fetch); ++ struct npfec npfec = { ++ .read_access = 1, /* All NPFs count as reads */ ++ .write_access = !!(pfec & PFEC_write_access), ++ .insn_fetch = !!(pfec & PFEC_insn_fetch) ++ }; ++ ++ ret = hvm_hap_nested_page_fault(gpa, ~0ul, npfec); + + if ( tb_init_done ) + { +@@ -1329,7 +1332,7 @@ static void svm_do_nested_pgfault(struct + case -1: + ASSERT(nestedhvm_enabled(v->domain) && nestedhvm_vcpu_in_guestmode(v)); + /* inject #VMEXIT(NPF) into guest. */ +- nestedsvm_vmexit_defer(v, VMEXIT_NPF, npfec, gpa); ++ nestedsvm_vmexit_defer(v, VMEXIT_NPF, pfec, gpa); + return; + } + +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2278,6 +2278,11 @@ static void ept_handle_violation(unsigne + p2m_type_t p2mt; + int ret; + struct domain *d = current->domain; ++ struct npfec npfec = { ++ .read_access = !!(qualification & EPT_READ_VIOLATION), ++ .write_access = !!(qualification & EPT_WRITE_VIOLATION), ++ .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION) ++ }; + + if ( tb_init_done ) + { +@@ -2296,14 +2301,14 @@ static void ept_handle_violation(unsigne + } + + if ( qualification & EPT_GLA_VALID ) ++ { + __vmread(GUEST_LINEAR_ADDRESS, &gla); ++ npfec.gla_valid = 1; ++ } + else + gla = ~0ull; +- ret = hvm_hap_nested_page_fault(gpa, +- !!(qualification & EPT_GLA_VALID), gla, +- !!(qualification & EPT_READ_VIOLATION), +- !!(qualification & EPT_WRITE_VIOLATION), +- !!(qualification & EPT_EXEC_VIOLATION)); ++ ++ ret = hvm_hap_nested_page_fault(gpa, gla, npfec); + switch ( ret ) + { + case 0: // Unhandled L1 EPT violation +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -1261,9 +1261,9 @@ void p2m_mem_paging_resume(struct domain + } + } + +-bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, +- bool_t access_r, bool_t access_w, bool_t access_x, +- mem_event_request_t **req_ptr) ++bool_t p2m_mem_access_check(paddr_t gpa, unsigned long gla, ++ struct npfec npfec, ++ mem_event_request_t **req_ptr) + { + struct vcpu *v = current; + unsigned long gfn = gpa >> PAGE_SHIFT; +@@ -1281,7 +1281,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, + gfn_lock(p2m, gfn, 0); + mfn = p2m->get_entry(p2m, gfn, &p2mt, &p2ma, 0, NULL); + +- if ( access_w && p2ma == p2m_access_rx2rw ) ++ if ( npfec.write_access && p2ma == p2m_access_rx2rw ) + { + rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2mt, p2m_access_rw); + ASSERT(rc); +@@ -1290,7 +1290,7 @@ bool_t p2m_mem_access_check(paddr_t gpa, + } + else if ( p2ma == p2m_access_n2rwx ) + { +- ASSERT(access_w || access_r || access_x); ++ ASSERT(npfec.write_access || npfec.read_access || npfec.insn_fetch); + rc = p2m->set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, + p2mt, p2m_access_rwx); + ASSERT(rc); +@@ -1341,11 +1341,11 @@ bool_t p2m_mem_access_check(paddr_t gpa, + /* Send request to mem event */ + req->gfn = gfn; + req->offset = gpa & ((1 << PAGE_SHIFT) - 1); +- req->gla_valid = gla_valid; ++ req->gla_valid = npfec.gla_valid; + req->gla = gla; +- req->access_r = access_r; +- req->access_w = access_w; +- req->access_x = access_x; ++ req->access_r = npfec.read_access; ++ req->access_w = npfec.write_access; ++ req->access_x = npfec.insn_fetch; + + req->vcpu_id = v->vcpu_id; + } +--- a/xen/include/asm-x86/hvm/hvm.h ++++ b/xen/include/asm-x86/hvm/hvm.h +@@ -435,11 +435,8 @@ static inline void hvm_invalidate_regs_f + #endif + } + +-int hvm_hap_nested_page_fault(paddr_t gpa, +- bool_t gla_valid, unsigned long gla, +- bool_t access_r, +- bool_t access_w, +- bool_t access_x); ++int hvm_hap_nested_page_fault(paddr_t gpa, unsigned long gla, ++ struct npfec npfec); + + #define hvm_msr_tsc_aux(v) ({ \ + struct domain *__d = (v)->domain; \ +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -551,6 +551,16 @@ void audit_domains(void); + + #endif + ++/* ++ * Nested page fault exception codes. ++ */ ++struct npfec { ++ unsigned int read_access:1; ++ unsigned int write_access:1; ++ unsigned int insn_fetch:1; ++ unsigned int gla_valid:1; ++}; ++ + int new_guest_cr3(unsigned long pfn); + void make_cr3(struct vcpu *v, unsigned long mfn); + void update_cr3(struct vcpu *v); +--- a/xen/include/asm-x86/p2m.h ++++ b/xen/include/asm-x86/p2m.h +@@ -568,9 +568,9 @@ void p2m_mem_paging_resume(struct domain + * been promoted with no underlying vcpu pause. If the req_ptr has been populated, + * then the caller must put the event in the ring (once having released get_gfn* + * locks -- caller must also xfree the request. */ +-bool_t p2m_mem_access_check(paddr_t gpa, bool_t gla_valid, unsigned long gla, +- bool_t access_r, bool_t access_w, bool_t access_x, +- mem_event_request_t **req_ptr); ++bool_t p2m_mem_access_check(paddr_t gpa, unsigned long gla, ++ struct npfec npfec, ++ mem_event_request_t **req_ptr); + /* Resumes the running of the VCPU, restarting the last instruction */ + void p2m_mem_access_resume(struct domain *d); + diff --git a/53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch b/53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch new file mode 100644 index 0000000..9ed5b94 --- /dev/null +++ b/53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch @@ -0,0 +1,69 @@ +References: bnc#882089 + +# Commit 401d5c5cc5a780cad160aa0e3c282c11ac11dd0c +# Date 2014-08-28 16:03:26 +0200 +# Author Tamas K Lengyel +# Committer Jan Beulich +x86/hvm: treat non-instruction fetch nested page faults also as read violations + +As pointed out by Jan Beulich in +http://lists.xen.org/archives/html/xen-devel/2014-08/msg01269.html: +"Read-modify-write instructions absolutely need to be treated as read +accesses, yet hardware doesn't guarantee to tell us so (they may +surface as just write accesses)." This patch addresses the issue in +both the VMX and the SVM side. + +VMX: Treat all write data access violations also as read violations (in + addition to those that were already reported as read violations). +SVM: Refine the meaning of read data access violations to distinguish + between read/write and instruction fetch access violations. + +With this patch both VMX and SVM specific nested page fault handling code reports violations the same way, thus abstracting the hardware specific behaviour from the layers above. + +Suggested-by: Jan Beulich +Signed-off-by: Tamas K Lengyel +Reviewed-by: Boris Ostrovsky +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1298,8 +1298,13 @@ static void svm_do_nested_pgfault(struct + p2m_access_t p2ma; + struct p2m_domain *p2m = NULL; + ++ /* ++ * Since HW doesn't explicitly provide a read access bit and we need to ++ * somehow describe read-modify-write instructions we will conservatively ++ * set read_access for all memory accesses that are not instruction fetches. ++ */ + struct npfec npfec = { +- .read_access = 1, /* All NPFs count as reads */ ++ .read_access = !(pfec & PFEC_insn_fetch), + .write_access = !!(pfec & PFEC_write_access), + .insn_fetch = !!(pfec & PFEC_insn_fetch) + }; +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2278,8 +2278,21 @@ static void ept_handle_violation(unsigne + p2m_type_t p2mt; + int ret; + struct domain *d = current->domain; ++ ++ /* ++ * We treat all write violations also as read violations. ++ * The reason why this is required is the following warning: ++ * "An EPT violation that occurs during as a result of execution of a ++ * read-modify-write operation sets bit 1 (data write). Whether it also ++ * sets bit 0 (data read) is implementation-specific and, for a given ++ * implementation, may differ for different kinds of read-modify-write ++ * operations." ++ * - Intel(R) 64 and IA-32 Architectures Software Developer's Manual ++ * Volume 3C: System Programming Guide, Part 3 ++ */ + struct npfec npfec = { +- .read_access = !!(qualification & EPT_READ_VIOLATION), ++ .read_access = !!(qualification & EPT_READ_VIOLATION) || ++ !!(qualification & EPT_WRITE_VIOLATION), + .write_access = !!(qualification & EPT_WRITE_VIOLATION), + .insn_fetch = !!(qualification & EPT_EXEC_VIOLATION) + }; diff --git a/53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch b/53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch new file mode 100644 index 0000000..8e18e60 --- /dev/null +++ b/53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch @@ -0,0 +1,133 @@ +References: bnc#882089 + +# Commit 692f3cc7dd05b80dbd027e46372b1c25d7975332 +# Date 2014-08-28 16:04:05 +0200 +# Author Tamas K Lengyel +# Committer Jan Beulich +x86/mem_event: deliver gla fault EPT violation information + +On Intel EPT the exit qualification generated by a violation also +includes a bit (EPT_GLA_FAULT) which describes the following +information: Set if the access causing the EPT violation is to a +guest-physical address that is the translation of a linear address. +Clear if the access causing the EPT violation is to a paging-structure +entry as part of a page walk or the update of an accessed or dirty bit. + +For more information see Table 27-7 in the Intel SDM. + +This patch extends the mem_event system to deliver this extra +information, which could be useful for determining the cause of a +violation. + +Signed-off-by: Tamas K Lengyel +Reviewed-by: Jan Beulich +Acked-by: Kevin Tian +Acked-by: Tim Deegan + +--- a/xen/arch/x86/hvm/svm/svm.c ++++ b/xen/arch/x86/hvm/svm/svm.c +@@ -1289,7 +1289,7 @@ const struct hvm_function_table * __init + } + + static void svm_do_nested_pgfault(struct vcpu *v, +- struct cpu_user_regs *regs, uint32_t pfec, paddr_t gpa) ++ struct cpu_user_regs *regs, uint64_t pfec, paddr_t gpa) + { + int ret; + unsigned long gfn = gpa >> PAGE_SHIFT; +@@ -1309,6 +1309,12 @@ static void svm_do_nested_pgfault(struct + .insn_fetch = !!(pfec & PFEC_insn_fetch) + }; + ++ /* These bits are mutually exclusive */ ++ if ( pfec & NPT_PFEC_with_gla ) ++ npfec.kind = npfec_kind_with_gla; ++ else if ( pfec & NPT_PFEC_in_gpt ) ++ npfec.kind = npfec_kind_in_gpt; ++ + ret = hvm_hap_nested_page_fault(gpa, ~0ul, npfec); + + if ( tb_init_done ) +--- a/xen/arch/x86/hvm/vmx/vmx.c ++++ b/xen/arch/x86/hvm/vmx/vmx.c +@@ -2317,6 +2317,10 @@ static void ept_handle_violation(unsigne + { + __vmread(GUEST_LINEAR_ADDRESS, &gla); + npfec.gla_valid = 1; ++ if( qualification & EPT_GLA_FAULT ) ++ npfec.kind = npfec_kind_with_gla; ++ else ++ npfec.kind = npfec_kind_in_gpt; + } + else + gla = ~0ull; +--- a/xen/arch/x86/mm/p2m.c ++++ b/xen/arch/x86/mm/p2m.c +@@ -1343,10 +1343,13 @@ bool_t p2m_mem_access_check(paddr_t gpa, + req->offset = gpa & ((1 << PAGE_SHIFT) - 1); + req->gla_valid = npfec.gla_valid; + req->gla = gla; ++ if ( npfec.kind == npfec_kind_with_gla ) ++ req->fault_with_gla = 1; ++ else if ( npfec.kind == npfec_kind_in_gpt ) ++ req->fault_in_gpt = 1; + req->access_r = npfec.read_access; + req->access_w = npfec.write_access; + req->access_x = npfec.insn_fetch; +- + req->vcpu_id = v->vcpu_id; + } + +--- a/xen/include/asm-x86/hvm/svm/svm.h ++++ b/xen/include/asm-x86/hvm/svm/svm.h +@@ -105,4 +105,10 @@ extern u32 svm_feature_flags; + extern void svm_host_osvw_reset(void); + extern void svm_host_osvw_init(void); + ++/* EXITINFO1 fields on NPT faults */ ++#define _NPT_PFEC_with_gla 32 ++#define NPT_PFEC_with_gla (1UL<<_NPT_PFEC_with_gla) ++#define _NPT_PFEC_in_gpt 33 ++#define NPT_PFEC_in_gpt (1UL<<_NPT_PFEC_in_gpt) ++ + #endif /* __ASM_X86_HVM_SVM_H__ */ +--- a/xen/include/asm-x86/mm.h ++++ b/xen/include/asm-x86/mm.h +@@ -552,6 +552,16 @@ void audit_domains(void); + #endif + + /* ++ * Extra fault info types which are used to further describe ++ * the source of an access violation. ++ */ ++typedef enum { ++ npfec_kind_unknown, /* must be first */ ++ npfec_kind_in_gpt, /* violation in guest page table */ ++ npfec_kind_with_gla /* violation with guest linear address */ ++} npfec_kind_t; ++ ++/* + * Nested page fault exception codes. + */ + struct npfec { +@@ -559,6 +569,7 @@ struct npfec { + unsigned int write_access:1; + unsigned int insn_fetch:1; + unsigned int gla_valid:1; ++ unsigned int kind:2; /* npfec_kind_t */ + }; + + int new_guest_cr3(unsigned long pfn); +--- a/xen/include/public/mem_event.h ++++ b/xen/include/public/mem_event.h +@@ -62,7 +62,9 @@ typedef struct mem_event_st { + uint16_t access_w:1; + uint16_t access_x:1; + uint16_t gla_valid:1; +- uint16_t available:12; ++ uint16_t fault_with_gla:1; ++ uint16_t fault_in_gpt:1; ++ uint16_t available:10; + + uint16_t reason; + } mem_event_request_t, mem_event_response_t; diff --git a/53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch b/53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch new file mode 100644 index 0000000..4281d26 --- /dev/null +++ b/53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch @@ -0,0 +1,49 @@ +# Commit ad6eddb742577d182e634785bcfaf92732a50024 +# Date 2014-08-28 16:05:10 +0200 +# Author Andrew Cooper +# Committer Jan Beulich +x86/ats: Disable Address Translation Services by default + +Xen cannot safely use any ATS functionality until it gains asynchronous queued +invalidation support, because of the current synchronous wait for completion. + +Do not turn ATS on by default. + +While editing the default in the command line documentation, correct the +statement regarding PCI Passthrough. ATS is purely a performance +optimisation, and is certainly not required for PCI Passthrough to function. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Suravee Suthikulpanit + +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -167,10 +167,13 @@ developers wishing Xen to fall back to o + ### ats + > `= ` + +-> Default: `true` ++> Default: `false` ++ ++Permits Xen to set up and use PCI Address Translation Services. This is a ++performance optimisation for PCI Passthrough. + +-Permits Xen to set up and use PCI Address Translation Services, which +-is required for PCI Passthrough. ++**WARNING: Xen cannot currently safely use ATS because of its synchronous wait ++loops for Queued Invalidation completions.** + + ### availmem + > `= ` +--- a/xen/drivers/passthrough/x86/ats.c ++++ b/xen/drivers/passthrough/x86/ats.c +@@ -20,7 +20,7 @@ + + LIST_HEAD(ats_devices); + +-bool_t __read_mostly ats_enabled = 1; ++bool_t __read_mostly ats_enabled = 0; + boolean_param("ats", ats_enabled); + + int enable_ats_device(int seg, int bus, int devfn, const void *iommu) diff --git a/53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch b/53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch new file mode 100644 index 0000000..bea3784 --- /dev/null +++ b/53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch @@ -0,0 +1,220 @@ +# Commit 3ea2ba980afe7356c613c8e1ba00d223d1c25412 +# Date 2014-08-28 16:11:37 +0200 +# Author Ross Lagerwall +# Committer Jan Beulich +x86/NMI: allow processing unknown NMIs when watchdog is enabled + +Change NMI processing so that if watchdog=force is passed on the +command-line and the NMI is not caused by a perf counter overflow (i.e. +likely not a watchdog "tick"), the NMI is handled by the unknown NMI +handler. + +This allows injection of NMIs from IPMI controllers that don't set the +IOCK/SERR bits to trigger the unknown NMI handler rather than be +ignored. + +Signed-off-by: Ross Lagerwall +Reviewed-by: Andrew Cooper + +Fix command line parsing (don't enable the watchdog on e.g. +"watchdog=xyz"). + +Signed-off-by: Jan Beulich + +# Commit fd553ae5f0f57baa63d033bedee84f607de57d33 +# Date 2014-09-03 15:09:59 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/NMI: allow passing just "watchdog" again + +This capability got inadvertently lost in commit 3ea2ba980a ("x86/NMI: +allow processing unknown NMIs when watchdog is enabled") due to an +oversight of mine. + +Reported-by: Ross Lagerwall +Signed-off-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/docs/misc/xen-command-line.markdown ++++ b/docs/misc/xen-command-line.markdown +@@ -1039,12 +1039,14 @@ As the BTS virtualisation is not 100% sa + don't use the vpmu flag on production systems with Intel cpus! + + ### watchdog +-> `= ` ++> `= force | ` + + > Default: `false` + + Run an NMI watchdog on each processor. If a processor is stuck for +-longer than the **watchdog\_timeout**, a panic occurs. ++longer than the **watchdog\_timeout**, a panic occurs. When `force` is ++specified, in addition to running an NMI watchdog on each processor, ++unknown NMIs will still be processed. + + ### watchdog\_timeout + > `= ` +--- a/xen/arch/x86/nmi.c ++++ b/xen/arch/x86/nmi.c +@@ -43,7 +43,32 @@ static DEFINE_PER_CPU(unsigned int, nmi_ + + /* opt_watchdog: If true, run a watchdog NMI on each processor. */ + bool_t __initdata opt_watchdog = 0; +-boolean_param("watchdog", opt_watchdog); ++ ++/* watchdog_force: If true, process unknown NMIs when running the watchdog. */ ++bool_t watchdog_force = 0; ++ ++static void __init parse_watchdog(char *s) ++{ ++ if ( !*s ) ++ { ++ opt_watchdog = 1; ++ return; ++ } ++ ++ switch ( parse_bool(s) ) ++ { ++ case 0: ++ opt_watchdog = 0; ++ return; ++ case 1: ++ opt_watchdog = 1; ++ return; ++ } ++ ++ if ( !strcmp(s, "force") ) ++ watchdog_force = opt_watchdog = 1; ++} ++custom_param("watchdog", parse_watchdog); + + /* opt_watchdog_timeout: Number of seconds to wait before panic. */ + static unsigned int opt_watchdog_timeout = 5; +@@ -82,6 +107,7 @@ int nmi_active; + #define K7_EVNTSEL_USR (1 << 16) + #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 + #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING ++#define K7_EVENT_WIDTH 32 + + #define P6_EVNTSEL0_ENABLE (1 << 22) + #define P6_EVNTSEL_INT (1 << 20) +@@ -89,10 +115,12 @@ int nmi_active; + #define P6_EVNTSEL_USR (1 << 16) + #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 + #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c ++#define P6_EVENT_WIDTH 32 + + #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) + #define P4_CCCR_OVF_PMI0 (1<<26) + #define P4_CCCR_OVF_PMI1 (1<<27) ++#define P4_CCCR_OVF (1<<31) + #define P4_CCCR_THRESHOLD(N) ((N)<<20) + #define P4_CCCR_COMPLEMENT (1<<19) + #define P4_CCCR_COMPARE (1<<18) +@@ -433,8 +461,10 @@ int __init watchdog_setup(void) + return 0; + } + +-void nmi_watchdog_tick(struct cpu_user_regs * regs) ++/* Returns false if this was not a watchdog NMI, true otherwise */ ++bool_t nmi_watchdog_tick(struct cpu_user_regs *regs) + { ++ bool_t watchdog_tick = 1; + unsigned int sum = this_cpu(nmi_timer_ticks); + + if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() ) +@@ -460,8 +490,15 @@ void nmi_watchdog_tick(struct cpu_user_r + + if ( nmi_perfctr_msr ) + { ++ uint64_t msr_content; ++ ++ /* Work out if this is a watchdog tick by checking for overflow. */ + if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 ) + { ++ rdmsrl(MSR_P4_IQ_CCCR0, msr_content); ++ if ( !(msr_content & P4_CCCR_OVF) ) ++ watchdog_tick = 0; ++ + /* + * P4 quirks: + * - An overflown perfctr will assert its interrupt +@@ -474,14 +511,26 @@ void nmi_watchdog_tick(struct cpu_user_r + } + else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 ) + { ++ rdmsrl(MSR_P6_PERFCTR0, msr_content); ++ if ( msr_content & (1ULL << P6_EVENT_WIDTH) ) ++ watchdog_tick = 0; ++ + /* + * Only P6 based Pentium M need to re-unmask the apic vector but + * it doesn't hurt other P6 variants. + */ + apic_write(APIC_LVTPC, APIC_DM_NMI); + } ++ else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 ) ++ { ++ rdmsrl(MSR_K7_PERFCTR0, msr_content); ++ if ( msr_content & (1ULL << K7_EVENT_WIDTH) ) ++ watchdog_tick = 0; ++ } + write_watchdog_counter(NULL); + } ++ ++ return watchdog_tick; + } + + /* +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -3226,14 +3226,15 @@ void do_nmi(struct cpu_user_regs *regs) + { + unsigned int cpu = smp_processor_id(); + unsigned char reason; ++ bool_t handle_unknown = 0; + + ++nmi_count(cpu); + + if ( nmi_callback(regs, cpu) ) + return; + +- if ( nmi_watchdog ) +- nmi_watchdog_tick(regs); ++ if ( !nmi_watchdog || (!nmi_watchdog_tick(regs) && watchdog_force) ) ++ handle_unknown = 1; + + /* Only the BSP gets external NMIs from the system. */ + if ( cpu == 0 ) +@@ -3243,7 +3244,7 @@ void do_nmi(struct cpu_user_regs *regs) + pci_serr_error(regs); + if ( reason & 0x40 ) + io_check_error(regs); +- if ( !(reason & 0xc0) && !nmi_watchdog ) ++ if ( !(reason & 0xc0) && handle_unknown ) + unknown_nmi_error(regs, reason); + } + } +--- a/xen/include/asm-x86/apic.h ++++ b/xen/include/asm-x86/apic.h +@@ -206,7 +206,7 @@ extern void release_lapic_nmi(void); + extern void self_nmi(void); + extern void disable_timer_nmi_watchdog(void); + extern void enable_timer_nmi_watchdog(void); +-extern void nmi_watchdog_tick (struct cpu_user_regs *regs); ++extern bool_t nmi_watchdog_tick (struct cpu_user_regs *regs); + extern int APIC_init_uniprocessor (void); + extern void disable_APIC_timer(void); + extern void enable_APIC_timer(void); +--- a/xen/include/asm-x86/nmi.h ++++ b/xen/include/asm-x86/nmi.h +@@ -8,6 +8,9 @@ struct cpu_user_regs; + + /* Watchdog boolean from the command line */ + extern bool_t opt_watchdog; ++ ++/* Watchdog force parameter from the command line */ ++extern bool_t watchdog_force; + + typedef int (*nmi_callback_t)(struct cpu_user_regs *regs, int cpu); + diff --git a/54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch b/54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch new file mode 100644 index 0000000..951608a --- /dev/null +++ b/54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch @@ -0,0 +1,163 @@ +References: bnc#882089 + +# Commit ecb69533582e51999e5d76bce513be870222908f +# Date 2014-08-29 12:22:42 +0200 +# Author Jan Beulich +# Committer Jan Beulich +EPT: utilize GLA->GPA translation known for certain faults + +Rather than doing the translation ourselves in __hvmemul_{read,write}() +leverage that we know the association for faults other than such having +occurred when translating addresses of page tables. + +There is one intentional but not necessarily obvious (and possibly +subtle) adjustment to behavior: __hvmemul_read() no longer blindly +bails on instruction fetches matching the MMIO GVA (the callers of +handle_mmio_with_translation() now control the behavior via the struct +npfec they pass, and it didn't seem right to bail here rather than just +falling through to the unaccelerated path) + +Signed-off-by: Jan Beulich +Reviewed-by: Tim Deegan + +--- a/xen/arch/x86/hvm/emulate.c ++++ b/xen/arch/x86/hvm/emulate.c +@@ -496,10 +496,11 @@ static int __hvmemul_read( + while ( off & (chunk - 1) ) + chunk >>= 1; + +- if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) ++ if ( ((access_type != hvm_access_insn_fetch ++ ? vio->mmio_access.read_access ++ : vio->mmio_access.insn_fetch)) && ++ (vio->mmio_gva == (addr & PAGE_MASK)) ) + { +- if ( access_type == hvm_access_insn_fetch ) +- return X86EMUL_UNHANDLEABLE; + gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); + while ( (off + chunk) <= PAGE_SIZE ) + { +@@ -639,7 +640,8 @@ static int hvmemul_write( + while ( off & (chunk - 1) ) + chunk >>= 1; + +- if ( unlikely(vio->mmio_gva == (addr & PAGE_MASK)) && vio->mmio_gva ) ++ if ( vio->mmio_access.write_access && ++ (vio->mmio_gva == (addr & PAGE_MASK)) ) + { + gpa = (((paddr_t)vio->mmio_gpfn << PAGE_SHIFT) | off); + while ( (off + chunk) <= PAGE_SIZE ) +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1529,7 +1529,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + && is_hvm_vcpu(v) + && hvm_mmio_internal(gpa) ) + { +- if ( !handle_mmio() ) ++ if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + rc = 1; + goto out; +@@ -1603,7 +1603,7 @@ int hvm_hap_nested_page_fault(paddr_t gp + if ( unlikely(is_pvh_vcpu(v)) ) + goto out; + +- if ( !handle_mmio() ) ++ if ( !handle_mmio_with_translation(gla, gpa >> PAGE_SHIFT, npfec) ) + hvm_inject_hw_exception(TRAP_gp_fault, 0); + rc = 1; + goto out; +--- a/xen/arch/x86/hvm/io.c ++++ b/xen/arch/x86/hvm/io.c +@@ -189,7 +189,7 @@ int handle_mmio(void) + if ( vio->io_state == HVMIO_awaiting_completion ) + vio->io_state = HVMIO_handle_mmio_awaiting_completion; + else +- vio->mmio_gva = 0; ++ vio->mmio_access = (struct npfec){}; + + switch ( rc ) + { +@@ -218,9 +218,14 @@ int handle_mmio(void) + return 1; + } + +-int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn) ++int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, ++ struct npfec access) + { + struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io; ++ ++ vio->mmio_access = access.gla_valid && ++ access.kind == npfec_kind_with_gla ++ ? access : (struct npfec){}; + vio->mmio_gva = gva & PAGE_MASK; + vio->mmio_gpfn = gpfn; + return handle_mmio(); +--- a/xen/arch/x86/mm/shadow/multi.c ++++ b/xen/arch/x86/mm/shadow/multi.c +@@ -2839,6 +2839,11 @@ static int sh_page_fault(struct vcpu *v, + p2m_type_t p2mt; + uint32_t rc; + int version; ++ struct npfec access = { ++ .read_access = 1, ++ .gla_valid = 1, ++ .kind = npfec_kind_with_gla ++ }; + #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION + int fast_emul = 0; + #endif +@@ -2849,6 +2854,9 @@ static int sh_page_fault(struct vcpu *v, + + perfc_incr(shadow_fault); + ++ if ( regs->error_code & PFEC_write_access ) ++ access.write_access = 1; ++ + #if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION + /* If faulting frame is successfully emulated in last shadow fault + * it's highly likely to reach same emulation action for this frame. +@@ -2950,7 +2958,7 @@ static int sh_page_fault(struct vcpu *v, + SHADOW_PRINTK("fast path mmio %#"PRIpaddr"\n", gpa); + reset_early_unshadow(v); + trace_shadow_gen(TRC_SHADOW_FAST_MMIO, va); +- return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) ++ return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) + ? EXCRET_fault_fixed : 0); + } + else +@@ -3447,7 +3455,7 @@ static int sh_page_fault(struct vcpu *v, + paging_unlock(d); + put_gfn(d, gfn_x(gfn)); + trace_shadow_gen(TRC_SHADOW_MMIO, va); +- return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT) ++ return (handle_mmio_with_translation(va, gpa >> PAGE_SHIFT, access) + ? EXCRET_fault_fixed : 0); + + not_a_shadow_fault: +--- a/xen/include/asm-x86/hvm/io.h ++++ b/xen/include/asm-x86/hvm/io.h +@@ -119,7 +119,8 @@ static inline void register_buffered_io_ + void send_timeoffset_req(unsigned long timeoff); + void send_invalidate_req(void); + int handle_mmio(void); +-int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn); ++int handle_mmio_with_translation(unsigned long gva, unsigned long gpfn, ++ struct npfec); + int handle_pio(uint16_t port, unsigned int size, int dir); + void hvm_interrupt_post(struct vcpu *v, int vector, int type); + void hvm_io_assist(ioreq_t *p); +--- a/xen/include/asm-x86/hvm/vcpu.h ++++ b/xen/include/asm-x86/hvm/vcpu.h +@@ -54,8 +54,9 @@ struct hvm_vcpu_io { + * HVM emulation: + * Virtual address @mmio_gva maps to MMIO physical frame @mmio_gpfn. + * The latter is known to be an MMIO frame (not RAM). +- * This translation is only valid if @mmio_gva is non-zero. ++ * This translation is only valid for accesses as per @mmio_access. + */ ++ struct npfec mmio_access; + unsigned long mmio_gva; + unsigned long mmio_gpfn; + diff --git a/CVE-2013-4540-qemu.patch b/CVE-2013-4540-qemu.patch new file mode 100644 index 0000000..b8e7c62 --- /dev/null +++ b/CVE-2013-4540-qemu.patch @@ -0,0 +1,52 @@ +References: bnc#864801 + +Subject: zaurus: fix buffer overrun on invalid state load +From: Michael S. Tsirkin mst@redhat.com Thu Apr 3 19:52:13 2014 +0300 +Date: Mon May 5 22:15:02 2014 +0200: +Git: 52f91c3723932f8340fe36c8ec8b18a757c37b2b + +CVE-2013-4540 + +Within scoop_gpio_handler_update, if prev_level has a high bit set, then +we get bit > 16 and that causes a buffer overrun. + +Since prev_level comes from wire indirectly, this can +happen on invalid state load. + +Similarly for gpio_level and gpio_dir. + +To fix, limit to 16 bit. + +Reported-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Juan Quintela + +Index: xen-4.4.1-testing/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c +=================================================================== +--- xen-4.4.1-testing.orig/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c ++++ xen-4.4.1-testing/tools/qemu-xen-dir-remote/hw/gpio/zaurus.c +@@ -203,6 +203,15 @@ static bool is_version_0 (void *opaque, + return version_id == 0; + } + ++static bool vmstate_scoop_validate(void *opaque, int version_id) ++{ ++ ScoopInfo *s = opaque; ++ ++ return !(s->prev_level & 0xffff0000) && ++ !(s->gpio_level & 0xffff0000) && ++ !(s->gpio_dir & 0xffff0000); ++} ++ + static const VMStateDescription vmstate_scoop_regs = { + .name = "scoop", + .version_id = 1, +@@ -215,6 +224,7 @@ static const VMStateDescription vmstate_ + VMSTATE_UINT32(gpio_level, ScoopInfo), + VMSTATE_UINT32(gpio_dir, ScoopInfo), + VMSTATE_UINT32(prev_level, ScoopInfo), ++ VMSTATE_VALIDATE("irq levels are 16 bit", vmstate_scoop_validate), + VMSTATE_UINT16(mcr, ScoopInfo), + VMSTATE_UINT16(cdr, ScoopInfo), + VMSTATE_UINT16(ccr, ScoopInfo), diff --git a/README.SUSE b/README.SUSE index 0636f6f..66758ca 100644 --- a/README.SUSE +++ b/README.SUSE @@ -550,16 +550,16 @@ the 'xl dmesg' command will produce more output to better analyze problems. Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all" + GRUB_CMDLINE_XEN_DEFAULT="loglvl=all guest_loglvl=all" and then run, grub2-mkconfig -o /boot/grub2/grub.cfg Grub1 Example: Edit /boot/grub/menu.lst and edit the line containing xen.gz - kernel /boot/xen.gz loglvl=all loglvl_guest=all + kernel /boot/xen.gz loglvl=all guest_loglvl=all 2) With the log levels specified above and the host rebooted, more useful -information about domain 0 and running VMs can be obtained using using the +information about domain 0 and running VMs can be obtained using the 'xl dmesg' and 'xl debug-keys' commands. For example, from the command line run: xl debug-keys h @@ -581,7 +581,7 @@ following to prepare Xen to send its output over this serial line. Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="loglvl=all loglvl_guest=all console=com1 com1=115200,8n1" + GRUB_CMDLINE_XEN_DEFAULT="loglvl=all guest_loglvl=all console=com1 com1=115200,8n1" Also append additional serial flags to the option below such that it appears as, GRUB_CMDLINE_LINUX_DEFAULT=" console=ttyS0, 115200" where pre-existing flags are those options already present and then run, @@ -600,7 +600,7 @@ use the debug-enabled hypervisor, and/or to prevent automatic rebooting. Grub2 Example: Edit /etc/default/grub and add, - GRUB_CMDLINE_XEN_DEFAULT="noreboot loglvl=all loglvl_guest=all" + GRUB_CMDLINE_XEN_DEFAULT="noreboot loglvl=all guest_loglvl=all" Edit /etc/grub.d/20_linux_xen file. Look for this line: while [ "x${xen_list}" != "x" ] ; do and add *before* the above line something like this: @@ -616,7 +616,7 @@ Grub1 Example: Edit your menu.lst configuration from something like this: kernel (hd0,5)/xen.gz To something like this: - kernel (hd0,5)/xen-dbg.gz noreboot loglvl=all loglvl_guest=all + kernel (hd0,5)/xen-dbg.gz noreboot loglvl=all guest_loglvl=all All hypervisor options require a reboot to take effect. After rebooting, the Xen hypervisor will write any error messages to the log file (viewable with diff --git a/x86-ioapic-ack-default.patch b/x86-ioapic-ack-default.patch index a42145a..9db4115 100644 --- a/x86-ioapic-ack-default.patch +++ b/x86-ioapic-ack-default.patch @@ -1,10 +1,8 @@ Change default IO-APIC ack mode for single IO-APIC systems to old-style. -Index: xen-4.3.0-testing/xen/arch/x86/io_apic.c -=================================================================== ---- xen-4.3.0-testing.orig/xen/arch/x86/io_apic.c -+++ xen-4.3.0-testing/xen/arch/x86/io_apic.c -@@ -2026,7 +2026,10 @@ void __init setup_IO_APIC(void) +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -2034,7 +2034,10 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; printk("ENABLING IO-APIC IRQs\n"); diff --git a/xen-4.4.1-testing-src.tar.bz2 b/xen-4.4.1-testing-src.tar.bz2 index 91f2a22..df9a935 100644 --- a/xen-4.4.1-testing-src.tar.bz2 +++ b/xen-4.4.1-testing-src.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f277f2bd5714773978d9a3688fd1f65577178aaac1add3b7d66c01e3648267b1 -size 4432926 +oid sha256:57062330e02f3d4d2911bb184ed89717a007e8e276e5922a1e50d6e90e72662d +size 4368624 diff --git a/xen.changes b/xen.changes index 6568666..2944d3f 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,37 @@ +------------------------------------------------------------------- +Tue Sep 2 09:01:24 MDT 2014 - carnold@suse.com + +- Update to Xen Version 4.4.1 FCS + xen-4.4.1-testing-src.tar.bz2 +- Dropped patches now contained in tarball + 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch + 53df71c7-lz4-check-for-underruns.patch + 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch + +------------------------------------------------------------------- +Mon Sep 1 15:20:20 MDT 2014 - carnold@suse.com + +- bnc#882089 - Windows 2012 R2 fails to boot up with greater than + 60 vcpus + 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch + 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch + 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch + 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch + 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch + 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch +- Upstream patches from Jan + 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch + 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch + 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch + 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch + +------------------------------------------------------------------- +Fri Aug 29 09:25:47 MDT 2014 - carnold@suse.com + +- bnc#864801 - VUL-0: CVE-2013-4540: qemu: zaurus: buffer overrun + on invalid state load + CVE-2013-4540-qemu.patch + ------------------------------------------------------------------- Fri Aug 15 15:09:27 MDT 2014 - carnold@suse.com diff --git a/xen.spec b/xen.spec index cd2eaf5..e24dabf 100644 --- a/xen.spec +++ b/xen.spec @@ -21,7 +21,7 @@ Name: xen ExclusiveArch: %ix86 x86_64 %arm aarch64 %define xvers 4.4 %define xvermaj 4 -%define changeset 28531 +%define changeset 28541 %define xen_build_dir xen-4.4.1-testing # %define with_kmp 0 @@ -153,7 +153,7 @@ BuildRequires: xorg-x11-util-devel %endif %endif -Version: 4.4.1_02 +Version: 4.4.1_04 Release: 0 PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) @@ -224,11 +224,17 @@ Patch9: 539ebe62-x86-EFI-improve-boot-time-diagnostics.patch Patch10: 53aac342-x86-HVM-consolidate-and-sanitize-CR4-guest-reserved-bit-determination.patch Patch11: 53c9151b-Fix-xl-vncviewer-accesses-port-0-by-any-invalid-domid.patch Patch12: 53d124e7-fix-list_domain_details-check-config-data-length-0.patch -Patch13: 53d7b781-x86-cpu-undo-BIOS-CPUID-max_leaf-limit-earlier.patch -Patch14: 53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch -Patch15: 53df71c7-lz4-check-for-underruns.patch -Patch16: 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch -Patch17: 53e47d6b-x86_emulate-properly-do-IP-updates-and-other-side-effects.patch +Patch13: 53dba447-x86-ACPI-allow-CMOS-RTC-use-even-when-ACPI-says-there-is-none.patch +Patch14: 53df727b-x86-HVM-extend-LAPIC-shortcuts-around-P2M-lookups.patch +Patch15: 53e8be5f-x86-vHPET-use-rwlock-instead-of-simple-one.patch +Patch16: 53f737b1-VMX-fix-DebugCtl-MSR-clearing.patch +Patch17: 53f7386d-x86-irq-process-softirqs-in-irq-keyhandlers.patch +Patch18: 53ff3659-x86-consolidate-boolean-inputs-in-hvm-and-p2m.patch +Patch19: 53ff36ae-x86-hvm-treat-non-insn-fetch-NPF-also-as-read-violations.patch +Patch20: 53ff36d5-x86-mem_event-deliver-gla-fault-EPT-violation-information.patch +Patch21: 53ff3716-x86-ats-Disable-Address-Translation-Services-by-default.patch +Patch22: 53ff3899-x86-NMI-allow-processing-unknown-NMIs-with-watchdog.patch +Patch23: 54005472-EPT-utilize-GLA-GPA-translation-known-for-certain-faults.patch # Upstream qemu Patch250: VNC-Support-for-ExtendedKeyEvent-client-message.patch Patch251: 0001-net-move-the-tap-buffer-into-TAPState.patch @@ -239,6 +245,7 @@ Patch255: 0005-e1000-multi-buffer-packet-support.patch Patch256: 0006-e1000-clear-EOP-for-multi-buffer-descriptors.patch Patch257: 0007-e1000-verify-we-have-buffers-upfront.patch Patch258: 0008-e1000-check-buffer-availability.patch +Patch259: CVE-2013-4540-qemu.patch # Our platform specific patches Patch301: xen-destdir.patch Patch302: xen-xmexample.patch @@ -611,6 +618,12 @@ Authors: %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 +%patch19 -p1 +%patch20 -p1 +%patch21 -p1 +%patch22 -p1 +%patch23 -p1 # Upstream qemu patches %patch250 -p1 %patch251 -p1 @@ -621,6 +634,7 @@ Authors: %patch256 -p1 %patch257 -p1 %patch258 -p1 +%patch259 -p1 # Our platform specific patches %patch301 -p1 %patch302 -p1