From 4d852d5e7f624a2ecdbad0015bef69596fcef1ac590e3a996713315a89066ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Mon, 22 Jul 2024 17:48:38 +0200 Subject: [PATCH] Sync from SUSE:SLFO:Main xen revision 62bbe8a36d5946ad90b6ab41036b5be3 --- ...c-x86-hvm-Misra-Rule-19-1-regression.patch | 45 +++++ ...RTC-UIP-set-for-longer-than-expected.patch | 47 +++++ 6627a5fc-x86-MTRR-inverted-WC-check.patch | 24 +++ ...c-x86-spec-reporting-of-BHB-clearing.patch | 56 ++++++ ...86-spec-adjust-logic-to-elide-LFENCE.patch | 60 ++++++ 663090fd-x86-gen-cpuid-syntax.patch | 32 ++++ ...c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch | 38 ++++ ...icy-migration-IceLake-to-CascadeLake.patch | 79 ++++++++ ...5b5-x86-ucode-distinguish-up-to-date.patch | 43 +++++ ...re-error-handling-in-device-creation.patch | 176 ++++++++++++++++++ ...ll-sched_resource-data-inside-locked.patch | 74 ++++++++ ...respect-mapcache_domain_init-failing.patch | 26 +++ ...-further-identify-already-up-to-date.patch | 73 ++++++++ ...-offline-CPUs-from-old-CPU-mask-when.patch | 34 ++++ ...-x86-SMP-no-shorthand-IPI-in-hotplug.patch | 79 ++++++++ ...mit-interrupt-movement-in-fixup_irqs.patch | 92 +++++++++ ...T-special-page-in-epte_get_entry_emt.patch | 33 ++++ ...T-avoid-marking-np-ents-for-reconfig.patch | 72 +++++++ ...PT-drop-questionable-mfn_valid-from-.patch | 34 ++++ 667187cc-x86-Intel-unlock-CPUID-earlier.patch | 87 +++++++++ ...9-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch | 74 ++++++++ ...-handle-moving-in-_assign_irq_vector.patch | 162 ++++++++++++++++ ...6-xstate-initialisation-of-XSS-cache.patch | 65 +++++++ 6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch | 63 +++++++ ...rd-pending-to-new-dest-in-fixup_irqs.patch | 130 +++++++++++++ gcc14-fixes.patch | 81 ++++++++ libxl.LIBXL_HOTPLUG_TIMEOUT.patch | 40 ++-- xen.changes | 53 ++++++ xen.libxl.dmmd.patch | 32 ++-- xen.spec | 29 ++- xsa458.patch | 38 ++++ 31 files changed, 1923 insertions(+), 48 deletions(-) create mode 100644 6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch create mode 100644 6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch create mode 100644 6627a5fc-x86-MTRR-inverted-WC-check.patch create mode 100644 662a6a4c-x86-spec-reporting-of-BHB-clearing.patch create mode 100644 662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch create mode 100644 663090fd-x86-gen-cpuid-syntax.patch create mode 100644 663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch create mode 100644 663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch create mode 100644 663d05b5-x86-ucode-distinguish-up-to-date.patch create mode 100644 663eaa27-libxl-XenStore-error-handling-in-device-creation.patch create mode 100644 66450626-sched-set-all-sched_resource-data-inside-locked.patch create mode 100644 66450627-x86-respect-mapcache_domain_init-failing.patch create mode 100644 6646031f-x86-ucode-further-identify-already-up-to-date.patch create mode 100644 6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch create mode 100644 666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch create mode 100644 666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch create mode 100644 666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch create mode 100644 666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch create mode 100644 666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch create mode 100644 667187cc-x86-Intel-unlock-CPUID-earlier.patch create mode 100644 66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch create mode 100644 6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch create mode 100644 6672c846-x86-xstate-initialisation-of-XSS-cache.patch create mode 100644 6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch create mode 100644 6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch create mode 100644 gcc14-fixes.patch create mode 100644 xsa458.patch diff --git a/6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch b/6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch new file mode 100644 index 0000000..a506255 --- /dev/null +++ b/6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch @@ -0,0 +1,45 @@ +# Commit d0a718a45f14b86471d8eb3083acd72760963470 +# Date 2024-04-11 13:23:08 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/hvm: Fix Misra Rule 19.1 regression + +Despite noticing an impending Rule 19.1 violation, the adjustment made (the +uint32_t cast) wasn't sufficient to avoid it. Try again. + +Subsequently noticed by Coverity too. + +Fixes: 6a98383b0877 ("x86/HVM: clear upper halves of GPRs upon entry from 32-bit code") +Coverity-IDs: 1596289 thru 1596298 +Signed-off-by: Andrew Cooper +Reviewed-by: Stefano Stabellini + +--- a/xen/arch/x86/include/asm/hvm/hvm.h ++++ b/xen/arch/x86/include/asm/hvm/hvm.h +@@ -585,16 +585,16 @@ static inline void hvm_sanitize_regs_fie + if ( compat ) + { + /* Clear GPR upper halves, to counteract guests playing games. */ +- regs->rbp = (uint32_t)regs->ebp; +- regs->rbx = (uint32_t)regs->ebx; +- regs->rax = (uint32_t)regs->eax; +- regs->rcx = (uint32_t)regs->ecx; +- regs->rdx = (uint32_t)regs->edx; +- regs->rsi = (uint32_t)regs->esi; +- regs->rdi = (uint32_t)regs->edi; +- regs->rip = (uint32_t)regs->eip; +- regs->rflags = (uint32_t)regs->eflags; +- regs->rsp = (uint32_t)regs->esp; ++ regs->rbp = (uint32_t)regs->rbp; ++ regs->rbx = (uint32_t)regs->rbx; ++ regs->rax = (uint32_t)regs->rax; ++ regs->rcx = (uint32_t)regs->rcx; ++ regs->rdx = (uint32_t)regs->rdx; ++ regs->rsi = (uint32_t)regs->rsi; ++ regs->rdi = (uint32_t)regs->rdi; ++ regs->rip = (uint32_t)regs->rip; ++ regs->rflags = (uint32_t)regs->rflags; ++ regs->rsp = (uint32_t)regs->rsp; + } + + #ifndef NDEBUG diff --git a/6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch b/6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch new file mode 100644 index 0000000..a0e3217 --- /dev/null +++ b/6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch @@ -0,0 +1,47 @@ +# Commit 43a07069863b419433dee12c9b58c1f7ce70aa97 +# Date 2024-04-23 14:09:18 +0200 +# Author Ross Lagerwall +# Committer Jan Beulich +x86/rtc: Avoid UIP flag being set for longer than expected + +In a test, OVMF reported an error initializing the RTC without +indicating the precise nature of the error. The only plausible +explanation I can find is as follows: + +As part of the initialization, OVMF reads register C and then reads +register A repatedly until the UIP flag is not set. If this takes longer +than 100 ms, OVMF fails and reports an error. This may happen with the +following sequence of events: + +At guest time=0s, rtc_init() calls check_update_timer() which schedules +update_timer for t=(1 - 244us). + +At t=1s, the update_timer function happens to have been called >= 244us +late. In the timer callback, it sets the UIP flag and schedules +update_timer2 for t=1s. + +Before update_timer2 runs, the guest reads register C which calls +check_update_timer(). check_update_timer() stops the scheduled +update_timer2 and since the guest time is now outside of the update +cycle, it schedules update_timer for t=(2 - 244us). + +The UIP flag will therefore be set for a whole second from t=1 to t=2 +while the guest repeatedly reads register A waiting for the UIP flag to +clear. Fix it by clearing the UIP flag when scheduling update_timer. + +I was able to reproduce this issue with a synthetic test and this +resolves the issue. + +Signed-off-by: Ross Lagerwall +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/hvm/rtc.c ++++ b/xen/arch/x86/hvm/rtc.c +@@ -202,6 +202,7 @@ static void check_update_timer(RTCState + } + else + { ++ s->hw.cmos_data[RTC_REG_A] &= ~RTC_UIP; + next_update_time = (USEC_PER_SEC - guest_usec - 244) * NS_PER_USEC; + expire_time = NOW() + next_update_time; + s->next_update_time = expire_time; diff --git a/6627a5fc-x86-MTRR-inverted-WC-check.patch b/6627a5fc-x86-MTRR-inverted-WC-check.patch new file mode 100644 index 0000000..44a40d5 --- /dev/null +++ b/6627a5fc-x86-MTRR-inverted-WC-check.patch @@ -0,0 +1,24 @@ +# Commit 77e25f0e30ddd11e043e6fce84bf108ce7de5b6f +# Date 2024-04-23 14:13:48 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/MTRR: correct inadvertently inverted WC check + +The ! clearly got lost by mistake. + +Fixes: e9e0eb30d4d6 ("x86/MTRR: avoid several indirect calls") +Reported-by: Marek Marczykowski-Górecki +Signed-off-by: Jan Beulich +Acked-by: Roger Pau Monné + +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c +@@ -316,7 +316,7 @@ int mtrr_add_page(unsigned long base, un + } + + /* If the type is WC, check that this processor supports it */ +- if ((type == X86_MT_WC) && mtrr_have_wrcomb()) { ++ if ((type == X86_MT_WC) && !mtrr_have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -EOPNOTSUPP; diff --git a/662a6a4c-x86-spec-reporting-of-BHB-clearing.patch b/662a6a4c-x86-spec-reporting-of-BHB-clearing.patch new file mode 100644 index 0000000..a5d3929 --- /dev/null +++ b/662a6a4c-x86-spec-reporting-of-BHB-clearing.patch @@ -0,0 +1,56 @@ +# Commit 049ab0b2c9f1f5edb54b505fef0bc575787dafe9 +# Date 2024-04-25 16:35:56 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/spec: fix reporting of BHB clearing usage from guest entry points + +Reporting whether the BHB clearing on entry is done for the different domains +types based on cpu_has_bhb_seq is unhelpful, as that variable signals whether +there's a BHB clearing sequence selected, but that alone doesn't imply that +such sequence is used from the PV and/or HVM entry points. + +Instead use opt_bhb_entry_{pv,hvm} which do signal whether BHB clearing is +performed on entry from PV/HVM. + +Fixes: 689ad48ce9cf ('x86/spec-ctrl: Wire up the Native-BHI software sequences') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -634,7 +634,7 @@ static void __init print_details(enum in + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || +- cpu_has_bhb_seq || amd_virt_spec_ctrl || ++ opt_bhb_entry_hvm || amd_virt_spec_ctrl || + opt_eager_fpu || opt_verw_hvm) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || +@@ -643,7 +643,7 @@ static void __init print_details(enum in + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_hvm ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "", +- cpu_has_bhb_seq ? " BHB-entry" : ""); ++ opt_bhb_entry_hvm ? " BHB-entry" : ""); + + #endif + #ifdef CONFIG_PV +@@ -651,14 +651,14 @@ static void __init print_details(enum in + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || +- cpu_has_bhb_seq || ++ opt_bhb_entry_pv || + opt_eager_fpu || opt_verw_pv) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_pv ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "", +- cpu_has_bhb_seq ? " BHB-entry" : ""); ++ opt_bhb_entry_pv ? " BHB-entry" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", + opt_xpti_hwdom ? "enabled" : "disabled", diff --git a/662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch b/662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch new file mode 100644 index 0000000..fdf650c --- /dev/null +++ b/662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch @@ -0,0 +1,60 @@ +# Commit 656ae8f1091bcefec9c46ec3ea3ac2118742d4f6 +# Date 2024-04-25 16:37:01 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/spec: adjust logic that elides lfence + +It's currently too restrictive by just checking whether there's a BHB clearing +sequence selected. It should instead check whether BHB clearing is used on +entry from PV or HVM specifically. + +Switch to use opt_bhb_entry_{pv,hvm} instead, and then remove cpu_has_bhb_seq +since it no longer has any users. + +Reported-by: Jan Beulich +Fixes: 954c983abcee ('x86/spec-ctrl: Software BHB-clearing sequences') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich +Reviewed-by: Andrew Cooper + +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -228,9 +228,6 @@ static inline bool boot_cpu_has(unsigned + #define cpu_bug_fpu_ptrs boot_cpu_has(X86_BUG_FPU_PTRS) + #define cpu_bug_null_seg boot_cpu_has(X86_BUG_NULL_SEG) + +-#define cpu_has_bhb_seq (boot_cpu_has(X86_SPEC_BHB_TSX) || \ +- boot_cpu_has(X86_SPEC_BHB_LOOPS)) +- + enum _cache_type { + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -2328,7 +2328,7 @@ void __init init_speculation_mitigations + * unconditional WRMSR. If we do have it, or we're not using any + * prior conditional block, then it's safe to drop the LFENCE. + */ +- if ( !cpu_has_bhb_seq && ++ if ( !opt_bhb_entry_pv && + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)) ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); +@@ -2344,7 +2344,7 @@ void __init init_speculation_mitigations + * active in the block that is skipped when interrupting guest + * context, then it's safe to drop the LFENCE. + */ +- if ( !cpu_has_bhb_seq && ++ if ( !opt_bhb_entry_pv && + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && + !boot_cpu_has(X86_FEATURE_SC_RSB_PV))) ) +@@ -2356,7 +2356,7 @@ void __init init_speculation_mitigations + * A BHB sequence, if used, is the only conditional action, so if we + * don't have it, we don't need the safety LFENCE. + */ +- if ( !cpu_has_bhb_seq ) ++ if ( !opt_bhb_entry_hvm ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); + } + diff --git a/663090fd-x86-gen-cpuid-syntax.patch b/663090fd-x86-gen-cpuid-syntax.patch new file mode 100644 index 0000000..98c67df --- /dev/null +++ b/663090fd-x86-gen-cpuid-syntax.patch @@ -0,0 +1,32 @@ +# Commit 08e79bba73d74a85d3ce6ff0f91c5205f1e05eda +# Date 2024-04-30 08:34:37 +0200 +# Author Jason Andryuk +# Committer Jan Beulich +xen/x86: Fix Syntax warning in gen-cpuid.py + +Python 3.12.2 warns: + +xen/tools/gen-cpuid.py:50: SyntaxWarning: invalid escape sequence '\s' + "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" +xen/tools/gen-cpuid.py:51: SyntaxWarning: invalid escape sequence '\s' + "\s+/\*([\w!]*) .*$") + +Specify the strings as raw strings so '\s' is read as literal '\' + 's'. +This avoids escaping all the '\'s in the strings. + +Signed-off-by: Jason Andryuk +Acked-by: Andrew Cooper + +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -47,8 +47,8 @@ def parse_definitions(state): + """ + feat_regex = re.compile( + r"^XEN_CPUFEATURE\(([A-Z0-9_]+)," +- "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" +- "\s+/\*([\w!]*) .*$") ++ r"\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" ++ r"\s+/\*([\w!]*) .*$") + + word_regex = re.compile( + r"^/\* .* word (\d*) \*/$") diff --git a/663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch b/663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch new file mode 100644 index 0000000..2bc0672 --- /dev/null +++ b/663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch @@ -0,0 +1,38 @@ +# Commit f4f2f3402b2f4985d69ffc0d46f845d05fd0b60f +# Date 2024-05-07 15:18:36 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +tools/libxs: Open /dev/xen/xenbus fds as O_CLOEXEC + +The header description for xs_open() goes as far as to suggest that the fd is +O_CLOEXEC, but it isn't actually. + +`xl devd` has been observed leaking /dev/xen/xenbus into children. + +Link: https://github.com/QubesOS/qubes-issues/issues/8292 +Reported-by: Demi Marie Obenour +Signed-off-by: Andrew Cooper +Reviewed-by: Juergen Gross + +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -54,6 +54,10 @@ struct xs_stored_msg { + #include + #endif + ++#ifndef O_CLOEXEC ++#define O_CLOEXEC 0 ++#endif ++ + struct xs_handle { + /* Communications channel to xenstore daemon. */ + int fd; +@@ -227,7 +231,7 @@ error: + static int get_dev(const char *connect_to) + { + /* We cannot open read-only because requests are writes */ +- return open(connect_to, O_RDWR); ++ return open(connect_to, O_RDWR | O_CLOEXEC); + } + + static int all_restrict_cb(Xentoolcore__Active_Handle *ah, domid_t domid) { diff --git a/663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch b/663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch new file mode 100644 index 0000000..d53924e --- /dev/null +++ b/663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch @@ -0,0 +1,79 @@ +# Commit a2330b51df267e20e66bbba6c5bf08f0570ed58b +# Date 2024-05-07 16:56:46 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/cpu-policy: Fix migration from Ice Lake to Cascade Lake + +Ever since Xen 4.14, there has been a latent bug with migration. + +While some toolstacks can level the features properly, they don't shink +feat.max_subleaf when all features have been dropped. This is because +we *still* have not completed the toolstack side work for full CPU Policy +objects. + +As a consequence, even when properly feature levelled, VMs can't migrate +"backwards" across hardware which reduces feat.max_subleaf. One such example +is Ice Lake (max_subleaf=2 for INTEL_PSFD) to Cascade Lake (max_subleaf=0). + +Extend the max policies feat.max_subleaf to the hightest number Xen knows +about, but leave the default policies matching the host. This will allow VMs +with a higher feat.max_subleaf than strictly necessary to migrate in. + +Eventually we'll manage to teach the toolstack how to avoid creating such VMs +in the first place, but there's still more work to do there. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné + +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -603,6 +603,13 @@ static void __init calculate_pv_max_poli + unsigned int i; + + *p = host_cpu_policy; ++ ++ /* ++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them ++ * to migrate in. ++ */ ++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1; ++ + x86_cpu_policy_to_featureset(p, fs); + + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) +@@ -643,6 +650,10 @@ static void __init calculate_pv_def_poli + unsigned int i; + + *p = pv_max_cpu_policy; ++ ++ /* Default to the same max_subleaf as the host. */ ++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf; ++ + x86_cpu_policy_to_featureset(p, fs); + + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) +@@ -679,6 +690,13 @@ static void __init calculate_hvm_max_pol + const uint32_t *mask; + + *p = host_cpu_policy; ++ ++ /* ++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them ++ * to migrate in. ++ */ ++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1; ++ + x86_cpu_policy_to_featureset(p, fs); + + mask = hvm_hap_supported() ? +@@ -780,6 +798,10 @@ static void __init calculate_hvm_def_pol + const uint32_t *mask; + + *p = hvm_max_cpu_policy; ++ ++ /* Default to the same max_subleaf as the host. */ ++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf; ++ + x86_cpu_policy_to_featureset(p, fs); + + mask = hvm_hap_supported() ? diff --git a/663d05b5-x86-ucode-distinguish-up-to-date.patch b/663d05b5-x86-ucode-distinguish-up-to-date.patch new file mode 100644 index 0000000..783fa12 --- /dev/null +++ b/663d05b5-x86-ucode-distinguish-up-to-date.patch @@ -0,0 +1,43 @@ +# Commit 648db37a155aca6f66d4cf3bb118417a728c3579 +# Date 2024-05-09 18:19:49 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/ucode: Distinguish "ucode already up to date" + +Right now, Xen returns -ENOENT for both "the provided blob isn't correct for +this CPU", and "the blob isn't newer than what's loaded". + +This in turn causes xen-ucode to exit with an error, when "nothing to do" is +more commonly a success condition. + +Handle EEXIST specially and exit cleanly. + +Signed-off-by: Andrew Cooper +Acked-by: Roger Pau Monné + +--- a/tools/misc/xen-ucode.c ++++ b/tools/misc/xen-ucode.c +@@ -125,8 +125,11 @@ int main(int argc, char *argv[]) + exit(1); + } + ++ errno = 0; + ret = xc_microcode_update(xch, buf, len); +- if ( ret ) ++ if ( ret == -1 && errno == EEXIST ) ++ printf("Microcode already up to date\n"); ++ else if ( ret ) + { + fprintf(stderr, "Failed to update microcode. (err: %s)\n", + strerror(errno)); +--- a/xen/arch/x86/cpu/microcode/core.c ++++ b/xen/arch/x86/cpu/microcode/core.c +@@ -640,7 +640,7 @@ static long cf_check microcode_update_he + "microcode: couldn't find any newer%s revision in the provided blob!\n", + opt_ucode_allow_same ? " (or the same)" : ""); + microcode_free_patch(patch); +- ret = -ENOENT; ++ ret = -EEXIST; + + goto put; + } diff --git a/663eaa27-libxl-XenStore-error-handling-in-device-creation.patch b/663eaa27-libxl-XenStore-error-handling-in-device-creation.patch new file mode 100644 index 0000000..29a2297 --- /dev/null +++ b/663eaa27-libxl-XenStore-error-handling-in-device-creation.patch @@ -0,0 +1,176 @@ +# Commit 531d3bea5e9357357eaf6d40f5784a1b4c29b910 +# Date 2024-05-11 00:13:43 +0100 +# Author Demi Marie Obenour +# Committer Andrew Cooper +libxl: Fix handling XenStore errors in device creation + +If xenstored runs out of memory it is possible for it to fail operations +that should succeed. libxl wasn't robust against this, and could fail +to ensure that the TTY path of a non-initial console was created and +read-only for guests. This doesn't qualify for an XSA because guests +should not be able to run xenstored out of memory, but it still needs to +be fixed. + +Add the missing error checks to ensure that all errors are properly +handled and that at no point can a guest make the TTY path of its +frontend directory writable. + +Signed-off-by: Demi Marie Obenour +Reviewed-by: Juergen Gross + +--- a/tools/libs/light/libxl_console.c ++++ b/tools/libs/light/libxl_console.c +@@ -351,11 +351,10 @@ int libxl__device_console_add(libxl__gc + flexarray_append(front, "protocol"); + flexarray_append(front, LIBXL_XENCONSOLE_PROTOCOL); + } +- libxl__device_generic_add(gc, XBT_NULL, device, +- libxl__xs_kvs_of_flexarray(gc, back), +- libxl__xs_kvs_of_flexarray(gc, front), +- libxl__xs_kvs_of_flexarray(gc, ro_front)); +- rc = 0; ++ rc = libxl__device_generic_add(gc, XBT_NULL, device, ++ libxl__xs_kvs_of_flexarray(gc, back), ++ libxl__xs_kvs_of_flexarray(gc, front), ++ libxl__xs_kvs_of_flexarray(gc, ro_front)); + out: + return rc; + } +@@ -665,6 +664,8 @@ int libxl_device_channel_getinfo(libxl_c + */ + if (!val) val = "/NO-SUCH-PATH"; + channelinfo->u.pty.path = strdup(val); ++ if (channelinfo->u.pty.path == NULL) ++ abort(); + break; + default: + break; +--- a/tools/libs/light/libxl_device.c ++++ b/tools/libs/light/libxl_device.c +@@ -177,8 +177,13 @@ int libxl__device_generic_add(libxl__gc + ro_frontend_perms[1].perms = backend_perms[1].perms = XS_PERM_READ; + + retry_transaction: +- if (create_transaction) ++ if (create_transaction) { + t = xs_transaction_start(ctx->xsh); ++ if (t == XBT_NULL) { ++ LOGED(ERROR, device->domid, "xs_transaction_start failed"); ++ return ERROR_FAIL; ++ } ++ } + + /* FIXME: read frontend_path and check state before removing stuff */ + +@@ -195,42 +200,55 @@ retry_transaction: + if (rc) goto out; + } + +- /* xxx much of this function lacks error checks! */ +- + if (fents || ro_fents) { +- xs_rm(ctx->xsh, t, frontend_path); +- xs_mkdir(ctx->xsh, t, frontend_path); ++ if (!xs_rm(ctx->xsh, t, frontend_path) && errno != ENOENT) ++ goto out; ++ if (!xs_mkdir(ctx->xsh, t, frontend_path)) ++ goto out; + /* Console 0 is a special case. It doesn't use the regular PV + * state machine but also the frontend directory has + * historically contained other information, such as the + * vnc-port, which we don't want the guest fiddling with. + */ + if ((device->kind == LIBXL__DEVICE_KIND_CONSOLE && device->devid == 0) || +- (device->kind == LIBXL__DEVICE_KIND_VUART)) +- xs_set_permissions(ctx->xsh, t, frontend_path, +- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); +- else +- xs_set_permissions(ctx->xsh, t, frontend_path, +- frontend_perms, ARRAY_SIZE(frontend_perms)); +- xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path), +- backend_path, strlen(backend_path)); +- if (fents) +- libxl__xs_writev_perms(gc, t, frontend_path, fents, +- frontend_perms, ARRAY_SIZE(frontend_perms)); +- if (ro_fents) +- libxl__xs_writev_perms(gc, t, frontend_path, ro_fents, +- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); ++ (device->kind == LIBXL__DEVICE_KIND_VUART)) { ++ if (!xs_set_permissions(ctx->xsh, t, frontend_path, ++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms))) ++ goto out; ++ } else { ++ if (!xs_set_permissions(ctx->xsh, t, frontend_path, ++ frontend_perms, ARRAY_SIZE(frontend_perms))) ++ goto out; ++ } ++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path), ++ backend_path, strlen(backend_path))) ++ goto out; ++ if (fents) { ++ rc = libxl__xs_writev_perms(gc, t, frontend_path, fents, ++ frontend_perms, ARRAY_SIZE(frontend_perms)); ++ if (rc) goto out; ++ } ++ if (ro_fents) { ++ rc = libxl__xs_writev_perms(gc, t, frontend_path, ro_fents, ++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); ++ if (rc) goto out; ++ } + } + + if (bents) { + if (!libxl_only) { +- xs_rm(ctx->xsh, t, backend_path); +- xs_mkdir(ctx->xsh, t, backend_path); +- xs_set_permissions(ctx->xsh, t, backend_path, backend_perms, +- ARRAY_SIZE(backend_perms)); +- xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), +- frontend_path, strlen(frontend_path)); +- libxl__xs_writev(gc, t, backend_path, bents); ++ if (!xs_rm(ctx->xsh, t, backend_path) && errno != ENOENT) ++ goto out; ++ if (!xs_mkdir(ctx->xsh, t, backend_path)) ++ goto out; ++ if (!xs_set_permissions(ctx->xsh, t, backend_path, backend_perms, ++ ARRAY_SIZE(backend_perms))) ++ goto out; ++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), ++ frontend_path, strlen(frontend_path))) ++ goto out; ++ rc = libxl__xs_writev(gc, t, backend_path, bents); ++ if (rc) goto out; + } + + /* +@@ -276,7 +294,7 @@ retry_transaction: + out: + if (create_transaction && t) + libxl__xs_transaction_abort(gc, &t); +- return rc; ++ return rc != 0 ? rc : ERROR_FAIL; + } + + typedef struct { +--- a/tools/libs/light/libxl_xshelp.c ++++ b/tools/libs/light/libxl_xshelp.c +@@ -60,10 +60,15 @@ int libxl__xs_writev_perms(libxl__gc *gc + for (i = 0; kvs[i] != NULL; i += 2) { + path = GCSPRINTF("%s/%s", dir, kvs[i]); + if (path && kvs[i + 1]) { +- int length = strlen(kvs[i + 1]); +- xs_write(ctx->xsh, t, path, kvs[i + 1], length); +- if (perms) +- xs_set_permissions(ctx->xsh, t, path, perms, num_perms); ++ size_t length = strlen(kvs[i + 1]); ++ if (length > UINT_MAX) ++ return ERROR_FAIL; ++ if (!xs_write(ctx->xsh, t, path, kvs[i + 1], length)) ++ return ERROR_FAIL; ++ if (perms) { ++ if (!xs_set_permissions(ctx->xsh, t, path, perms, num_perms)) ++ return ERROR_FAIL; ++ } + } + } + return 0; diff --git a/66450626-sched-set-all-sched_resource-data-inside-locked.patch b/66450626-sched-set-all-sched_resource-data-inside-locked.patch new file mode 100644 index 0000000..35e18d2 --- /dev/null +++ b/66450626-sched-set-all-sched_resource-data-inside-locked.patch @@ -0,0 +1,74 @@ +# Commit d104a07524ffc92ae7a70dfe192c291de2a563cc +# Date 2024-05-15 19:59:52 +0100 +# Author Juergen Gross +# Committer Andrew Cooper +xen/sched: set all sched_resource data inside locked region for new cpu + +When adding a cpu to a scheduler, set all data items of struct +sched_resource inside the locked region, as otherwise a race might +happen (e.g. when trying to access the cpupool of the cpu): + + (XEN) ----[ Xen-4.19.0-1-d x86_64 debug=y Tainted: H ]---- + (XEN) CPU: 45 + (XEN) RIP: e008:[] common/sched/credit.c#csched_load_balance+0x41/0x877 + (XEN) RFLAGS: 0000000000010092 CONTEXT: hypervisor + (XEN) rax: ffff82d040981618 rbx: ffff82d040981618 rcx: 0000000000000000 + (XEN) rdx: 0000003ff68cd000 rsi: 000000000000002d rdi: ffff83103723d450 + (XEN) rbp: ffff83207caa7d48 rsp: ffff83207caa7b98 r8: 0000000000000000 + (XEN) r9: ffff831037253cf0 r10: ffff83103767c3f0 r11: 0000000000000009 + (XEN) r12: ffff831037237990 r13: ffff831037237990 r14: ffff831037253720 + (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 0000000000f526e0 + (XEN) cr3: 000000005bc2f000 cr2: 0000000000000010 + (XEN) fsb: 0000000000000000 gsb: 0000000000000000 gss: 0000000000000000 + (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008 + (XEN) Xen code around (common/sched/credit.c#csched_load_balance+0x41/0x877): + (XEN) 48 8b 0c 10 48 8b 49 08 <48> 8b 79 10 48 89 bd b8 fe ff ff 49 8b 4e 28 48 + + (XEN) Xen call trace: + (XEN) [] R common/sched/credit.c#csched_load_balance+0x41/0x877 + (XEN) [] F common/sched/credit.c#csched_schedule+0x36a/0x69f + (XEN) [] F common/sched/core.c#do_schedule+0xe8/0x433 + (XEN) [] F common/sched/core.c#schedule+0x2e5/0x2f9 + (XEN) [] F common/softirq.c#__do_softirq+0x94/0xbe + (XEN) [] F do_softirq+0x13/0x15 + (XEN) [] F arch/x86/domain.c#idle_loop+0x92/0xe6 + (XEN) + (XEN) Pagetable walk from 0000000000000010: + (XEN) L4[0x000] = 000000103ff61063 ffffffffffffffff + (XEN) L3[0x000] = 000000103ff60063 ffffffffffffffff + (XEN) L2[0x000] = 0000001033dff063 ffffffffffffffff + (XEN) L1[0x000] = 0000000000000000 ffffffffffffffff + (XEN) + (XEN) **************************************** + (XEN) Panic on CPU 45: + (XEN) FATAL PAGE FAULT + (XEN) [error_code=0000] + (XEN) Faulting linear address: 0000000000000010 + (XEN) **************************************** + +Reported-by: Andrew Cooper +Fixes: a8c6c623192e ("sched: clarify use cases of schedule_cpu_switch()") +Signed-off-by: Juergen Gross +Acked-by: Andrew Cooper +Tested-by: Andrew Cooper + +--- a/xen/common/sched/core.c ++++ b/xen/common/sched/core.c +@@ -3179,6 +3179,8 @@ int schedule_cpu_add(unsigned int cpu, s + + sr->scheduler = new_ops; + sr->sched_priv = ppriv; ++ sr->granularity = cpupool_get_granularity(c); ++ sr->cpupool = c; + + /* + * Reroute the lock to the per pCPU lock as /last/ thing. In fact, +@@ -3191,8 +3193,6 @@ int schedule_cpu_add(unsigned int cpu, s + /* _Not_ pcpu_schedule_unlock(): schedule_lock has changed! */ + spin_unlock_irqrestore(old_lock, flags); + +- sr->granularity = cpupool_get_granularity(c); +- sr->cpupool = c; + /* The cpu is added to a pool, trigger it to go pick up some work */ + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + diff --git a/66450627-x86-respect-mapcache_domain_init-failing.patch b/66450627-x86-respect-mapcache_domain_init-failing.patch new file mode 100644 index 0000000..c09094a --- /dev/null +++ b/66450627-x86-respect-mapcache_domain_init-failing.patch @@ -0,0 +1,26 @@ +# Commit 7270fdc7a0028d4b7b26fd1b36c6b9e97abcf3da +# Date 2024-05-15 19:59:52 +0100 +# Author Jan Beulich +# Committer Andrew Cooper +x86: respect mapcache_domain_init() failing + +The function itself properly handles and hands onwards failure from +create_perdomain_mapping(). Therefore its caller should respect possible +failure, too. + +Fixes: 4b28bf6ae90b ("x86: re-introduce map_domain_page() et al") +Signed-off-by: Jan Beulich +Acked-by: Roger Pau Monné + +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -850,7 +850,8 @@ int arch_domain_create(struct domain *d, + } + else if ( is_pv_domain(d) ) + { +- mapcache_domain_init(d); ++ if ( (rc = mapcache_domain_init(d)) != 0 ) ++ goto fail; + + if ( (rc = pv_domain_initialise(d)) != 0 ) + goto fail; diff --git a/6646031f-x86-ucode-further-identify-already-up-to-date.patch b/6646031f-x86-ucode-further-identify-already-up-to-date.patch new file mode 100644 index 0000000..42633c5 --- /dev/null +++ b/6646031f-x86-ucode-further-identify-already-up-to-date.patch @@ -0,0 +1,73 @@ +# Commit 977d98e67c2e929c62aa1f495fc4c6341c45abb5 +# Date 2024-05-16 13:59:11 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/ucode: Further fixes to identify "ucode already up to date" + +When the revision in hardware is newer than anything Xen has to hand, +'microcode_cache' isn't set up. Then, `xen-ucode` initiates the update +because it doesn't know whether the revisions across the system are symmetric +or not. This involves the patch getting all the way into the +apply_microcode() hooks before being found to be too old. + +This is all a giant mess and needs an overhaul, but in the short term simply +adjust the apply_microcode() to return -EEXIST. + +Also, unconditionally print the preexisting microcode revision on boot. It's +relevant information which is otherwise unavailable if Xen doesn't find new +microcode to use. + +Fixes: 648db37a155a ("x86/ucode: Distinguish "ucode already up to date"") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +Acked-by: Roger Pau Monné + +--- a/xen/arch/x86/cpu/microcode/amd.c ++++ b/xen/arch/x86/cpu/microcode/amd.c +@@ -222,12 +222,15 @@ static int cf_check apply_microcode(cons + uint32_t rev, old_rev = sig->rev; + enum microcode_match_result result = microcode_fits(patch); + ++ if ( result == MIS_UCODE ) ++ return -EINVAL; ++ + /* + * Allow application of the same revision to pick up SMT-specific changes + * even if the revision of the other SMT thread is already up-to-date. + */ +- if ( result != NEW_UCODE && result != SAME_UCODE ) +- return -EINVAL; ++ if ( result == OLD_UCODE ) ++ return -EEXIST; + + if ( check_final_patch_levels(sig) ) + { +--- a/xen/arch/x86/cpu/microcode/core.c ++++ b/xen/arch/x86/cpu/microcode/core.c +@@ -887,6 +887,8 @@ int __init early_microcode_init(unsigned + + ucode_ops.collect_cpu_info(); + ++ printk(XENLOG_INFO "BSP microcode revision: 0x%08x\n", this_cpu(cpu_sig).rev); ++ + /* + * Some hypervisors deliberately report a microcode revision of -1 to + * mean that they will not accept microcode updates. +--- a/xen/arch/x86/cpu/microcode/intel.c ++++ b/xen/arch/x86/cpu/microcode/intel.c +@@ -294,10 +294,13 @@ static int cf_check apply_microcode(cons + + result = microcode_update_match(patch); + +- if ( result != NEW_UCODE && +- !(opt_ucode_allow_same && result == SAME_UCODE) ) ++ if ( result == MIS_UCODE ) + return -EINVAL; + ++ if ( result == OLD_UCODE || ++ (result == SAME_UCODE && !opt_ucode_allow_same) ) ++ return -EEXIST; ++ + wbinvd(); + + wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)patch->data); diff --git a/6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch b/6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch new file mode 100644 index 0000000..64cd89d --- /dev/null +++ b/6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch @@ -0,0 +1,34 @@ + +References: bsc#1214718 + +# Commit e63209d3ba2fd1b2f232babd14c9c679ffa7b09a +# Date 2024-06-10 10:33:22 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/irq: remove offline CPUs from old CPU mask when adjusting move_cleanup_count + +When adjusting move_cleanup_count to account for CPUs that are offline also +adjust old_cpu_mask, otherwise further calls to fixup_irqs() could subtract +those again and create an imbalance in move_cleanup_count. + +Fixes: 472e0b74c5c4 ('x86/IRQ: deal with move cleanup count state in fixup_irqs()') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2576,6 +2576,14 @@ void fixup_irqs(const cpumask_t *mask, b + desc->arch.move_cleanup_count -= cpumask_weight(affinity); + if ( !desc->arch.move_cleanup_count ) + release_old_vec(desc); ++ else ++ /* ++ * Adjust old_cpu_mask to account for the offline CPUs, ++ * otherwise further calls to fixup_irqs() could subtract those ++ * again and possibly underflow the counter. ++ */ ++ cpumask_andnot(desc->arch.old_cpu_mask, desc->arch.old_cpu_mask, ++ affinity); + } + + if ( !desc->action || cpumask_subset(desc->affinity, mask) ) diff --git a/666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch b/666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch new file mode 100644 index 0000000..7a2274b --- /dev/null +++ b/666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch @@ -0,0 +1,79 @@ + +References: bsc#1214718 + +# Commit 171c52fba5d94e050d704770480dcb983490d0ad +# Date 2024-06-12 14:29:31 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/smp: do not use shorthand IPI destinations in CPU hot{,un}plug contexts + +Due to the current rwlock logic, if the CPU calling get_cpu_maps() does +so from a cpu_hotplug_{begin,done}() region the function will still +return success, because a CPU taking the rwlock in read mode after +having taken it in write mode is allowed. Such corner case makes using +get_cpu_maps() alone not enough to prevent using the shorthand in CPU +hotplug regions. + +Introduce a new helper to detect whether the current caller is between a +cpu_hotplug_{begin,done}() region and use it in send_IPI_mask() to restrict +shorthand usage. + +Fixes: 5500d265a2a8 ('x86/smp: use APIC ALLBUT destination shorthand when possible') +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/smp.c ++++ b/xen/arch/x86/smp.c +@@ -88,7 +88,7 @@ void send_IPI_mask(const cpumask_t *mask + * the system have been accounted for. + */ + if ( system_state > SYS_STATE_smp_boot && +- !unaccounted_cpus && !disabled_cpus && ++ !unaccounted_cpus && !disabled_cpus && !cpu_in_hotplug_context() && + /* NB: get_cpu_maps lock requires enabled interrupts. */ + local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) && + (park_offline_cpus || +--- a/xen/common/cpu.c ++++ b/xen/common/cpu.c +@@ -68,6 +68,11 @@ void cpu_hotplug_done(void) + write_unlock(&cpu_add_remove_lock); + } + ++bool cpu_in_hotplug_context(void) ++{ ++ return rw_is_write_locked_by_me(&cpu_add_remove_lock); ++} ++ + static NOTIFIER_HEAD(cpu_chain); + + void __init register_cpu_notifier(struct notifier_block *nb) +--- a/xen/include/xen/cpu.h ++++ b/xen/include/xen/cpu.h +@@ -13,6 +13,16 @@ void put_cpu_maps(void); + void cpu_hotplug_begin(void); + void cpu_hotplug_done(void); + ++/* ++ * Returns true when the caller CPU is between a cpu_hotplug_{begin,done}() ++ * region. ++ * ++ * This is required to safely identify hotplug contexts, as get_cpu_maps() ++ * would otherwise succeed because a caller holding the lock in write mode is ++ * allowed to acquire the same lock in read mode. ++ */ ++bool cpu_in_hotplug_context(void); ++ + /* Receive notification of CPU hotplug events. */ + void register_cpu_notifier(struct notifier_block *nb); + +--- a/xen/include/xen/rwlock.h ++++ b/xen/include/xen/rwlock.h +@@ -309,6 +309,8 @@ static always_inline void write_lock_irq + + #define rw_is_locked(l) _rw_is_locked(l) + #define rw_is_write_locked(l) _rw_is_write_locked(l) ++#define rw_is_write_locked_by_me(l) \ ++ lock_evaluate_nospec(_is_write_locked_by_me(atomic_read(&(l)->cnts))) + + + typedef struct percpu_rwlock percpu_rwlock_t; diff --git a/666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch b/666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch new file mode 100644 index 0000000..bc1bdb4 --- /dev/null +++ b/666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch @@ -0,0 +1,92 @@ + +References: bsc#1214718 + +# Commit c7564d7366d865cc407e3d64bca816d07edee174 +# Date 2024-06-12 14:30:40 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/irq: limit interrupt movement done by fixup_irqs() + +The current check used in fixup_irqs() to decide whether to move around +interrupts is based on the affinity mask, but such mask can have all bits set, +and hence is unlikely to be a subset of the input mask. For example if an +interrupt has an affinity mask of all 1s, any input to fixup_irqs() that's not +an all set CPU mask would cause that interrupt to be shuffled around +unconditionally. + +What fixup_irqs() care about is evacuating interrupts from CPUs not set on the +input CPU mask, and for that purpose it should check whether the interrupt is +assigned to a CPU not present in the input mask. Assume that ->arch.cpu_mask +is a subset of the ->affinity mask, and keep the current logic that resets the +->affinity mask if the interrupt has to be shuffled around. + +Doing the affinity movement based on ->arch.cpu_mask requires removing the +special handling to ->arch.cpu_mask done for high priority vectors, otherwise +the adjustment done to cpu_mask makes them always skip the CPU interrupt +movement. + +While there also adjust the comment as to the purpose of fixup_irqs(). + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/include/asm/irq.h ++++ b/xen/arch/x86/include/asm/irq.h +@@ -132,7 +132,7 @@ void free_domain_pirqs(struct domain *d) + int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq); + int unmap_domain_pirq_emuirq(struct domain *d, int pirq); + +-/* Reset irq affinities to match the given CPU mask. */ ++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */ + void fixup_irqs(const cpumask_t *mask, bool verbose); + void fixup_eoi(void); + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2529,7 +2529,7 @@ static int __init cf_check setup_dump_ir + } + __initcall(setup_dump_irqs); + +-/* Reset irq affinities to match the given CPU mask. */ ++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */ + void fixup_irqs(const cpumask_t *mask, bool verbose) + { + unsigned int irq; +@@ -2553,19 +2553,15 @@ void fixup_irqs(const cpumask_t *mask, b + + vector = irq_to_vector(irq); + if ( vector >= FIRST_HIPRIORITY_VECTOR && +- vector <= LAST_HIPRIORITY_VECTOR ) ++ vector <= LAST_HIPRIORITY_VECTOR && ++ desc->handler == &no_irq_type ) + { +- cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask); +- + /* + * This can in particular happen when parking secondary threads + * during boot and when the serial console wants to use a PCI IRQ. + */ +- if ( desc->handler == &no_irq_type ) +- { +- spin_unlock(&desc->lock); +- continue; +- } ++ spin_unlock(&desc->lock); ++ continue; + } + + if ( desc->arch.move_cleanup_count ) +@@ -2586,7 +2582,12 @@ void fixup_irqs(const cpumask_t *mask, b + affinity); + } + +- if ( !desc->action || cpumask_subset(desc->affinity, mask) ) ++ /* ++ * Avoid shuffling the interrupt around as long as current target CPUs ++ * are a subset of the input mask. What fixup_irqs() cares about is ++ * evacuating interrupts from CPUs not in the input mask. ++ */ ++ if ( !desc->action || cpumask_subset(desc->arch.cpu_mask, mask) ) + { + spin_unlock(&desc->lock); + continue; diff --git a/666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch b/666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch new file mode 100644 index 0000000..ad3ff69 --- /dev/null +++ b/666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch @@ -0,0 +1,33 @@ +# Commit 5540b94e8191059eb9cbbe98ac316232a42208f6 +# Date 2024-06-13 16:53:34 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/EPT: correct special page checking in epte_get_entry_emt() + +mfn_valid() granularity is (currently) 256Mb. Therefore the start of a +1Gb page passing the test doesn't necessarily mean all parts of such a +range would also pass. Yet using the result of mfn_to_page() on an MFN +which doesn't pass mfn_valid() checking is liable to result in a crash +(the invocation of mfn_to_page() alone is presumably "just" UB in such a +case). + +Fixes: ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -518,8 +518,12 @@ int epte_get_entry_emt(struct domain *d, + } + + for ( special_pgs = i = 0; i < (1ul << order); i++ ) +- if ( is_special_page(mfn_to_page(mfn_add(mfn, i))) ) ++ { ++ mfn_t cur = mfn_add(mfn, i); ++ ++ if ( mfn_valid(cur) && is_special_page(mfn_to_page(cur)) ) + special_pgs++; ++ } + + if ( special_pgs ) + { diff --git a/666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch b/666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch new file mode 100644 index 0000000..8e8c27c --- /dev/null +++ b/666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch @@ -0,0 +1,72 @@ +# Commit 777c71d31325bc55ba1cc3f317d4155fe519ab0b +# Date 2024-06-13 16:54:17 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/EPT: avoid marking non-present entries for re-configuring + +For non-present entries EMT, like most other fields, is meaningless to +hardware. Make the logic in ept_set_entry() setting the field (and iPAT) +conditional upon dealing with a present entry, leaving the value at 0 +otherwise. This has two effects for epte_get_entry_emt() which we'll +want to leverage subsequently: +1) The call moved here now won't be issued with INVALID_MFN anymore (a + respective BUG_ON() is being added). +2) Neither of the other two calls could now be issued with a truncated + form of INVALID_MFN anymore (as long as there's no bug anywhere + marking an entry present when that was populated using INVALID_MFN). + +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -649,6 +649,8 @@ static int cf_check resolve_misconfig(st + if ( e.emt != MTRR_NUM_TYPES ) + break; + ++ ASSERT(is_epte_present(&e)); ++ + if ( level == 0 ) + { + for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i ) +@@ -914,17 +916,6 @@ ept_set_entry(struct p2m_domain *p2m, gf + + if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) + { +- bool ipat; +- int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn, +- i * EPT_TABLE_ORDER, &ipat, +- p2mt); +- +- if ( emt >= 0 ) +- new_entry.emt = emt; +- else /* ept_handle_misconfig() will need to take care of this. */ +- new_entry.emt = MTRR_NUM_TYPES; +- +- new_entry.ipat = ipat; + new_entry.sp = !!i; + new_entry.sa_p2mt = p2mt; + new_entry.access = p2ma; +@@ -940,6 +931,22 @@ ept_set_entry(struct p2m_domain *p2m, gf + need_modify_vtd_table = 0; + + ept_p2m_type_to_flags(p2m, &new_entry); ++ ++ if ( is_epte_present(&new_entry) ) ++ { ++ bool ipat; ++ int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn, ++ i * EPT_TABLE_ORDER, &ipat, ++ p2mt); ++ ++ BUG_ON(mfn_eq(mfn, INVALID_MFN)); ++ ++ if ( emt >= 0 ) ++ new_entry.emt = emt; ++ else /* ept_handle_misconfig() will need to take care of this. */ ++ new_entry.emt = MTRR_NUM_TYPES; ++ new_entry.ipat = ipat; ++ } + } + + if ( sve != -1 ) diff --git a/666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch b/666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch new file mode 100644 index 0000000..c972ec8 --- /dev/null +++ b/666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch @@ -0,0 +1,34 @@ +# Commit 4fdd8d75566fdad06667a79ec0ce6f43cc466c54 +# Date 2024-06-13 16:55:22 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/EPT: drop questionable mfn_valid() from epte_get_entry_emt() + +mfn_valid() is RAM-focused; it will often return false for MMIO. Yet +access to actual MMIO space should not generally be restricted to UC +only; especially video frame buffer accesses are unduly affected by such +a restriction. + +Since, as of 777c71d31325 ("x86/EPT: avoid marking non-present entries +for re-configuring"), the function won't be called with INVALID_MFN or, +worse, truncated forms thereof anymore, we call fully drop that check. + +Fixes: 81fd0d3ca4b2 ("x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()") +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -500,12 +500,6 @@ int epte_get_entry_emt(struct domain *d, + return -1; + } + +- if ( !mfn_valid(mfn) ) +- { +- *ipat = true; +- return X86_MT_UC; +- } +- + /* + * Conditional must be kept in sync with the code in + * {iomem,ioports}_{permit,deny}_access(). diff --git a/667187cc-x86-Intel-unlock-CPUID-earlier.patch b/667187cc-x86-Intel-unlock-CPUID-earlier.patch new file mode 100644 index 0000000..f5aeb6a --- /dev/null +++ b/667187cc-x86-Intel-unlock-CPUID-earlier.patch @@ -0,0 +1,87 @@ +# Commit fa4d026737a47cd1d66ffb797a29150b4453aa9f +# Date 2024-06-18 15:12:44 +0200 +# Author Jan Beulich +# Committer Jan Beulich +x86/Intel: unlock CPUID earlier for the BSP + +Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If +this bit is set by the BIOS then CPUID evaluation does not work when +data from any leaf greater than two is needed; early_cpu_init() in +particular wants to collect leaf 7 data. + +Cure this by unlocking CPUID right before evaluating anything which +depends on the maximum CPUID leaf being greater than two. + +Inspired by (and description cloned from) Linux commit 0c2f6d04619e +("x86/topology/intel: Unlock CPUID before evaluating anything"). + +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose) + + c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); + switch (c->x86_vendor) { +- case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; ++ case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c); ++ actual_cpu = intel_cpu_dev; break; + case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; + case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; + case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; +--- a/xen/arch/x86/cpu/cpu.h ++++ b/xen/arch/x86/cpu/cpu.h +@@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 + void amd_init_ssbd(const struct cpuinfo_x86 *c); + void amd_init_spectral_chicken(void); + void detect_zen2_null_seg_behaviour(void); ++ ++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c); +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -303,10 +303,24 @@ static void __init noinline intel_init_l + ctxt_switch_masking = intel_ctxt_switch_masking; + } + +-static void cf_check early_init_intel(struct cpuinfo_x86 *c) ++/* Unmask CPUID levels if masked. */ ++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c) + { +- u64 misc_enable, disable; ++ uint64_t misc_enable, disable; ++ ++ rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); ++ ++ disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; ++ if (disable) { ++ wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); ++ bootsym(trampoline_misc_enable_off) |= disable; ++ c->cpuid_level = cpuid_eax(0); ++ printk(KERN_INFO "revised cpuid level: %u\n", c->cpuid_level); ++ } ++} + ++static void cf_check early_init_intel(struct cpuinfo_x86 *c) ++{ + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +@@ -315,16 +329,7 @@ static void cf_check early_init_intel(st + bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) + printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); + +- /* Unmask CPUID levels and NX if masked: */ +- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); +- +- disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; +- if (disable) { +- wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); +- bootsym(trampoline_misc_enable_off) |= disable; +- printk(KERN_INFO "revised cpuid level: %d\n", +- cpuid_eax(0)); +- } ++ intel_unlock_cpuid_leaves(c); + + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 && diff --git a/66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch b/66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch new file mode 100644 index 0000000..b7e141c --- /dev/null +++ b/66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch @@ -0,0 +1,74 @@ + +References: bsc#1214718 + +# Commit 817d1cd627be668c358d038f0fadbf7d24d417d3 +# Date 2024-06-18 15:14:49 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/irq: deal with old_cpu_mask for interrupts in movement in fixup_irqs() + +Given the current logic it's possible for ->arch.old_cpu_mask to get out of +sync: if a CPU set in old_cpu_mask is offlined and then onlined +again without old_cpu_mask having been updated the data in the mask will no +longer be accurate, as when brought back online the CPU will no longer have +old_vector configured to handle the old interrupt source. + +If there's an interrupt movement in progress, and the to be offlined CPU (which +is the call context) is in the old_cpu_mask, clear it and update the mask, so +it doesn't contain stale data. + +Note that when the system is going down fixup_irqs() will be called by +smp_send_stop() from CPU 0 with a mask with only CPU 0 on it, effectively +asking to move all interrupts to the current caller (CPU 0) which is the only +CPU to remain online. In that case we don't care to migrate interrupts that +are in the process of being moved, as it's likely we won't be able to move all +interrupts to CPU 0 due to vector shortage anyway. + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2539,7 +2539,7 @@ void fixup_irqs(const cpumask_t *mask, b + for ( irq = 0; irq < nr_irqs; irq++ ) + { + bool break_affinity = false, set_affinity = true; +- unsigned int vector; ++ unsigned int vector, cpu = smp_processor_id(); + cpumask_t *affinity = this_cpu(scratch_cpumask); + + if ( irq == 2 ) +@@ -2582,6 +2582,33 @@ void fixup_irqs(const cpumask_t *mask, b + affinity); + } + ++ if ( desc->arch.move_in_progress && ++ /* ++ * Only attempt to adjust the mask if the current CPU is going ++ * offline, otherwise the whole system is going down and leaving ++ * stale data in the masks is fine. ++ */ ++ !cpu_online(cpu) && ++ cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) ) ++ { ++ /* ++ * This CPU is going offline, remove it from ->arch.old_cpu_mask ++ * and possibly release the old vector if the old mask becomes ++ * empty. ++ * ++ * Note cleaning ->arch.old_cpu_mask is required if the CPU is ++ * brought offline and then online again, as when re-onlined the ++ * per-cpu vector table will no longer have ->arch.old_vector ++ * setup, and hence ->arch.old_cpu_mask would be stale. ++ */ ++ cpumask_clear_cpu(cpu, desc->arch.old_cpu_mask); ++ if ( cpumask_empty(desc->arch.old_cpu_mask) ) ++ { ++ desc->arch.move_in_progress = 0; ++ release_old_vec(desc); ++ } ++ } ++ + /* + * Avoid shuffling the interrupt around as long as current target CPUs + * are a subset of the input mask. What fixup_irqs() cares about is diff --git a/6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch b/6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch new file mode 100644 index 0000000..3b831b5 --- /dev/null +++ b/6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch @@ -0,0 +1,162 @@ + +References: bsc#1214718 + +# Commit 369558924a642bbb0cb731e9a3375958867cb17b +# Date 2024-06-18 15:15:10 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/irq: handle moving interrupts in _assign_irq_vector() + +Currently there's logic in fixup_irqs() that attempts to prevent +_assign_irq_vector() from failing, as fixup_irqs() is required to evacuate all +interrupts from the CPUs not present in the input mask. The current logic in +fixup_irqs() is incomplete, as it doesn't deal with interrupts that have +move_cleanup_count > 0 and a non-empty ->arch.old_cpu_mask field. + +Instead of attempting to fixup the interrupt descriptor in fixup_irqs() so that +_assign_irq_vector() cannot fail, introduce logic in _assign_irq_vector() +to deal with interrupts that have either move_{in_progress,cleanup_count} set +and no remaining online CPUs in ->arch.cpu_mask. + +If _assign_irq_vector() is requested to move an interrupt in the state +described above, first attempt to see if ->arch.old_cpu_mask contains any valid +CPUs that could be used as fallback, and if that's the case do move the +interrupt back to the previous destination. Note this is easier because the +vector hasn't been released yet, so there's no need to allocate and setup a new +vector on the destination. + +Due to the logic in fixup_irqs() that clears offline CPUs from +->arch.old_cpu_mask (and releases the old vector if the mask becomes empty) it +shouldn't be possible to get into _assign_irq_vector() with +->arch.move_{in_progress,cleanup_count} set but no online CPUs in +->arch.old_cpu_mask. + +However if ->arch.move_{in_progress,cleanup_count} is set and the interrupt has +also changed affinity, it's possible the members of ->arch.old_cpu_mask are no +longer part of the affinity set, move the interrupt to a different CPU part of +the provided mask and keep the current ->arch.old_{cpu_mask,vector} for the +pending interrupt movement to be completed. + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -553,7 +553,58 @@ static int _assign_irq_vector(struct irq + } + + if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count ) +- return -EAGAIN; ++ { ++ /* ++ * If the current destination is online refuse to shuffle. Retry after ++ * the in-progress movement has finished. ++ */ ++ if ( cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map) ) ++ return -EAGAIN; ++ ++ /* ++ * Due to the logic in fixup_irqs() that clears offlined CPUs from ++ * ->arch.old_cpu_mask it shouldn't be possible to get here with ++ * ->arch.move_{in_progress,cleanup_count} set and no online CPUs in ++ * ->arch.old_cpu_mask. ++ */ ++ ASSERT(valid_irq_vector(desc->arch.old_vector)); ++ ASSERT(cpumask_intersects(desc->arch.old_cpu_mask, &cpu_online_map)); ++ ++ if ( cpumask_intersects(desc->arch.old_cpu_mask, mask) ) ++ { ++ /* ++ * Fallback to the old destination if moving is in progress and the ++ * current destination is to be offlined. This is only possible if ++ * the CPUs in old_cpu_mask intersect with the affinity mask passed ++ * in the 'mask' parameter. ++ */ ++ desc->arch.vector = desc->arch.old_vector; ++ cpumask_and(desc->arch.cpu_mask, desc->arch.old_cpu_mask, mask); ++ ++ /* Undo any possibly done cleanup. */ ++ for_each_cpu(cpu, desc->arch.cpu_mask) ++ per_cpu(vector_irq, cpu)[desc->arch.vector] = irq; ++ ++ /* Cancel the pending move and release the current vector. */ ++ desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED; ++ cpumask_clear(desc->arch.old_cpu_mask); ++ desc->arch.move_in_progress = 0; ++ desc->arch.move_cleanup_count = 0; ++ if ( desc->arch.used_vectors ) ++ { ++ ASSERT(test_bit(old_vector, desc->arch.used_vectors)); ++ clear_bit(old_vector, desc->arch.used_vectors); ++ } ++ ++ return 0; ++ } ++ ++ /* ++ * There's an interrupt movement in progress but the destination(s) in ++ * ->arch.old_cpu_mask are not suitable given the 'mask' parameter, go ++ * through the full logic to find a new vector in a suitable CPU. ++ */ ++ } + + err = -ENOSPC; + +@@ -609,7 +660,22 @@ next: + current_vector = vector; + current_offset = offset; + +- if ( valid_irq_vector(old_vector) ) ++ if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count ) ++ { ++ ASSERT(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map)); ++ /* ++ * Special case when evacuating an interrupt from a CPU to be ++ * offlined and the interrupt was already in the process of being ++ * moved. Leave ->arch.old_{vector,cpu_mask} as-is and just ++ * replace ->arch.{cpu_mask,vector} with the new destination. ++ * Cleanup will be done normally for the old fields, just release ++ * the current vector here. ++ */ ++ if ( desc->arch.used_vectors && ++ !test_and_clear_bit(old_vector, desc->arch.used_vectors) ) ++ ASSERT_UNREACHABLE(); ++ } ++ else if ( valid_irq_vector(old_vector) ) + { + cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask, + &cpu_online_map); +@@ -2620,33 +2686,6 @@ void fixup_irqs(const cpumask_t *mask, b + continue; + } + +- /* +- * In order for the affinity adjustment below to be successful, we +- * need _assign_irq_vector() to succeed. This in particular means +- * clearing desc->arch.move_in_progress if this would otherwise +- * prevent the function from succeeding. Since there's no way for the +- * flag to get cleared anymore when there's no possible destination +- * left (the only possibility then would be the IRQs enabled window +- * after this loop), there's then also no race with us doing it here. +- * +- * Therefore the logic here and there need to remain in sync. +- */ +- if ( desc->arch.move_in_progress && +- !cpumask_intersects(mask, desc->arch.cpu_mask) ) +- { +- unsigned int cpu; +- +- cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map); +- +- spin_lock(&vector_lock); +- for_each_cpu(cpu, affinity) +- per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq; +- spin_unlock(&vector_lock); +- +- release_old_vec(desc); +- desc->arch.move_in_progress = 0; +- } +- + if ( !cpumask_intersects(mask, desc->affinity) ) + { + break_affinity = true; diff --git a/6672c846-x86-xstate-initialisation-of-XSS-cache.patch b/6672c846-x86-xstate-initialisation-of-XSS-cache.patch new file mode 100644 index 0000000..662db86 --- /dev/null +++ b/6672c846-x86-xstate-initialisation-of-XSS-cache.patch @@ -0,0 +1,65 @@ +# Commit 9e6dbbe8bf400aacb99009ddffa91d2a0c312b39 +# Date 2024-06-19 13:00:06 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/xstate: Fix initialisation of XSS cache + +The clobbering of this_cpu(xcr0) and this_cpu(xss) to architecturally invalid +values is to force the subsequent set_xcr0() and set_msr_xss() to reload the +hardware register. + +While XCR0 is reloaded in xstate_init(), MSR_XSS isn't. This causes +get_msr_xss() to return the invalid value, and logic of the form: + + old = get_msr_xss(); + set_msr_xss(new); + ... + set_msr_xss(old); + +to try and restore said invalid value. + +The architecturally invalid value must be purged from the cache, meaning the +hardware register must be written at least once. This in turn highlights that +the invalid value must only be used in the case that the hardware register is +available. + +Fixes: f7f4a523927f ("x86/xstate: reset cached register values on resume") +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/xstate.c ++++ b/xen/arch/x86/xstate.c +@@ -641,13 +641,6 @@ void xstate_init(struct cpuinfo_x86 *c) + return; + } + +- /* +- * Zap the cached values to make set_xcr0() and set_msr_xss() really +- * write it. +- */ +- this_cpu(xcr0) = 0; +- this_cpu(xss) = ~0; +- + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; + BUG_ON(!valid_xcr0(feature_mask)); +@@ -657,8 +650,19 @@ void xstate_init(struct cpuinfo_x86 *c) + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. + */ + set_in_cr4(X86_CR4_OSXSAVE); ++ ++ /* ++ * Zap the cached values to make set_xcr0() and set_msr_xss() really write ++ * the hardware register. ++ */ ++ this_cpu(xcr0) = 0; + if ( !set_xcr0(feature_mask) ) + BUG(); ++ if ( cpu_has_xsaves ) ++ { ++ this_cpu(xss) = ~0; ++ set_msr_xss(0); ++ } + + if ( bsp ) + { diff --git a/6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch b/6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch new file mode 100644 index 0000000..84c892c --- /dev/null +++ b/6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch @@ -0,0 +1,63 @@ +# Commit 71cacfb035f4a78ee10970dc38a3baa04d387451 +# Date 2024-06-19 13:00:06 +0100 +# Author Andrew Cooper +# Committer Andrew Cooper +x86/cpuid: Fix handling of XSAVE dynamic leaves + +[ This is a minimal backport of commit 71cacfb035f4 ("x86/cpuid: Fix handling + of XSAVE dynamic leaves") to fix the bugs without depending on the large + rework of XSTATE handling in Xen 4.19 ] + +First, if XSAVE is available in hardware but not visible to the guest, the +dynamic leaves shouldn't be filled in. + +Second, the comment concerning XSS state is wrong. VT-x doesn't manage +host/guest state automatically, but there is provision for "host only" bits to +be set, so the implications are still accurate. + +In Xen 4.18, no XSS states are supported, so it's safe to keep deferring to +real hardware. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -330,24 +330,20 @@ void guest_cpuid(const struct vcpu *v, u + case XSTATE_CPUID: + switch ( subleaf ) + { +- case 1: +- if ( p->xstate.xsavec || p->xstate.xsaves ) +- { +- /* +- * TODO: Figure out what to do for XSS state. VT-x manages +- * host vs guest MSR_XSS automatically, so as soon as we start +- * supporting any XSS states, the wrong XSS will be in +- * context. +- */ +- BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); +- +- /* +- * Read CPUID[0xD,0/1].EBX from hardware. They vary with +- * enabled XSTATE, and appropraite XCR0|XSS are in context. +- */ ++ /* ++ * Read CPUID[0xd,0/1].EBX from hardware. They vary with enabled ++ * XSTATE, and the appropriate XCR0 is in context. ++ */ + case 0: +- res->b = cpuid_count_ebx(leaf, subleaf); +- } ++ if ( p->basic.xsave ) ++ res->b = cpuid_count_ebx(0xd, 0); ++ break; ++ ++ case 1: ++ /* This only works because Xen doesn't support XSS states yet. */ ++ BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); ++ if ( p->xstate.xsavec ) ++ res->b = cpuid_count_ebx(0xd, 1); + break; + } + break; diff --git a/6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch b/6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch new file mode 100644 index 0000000..ab30fb7 --- /dev/null +++ b/6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch @@ -0,0 +1,130 @@ + +References: bsc#1214718 + +# Commit e2bb28d621584fce15c907002ddc7c6772644b64 +# Date 2024-06-20 12:09:32 +0200 +# Author Roger Pau Monné +# Committer Jan Beulich +x86/irq: forward pending interrupts to new destination in fixup_irqs() + +fixup_irqs() is used to evacuate interrupts from to be offlined CPUs. Given +the CPU is to become offline, the normal migration logic used by Xen where the +vector in the previous target(s) is left configured until the interrupt is +received on the new destination is not suitable. + +Instead attempt to do as much as possible in order to prevent loosing +interrupts. If fixup_irqs() is called from the CPU to be offlined (as is +currently the case for CPU hot unplug) attempt to forward pending vectors when +interrupts that target the current CPU are migrated to a different destination. + +Additionally, for interrupts that have already been moved from the current CPU +prior to the call to fixup_irqs() but that haven't been delivered to the new +destination (iow: interrupts with move_in_progress set and the current CPU set +in ->arch.old_cpu_mask) also check whether the previous vector is pending and +forward it to the new destination. + +This allows us to remove the window with interrupts enabled at the bottom of +fixup_irqs(). Such window wasn't safe anyway: references to the CPU to become +offline are removed from interrupts masks, but the per-CPU vector_irq[] array +is not updated to reflect those changes (as the CPU is going offline anyway). + +Signed-off-by: Roger Pau Monné +Reviewed-by: Jan Beulich + +--- a/xen/arch/x86/include/asm/apic.h ++++ b/xen/arch/x86/include/asm/apic.h +@@ -145,6 +145,11 @@ static __inline bool_t apic_isr_read(u8 + (vector & 0x1f)) & 1; + } + ++static inline bool apic_irr_read(unsigned int vector) ++{ ++ return apic_read(APIC_IRR + (vector / 32 * 0x10)) & (1U << (vector % 32)); ++} ++ + static __inline u32 get_apic_id(void) /* Get the physical APIC id */ + { + u32 id = apic_read(APIC_ID); +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2604,7 +2604,7 @@ void fixup_irqs(const cpumask_t *mask, b + + for ( irq = 0; irq < nr_irqs; irq++ ) + { +- bool break_affinity = false, set_affinity = true; ++ bool break_affinity = false, set_affinity = true, check_irr = false; + unsigned int vector, cpu = smp_processor_id(); + cpumask_t *affinity = this_cpu(scratch_cpumask); + +@@ -2658,6 +2658,25 @@ void fixup_irqs(const cpumask_t *mask, b + cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) ) + { + /* ++ * This to be offlined CPU was the target of an interrupt that's ++ * been moved, and the new destination target hasn't yet ++ * acknowledged any interrupt from it. ++ * ++ * We know the interrupt is configured to target the new CPU at ++ * this point, so we can check IRR for any pending vectors and ++ * forward them to the new destination. ++ * ++ * Note that for the other case of an interrupt movement being in ++ * progress (move_cleanup_count being non-zero) we know the new ++ * destination has already acked at least one interrupt from this ++ * source, and hence there's no need to forward any stale ++ * interrupts. ++ */ ++ if ( apic_irr_read(desc->arch.old_vector) ) ++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)), ++ desc->arch.vector); ++ ++ /* + * This CPU is going offline, remove it from ->arch.old_cpu_mask + * and possibly release the old vector if the old mask becomes + * empty. +@@ -2697,6 +2716,14 @@ void fixup_irqs(const cpumask_t *mask, b + if ( desc->handler->disable ) + desc->handler->disable(desc); + ++ /* ++ * If the current CPU is going offline and is (one of) the target(s) of ++ * the interrupt, signal to check whether there are any pending vectors ++ * to be handled in the local APIC after the interrupt has been moved. ++ */ ++ if ( !cpu_online(cpu) && cpumask_test_cpu(cpu, desc->arch.cpu_mask) ) ++ check_irr = true; ++ + if ( desc->handler->set_affinity ) + desc->handler->set_affinity(desc, affinity); + else if ( !(warned++) ) +@@ -2707,6 +2734,18 @@ void fixup_irqs(const cpumask_t *mask, b + + cpumask_copy(affinity, desc->affinity); + ++ if ( check_irr && apic_irr_read(vector) ) ++ /* ++ * Forward pending interrupt to the new destination, this CPU is ++ * going offline and otherwise the interrupt would be lost. ++ * ++ * Do the IRR check as late as possible before releasing the irq ++ * desc in order for any in-flight interrupts to be delivered to ++ * the lapic. ++ */ ++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)), ++ desc->arch.vector); ++ + spin_unlock(&desc->lock); + + if ( !verbose ) +@@ -2718,11 +2757,6 @@ void fixup_irqs(const cpumask_t *mask, b + printk("Broke affinity for IRQ%u, new: %*pb\n", + irq, CPUMASK_PR(affinity)); + } +- +- /* That doesn't seem sufficient. Give it 1ms. */ +- local_irq_enable(); +- mdelay(1); +- local_irq_disable(); + } + + void fixup_eoi(void) diff --git a/gcc14-fixes.patch b/gcc14-fixes.patch new file mode 100644 index 0000000..4803331 --- /dev/null +++ b/gcc14-fixes.patch @@ -0,0 +1,81 @@ +References: bsc#1225953 + +Compiling against gcc14. + ../../../../../newlib-1.16.0/newlib/libc/stdlib/wcstoull.c: In function ‘wcstoull’: + ../../../../../newlib-1.16.0/newlib/libc/stdlib/wcstoull.c:136:16: error: implicit declaration of function ‘_wcstoull_r’; did you mean ‘wcstoull’? [-Wimplicit-function-declaration] + 136 | return _wcstoull_r (_REENT, s, ptr, base); + | ^~~~~~~~~~~ + | wcstoull + + In file included from ../../../../../newlib-1.16.0/newlib/libc/reent/signalr.c:7: + ../../../../../newlib-1.16.0/newlib/libc/reent/signalr.c: In function ‘_kill_r’: + ../../../../../newlib-1.16.0/newlib/libc/reent/signalr.c:61:14: error: implicit declaration of function ‘kill’; did you mean ‘_kill’? [-Wimplicit-function-declaration] + 61 | if ((ret = _kill (pid, sig)) == -1 && errno != 0) + | ^~~~~ + + +Index: xen-4.18.2-testing/stubdom/Makefile +=================================================================== +--- xen-4.18.2-testing.orig/stubdom/Makefile ++++ xen-4.18.2-testing/stubdom/Makefile +@@ -97,6 +97,7 @@ newlib-$(NEWLIB_VERSION): newlib-$(NEWLI + patch -d $@ -p1 < newlib-disable-texinfo.patch + patch -d $@ -p1 < newlib-cygmon-gmon.patch + patch -d $@ -p1 < newlib-makedoc.patch ++ patch -d $@ -p1 < newlib-gcc14-pragmas.patch + find $@ -type f | xargs perl -i.bak \ + -pe 's/\b_(tzname|daylight|timezone)\b/$$1/g' + touch $@ +Index: xen-4.18.2-testing/stubdom/newlib-gcc14-pragmas.patch +=================================================================== +--- /dev/null ++++ xen-4.18.2-testing/stubdom/newlib-gcc14-pragmas.patch +@@ -0,0 +1,36 @@ ++--- newlib-1.16.0/newlib/libc/stdlib/wcstoull.c.orig 2024-06-04 15:32:01.495146632 -0600 +++++ newlib-1.16.0/newlib/libc/stdlib/wcstoull.c 2024-06-04 15:38:56.627156524 -0600 ++@@ -127,6 +127,10 @@ PORTABILITY ++ ++ #ifndef _REENT_ONLY ++ +++#if __GNUC__ >= 14 +++#pragma GCC diagnostic ignored "-Wimplicit-function-declaration" +++#endif +++ ++ unsigned long long ++ _DEFUN (wcstoull, (s, ptr, base), ++ _CONST wchar_t *s _AND ++--- newlib-1.16.0/newlib/libc/reent/signalr.c.orig 2024-06-04 15:39:15.139156966 -0600 +++++ newlib-1.16.0/newlib/libc/reent/signalr.c 2024-06-04 15:40:24.899158628 -0600 ++@@ -49,6 +49,10 @@ DESCRIPTION ++ <>. ++ */ ++ +++#if __GNUC__ >= 14 +++#pragma GCC diagnostic ignored "-Wimplicit-function-declaration" +++#endif +++ ++ int ++ _DEFUN (_kill_r, (ptr, pid, sig), ++ struct _reent *ptr _AND ++--- newlib-1.16.0/newlib/doc/makedoc.c.orig 2024-06-04 16:07:54.423197934 -0600 +++++ newlib-1.16.0/newlib/doc/makedoc.c 2024-06-04 16:15:15.395208441 -0600 ++@@ -798,6 +798,7 @@ DEFUN( iscommand,(ptr, idx), ++ } ++ ++ +++static unsigned int ++ DEFUN(copy_past_newline,(ptr, idx, dst), ++ string_type *ptr AND ++ unsigned int idx AND +--- xen-4.18.2-testing/extras/mini-os-remote/include/posix/sys/mman.h.orig 2024-06-04 16:27:35.155226069 -0600 ++++ xen-4.18.2-testing/extras/mini-os-remote/include/posix/sys/mman.h 2024-06-04 16:31:46.591232060 -0600 +@@ -16,7 +16,7 @@ + + void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) asm("mmap64"); + int munmap(void *start, size_t length); +-static inline mlock(const void *addr, size_t len) { return 0; } +-static inline munlock(const void *addr, size_t len) { return 0; } ++static inline int mlock(const void *addr, size_t len) { return 0; } ++static inline int munlock(const void *addr, size_t len) { return 0; } + + #endif /* _POSIX_SYS_MMAN_H */ diff --git a/libxl.LIBXL_HOTPLUG_TIMEOUT.patch b/libxl.LIBXL_HOTPLUG_TIMEOUT.patch index cc01f91..0786765 100644 --- a/libxl.LIBXL_HOTPLUG_TIMEOUT.patch +++ b/libxl.LIBXL_HOTPLUG_TIMEOUT.patch @@ -52,10 +52,8 @@ In this example the per-device value will be set to 5 seconds. The change for libxl which handles this xenstore value will enable additional logging if the key is found. That extra logging will show how the execution time of each script. -Index: xen-4.18.0-testing/tools/libs/light/libxl_aoutils.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_aoutils.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_aoutils.c +--- a/tools/libs/light/libxl_aoutils.c ++++ b/tools/libs/light/libxl_aoutils.c @@ -529,6 +529,8 @@ static void async_exec_timeout(libxl__eg { libxl__async_exec_state *aes = CONTAINER_OF(ev, *aes, time); @@ -85,10 +83,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_aoutils.c libxl__ev_time_deregister(gc, &aes->time); -Index: xen-4.18.0-testing/tools/libs/light/libxl_create.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_create.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_create.c +--- a/tools/libs/light/libxl_create.c ++++ b/tools/libs/light/libxl_create.c @@ -1323,6 +1323,7 @@ static void initiate_domain_create(libxl * build info around just to know if the domain has a device model or not. */ @@ -97,11 +93,9 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_create.c for (i = 0; i < d_config->num_disks; i++) { ret = libxl__disk_devtype.set_default(gc, domid, &d_config->disks[i], -Index: xen-4.18.0-testing/tools/libs/light/libxl_device.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_device.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_device.c -@@ -1278,7 +1278,7 @@ static void device_hotplug(libxl__egc *e +--- a/tools/libs/light/libxl_device.c ++++ b/tools/libs/light/libxl_device.c +@@ -1296,7 +1296,7 @@ static void device_hotplug(libxl__egc *e } aes->ao = ao; @@ -110,7 +104,7 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_device.c aes->env = env; aes->args = args; aes->callback = device_hotplug_child_death_cb; -@@ -1287,6 +1287,15 @@ static void device_hotplug(libxl__egc *e +@@ -1305,6 +1305,15 @@ static void device_hotplug(libxl__egc *e aes->stdfds[1] = 2; aes->stdfds[2] = -1; @@ -126,10 +120,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_device.c rc = libxl__async_exec_start(aes); if (rc) goto out; -Index: xen-4.18.0-testing/tools/libs/light/libxl_event.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_event.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_event.c +--- a/tools/libs/light/libxl_event.c ++++ b/tools/libs/light/libxl_event.c @@ -1032,27 +1032,29 @@ static void devstate_callback(libxl__egc { EGC_GC; @@ -176,10 +168,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_event.c rc = libxl__xswait_start(gc, &ds->w); if (rc) goto out; -Index: xen-4.18.0-testing/tools/libs/light/libxl_internal.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_internal.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_internal.c +--- a/tools/libs/light/libxl_internal.c ++++ b/tools/libs/light/libxl_internal.c @@ -18,6 +18,97 @@ #include "libxl_internal.h" #include "libxl_arch.h" @@ -278,10 +268,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_internal.c void libxl__alloc_failed(libxl_ctx *ctx, const char *func, size_t nmemb, size_t size) { #define M "libxl: FATAL ERROR: memory allocation failure" -Index: xen-4.18.0-testing/tools/libs/light/libxl_internal.h -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_internal.h -+++ xen-4.18.0-testing/tools/libs/light/libxl_internal.h +--- a/tools/libs/light/libxl_internal.h ++++ b/tools/libs/light/libxl_internal.h @@ -50,6 +50,7 @@ #include #include diff --git a/xen.changes b/xen.changes index aeb8a03..98508d9 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,56 @@ +------------------------------------------------------------------- +Wed Jul 3 12:41:39 MDT 2024 - carnold@suse.com + +- bsc#1227355 - VUL-0: CVE-2024-31143: xen: double unlock in x86 + guest IRQ handling (XSA-458) + xsa458.patch + +------------------------------------------------------------------- +Mon Jun 24 16:20:00 CEST 2024 - jbeulich@suse.com + +- bsc#1214718 - The system hangs intermittently when Power Control + Mode is set to Minimum Power on SLES15SP5 Xen + 6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch + 666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch + 666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch + 66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch + 6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch + 6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch +- Upstream bug fixes (bsc#1027519) + 66450626-sched-set-all-sched_resource-data-inside-locked.patch + 66450627-x86-respect-mapcache_domain_init-failing.patch + 6646031f-x86-ucode-further-identify-already-up-to-date.patch + 666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch + 666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch + 666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch + 667187cc-x86-Intel-unlock-CPUID-earlier.patch + 6672c846-x86-xstate-initialisation-of-XSS-cache.patch + 6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch + +------------------------------------------------------------------- +Tue Jun 4 18:09:00 MDT 2024 - carnold@suse.com + +- bsc#1225953 - Package xen does not build with gcc14 because of + new errors + gcc14-fixes.patch + +------------------------------------------------------------------- +Wed May 15 11:15:00 CEST 2024 - jbeulich@suse.com + +- bsc#1221984 - VUL-0: CVE-2023-46842: xen: x86 HVM hypercalls may + trigger Xen bug check (XSA-454) + 6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch +- Upstream bug fixes (bsc#1027519) + 6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch + 6627a5fc-x86-MTRR-inverted-WC-check.patch + 662a6a4c-x86-spec-reporting-of-BHB-clearing.patch + 662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch + 663090fd-x86-gen-cpuid-syntax.patch + 663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch + 663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch + 663d05b5-x86-ucode-distinguish-up-to-date.patch + 663eaa27-libxl-XenStore-error-handling-in-device-creation.patch + ------------------------------------------------------------------- Tue Apr 9 14:11:15 MDT 2024 - carnold@suse.com diff --git a/xen.libxl.dmmd.patch b/xen.libxl.dmmd.patch index 4d12172..096d920 100644 --- a/xen.libxl.dmmd.patch +++ b/xen.libxl.dmmd.patch @@ -7,10 +7,8 @@ References: bsc#954872 tools/libxl/libxlu_disk_l.l | 2 ++ 4 files changed, 37 insertions(+), 6 deletions(-) -Index: xen-4.18.0-testing/tools/libs/light/libxl_disk.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_disk.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_disk.c +--- a/tools/libs/light/libxl_disk.c ++++ b/tools/libs/light/libxl_disk.c @@ -203,7 +203,7 @@ static int libxl__device_disk_setdefault return rc; } @@ -31,11 +29,9 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_disk.c flexarray_append(back, "params"); flexarray_append(back, GCSPRINTF("%s:%s", libxl__device_disk_string_of_format(disk->format), -Index: xen-4.18.0-testing/tools/libs/light/libxl_device.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_device.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_device.c -@@ -333,7 +333,8 @@ static int disk_try_backend(disk_try_bac +--- a/tools/libs/light/libxl_device.c ++++ b/tools/libs/light/libxl_device.c +@@ -351,7 +351,8 @@ static int disk_try_backend(disk_try_bac return 0; case LIBXL_DISK_BACKEND_QDISK: @@ -45,10 +41,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_device.c return backend; case LIBXL_DISK_BACKEND_STANDALONE: -Index: xen-4.18.0-testing/tools/libs/light/libxl_dm.c -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_dm.c -+++ xen-4.18.0-testing/tools/libs/light/libxl_dm.c +--- a/tools/libs/light/libxl_dm.c ++++ b/tools/libs/light/libxl_dm.c @@ -1197,6 +1197,30 @@ out: return rc; } @@ -93,10 +87,8 @@ Index: xen-4.18.0-testing/tools/libs/light/libxl_dm.c if (dev_number == -1) { LOGD(WARN, guest_domid, "unable to determine"" disk number for %s", disks[i].vdev); -Index: xen-4.18.0-testing/tools/libs/util/libxlu_disk_l.l -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/util/libxlu_disk_l.l -+++ xen-4.18.0-testing/tools/libs/util/libxlu_disk_l.l +--- a/tools/libs/util/libxlu_disk_l.l ++++ b/tools/libs/util/libxlu_disk_l.l @@ -253,6 +253,8 @@ target=.* { STRIP(','); SAVESTRING("targ free(newscript); } @@ -106,10 +98,8 @@ Index: xen-4.18.0-testing/tools/libs/util/libxlu_disk_l.l tapdisk:/.* { DPC->had_depr_prefix=1; DEPRECATE(0); } tap2?:/.* { DPC->had_depr_prefix=1; DEPRECATE(0); } aio:/.* { DPC->had_depr_prefix=1; DEPRECATE(0); } -Index: xen-4.18.0-testing/tools/libs/light/libxl_internal.h -=================================================================== ---- xen-4.18.0-testing.orig/tools/libs/light/libxl_internal.h -+++ xen-4.18.0-testing/tools/libs/light/libxl_internal.h +--- a/tools/libs/light/libxl_internal.h ++++ b/tools/libs/light/libxl_internal.h @@ -2073,6 +2073,10 @@ _hidden char *libxl__object_to_json(libx _hidden int libxl__cpuid_legacy(libxl_ctx *ctx, uint32_t domid, bool retore, libxl_domain_build_info *info); diff --git a/xen.spec b/xen.spec index d36afd8..bc55e48 100644 --- a/xen.spec +++ b/xen.spec @@ -119,7 +119,7 @@ BuildRequires: pesign-obs-integration %endif Provides: installhint(reboot-needed) -Version: 4.18.2_02 +Version: 4.18.2_06 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0-only @@ -154,7 +154,33 @@ Source10183: xen_maskcalc.py # For xen-libs Source99: baselibs.conf # Upstream patches +Patch1: 6617d62c-x86-hvm-Misra-Rule-19-1-regression.patch +Patch2: 6627a4ee-vRTC-UIP-set-for-longer-than-expected.patch +Patch3: 6627a5fc-x86-MTRR-inverted-WC-check.patch +Patch4: 662a6a4c-x86-spec-reporting-of-BHB-clearing.patch +Patch5: 662a6a8d-x86-spec-adjust-logic-to-elide-LFENCE.patch +Patch6: 663090fd-x86-gen-cpuid-syntax.patch +Patch7: 663a383c-libxs-open-xenbus-fds-as-O_CLOEXEC.patch +Patch8: 663a4f3e-x86-cpu-policy-migration-IceLake-to-CascadeLake.patch +Patch9: 663d05b5-x86-ucode-distinguish-up-to-date.patch +Patch10: 663eaa27-libxl-XenStore-error-handling-in-device-creation.patch +Patch11: 66450626-sched-set-all-sched_resource-data-inside-locked.patch +Patch12: 66450627-x86-respect-mapcache_domain_init-failing.patch +Patch13: 6646031f-x86-ucode-further-identify-already-up-to-date.patch +Patch14: 6666ba52-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when.patch +Patch15: 666994ab-x86-SMP-no-shorthand-IPI-in-hotplug.patch +Patch16: 666994f0-x86-IRQ-limit-interrupt-movement-in-fixup_irqs.patch +Patch17: 666b07ee-x86-EPT-special-page-in-epte_get_entry_emt.patch +Patch18: 666b0819-x86-EPT-avoid-marking-np-ents-for-reconfig.patch +Patch19: 666b085a-x86-EPT-drop-questionable-mfn_valid-from-.patch +Patch20: 667187cc-x86-Intel-unlock-CPUID-earlier.patch +Patch21: 66718849-x86-IRQ-old_cpu_mask-in-fixup_irqs.patch +Patch22: 6671885e-x86-IRQ-handle-moving-in-_assign_irq_vector.patch +Patch23: 6672c846-x86-xstate-initialisation-of-XSS-cache.patch +Patch24: 6672c847-x86-CPUID-XSAVE-dynamic-leaves.patch +Patch25: 6673ffdc-x86-IRQ-forward-pending-to-new-dest-in-fixup_irqs.patch # EMBARGOED security fixes +Patch100: xsa458.patch # libxc Patch301: libxc-bitmap-long.patch Patch302: libxc-sr-xl-migration-debug.patch @@ -199,6 +225,7 @@ Patch408: ignore-ip-command-script-errors.patch # Needs to go upstream Patch420: suspend_evtchn_lock.patch Patch421: vif-route.patch +Patch422: gcc14-fixes.patch # Other bug fixes or features Patch450: xen.sysconfig-fillup.patch Patch451: xenconsole-no-multiple-connections.patch diff --git a/xsa458.patch b/xsa458.patch new file mode 100644 index 0000000..f0c251d --- /dev/null +++ b/xsa458.patch @@ -0,0 +1,38 @@ +From: Jan Beulich +Subject: x86/IRQ: avoid double unlock in map_domain_pirq() + +Forever since its introduction the main loop in the function dealing +with multi-vector MSI had error exit points ("break") with different +properties: In one case no IRQ descriptor lock is being held. +Nevertheless the subsequent error cleanup path assumed such a lock would +uniformly need releasing. Identify the case by setting "desc" to NULL, +thus allowing the unlock to be skipped as necessary. + +This is CVE-2024-31143 / XSA-458. + +Coverity ID: 1605298 +Fixes: d1b6d0a02489 ("x86: enable multi-vector MSI") +Signed-off-by: Jan Beulich +Reviewed-by: Roger Pau Monné + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2286,6 +2286,7 @@ int map_domain_pirq( + + set_domain_irq_pirq(d, irq, info); + spin_unlock_irqrestore(&desc->lock, flags); ++ desc = NULL; + + info = NULL; + irq = create_irq(NUMA_NO_NODE, true); +@@ -2321,7 +2322,9 @@ int map_domain_pirq( + + if ( ret ) + { +- spin_unlock_irqrestore(&desc->lock, flags); ++ if ( desc ) ++ spin_unlock_irqrestore(&desc->lock, flags); ++ + pci_disable_msi(msi_desc); + if ( nr ) + {