104 lines
4.9 KiB
Diff
104 lines
4.9 KiB
Diff
|
# Commit 87f37449d586b4d407b75235bb0a171e018e25ec
|
||
|
# Date 2023-11-02 10:50:59 +0100
|
||
|
# Author Roger Pau Monné <roger.pau@citrix.com>
|
||
|
# Committer Jan Beulich <jbeulich@suse.com>
|
||
|
x86/i8259: do not assume interrupts always target CPU0
|
||
|
|
||
|
Sporadically we have seen the following during AP bringup on AMD platforms
|
||
|
only:
|
||
|
|
||
|
microcode: CPU59 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
|
||
|
microcode: CPU60 updated from revision 0x830104d to 0x830107a, date = 2023-05-17
|
||
|
CPU60: No irq handler for vector 27 (IRQ -2147483648)
|
||
|
microcode: CPU61 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
|
||
|
|
||
|
This is similar to the issue raised on Linux commit 36e9e1eab777e, where they
|
||
|
observed i8259 (active) vectors getting delivered to CPUs different than 0.
|
||
|
|
||
|
On AMD or Hygon platforms adjust the target CPU mask of i8259 interrupt
|
||
|
descriptors to contain all possible CPUs, so that APs will reserve the vector
|
||
|
at startup if any legacy IRQ is still delivered through the i8259. Note that
|
||
|
if the IO-APIC takes over those interrupt descriptors the CPU mask will be
|
||
|
reset.
|
||
|
|
||
|
Spurious i8259 interrupt vectors however (IRQ7 and IRQ15) can be injected even
|
||
|
when all i8259 pins are masked, and hence would need to be handled on all CPUs.
|
||
|
|
||
|
Continue to reserve PIC vectors on CPU0 only, but do check for such spurious
|
||
|
interrupts on all CPUs if the vendor is AMD or Hygon. Note that once the
|
||
|
vectors get used by devices detecting PIC spurious interrupts will no longer be
|
||
|
possible, however the device driver should be able to cope with spurious
|
||
|
interrupts. Such PIC spurious interrupts occurring when the vector is in use
|
||
|
by a local APIC routed source will lead to an extra EOI, which might
|
||
|
unintentionally clear a different vector from ISR. Note this is already the
|
||
|
current behavior, so assume it's infrequent enough to not cause real issues.
|
||
|
|
||
|
Finally, adjust the printed message to display the CPU where the spurious
|
||
|
interrupt has been received, so it looks like:
|
||
|
|
||
|
microcode: CPU1 updated from revision 0x830107a to 0x830107a, date = 2023-05-17
|
||
|
cpu1: spurious 8259A interrupt: IRQ7
|
||
|
microcode: CPU2 updated from revision 0x830104d to 0x830107a, date = 2023-05-17
|
||
|
|
||
|
Amends: 3fba06ba9f8b ('x86/IRQ: re-use legacy vector ranges on APs')
|
||
|
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
|
||
|
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
||
|
|
||
|
--- a/xen/arch/x86/i8259.c
|
||
|
+++ b/xen/arch/x86/i8259.c
|
||
|
@@ -222,7 +222,8 @@ static bool _mask_and_ack_8259A_irq(unsi
|
||
|
is_real_irq = false;
|
||
|
/* Report spurious IRQ, once per IRQ line. */
|
||
|
if (!(spurious_irq_mask & irqmask)) {
|
||
|
- printk("spurious 8259A interrupt: IRQ%d.\n", irq);
|
||
|
+ printk("cpu%u: spurious 8259A interrupt: IRQ%u\n",
|
||
|
+ smp_processor_id(), irq);
|
||
|
spurious_irq_mask |= irqmask;
|
||
|
}
|
||
|
/*
|
||
|
@@ -349,7 +350,23 @@ void __init init_IRQ(void)
|
||
|
continue;
|
||
|
desc->handler = &i8259A_irq_type;
|
||
|
per_cpu(vector_irq, cpu)[LEGACY_VECTOR(irq)] = irq;
|
||
|
- cpumask_copy(desc->arch.cpu_mask, cpumask_of(cpu));
|
||
|
+
|
||
|
+ /*
|
||
|
+ * The interrupt affinity logic never targets interrupts to offline
|
||
|
+ * CPUs, hence it's safe to use cpumask_all here.
|
||
|
+ *
|
||
|
+ * Legacy PIC interrupts are only targeted to CPU0, but depending on
|
||
|
+ * the platform they can be distributed to any online CPU in hardware.
|
||
|
+ * Note this behavior has only been observed on AMD hardware. In order
|
||
|
+ * to cope install all active legacy vectors on all CPUs.
|
||
|
+ *
|
||
|
+ * IO-APIC will change the destination mask if/when taking ownership of
|
||
|
+ * the interrupt.
|
||
|
+ */
|
||
|
+ cpumask_copy(desc->arch.cpu_mask,
|
||
|
+ (boot_cpu_data.x86_vendor &
|
||
|
+ (X86_VENDOR_AMD | X86_VENDOR_HYGON) ? &cpumask_all
|
||
|
+ : cpumask_of(cpu)));
|
||
|
desc->arch.vector = LEGACY_VECTOR(irq);
|
||
|
}
|
||
|
|
||
|
--- a/xen/arch/x86/irq.c
|
||
|
+++ b/xen/arch/x86/irq.c
|
||
|
@@ -1920,7 +1920,16 @@ void do_IRQ(struct cpu_user_regs *regs)
|
||
|
kind = "";
|
||
|
if ( !(vector >= FIRST_LEGACY_VECTOR &&
|
||
|
vector <= LAST_LEGACY_VECTOR &&
|
||
|
- !smp_processor_id() &&
|
||
|
+ (!smp_processor_id() ||
|
||
|
+ /*
|
||
|
+ * For AMD/Hygon do spurious PIC interrupt
|
||
|
+ * detection on all CPUs, as it has been observed
|
||
|
+ * that during unknown circumstances spurious PIC
|
||
|
+ * interrupts have been delivered to CPUs
|
||
|
+ * different than the BSP.
|
||
|
+ */
|
||
|
+ (boot_cpu_data.x86_vendor & (X86_VENDOR_AMD |
|
||
|
+ X86_VENDOR_HYGON))) &&
|
||
|
bogus_8259A_irq(vector - FIRST_LEGACY_VECTOR)) )
|
||
|
{
|
||
|
printk("CPU%u: No irq handler for vector %02x (IRQ %d%s)\n",
|