362 lines
12 KiB
Diff
362 lines
12 KiB
Diff
|
References: bnc#713503
|
||
|
|
||
|
# HG changeset patch
|
||
|
# User George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
# Date 1311701836 -3600
|
||
|
# Node ID 2e0cf9428554da666616982cd0074024ff85b221
|
||
|
# Parent ef9ed3d2aa870a37ed5e611be9c524d526a2d604
|
||
|
xen: Option to allow per-device vector maps for MSI IRQs
|
||
|
|
||
|
Add a vector-map to pci_dev, and add an option to point MSI-related
|
||
|
IRQs to the vector-map of the device.
|
||
|
|
||
|
This prevents irqs from the same device from being assigned
|
||
|
the same vector on different pcpus. This is required for systems
|
||
|
using an AMD IOMMU, since the intremap tables on AMD only look at
|
||
|
vector, and not destination ID.
|
||
|
|
||
|
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
|
||
|
# HG changeset patch
|
||
|
# User George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
# Date 1311701852 -3600
|
||
|
# Node ID fa4e2ca9ecffbc432b451f495ad0a403644a6be8
|
||
|
# Parent 2e0cf9428554da666616982cd0074024ff85b221
|
||
|
xen: AMD IOMMU: Automatically enable per-device vector maps
|
||
|
|
||
|
Automatically enable per-device vector maps when using IOMMU,
|
||
|
unless disabled specifically by an IOMMU parameter.
|
||
|
|
||
|
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
|
||
|
# HG changeset patch
|
||
|
# User George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
# Date 1315231215 -3600
|
||
|
# Node ID 32814ad7458dc842a7c588eee13e5c4ee11709a3
|
||
|
# Parent f1349a968a5ac5577d67ad4a3f3490c580dbe264
|
||
|
xen: Add global irq_vector_map option, set if using AMD global intremap tables
|
||
|
|
||
|
As mentioned in previous changesets, AMD IOMMU interrupt
|
||
|
remapping tables only look at the vector, not the destination
|
||
|
id of an interrupt. This means that all IRQs going through
|
||
|
the same interrupt remapping table need to *not* share vectors.
|
||
|
|
||
|
The irq "vector map" functionality was originally introduced
|
||
|
after a patch which disabled global AMD IOMMUs entirely. That
|
||
|
patch has since been reverted, meaning that AMD intremap tables
|
||
|
can either be per-device or global.
|
||
|
|
||
|
This patch therefore introduces a global irq vector map option,
|
||
|
and enables it if we're using an AMD IOMMU with a global
|
||
|
interrupt remapping table.
|
||
|
|
||
|
This patch removes the "irq-perdev-vector-map" boolean
|
||
|
command-line optino and replaces it with "irq_vector_map",
|
||
|
which can have one of three values: none, global, or per-device.
|
||
|
|
||
|
Setting the irq_vector_map to any value will override the
|
||
|
default that the AMD code sets.
|
||
|
|
||
|
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
|
||
|
|
||
|
# HG changeset patch
|
||
|
# User Jan Beulich <jbeulich@suse.com>
|
||
|
# Date 1317730316 -7200
|
||
|
# Node ID a99d75671a911f9c0d5d11e0fe88a0a65863cb44
|
||
|
# Parent 3d1664cc9e458809e399320204aca8536e401ee1
|
||
|
AMD-IOMMU: remove dead variable references
|
||
|
|
||
|
These got orphaned up by recent changes.
|
||
|
|
||
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
||
|
Acked-by: Keir Fraser <keir@xen.org>
|
||
|
|
||
|
--- a/docs/src/user.tex
|
||
|
+++ b/docs/src/user.tex
|
||
|
@@ -4197,6 +4197,10 @@ writing to the VGA console after domain
|
||
|
\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
|
||
|
vcpu migration in microseconds (default 0). This parameter avoids agressive
|
||
|
vcpu migration. For example, the linux kernel uses 0.5ms by default.
|
||
|
+\item [ irq_vector_map=xxx ] Enable irq vector non-sharing maps. Setting 'global'
|
||
|
+ will ensure that no IRQs will share vectors. Setting 'per-device' will ensure
|
||
|
+ that no IRQs from the same device will share vectors. Setting to 'none' will
|
||
|
+ disable it entirely, overriding any defaults the IOMMU code may set.
|
||
|
\end{description}
|
||
|
|
||
|
In addition, the following options may be specified on the Xen command
|
||
|
--- a/xen/arch/x86/irq.c
|
||
|
+++ b/xen/arch/x86/irq.c
|
||
|
@@ -24,6 +24,8 @@
|
||
|
#include <asm/mach-generic/mach_apic.h>
|
||
|
#include <public/physdev.h>
|
||
|
|
||
|
+static void parse_irq_vector_map_param(char *s);
|
||
|
+
|
||
|
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
|
||
|
bool_t __read_mostly opt_noirqbalance = 0;
|
||
|
boolean_param("noirqbalance", opt_noirqbalance);
|
||
|
@@ -32,6 +34,12 @@ unsigned int __read_mostly nr_irqs_gsi =
|
||
|
unsigned int __read_mostly nr_irqs;
|
||
|
integer_param("nr_irqs", nr_irqs);
|
||
|
|
||
|
+/* This default may be changed by the AMD IOMMU code */
|
||
|
+int __read_mostly opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_DEFAULT;
|
||
|
+custom_param("irq_vector_map", parse_irq_vector_map_param);
|
||
|
+
|
||
|
+vmask_t global_used_vector_map;
|
||
|
+
|
||
|
u8 __read_mostly *irq_vector;
|
||
|
struct irq_desc __read_mostly *irq_desc = NULL;
|
||
|
|
||
|
@@ -60,6 +68,26 @@ static struct timer irq_ratelimit_timer;
|
||
|
static unsigned int __read_mostly irq_ratelimit_threshold = 10000;
|
||
|
integer_param("irq_ratelimit", irq_ratelimit_threshold);
|
||
|
|
||
|
+static void __init parse_irq_vector_map_param(char *s)
|
||
|
+{
|
||
|
+ char *ss;
|
||
|
+
|
||
|
+ do {
|
||
|
+ ss = strchr(s, ',');
|
||
|
+ if ( ss )
|
||
|
+ *ss = '\0';
|
||
|
+
|
||
|
+ if ( !strcmp(s, "none"))
|
||
|
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_NONE;
|
||
|
+ else if ( !strcmp(s, "global"))
|
||
|
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_GLOBAL;
|
||
|
+ else if ( !strcmp(s, "per-device"))
|
||
|
+ opt_irq_vector_map=OPT_IRQ_VECTOR_MAP_PERDEV;
|
||
|
+
|
||
|
+ s = ss + 1;
|
||
|
+ } while ( ss );
|
||
|
+}
|
||
|
+
|
||
|
/* Must be called when irq disabled */
|
||
|
void lock_vector_lock(void)
|
||
|
{
|
||
|
@@ -344,6 +372,41 @@ hw_irq_controller no_irq_type = {
|
||
|
end_none
|
||
|
};
|
||
|
|
||
|
+static vmask_t *irq_get_used_vector_mask(int irq)
|
||
|
+{
|
||
|
+ vmask_t *ret = NULL;
|
||
|
+
|
||
|
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_GLOBAL )
|
||
|
+ {
|
||
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
||
|
+
|
||
|
+ ret = &global_used_vector_map;
|
||
|
+
|
||
|
+ if ( desc->chip_data->used_vectors )
|
||
|
+ {
|
||
|
+ printk(XENLOG_INFO "%s: Strange, unassigned irq %d already has used_vectors!\n",
|
||
|
+ __func__, irq);
|
||
|
+ }
|
||
|
+ else
|
||
|
+ {
|
||
|
+ int vector;
|
||
|
+
|
||
|
+ vector = irq_to_vector(irq);
|
||
|
+ if ( vector > 0 )
|
||
|
+ {
|
||
|
+ printk(XENLOG_INFO "%s: Strange, irq %d already assigned vector %d!\n",
|
||
|
+ __func__, irq, vector);
|
||
|
+
|
||
|
+ ASSERT(!test_bit(vector, ret));
|
||
|
+
|
||
|
+ set_bit(vector, ret);
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
int __assign_irq_vector(int irq, struct irq_cfg *cfg, const cpumask_t *mask)
|
||
|
{
|
||
|
/*
|
||
|
@@ -362,6 +425,7 @@ int __assign_irq_vector(int irq, struct
|
||
|
int cpu, err;
|
||
|
unsigned long flags;
|
||
|
cpumask_t tmp_mask;
|
||
|
+ vmask_t *irq_used_vectors = NULL;
|
||
|
|
||
|
old_vector = irq_to_vector(irq);
|
||
|
if (old_vector) {
|
||
|
@@ -376,6 +440,17 @@ int __assign_irq_vector(int irq, struct
|
||
|
return -EAGAIN;
|
||
|
|
||
|
err = -ENOSPC;
|
||
|
+
|
||
|
+ /* This is the only place normal IRQs are ever marked
|
||
|
+ * as "in use". If they're not in use yet, check to see
|
||
|
+ * if we need to assign a global vector mask. */
|
||
|
+ if ( irq_status[irq] == IRQ_USED )
|
||
|
+ {
|
||
|
+ irq_used_vectors = cfg->used_vectors;
|
||
|
+ }
|
||
|
+ else
|
||
|
+ irq_used_vectors = irq_get_used_vector_mask(irq);
|
||
|
+
|
||
|
for_each_cpu_mask(cpu, *mask) {
|
||
|
int new_cpu;
|
||
|
int vector, offset;
|
||
|
@@ -401,8 +476,8 @@ next:
|
||
|
if (test_bit(vector, used_vectors))
|
||
|
goto next;
|
||
|
|
||
|
- if (cfg->used_vectors
|
||
|
- && test_bit(vector, cfg->used_vectors) )
|
||
|
+ if (irq_used_vectors
|
||
|
+ && test_bit(vector, irq_used_vectors) )
|
||
|
goto next;
|
||
|
|
||
|
for_each_cpu_mask(new_cpu, tmp_mask)
|
||
|
@@ -420,15 +495,22 @@ next:
|
||
|
per_cpu(vector_irq, new_cpu)[vector] = irq;
|
||
|
cfg->vector = vector;
|
||
|
cpus_copy(cfg->cpu_mask, tmp_mask);
|
||
|
+
|
||
|
+ irq_status[irq] = IRQ_USED;
|
||
|
+ ASSERT((cfg->used_vectors == NULL)
|
||
|
+ || (cfg->used_vectors == irq_used_vectors));
|
||
|
+ cfg->used_vectors = irq_used_vectors;
|
||
|
+
|
||
|
+ if (IO_APIC_IRQ(irq))
|
||
|
+ irq_vector[irq] = vector;
|
||
|
+
|
||
|
if ( cfg->used_vectors )
|
||
|
{
|
||
|
ASSERT(!test_bit(vector, cfg->used_vectors));
|
||
|
+
|
||
|
set_bit(vector, cfg->used_vectors);
|
||
|
}
|
||
|
|
||
|
- irq_status[irq] = IRQ_USED;
|
||
|
- if (IO_APIC_IRQ(irq))
|
||
|
- irq_vector[irq] = vector;
|
||
|
err = 0;
|
||
|
local_irq_restore(flags);
|
||
|
break;
|
||
|
@@ -1523,7 +1605,7 @@ int map_domain_pirq(
|
||
|
|
||
|
if ( !IS_PRIV(current->domain) &&
|
||
|
!(IS_PRIV_FOR(current->domain, d) &&
|
||
|
- irq_access_permitted(current->domain, pirq)))
|
||
|
+ irq_access_permitted(current->domain, pirq)))
|
||
|
return -EPERM;
|
||
|
|
||
|
if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs )
|
||
|
@@ -1571,8 +1653,22 @@ int map_domain_pirq(
|
||
|
|
||
|
if ( desc->handler != &no_irq_type )
|
||
|
dprintk(XENLOG_G_ERR, "dom%d: irq %d in use\n",
|
||
|
- d->domain_id, irq);
|
||
|
+ d->domain_id, irq);
|
||
|
desc->handler = &pci_msi_type;
|
||
|
+
|
||
|
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV
|
||
|
+ && !desc->chip_data->used_vectors )
|
||
|
+ {
|
||
|
+ desc->chip_data->used_vectors = &pdev->info.used_vectors;
|
||
|
+ if ( desc->chip_data->vector != IRQ_VECTOR_UNASSIGNED )
|
||
|
+ {
|
||
|
+ int vector = desc->chip_data->vector;
|
||
|
+ ASSERT(!test_bit(vector, desc->chip_data->used_vectors));
|
||
|
+
|
||
|
+ set_bit(vector, desc->chip_data->used_vectors);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
d->arch.pirq_irq[pirq] = irq;
|
||
|
d->arch.irq_pirq[irq] = pirq;
|
||
|
setup_msi_irq(pdev, msi_desc, irq);
|
||
|
@@ -1583,9 +1679,12 @@ int map_domain_pirq(
|
||
|
d->arch.pirq_irq[pirq] = irq;
|
||
|
d->arch.irq_pirq[irq] = pirq;
|
||
|
spin_unlock_irqrestore(&desc->lock, flags);
|
||
|
+
|
||
|
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_PERDEV )
|
||
|
+ printk(XENLOG_INFO "Per-device vector maps for GSIs not implemented yet.\n");
|
||
|
}
|
||
|
|
||
|
- done:
|
||
|
+done:
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||
|
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
|
||
|
@@ -166,6 +166,35 @@ int __init amd_iov_detect(void)
|
||
|
return -ENODEV;
|
||
|
}
|
||
|
|
||
|
+ /*
|
||
|
+ * AMD IOMMUs don't distinguish between vectors destined for
|
||
|
+ * different cpus when doing interrupt remapping. This means
|
||
|
+ * that interrupts going through the same intremap table
|
||
|
+ * can't share the same vector.
|
||
|
+ *
|
||
|
+ * If irq_vector_map isn't specified, choose a sensible default:
|
||
|
+ * - If we're using per-device interemap tables, per-device
|
||
|
+ * vector non-sharing maps
|
||
|
+ * - If we're using a global interemap table, global vector
|
||
|
+ * non-sharing map
|
||
|
+ */
|
||
|
+ if ( opt_irq_vector_map == OPT_IRQ_VECTOR_MAP_DEFAULT )
|
||
|
+ {
|
||
|
+ if ( amd_iommu_perdev_intremap )
|
||
|
+ {
|
||
|
+ printk("AMD-Vi: Enabling per-device vector maps\n");
|
||
|
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_PERDEV;
|
||
|
+ }
|
||
|
+ else
|
||
|
+ {
|
||
|
+ printk("AMD-Vi: Enabling global vector map\n");
|
||
|
+ opt_irq_vector_map = OPT_IRQ_VECTOR_MAP_GLOBAL;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ else
|
||
|
+ {
|
||
|
+ printk("AMD-Vi: Not overriding irq_vector_map setting\n");
|
||
|
+ }
|
||
|
return scan_pci_devices();
|
||
|
}
|
||
|
|
||
|
--- a/xen/include/asm-x86/irq.h
|
||
|
+++ b/xen/include/asm-x86/irq.h
|
||
|
@@ -45,6 +45,13 @@ extern u8 *irq_vector;
|
||
|
|
||
|
extern bool_t opt_noirqbalance;
|
||
|
|
||
|
+#define OPT_IRQ_VECTOR_MAP_DEFAULT 0 /* Do the default thing */
|
||
|
+#define OPT_IRQ_VECTOR_MAP_NONE 1 /* None */
|
||
|
+#define OPT_IRQ_VECTOR_MAP_GLOBAL 2 /* One global vector map (no vector sharing) */
|
||
|
+#define OPT_IRQ_VECTOR_MAP_PERDEV 3 /* Per-device vetor map (no vector sharing w/in a device) */
|
||
|
+
|
||
|
+extern int opt_irq_vector_map;
|
||
|
+
|
||
|
/*
|
||
|
* Per-cpu current frame pointer - the location of the last exception frame on
|
||
|
* the stack
|
||
|
--- a/xen/include/xen/pci.h
|
||
|
+++ b/xen/include/xen/pci.h
|
||
|
@@ -11,6 +11,7 @@
|
||
|
#include <xen/types.h>
|
||
|
#include <xen/list.h>
|
||
|
#include <xen/spinlock.h>
|
||
|
+#include <xen/irq.h>
|
||
|
|
||
|
/*
|
||
|
* The PCI interface treats multi-function devices as independent
|
||
|
@@ -38,6 +39,7 @@ struct pci_dev_info {
|
||
|
u8 bus;
|
||
|
u8 devfn;
|
||
|
} physfn;
|
||
|
+ vmask_t used_vectors;
|
||
|
};
|
||
|
|
||
|
struct pci_dev {
|