# HG changeset patch # User Keir Fraser # Date 1222256215 -3600 # Node ID 31f09a5e24cf8eb8a9d73acc6c23262fe9d463d7 # Parent 7750906b06b3ebbba529e6d1042d7a2a2712623c x86: Properly synchronise updates to pirq-to-vector mapping. Per-domain irq mappings are now protected by d->evtchn_lock and by the per-vector irq_desc lock. Signed-off-by: Jan Beulich Signed-off-by: Keir Fraser Index: xen-3.3.1-testing/xen/arch/ia64/xen/irq.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/ia64/xen/irq.c +++ xen-3.3.1-testing/xen/arch/ia64/xen/irq.c @@ -459,20 +459,24 @@ int pirq_guest_bind(struct vcpu *v, int return rc; } -void pirq_guest_unbind(struct domain *d, int irq) +int pirq_guest_unbind(struct domain *d, int irq) { irq_desc_t *desc = &irq_desc[irq]; irq_guest_action_t *action; unsigned long flags; - int i; + int i, rc = 0; spin_lock_irqsave(&desc->lock, flags); action = (irq_guest_action_t *)desc->action; - i = 0; - while ( action->guest[i] && (action->guest[i] != d) ) - i++; + for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ ) + continue; + if ( i == action->nr_guests ) + { + rc = -EINVAL; + goto out; + } memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); action->nr_guests--; @@ -492,7 +496,9 @@ void pirq_guest_unbind(struct domain *d, desc->handler->shutdown(irq); } + out: spin_unlock_irqrestore(&desc->lock, flags); + return rc; } void Index: xen-3.3.1-testing/xen/arch/x86/domain.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/domain.c +++ xen-3.3.1-testing/xen/arch/x86/domain.c @@ -414,8 +414,6 @@ int arch_domain_create(struct domain *d, goto fail; } - spin_lock_init(&d->arch.irq_lock); - if ( is_hvm_domain(d) ) { if ( (rc = hvm_domain_initialise(d)) != 0 ) Index: xen-3.3.1-testing/xen/arch/x86/io_apic.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/io_apic.c +++ xen-3.3.1-testing/xen/arch/x86/io_apic.c @@ -48,22 +48,6 @@ atomic_t irq_mis_count; int msi_enable = 0; boolean_param("msi", msi_enable); -int domain_irq_to_vector(struct domain *d, int irq) -{ - if ( !msi_enable ) - return irq_to_vector(irq); - else - return d->arch.pirq_vector[irq]; -} - -int domain_vector_to_irq(struct domain *d, int vector) -{ - if ( !msi_enable ) - return vector_to_irq(vector); - else - return d->arch.vector_pirq[vector]; -} - /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; @@ -730,7 +714,6 @@ next: static struct hw_interrupt_type ioapic_level_type; static struct hw_interrupt_type ioapic_edge_type; -struct hw_interrupt_type pci_msi_type; #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 Index: xen-3.3.1-testing/xen/arch/x86/irq.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/irq.c +++ xen-3.3.1-testing/xen/arch/x86/irq.c @@ -277,6 +277,35 @@ static void __do_IRQ_guest(int vector) } } +/* + * Retrieve Xen irq-descriptor corresponding to a domain-specific irq. + * The descriptor is returned locked. This function is safe against changes + * to the per-domain irq-to-vector mapping. + */ +static irq_desc_t *domain_spin_lock_irq_desc( + struct domain *d, int irq, unsigned long *pflags) +{ + unsigned int vector; + unsigned long flags; + irq_desc_t *desc; + + for ( ; ; ) + { + vector = domain_irq_to_vector(d, irq); + if ( vector <= 0 ) + return NULL; + desc = &irq_desc[vector]; + spin_lock_irqsave(&desc->lock, flags); + if ( vector == domain_irq_to_vector(d, irq) ) + break; + spin_unlock_irqrestore(&desc->lock, flags); + } + + if ( pflags != NULL ) + *pflags = flags; + return desc; +} + /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ static void flush_ready_eoi(void *unused) { @@ -342,11 +371,13 @@ static void __pirq_guest_eoi(struct doma cpumask_t cpu_eoi_map; int vector; - vector = domain_irq_to_vector(d, irq); - desc = &irq_desc[vector]; - action = (irq_guest_action_t *)desc->action; + ASSERT(local_irq_is_enabled()); + desc = domain_spin_lock_irq_desc(d, irq, NULL); + if ( desc == NULL ) + return; - spin_lock_irq(&desc->lock); + action = (irq_guest_action_t *)desc->action; + vector = desc - irq_desc; ASSERT(!test_bit(irq, d->pirq_mask) || (action->ack_type != ACKTYPE_NONE)); @@ -418,7 +449,7 @@ int pirq_acktype(struct domain *d, int i unsigned int vector; vector = domain_irq_to_vector(d, irq); - if ( vector == 0 ) + if ( vector <= 0 ) return ACKTYPE_NONE; desc = &irq_desc[vector]; @@ -447,13 +478,6 @@ int pirq_acktype(struct domain *d, int i if ( !strcmp(desc->handler->typename, "XT-PIC") ) return ACKTYPE_UNMASK; - if ( strstr(desc->handler->typename, "MPIC") ) - { - if ( desc->status & IRQ_LEVEL ) - return (desc->status & IRQ_PER_CPU) ? ACKTYPE_EOI : ACKTYPE_UNMASK; - return ACKTYPE_NONE; /* edge-triggered => no final EOI */ - } - printk("Unknown PIC type '%s' for IRQ %d\n", desc->handler->typename, irq); BUG(); @@ -462,21 +486,18 @@ int pirq_acktype(struct domain *d, int i int pirq_shared(struct domain *d, int irq) { - unsigned int vector; irq_desc_t *desc; irq_guest_action_t *action; unsigned long flags; int shared; - vector = domain_irq_to_vector(d, irq); - if ( vector == 0 ) + desc = domain_spin_lock_irq_desc(d, irq, &flags); + if ( desc == NULL ) return 0; - desc = &irq_desc[vector]; - - spin_lock_irqsave(&desc->lock, flags); action = (irq_guest_action_t *)desc->action; shared = ((desc->status & IRQ_GUEST) && (action->nr_guests > 1)); + spin_unlock_irqrestore(&desc->lock, flags); return shared; @@ -489,20 +510,17 @@ int pirq_guest_bind(struct vcpu *v, int irq_guest_action_t *action, *newaction = NULL; int rc = 0; cpumask_t cpumask = CPU_MASK_NONE; + unsigned long flags; - retry: - vector = domain_irq_to_vector(v->domain, irq); - if ( vector == 0 ) - { - rc = -EINVAL; - goto out; - } - - desc = &irq_desc[vector]; + WARN_ON(!spin_is_locked(&v->domain->evtchn_lock)); - spin_lock_irq(&desc->lock); + retry: + desc = domain_spin_lock_irq_desc(v->domain, irq, &flags); + if ( desc == NULL ) + return -EINVAL; action = (irq_guest_action_t *)desc->action; + vector = desc - irq_desc; if ( !(desc->status & IRQ_GUEST) ) { @@ -517,7 +535,7 @@ int pirq_guest_bind(struct vcpu *v, int if ( newaction == NULL ) { - spin_unlock_irq(&desc->lock); + spin_unlock_irqrestore(&desc->lock, flags); if ( (newaction = xmalloc(irq_guest_action_t)) != NULL ) goto retry; gdprintk(XENLOG_INFO, @@ -563,7 +581,7 @@ int pirq_guest_bind(struct vcpu *v, int */ ASSERT(action->ack_type == ACKTYPE_EOI); ASSERT(desc->status & IRQ_DISABLED); - spin_unlock_irq(&desc->lock); + spin_unlock_irqrestore(&desc->lock, flags); cpu_relax(); goto retry; } @@ -579,32 +597,45 @@ int pirq_guest_bind(struct vcpu *v, int action->guest[action->nr_guests++] = v->domain; unlock_out: - spin_unlock_irq(&desc->lock); + spin_unlock_irqrestore(&desc->lock, flags); out: if ( newaction != NULL ) xfree(newaction); return rc; } -void pirq_guest_unbind(struct domain *d, int irq) +int pirq_guest_unbind(struct domain *d, int irq) { - unsigned int vector; + int vector; irq_desc_t *desc; irq_guest_action_t *action; cpumask_t cpu_eoi_map; - int i; + int i, rc = 0; - vector = domain_irq_to_vector(d, irq); - desc = &irq_desc[vector]; - BUG_ON(vector == 0); + WARN_ON(!spin_is_locked(&d->evtchn_lock)); - spin_lock_irq(&desc->lock); + desc = domain_spin_lock_irq_desc(d, irq, &flags); + if ( unlikely(desc == NULL) ) + { + if ( !msi_enable || (vector = -domain_irq_to_vector(d, irq)) == 0 ) + return -EINVAL; + BUG_ON(vector <= 0); + desc = &irq_desc[vector]; + spin_lock_irqsave(&desc->lock, flags); + d->arch.pirq_vector[irq] = d->arch.vector_pirq[vector] = 0; + goto out; + } action = (irq_guest_action_t *)desc->action; + vector = desc - irq_desc; - i = 0; - while ( action->guest[i] && (action->guest[i] != d) ) - i++; + for ( i = 0; (i < action->nr_guests) && (action->guest[i] != d); i++ ) + continue; + if ( i == action->nr_guests ) + { + rc = -EINVAL; + goto out; + } memmove(&action->guest[i], &action->guest[i+1], IRQ_MAX_GUESTS-i-1); action->nr_guests--; @@ -622,7 +653,7 @@ void pirq_guest_unbind(struct domain *d, (action->nr_guests != 0) ) { cpu_eoi_map = action->cpu_eoi_map; - spin_unlock_irq(&desc->lock); + spin_unlock_irqrestore(&desc->lock, flags); on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 0); spin_lock_irq(&desc->lock); } @@ -659,7 +690,7 @@ void pirq_guest_unbind(struct domain *d, if ( !cpus_empty(cpu_eoi_map) ) { BUG_ON(action->ack_type != ACKTYPE_EOI); - spin_unlock_irq(&desc->lock); + spin_unlock_irqrestore(&desc->lock, flags); on_selected_cpus(cpu_eoi_map, set_eoi_ready, desc, 1, 1); spin_lock_irq(&desc->lock); } @@ -673,9 +704,8 @@ void pirq_guest_unbind(struct domain *d, desc->handler->shutdown(vector); out: - spin_unlock_irq(&desc->lock); - if ( action != NULL ) - xfree(action); + spin_unlock_irqrestore(&desc->lock, flags); + return rc; } extern void dump_ioapic_irq_info(void); Index: xen-3.3.1-testing/xen/arch/x86/msi.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/msi.c +++ xen-3.3.1-testing/xen/arch/x86/msi.c @@ -728,7 +728,6 @@ void pci_disable_msi(int vector) __pci_disable_msix(vector); } -extern struct hw_interrupt_type pci_msi_type; static void msi_free_vectors(struct pci_dev* dev) { struct msi_desc *entry, *tmp; Index: xen-3.3.1-testing/xen/arch/x86/physdev.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c +++ xen-3.3.1-testing/xen/arch/x86/physdev.c @@ -26,17 +26,11 @@ int ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); - -extern struct hw_interrupt_type pci_msi_type; - static int get_free_pirq(struct domain *d, int type, int index) { int i; - if ( d == NULL ) - return -EINVAL; - - ASSERT(spin_is_locked(&d->arch.irq_lock)); + ASSERT(spin_is_locked(&d->evtchn_lock)); if ( type == MAP_PIRQ_TYPE_GSI ) { @@ -64,11 +58,10 @@ static int map_domain_pirq(struct domain int ret = 0; int old_vector, old_pirq; struct msi_info msi; + irq_desc_t *desc; + unsigned long flags; - if ( d == NULL ) - return -EINVAL; - - ASSERT(spin_is_locked(&d->arch.irq_lock)); + ASSERT(spin_is_locked(&d->evtchn_lock)); if ( !IS_PRIV(current->domain) ) return -EPERM; @@ -88,8 +81,7 @@ static int map_domain_pirq(struct domain { dprintk(XENLOG_G_ERR, "dom%d: pirq %d or vector %d already mapped\n", d->domain_id, pirq, vector); - ret = -EINVAL; - goto done; + return -EINVAL; } ret = irq_permit_access(d, pirq); @@ -97,17 +89,14 @@ static int map_domain_pirq(struct domain { dprintk(XENLOG_G_ERR, "dom%d: could not permit access to irq %d\n", d->domain_id, pirq); - goto done; + return ret; } + desc = &irq_desc[vector]; + spin_lock_irqsave(&desc->lock, flags); + if ( map && MAP_PIRQ_TYPE_MSI == map->type ) { - irq_desc_t *desc; - unsigned long flags; - - desc = &irq_desc[vector]; - - spin_lock_irqsave(&desc->lock, flags); if ( desc->handler != &no_irq_type ) dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n", d->domain_id, vector); @@ -120,8 +109,6 @@ static int map_domain_pirq(struct domain msi.vector = vector; ret = pci_enable_msi(&msi); - - spin_unlock_irqrestore(&desc->lock, flags); if ( ret ) goto done; } @@ -130,6 +117,7 @@ static int map_domain_pirq(struct domain d->arch.vector_pirq[vector] = pirq; done: + spin_unlock_irqrestore(&desc->lock, flags); return ret; } @@ -145,11 +133,11 @@ static int unmap_domain_pirq(struct doma if ( !IS_PRIV(current->domain) ) return -EINVAL; - ASSERT(spin_is_locked(&d->arch.irq_lock)); + ASSERT(spin_is_locked(&d->evtchn_lock)); vector = d->arch.pirq_vector[pirq]; - if ( !vector ) + if ( vector <= 0 ) { dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n", d->domain_id, pirq); @@ -159,21 +147,35 @@ static int unmap_domain_pirq(struct doma { unsigned long flags; irq_desc_t *desc; + bool_t forced_unbind = (pirq_guest_unbind(d, pirq) == 0); + + if ( forced_unbind ) + dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n", + d->domain_id, pirq); desc = &irq_desc[vector]; spin_lock_irqsave(&desc->lock, flags); + + BUG_ON(vector != d->arch.pirq_vector[pirq]); + if ( desc->msi_desc ) pci_disable_msi(vector); if ( desc->handler == &pci_msi_type ) + desc->handler = &no_irq_type; + + if ( !forced_unbind ) { - /* MSI is not shared, so should be released already */ - BUG_ON(desc->status & IRQ_GUEST); - irq_desc[vector].handler = &no_irq_type; + d->arch.pirq_vector[pirq] = 0; + d->arch.vector_pirq[vector] = 0; + } + else + { + d->arch.pirq_vector[pirq] = -vector; + d->arch.vector_pirq[vector] = -pirq; } - spin_unlock_irqrestore(&desc->lock, flags); - d->arch.pirq_vector[pirq] = d->arch.vector_pirq[vector] = 0; + spin_unlock_irqrestore(&desc->lock, flags); } ret = irq_deny_access(d, pirq); @@ -189,7 +191,6 @@ static int physdev_map_pirq(struct physd { struct domain *d; int vector, pirq, ret = 0; - unsigned long flags; /* if msi_enable is not enabled, map always succeeds */ if ( !msi_enable ) @@ -250,8 +251,8 @@ static int physdev_map_pirq(struct physd goto free_domain; } - spin_lock_irqsave(&d->arch.irq_lock, flags); - if ( map->pirq == -1 ) + spin_lock(&d->evtchn_lock); + if ( map->pirq < 0 ) { if ( d->arch.vector_pirq[vector] ) { @@ -259,6 +260,11 @@ static int physdev_map_pirq(struct physd d->domain_id, map->index, map->pirq, d->arch.vector_pirq[vector]); pirq = d->arch.vector_pirq[vector]; + if ( pirq < 0 ) + { + ret = -EBUSY; + goto done; + } } else { @@ -291,7 +297,7 @@ static int physdev_map_pirq(struct physd if ( !ret ) map->pirq = pirq; done: - spin_unlock_irqrestore(&d->arch.irq_lock, flags); + spin_unlock(&d->evtchn_lock); free_domain: rcu_unlock_domain(d); return ret; @@ -300,7 +306,6 @@ free_domain: static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap) { struct domain *d; - unsigned long flags; int ret; if ( !msi_enable ) @@ -323,9 +328,10 @@ static int physdev_unmap_pirq(struct phy return -ESRCH; } - spin_lock_irqsave(&d->arch.irq_lock, flags); + spin_lock(&d->evtchn_lock); ret = unmap_domain_pirq(d, unmap->pirq); - spin_unlock_irqrestore(&d->arch.irq_lock, flags); + spin_unlock(&d->evtchn_lock); + rcu_unlock_domain(d); return ret; @@ -431,7 +437,6 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H case PHYSDEVOP_alloc_irq_vector: { struct physdev_irq irq_op; - unsigned long flags; ret = -EFAULT; if ( copy_from_guest(&irq_op, arg, 1) != 0 ) @@ -456,9 +461,9 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H if ( msi_enable ) { - spin_lock_irqsave(&dom0->arch.irq_lock, flags); + spin_lock(&dom0->evtchn_lock); ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, NULL); - spin_unlock_irqrestore(&dom0->arch.irq_lock, flags); + spin_unlock(&dom0->evtchn_lock); } if ( copy_to_guest(arg, &irq_op, 1) != 0 ) Index: xen-3.3.1-testing/xen/common/event_channel.c =================================================================== --- xen-3.3.1-testing.orig/xen/common/event_channel.c +++ xen-3.3.1-testing/xen/common/event_channel.c @@ -376,7 +376,8 @@ static long __evtchn_close(struct domain break; case ECS_PIRQ: - pirq_guest_unbind(d1, chn1->u.pirq); + if ( pirq_guest_unbind(d1, chn1->u.pirq) != 0 ) + BUG(); d1->pirq_to_evtchn[chn1->u.pirq] = 0; break; Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h +++ xen-3.3.1-testing/xen/include/asm-x86/domain.h @@ -235,7 +235,7 @@ struct arch_domain /* Shadow translated domain: P2M mapping */ pagetable_t phys_table; - spinlock_t irq_lock; + /* NB. protected by d->evtchn_lock and by irq_desc[vector].lock */ int vector_pirq[NR_VECTORS]; int pirq_vector[NR_PIRQS]; Index: xen-3.3.1-testing/xen/include/asm-x86/irq.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/asm-x86/irq.h +++ xen-3.3.1-testing/xen/include/asm-x86/irq.h @@ -52,6 +52,7 @@ extern atomic_t irq_mis_count; int pirq_acktype(struct domain *d, int irq); int pirq_shared(struct domain *d , int irq); -extern int domain_irq_to_vector(struct domain *d, int irq); -extern int domain_vector_to_irq(struct domain *d, int vector); +#define domain_irq_to_vector(d, irq) (msi_enable ? (d)->arch.pirq_vector[irq] : irq_to_vector(irq)) +#define domain_vector_to_irq(d, vec) (msi_enable ? (d)->arch.vector_pirq[vec] : vector_to_irq(vec)) + #endif /* _ASM_HW_IRQ_H */ Index: xen-3.3.1-testing/xen/include/asm-x86/msi.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/asm-x86/msi.h +++ xen-3.3.1-testing/xen/include/asm-x86/msi.h @@ -107,7 +107,7 @@ struct msi_desc { */ #define NR_HP_RESERVED_VECTORS 20 -extern int vector_irq[NR_VECTORS]; +extern struct hw_interrupt_type pci_msi_type; /* * MSI-X Address Register Index: xen-3.3.1-testing/xen/include/xen/irq.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/xen/irq.h +++ xen-3.3.1-testing/xen/include/xen/irq.h @@ -22,7 +22,6 @@ struct irqaction #define IRQ_PENDING 4 /* IRQ pending - replay on enable */ #define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ #define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */ -#define IRQ_LEVEL 64 /* IRQ level triggered */ #define IRQ_PER_CPU 256 /* IRQ is per CPU */ /* @@ -78,7 +77,7 @@ struct vcpu; extern int pirq_guest_eoi(struct domain *d, int irq); extern int pirq_guest_unmask(struct domain *d); extern int pirq_guest_bind(struct vcpu *v, int irq, int will_share); -extern void pirq_guest_unbind(struct domain *d, int irq); +extern int pirq_guest_unbind(struct domain *d, int irq); static inline void set_native_irq_info(int irq, cpumask_t mask) {