# HG changeset patch # User Keir Fraser # Date 1227878852 0 # Node ID c820bf73a914f643ab48864629c0559e68ceede1 # Parent 8dbf23c89cc6a4fbd7b9063b14e706c065ba1678 x86: add a shared page indicating the need for an EOI notification To simplify the interface for the guest, when a guest uses this new (sub-)hypercall, PHYSDEVOP_eoi behavior changes to unmask the corresponding event channel at once, avoiding the eventual need for a second hypercall from the guest. Signed-off-by: Jan Beulich Signed-off-by: Keir Fraser 18846: x86: Fix PHYSDEVOP_pirq_eoi_mfn, which I modified and broke. Signed-off-by: Keir Fraser 18851: x86: Fix mfn_to_virt() to cast MFN to address size. Signed-off-by: Keir Fraser Index: xen-3.3.1-testing/xen/arch/x86/domain.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/domain.c +++ xen-3.3.1-testing/xen/arch/x86/domain.c @@ -1812,6 +1812,13 @@ int domain_relinquish_resources(struct d unmap_vcpu_info(v); } + if ( d->arch.pirq_eoi_map != NULL ) + { + unmap_domain_page_global(d->arch.pirq_eoi_map); + put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn)); + d->arch.pirq_eoi_map = NULL; + } + d->arch.relmem = RELMEM_xen; /* fallthrough */ Index: xen-3.3.1-testing/xen/arch/x86/irq.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/irq.c +++ xen-3.3.1-testing/xen/arch/x86/irq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */ @@ -206,16 +207,42 @@ struct pending_eoi { static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]); #define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector) +static inline void set_pirq_eoi(struct domain *d, unsigned int irq) +{ + if ( d->arch.pirq_eoi_map ) + set_bit(irq, d->arch.pirq_eoi_map); +} + +static inline void clear_pirq_eoi(struct domain *d, unsigned int irq) +{ + if ( d->arch.pirq_eoi_map ) + clear_bit(irq, d->arch.pirq_eoi_map); +} + +static void _irq_guest_eoi(irq_desc_t *desc) +{ + irq_guest_action_t *action = (irq_guest_action_t *)desc->action; + unsigned int i, vector = desc - irq_desc; + + if ( !(desc->status & IRQ_GUEST_EOI_PENDING) ) + return; + + for ( i = 0; i < action->nr_guests; ++i ) + clear_pirq_eoi(action->guest[i], + domain_vector_to_irq(action->guest[i], vector)); + + desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING); + desc->handler->enable(vector); +} + static struct timer irq_guest_eoi_timer[NR_IRQS]; static void irq_guest_eoi_timer_fn(void *data) { irq_desc_t *desc = data; - unsigned vector = desc - irq_desc; unsigned long flags; spin_lock_irqsave(&desc->lock, flags); - desc->status &= ~IRQ_INPROGRESS; - desc->handler->enable(vector); + _irq_guest_eoi(desc); spin_unlock_irqrestore(&desc->lock, flags); } @@ -272,8 +299,22 @@ static void __do_IRQ_guest(int vector) if ( already_pending == action->nr_guests ) { - desc->handler->disable(vector); stop_timer(&irq_guest_eoi_timer[vector]); + desc->handler->disable(vector); + desc->status |= IRQ_GUEST_EOI_PENDING; + for ( i = 0; i < already_pending; ++i ) + { + d = action->guest[i]; + set_pirq_eoi(d, domain_vector_to_irq(d, vector)); + /* + * Could check here whether the guest unmasked the event by now + * (or perhaps just re-issue the send_guest_pirq()), and if it + * can now accept the event, + * - clear all the pirq_eoi bits we already set, + * - re-enable the vector, and + * - skip the timer setup below. + */ + } init_timer(&irq_guest_eoi_timer[vector], irq_guest_eoi_timer_fn, desc, smp_processor_id()); set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1)); @@ -382,8 +423,12 @@ static void __pirq_guest_eoi(struct doma action = (irq_guest_action_t *)desc->action; vector = desc - irq_desc; - ASSERT(!test_bit(irq, d->pirq_mask) || - (action->ack_type != ACKTYPE_NONE)); + if ( action->ack_type == ACKTYPE_NONE ) + { + ASSERT(!test_bit(irq, d->pirq_mask)); + stop_timer(&irq_guest_eoi_timer[vector]); + _irq_guest_eoi(desc); + } if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) || unlikely(--action->in_flight != 0) ) @@ -604,6 +649,11 @@ int pirq_guest_bind(struct vcpu *v, int action->guest[action->nr_guests++] = v->domain; + if ( action->ack_type != ACKTYPE_NONE ) + set_pirq_eoi(v->domain, irq); + else + clear_pirq_eoi(v->domain, irq); + unlock_out: spin_unlock_irq(&desc->lock); out: Index: xen-3.3.1-testing/xen/arch/x86/physdev.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/physdev.c +++ xen-3.3.1-testing/xen/arch/x86/physdev.c @@ -204,10 +204,50 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H ret = -EFAULT; if ( copy_from_guest(&eoi, arg, 1) != 0 ) break; + ret = -EINVAL; + if ( eoi.irq < 0 || eoi.irq >= NR_IRQS ) + break; + if ( v->domain->arch.pirq_eoi_map ) + evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]); ret = pirq_guest_eoi(v->domain, eoi.irq); break; } + case PHYSDEVOP_pirq_eoi_mfn: { + struct physdev_pirq_eoi_mfn info; + + BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8)); + + ret = -EFAULT; + if ( copy_from_guest(&info, arg, 1) != 0 ) + break; + + ret = -EINVAL; + if ( !mfn_valid(info.mfn) || + !get_page_and_type(mfn_to_page(info.mfn), v->domain, + PGT_writable_page) ) + break; + + if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, info.mfn) != 0 ) + { + put_page_and_type(mfn_to_page(info.mfn)); + ret = -EBUSY; + break; + } + + v->domain->arch.pirq_eoi_map = map_domain_page_global(info.mfn); + if ( v->domain->arch.pirq_eoi_map == NULL ) + { + v->domain->arch.pirq_eoi_map_mfn = 0; + put_page_and_type(mfn_to_page(info.mfn)); + ret = -ENOSPC; + break; + } + + ret = 0; + break; + } + /* Legacy since 0x00030202. */ case PHYSDEVOP_IRQ_UNMASK_NOTIFY: { ret = pirq_guest_unmask(v->domain); Index: xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c =================================================================== --- xen-3.3.1-testing.orig/xen/arch/x86/x86_64/physdev.c +++ xen-3.3.1-testing/xen/arch/x86/x86_64/physdev.c @@ -18,6 +18,9 @@ #define physdev_eoi compat_physdev_eoi #define physdev_eoi_t physdev_eoi_compat_t +#define physdev_pirq_eoi_mfn compat_physdev_pirq_eoi_mfn +#define physdev_pirq_eoi_mfn_t physdev_pirq_eoi_mfn_compat_t + #define physdev_set_iobitmap compat_physdev_set_iobitmap #define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t Index: xen-3.3.1-testing/xen/common/event_channel.c =================================================================== --- xen-3.3.1-testing.orig/xen/common/event_channel.c +++ xen-3.3.1-testing/xen/common/event_channel.c @@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port, } -static long evtchn_unmask(evtchn_unmask_t *unmask) +int evtchn_unmask(unsigned int port) { struct domain *d = current->domain; - int port = unmask->port; struct vcpu *v; spin_lock(&d->event_lock); @@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU struct evtchn_unmask unmask; if ( copy_from_guest(&unmask, arg, 1) != 0 ) return -EFAULT; - rc = evtchn_unmask(&unmask); + rc = evtchn_unmask(unmask.port); break; } Index: xen-3.3.1-testing/xen/include/asm-x86/domain.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/asm-x86/domain.h +++ xen-3.3.1-testing/xen/include/asm-x86/domain.h @@ -239,6 +239,10 @@ struct arch_domain int vector_pirq[NR_VECTORS]; int pirq_vector[NR_PIRQS]; + /* Shared page for notifying that explicit PIRQ EOI is required. */ + unsigned long *pirq_eoi_map; + unsigned long pirq_eoi_map_mfn; + /* Pseudophysical e820 map (XENMEM_memory_map). */ struct e820entry e820[3]; unsigned int nr_e820; Index: xen-3.3.1-testing/xen/include/public/physdev.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/public/physdev.h +++ xen-3.3.1-testing/xen/include/public/physdev.h @@ -41,6 +41,21 @@ typedef struct physdev_eoi physdev_eoi_t DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); /* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_mfn 17 +struct physdev_pirq_eoi_mfn { + /* IN */ + xen_pfn_t mfn; +}; +typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t); + +/* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ Index: xen-3.3.1-testing/xen/include/xen/event.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/xen/event.h +++ xen-3.3.1-testing/xen/include/xen/event.h @@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign /* Bind a local event-channel port to the specified VCPU. */ long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id); +/* Unmask a local event-channel port. */ +int evtchn_unmask(unsigned int port); + /* Allocate/free a Xen-attached event channel port. */ int alloc_unbound_xen_event_channel( struct vcpu *local_vcpu, domid_t remote_domid); Index: xen-3.3.1-testing/xen/include/xen/irq.h =================================================================== --- xen-3.3.1-testing.orig/xen/include/xen/irq.h +++ xen-3.3.1-testing/xen/include/xen/irq.h @@ -22,6 +22,7 @@ struct irqaction #define IRQ_PENDING 4 /* IRQ pending - replay on enable */ #define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */ #define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */ +#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */ #define IRQ_PER_CPU 256 /* IRQ is per CPU */ /*