From 08a77ed8c4071c36269f43e1fb5069a3f1831e9a59c65731cf9389f716c7433f Mon Sep 17 00:00:00 2001 From: Charles Arnold Date: Wed, 20 Oct 2010 21:00:35 +0000 Subject: [PATCH] - fate#310510 - fix xenpaging xenpaging.tools_xenpaging_cleanup.patch - fate#310510 - fix xenpaging xenpaging.mem_event_check_ring-free_requests.patch - install /etc/xen/examples/xentrace_formats.txt to get human readable tracedata if xenalyze is not used - fate#310510 - fix xenpaging xenpaging.autostart_delay.patch xenpaging.blacklist.patch xenpaging.MRU_SIZE.patch remove xenpaging.hacks.patch, realmode works - Upstream patches from Jan including fixes for the following bugs bnc#583568 - Xen kernel is not booting bnc#615206 - Xen kernel fails to boot with IO-APIC problem bnc#640773 - Xen kernel crashing right after grub bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain 22223-vtd-igd-workaround.patch 22222-x86-timer-extint.patch 22214-x86-msr-misc-enable.patch 22213-x86-xsave-cpuid-check.patch 22194-tmem-check-pv-mfn.patch 22177-i386-irq-safe-map_domain_page.patch 22175-x86-irq-enter-exit.patch 22174-x86-pmtimer-accuracy.patch 22160-Intel-C6-EOI.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=76 --- 22157-x86-debug-key-i.patch | 21 + 22159-notify-evtchn-dying.patch | 27 ++ 22160-Intel-C6-EOI.patch | 91 ++++ 22174-x86-pmtimer-accuracy.patch | 68 +++ 22175-x86-irq-enter-exit.patch | 59 +++ 22177-i386-irq-safe-map_domain_page.patch | 92 ++++ 22194-tmem-check-pv-mfn.patch | 240 +++++++++ 22213-x86-xsave-cpuid-check.patch | 50 ++ 22214-x86-msr-misc-enable.patch | 83 ++++ 22222-x86-timer-extint.patch | 70 +++ 22223-vtd-igd-workaround.patch | 131 +++++ 32on64-extra-mem.patch | 2 +- altgr_2.patch | 56 +++ block-dmmd | 6 +- change_home_server.patch | 16 + check_device_status.patch | 7 + cpupools-core.patch | 351 +++++--------- del_usb_xend_entry.patch | 2 +- ioemu-vnc-resize.patch | 8 +- multi-xvdp.patch | 28 +- x86-cpufreq-report.patch | 20 +- x86-ioapic-ack-default.patch | 10 +- xen-4.0.1-testing-src.tar.bz2 | 4 +- xen-disable-libxl.diff | 13 - xen-disable-xenpaging.diff | 13 - xen.changes | 113 ++++- xen.spec | 128 ++++- xend-domain-lock.patch | 4 +- xenpaging.MRU_SIZE.patch | 23 + xenpaging.autostart.patch | 234 +++++++++ xenpaging.autostart_delay.patch | 74 +++ xenpaging.blacklist.patch | 27 ++ xenpaging.get_paged_frame.patch | 170 +++++++ xenpaging.makefile.patch | 13 + ...g.mem_event_check_ring-free_requests.patch | 25 + ...ing.mem_paging_tool_qemu_flush_cache.patch | 29 ++ xenpaging.memory_op.patch | 454 ++++++++++++++++++ xenpaging.pagefile.patch | 48 ++ xenpaging.pageout_policy.patch | 27 ++ xenpaging.policy_linear.patch | 122 +++++ xenpaging.populate_only_if_paged.patch | 114 +++++ xenpaging.signal_handling.patch | 161 +++++++ xenpaging.tools_xenpaging_cleanup.patch | 54 +++ xenpaging.xenpaging_init.patch | 53 ++ xenpaging.xs_daemon_close.patch | 22 + 45 files changed, 3035 insertions(+), 328 deletions(-) create mode 100644 22157-x86-debug-key-i.patch create mode 100644 22159-notify-evtchn-dying.patch create mode 100644 22160-Intel-C6-EOI.patch create mode 100644 22174-x86-pmtimer-accuracy.patch create mode 100644 22175-x86-irq-enter-exit.patch create mode 100644 22177-i386-irq-safe-map_domain_page.patch create mode 100644 22194-tmem-check-pv-mfn.patch create mode 100644 22213-x86-xsave-cpuid-check.patch create mode 100644 22214-x86-msr-misc-enable.patch create mode 100644 22222-x86-timer-extint.patch create mode 100644 22223-vtd-igd-workaround.patch create mode 100644 altgr_2.patch create mode 100644 change_home_server.patch delete mode 100644 xen-disable-libxl.diff delete mode 100644 xen-disable-xenpaging.diff create mode 100644 xenpaging.MRU_SIZE.patch create mode 100644 xenpaging.autostart.patch create mode 100644 xenpaging.autostart_delay.patch create mode 100644 xenpaging.blacklist.patch create mode 100644 xenpaging.get_paged_frame.patch create mode 100644 xenpaging.makefile.patch create mode 100644 xenpaging.mem_event_check_ring-free_requests.patch create mode 100644 xenpaging.mem_paging_tool_qemu_flush_cache.patch create mode 100644 xenpaging.memory_op.patch create mode 100644 xenpaging.pagefile.patch create mode 100644 xenpaging.pageout_policy.patch create mode 100644 xenpaging.policy_linear.patch create mode 100644 xenpaging.populate_only_if_paged.patch create mode 100644 xenpaging.signal_handling.patch create mode 100644 xenpaging.tools_xenpaging_cleanup.patch create mode 100644 xenpaging.xenpaging_init.patch create mode 100644 xenpaging.xs_daemon_close.patch diff --git a/22157-x86-debug-key-i.patch b/22157-x86-debug-key-i.patch new file mode 100644 index 0000000..080e29f --- /dev/null +++ b/22157-x86-debug-key-i.patch @@ -0,0 +1,21 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284533274 -3600 +# Node ID d4976434b8bba469fd1d337dc16249a5abfc4e5a +# Parent 14ce571d157e060fdb390e70fa8d0c95b2fd9b76 +x86: fix debug key 'i' handling with no IO-APICs + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -2463,6 +2463,9 @@ void dump_ioapic_irq_info(void) + unsigned int irq, pin, printed = 0; + unsigned long flags; + ++ if ( !irq_2_pin ) ++ return; ++ + for ( irq = 0; irq < nr_irqs_gsi; irq++ ) + { + entry = &irq_2_pin[irq]; diff --git a/22159-notify-evtchn-dying.patch b/22159-notify-evtchn-dying.patch new file mode 100644 index 0000000..1cf5c62 --- /dev/null +++ b/22159-notify-evtchn-dying.patch @@ -0,0 +1,27 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284535133 -3600 +# Node ID 62edd2611cbbe4c50574b6f6f73dda2ae1136dde +# Parent 869a0fdf8686c3dada14122df6d22a38705c2401 +notify_via_xen_event_channel() should check for dying domain. + +Else we can fail on either ASSERTion in that function. + +From: Olaf Hering +Signed-off-by: Keir Fraser + +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -994,6 +994,12 @@ void notify_via_xen_event_channel(struct + + spin_lock(&ld->event_lock); + ++ if ( unlikely(ld->is_dying) ) ++ { ++ spin_unlock(&ld->event_lock); ++ return; ++ } ++ + ASSERT(port_is_valid(ld, lport)); + lchn = evtchn_from_port(ld, lport); + ASSERT(lchn->consumer_is_xen); diff --git a/22160-Intel-C6-EOI.patch b/22160-Intel-C6-EOI.patch new file mode 100644 index 0000000..213916c --- /dev/null +++ b/22160-Intel-C6-EOI.patch @@ -0,0 +1,91 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284537635 -3600 +# Node ID 1087f9a03ab61d3a8bb0a1c65e5b09f82f3a4277 +# Parent 62edd2611cbbe4c50574b6f6f73dda2ae1136dde +C6 state with EOI issue fix for some Intel processors + +There is an errata in some of Intel processors. + +AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6 +During an Interrupt Service Routine + +If core C6 is entered after the start of an interrupt service routine +but before a write to the APIC EOI register, the core may not send an +EOI transaction (if needed) and further interrupts from the same +priority level or lower may be blocked. + +This patch fix this issue, by checking if ISR is pending before enter +deep Cx state. If so, it would use power->safe_state instead of deep +Cx state to prevent the above issue happen. + +Signed-off-by: Sheng Yang +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/acpi/cpu_idle.c ++++ b/xen/arch/x86/acpi/cpu_idle.c +@@ -226,6 +226,31 @@ static int sched_has_urgent_vcpu(void) + return atomic_read(&this_cpu(schedule_data).urgent_count); + } + ++/* ++ * "AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6 During ++ * an Interrupt Service Routine" ++ * ++ * There was an errata with some Core i7 processors that an EOI transaction ++ * may not be sent if software enters core C6 during an interrupt service ++ * routine. So we don't enter deep Cx state if there is an EOI pending. ++ */ ++bool_t errata_c6_eoi_workaround(void) ++{ ++ static bool_t fix_needed = -1; ++ ++ if ( unlikely(fix_needed == -1) ) ++ { ++ int model = boot_cpu_data.x86_model; ++ fix_needed = (cpu_has_apic && !directed_eoi_enabled && ++ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && ++ (boot_cpu_data.x86 == 6) && ++ ((model == 0x1a) || (model == 0x1e) || (model == 0x1f) || ++ (model == 0x25) || (model == 0x2c) || (model == 0x2f))); ++ } ++ ++ return (fix_needed && cpu_has_pending_apic_eoi()); ++} ++ + static void acpi_processor_idle(void) + { + struct acpi_processor_power *power = processor_powers[smp_processor_id()]; +@@ -277,6 +302,9 @@ static void acpi_processor_idle(void) + return; + } + ++ if ( (cx->type == ACPI_STATE_C3) && errata_c6_eoi_workaround() ) ++ cx = power->safe_state; ++ + power->last_state = cx; + + /* +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -752,6 +752,11 @@ struct pending_eoi { + static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]); + #define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector) + ++bool_t cpu_has_pending_apic_eoi(void) ++{ ++ return (pending_eoi_sp(this_cpu(pending_eoi)) != 0); ++} ++ + static inline void set_pirq_eoi(struct domain *d, unsigned int irq) + { + if ( d->arch.pirq_eoi_map ) +--- a/xen/include/asm-x86/irq.h ++++ b/xen/include/asm-x86/irq.h +@@ -150,4 +150,6 @@ void irq_set_affinity(int irq, cpumask_t + #define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq]) + #define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq]) + ++bool_t cpu_has_pending_apic_eoi(void); ++ + #endif /* _ASM_HW_IRQ_H */ diff --git a/22174-x86-pmtimer-accuracy.patch b/22174-x86-pmtimer-accuracy.patch new file mode 100644 index 0000000..3de6dec --- /dev/null +++ b/22174-x86-pmtimer-accuracy.patch @@ -0,0 +1,68 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284739161 -3600 +# Node ID 632c02167f97bb2bd25571b2780425b9b75949b4 +# Parent 1b05090854ba83576aa8399fa70e481f5b602417 +hvm pmtimer: correct pmtimer accuracy + +Several seconds of backward time drift per minute can be seen on a +RHEL6 HVM guest by switching the clocksource to 'acpi_pm' and then +running gettimeofday() in a loop. This is due to the accumulation +of small inaccuracies that are caused by shifting out the lower 32 +bits when pmt_update_time() computes 'tmr_val'. + +The patch makes sure that the lower 32 bits of the computed value +are not lost. They are saved in a new field 'not_accounted' in the +PMTState structure and are accounted the next time pmt_update_time() +is called. + +From: Ulrich Obergfell +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/hvm/pmtimer.c ++++ b/xen/arch/x86/hvm/pmtimer.c +@@ -83,14 +83,16 @@ void hvm_acpi_sleep_button(struct domain + * since the last time we did that. */ + static void pmt_update_time(PMTState *s) + { +- uint64_t curr_gtime; ++ uint64_t curr_gtime, tmp; + uint32_t msb = s->pm.tmr_val & TMR_VAL_MSB; + + ASSERT(spin_is_locked(&s->lock)); + + /* Update the timer */ + curr_gtime = hvm_get_guest_time(s->vcpu); +- s->pm.tmr_val += ((curr_gtime - s->last_gtime) * s->scale) >> 32; ++ tmp = ((curr_gtime - s->last_gtime) * s->scale) + s->not_accounted; ++ s->not_accounted = (uint32_t)tmp; ++ s->pm.tmr_val += tmp >> 32; + s->pm.tmr_val &= TMR_VAL_MASK; + s->last_gtime = curr_gtime; + +@@ -257,6 +259,7 @@ static int pmtimer_load(struct domain *d + + /* Calculate future counter values from now. */ + s->last_gtime = hvm_get_guest_time(s->vcpu); ++ s->not_accounted = 0; + + /* Set the SCI state from the registers */ + pmt_update_sci(s); +@@ -276,6 +279,7 @@ void pmtimer_init(struct vcpu *v) + spin_lock_init(&s->lock); + + s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ; ++ s->not_accounted = 0; + s->vcpu = v; + + /* Intercept port I/O (need two handlers because PM1a_CNT is between +--- a/xen/include/asm-x86/hvm/vpt.h ++++ b/xen/include/asm-x86/hvm/vpt.h +@@ -117,6 +117,7 @@ typedef struct PMTState { + struct hvm_hw_pmtimer pm; /* 32bit timer value */ + struct vcpu *vcpu; /* Keeps sync with this vcpu's guest-time */ + uint64_t last_gtime; /* Last (guest) time we updated the timer */ ++ uint32_t not_accounted; /* time not accounted at last update */ + uint64_t scale; /* Multiplier to get from tsc to timer ticks */ + struct timer timer; /* To make sure we send SCIs */ + spinlock_t lock; diff --git a/22175-x86-irq-enter-exit.patch b/22175-x86-irq-enter-exit.patch new file mode 100644 index 0000000..e3c6167 --- /dev/null +++ b/22175-x86-irq-enter-exit.patch @@ -0,0 +1,59 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284795115 -3600 +# Node ID ee3c640732311ef6bc5e2de56c3b4b753cb020fa +# Parent 632c02167f97bb2bd25571b2780425b9b75949b4 +x86: irq_enter()/irq_exit() covers all of do_IRQ(). + +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -535,6 +535,8 @@ asmlinkage void do_IRQ(struct cpu_user_r + return; + } + ++ irq_enter(); ++ + desc = irq_to_desc(irq); + + spin_lock(&desc->lock); +@@ -568,14 +570,10 @@ asmlinkage void do_IRQ(struct cpu_user_r + desc->rl_quantum_start = now; + } + +- irq_enter(); + tsc_in = tb_init_done ? get_cycles() : 0; + __do_IRQ_guest(irq); + TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles()); +- irq_exit(); +- spin_unlock(&desc->lock); +- set_irq_regs(old_regs); +- return; ++ goto out_no_end; + } + + desc->status &= ~IRQ_REPLAY; +@@ -594,20 +592,20 @@ asmlinkage void do_IRQ(struct cpu_user_r + while ( desc->status & IRQ_PENDING ) + { + desc->status &= ~IRQ_PENDING; +- irq_enter(); + spin_unlock_irq(&desc->lock); + tsc_in = tb_init_done ? get_cycles() : 0; + action->handler(irq, action->dev_id, regs); + TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles()); + spin_lock_irq(&desc->lock); +- irq_exit(); + } + + desc->status &= ~IRQ_INPROGRESS; + + out: + desc->handler->end(irq); ++ out_no_end: + spin_unlock(&desc->lock); ++ irq_exit(); + set_irq_regs(old_regs); + } + diff --git a/22177-i386-irq-safe-map_domain_page.patch b/22177-i386-irq-safe-map_domain_page.patch new file mode 100644 index 0000000..fda402b --- /dev/null +++ b/22177-i386-irq-safe-map_domain_page.patch @@ -0,0 +1,92 @@ +# HG changeset patch +# User Keir Fraser +# Date 1284796635 -3600 +# Node ID 7405e0ddb912a993982e4e4122856965b7c706dd +# Parent 0da4bfd2bc23937d2e1a8bfa6d259be0d9e482ad +x86_32: [un]map_domain_page() is now IRQ safe. + +Signed-off-by: Keir Fraser + +--- a/xen/arch/x86/x86_32/domain_page.c ++++ b/xen/arch/x86/x86_32/domain_page.c +@@ -42,15 +42,13 @@ static inline struct vcpu *mapcache_curr + + void *map_domain_page(unsigned long mfn) + { +- unsigned long va; +- unsigned int idx, i, flags; ++ unsigned long va, flags; ++ unsigned int idx, i; + struct vcpu *v; + struct mapcache_domain *dcache; + struct mapcache_vcpu *vcache; + struct vcpu_maphash_entry *hashent; + +- ASSERT(!in_irq()); +- + perfc_incr(map_domain_page_count); + + v = mapcache_current_vcpu(); +@@ -58,6 +56,8 @@ void *map_domain_page(unsigned long mfn) + dcache = &v->domain->arch.mapcache; + vcache = &v->arch.mapcache; + ++ local_irq_save(flags); ++ + hashent = &vcache->hash[MAPHASH_HASHFN(mfn)]; + if ( hashent->mfn == mfn ) + { +@@ -69,7 +69,7 @@ void *map_domain_page(unsigned long mfn) + goto out; + } + +- spin_lock_irqsave(&dcache->lock, flags); ++ spin_lock(&dcache->lock); + + /* Has some other CPU caused a wrap? We must flush if so. */ + if ( unlikely(dcache->epoch != vcache->shadow_epoch) ) +@@ -105,11 +105,12 @@ void *map_domain_page(unsigned long mfn) + set_bit(idx, dcache->inuse); + dcache->cursor = idx + 1; + +- spin_unlock_irqrestore(&dcache->lock, flags); ++ spin_unlock(&dcache->lock); + + l1e_write(&dcache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR)); + + out: ++ local_irq_restore(flags); + va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT); + return (void *)va; + } +@@ -119,11 +120,9 @@ void unmap_domain_page(const void *va) + unsigned int idx; + struct vcpu *v; + struct mapcache_domain *dcache; +- unsigned long mfn; ++ unsigned long mfn, flags; + struct vcpu_maphash_entry *hashent; + +- ASSERT(!in_irq()); +- + ASSERT((void *)MAPCACHE_VIRT_START <= va); + ASSERT(va < (void *)MAPCACHE_VIRT_END); + +@@ -135,6 +134,8 @@ void unmap_domain_page(const void *va) + mfn = l1e_get_pfn(dcache->l1tab[idx]); + hashent = &v->arch.mapcache.hash[MAPHASH_HASHFN(mfn)]; + ++ local_irq_save(flags); ++ + if ( hashent->idx == idx ) + { + ASSERT(hashent->mfn == mfn); +@@ -163,6 +164,8 @@ void unmap_domain_page(const void *va) + /* /Second/, mark as garbage. */ + set_bit(idx, dcache->garbage); + } ++ ++ local_irq_restore(flags); + } + + void mapcache_domain_init(struct domain *d) diff --git a/22194-tmem-check-pv-mfn.patch b/22194-tmem-check-pv-mfn.patch new file mode 100644 index 0000000..d23452f --- /dev/null +++ b/22194-tmem-check-pv-mfn.patch @@ -0,0 +1,240 @@ +# HG changeset patch +# User Keir Fraser +# Date 1285142048 -3600 +# Node ID e8e3aeed3ebacac6faa5795f67b195a434562323 +# Parent 35a1a14c408e60eca608a67a79f38ae5fdf3ea19 +tmem: disallow bad gmfns from PV domains + +Mfns for PV domains were not properly checked, potentially +allowing a buggy or malicious PV guest to crash Xen. Also, +use get_page/put_page to claim a reference to the pages +so they can't disappear out from under tmem's feet. + +Signed-off-by: Dan Magenheimer + +--- a/xen/common/tmem_xen.c ++++ b/xen/common/tmem_xen.c +@@ -87,49 +87,88 @@ void tmh_copy_page(char *to, char*from) + } + + #ifdef __ia64__ +-static inline void *cli_mfn_to_va(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn) ++static inline void *cli_get_page(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn, ++ pfp_t **pcli_pfp, bool_t cli_write) + { + ASSERT(0); + return NULL; + } +-#define paging_mark_dirty(_x,_y) do {} while(0) ++ ++static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp, ++ bool_t mark_dirty) ++{ ++ ASSERT(0); ++} + #else +-static inline void *cli_mfn_to_va(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn) ++static inline void *cli_get_page(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn, ++ pfp_t **pcli_pfp, bool_t cli_write) + { + unsigned long cli_mfn; + p2m_type_t t; ++ struct page_info *page; ++ int ret; + + cli_mfn = mfn_x(gfn_to_mfn(current->domain, cmfn, &t)); +- if (t != p2m_ram_rw || cli_mfn == INVALID_MFN) ++ if ( t != p2m_ram_rw || !mfn_valid(cli_mfn) ) ++ return NULL; ++ page = mfn_to_page(cli_mfn); ++ if ( cli_write ) ++ ret = get_page_and_type(page, current->domain, PGT_writable_page); ++ else ++ ret = get_page(page, current->domain); ++ if ( !ret ) + return NULL; +- if (pcli_mfn != NULL) +- *pcli_mfn = cli_mfn; ++ *pcli_mfn = cli_mfn; ++ *pcli_pfp = (pfp_t *)page; + return map_domain_page(cli_mfn); + } ++ ++static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp, ++ unsigned long cli_mfn, bool_t mark_dirty) ++{ ++ if ( mark_dirty ) ++ { ++ put_page_and_type((struct page_info *)cli_pfp); ++ paging_mark_dirty(current->domain,cli_mfn); ++ } ++ else ++ put_page((struct page_info *)cli_pfp); ++ unmap_domain_page(cli_va); ++} + #endif + + EXPORT int tmh_copy_from_client(pfp_t *pfp, + tmem_cli_mfn_t cmfn, pagesize_t tmem_offset, + pagesize_t pfn_offset, pagesize_t len, void *cli_va) + { +- unsigned long tmem_mfn; ++ unsigned long tmem_mfn, cli_mfn = 0; + void *tmem_va; ++ pfp_t *cli_pfp = NULL; ++ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */ + + ASSERT(pfp != NULL); +- if ( tmem_offset || pfn_offset || len ) +- if ( (cli_va == NULL) && ((cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) ) +- return -EFAULT; + tmem_mfn = page_to_mfn(pfp); + tmem_va = map_domain_page(tmem_mfn); +- mb(); +- if (!len && !tmem_offset && !pfn_offset) ++ if ( tmem_offset == 0 && pfn_offset == 0 && len == 0 ) ++ { + memset(tmem_va, 0, PAGE_SIZE); +- else if (len == PAGE_SIZE && !tmem_offset && !pfn_offset) ++ unmap_domain_page(tmem_va); ++ return 1; ++ } ++ if ( !tmemc ) ++ { ++ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0); ++ if ( cli_va == NULL ) ++ return -EFAULT; ++ } ++ mb(); ++ if (len == PAGE_SIZE && !tmem_offset && !pfn_offset) + tmh_copy_page(tmem_va, cli_va); + else if ( (tmem_offset+len <= PAGE_SIZE) && +- (pfn_offset+len <= PAGE_SIZE) ) ++ (pfn_offset+len <= PAGE_SIZE) ) + memcpy((char *)tmem_va+tmem_offset,(char *)cli_va+pfn_offset,len); +- unmap_domain_page(cli_va); ++ if ( !tmemc ) ++ cli_put_page(cli_va, cli_pfp, cli_mfn, 0); + unmap_domain_page(tmem_va); + return 1; + } +@@ -140,15 +179,24 @@ EXPORT int tmh_compress_from_client(tmem + int ret = 0; + unsigned char *dmem = this_cpu(dstmem); + unsigned char *wmem = this_cpu(workmem); ++ pfp_t *cli_pfp = NULL; ++ unsigned long cli_mfn = 0; ++ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */ + +- if ( (cli_va == NULL) && (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) +- return -EFAULT; + if ( dmem == NULL || wmem == NULL ) + return 0; /* no buffer, so can't compress */ ++ if ( !tmemc ) ++ { ++ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0); ++ if ( cli_va == NULL ) ++ return -EFAULT; ++ } + mb(); + ret = lzo1x_1_compress(cli_va, PAGE_SIZE, dmem, out_len, wmem); + ASSERT(ret == LZO_E_OK); + *out_va = dmem; ++ if ( !tmemc ) ++ cli_put_page(cli_va, cli_pfp, cli_mfn, 0); + unmap_domain_page(cli_va); + return 1; + } +@@ -157,14 +205,17 @@ EXPORT int tmh_copy_to_client(tmem_cli_m + pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cli_va) + { + unsigned long tmem_mfn, cli_mfn = 0; +- int mark_dirty = 1; + void *tmem_va; ++ pfp_t *cli_pfp = NULL; ++ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */ + + ASSERT(pfp != NULL); +- if ( cli_va != NULL ) +- mark_dirty = 0; +- else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) +- return -EFAULT; ++ if ( !tmemc ) ++ { ++ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1); ++ if ( cli_va == NULL ) ++ return -EFAULT; ++ } + tmem_mfn = page_to_mfn(pfp); + tmem_va = map_domain_page(tmem_mfn); + if (len == PAGE_SIZE && !tmem_offset && !pfn_offset) +@@ -172,11 +223,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m + else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) ) + memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len); + unmap_domain_page(tmem_va); +- if ( mark_dirty ) +- { +- unmap_domain_page(cli_va); +- paging_mark_dirty(current->domain,cli_mfn); +- } ++ if ( !tmemc ) ++ cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + mb(); + return 1; + } +@@ -185,22 +233,22 @@ EXPORT int tmh_decompress_to_client(tmem + size_t size, void *cli_va) + { + unsigned long cli_mfn = 0; +- int mark_dirty = 1; ++ pfp_t *cli_pfp = NULL; + size_t out_len = PAGE_SIZE; ++ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */ + int ret; + +- if ( cli_va != NULL ) +- mark_dirty = 0; +- else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) +- return -EFAULT; ++ if ( !tmemc ) ++ { ++ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1); ++ if ( cli_va == NULL ) ++ return -EFAULT; ++ } + ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len); + ASSERT(ret == LZO_E_OK); + ASSERT(out_len == PAGE_SIZE); +- if ( mark_dirty ) +- { +- unmap_domain_page(cli_va); +- paging_mark_dirty(current->domain,cli_mfn); +- } ++ if ( !tmemc ) ++ cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + mb(); + return 1; + } +@@ -210,18 +258,19 @@ EXPORT int tmh_copy_tze_to_client(tmem_c + { + void *cli_va; + unsigned long cli_mfn; ++ pfp_t *cli_pfp = NULL; + + ASSERT(!(len & (sizeof(uint64_t)-1))); + ASSERT(len <= PAGE_SIZE); + ASSERT(len > 0 || tmem_va == NULL); +- if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL) ++ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1); ++ if ( cli_va == NULL ) + return -EFAULT; + if ( len > 0 ) + memcpy((char *)cli_va,(char *)tmem_va,len); + if ( len < PAGE_SIZE ) + memset((char *)cli_va+len,0,PAGE_SIZE-len); +- unmap_domain_page(cli_va); +- paging_mark_dirty(current->domain,cli_mfn); ++ cli_put_page(cli_va, cli_pfp, cli_mfn, 1); + mb(); + return 1; + } diff --git a/22213-x86-xsave-cpuid-check.patch b/22213-x86-xsave-cpuid-check.patch new file mode 100644 index 0000000..34efeee --- /dev/null +++ b/22213-x86-xsave-cpuid-check.patch @@ -0,0 +1,50 @@ +# HG changeset patch +# User Keir Fraser +# Date 1285340011 -3600 +# Node ID eb247ea9db8c8b541a7f8c9cdc51c064c4c9e41c +# Parent 105c938eacbbc250447a676bb2088f804033b82b +x86: check CPUID level before enabling xsave +References: bnc#640773 + +While not as relevant after c/s 21894, is still seems safer to check +the CPUID level here, just like Linux does. The is particularly +relevant for the 4.0 tree (which doesn't have said c/s), but also +possibly for nested environments where writing MSR_IA32_MISC_ENABLE +may not actually take effect (Xen itself ignores such writes). + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/i387.c ++++ b/xen/arch/x86/i387.c +@@ -132,6 +132,8 @@ void restore_fpu(struct vcpu *v) + } + } + ++#define XSTATE_CPUID 0xd ++ + /* + * Maximum size (in byte) of the XSAVE/XRSTOR save area required by all + * the supported and enabled features on the processor, including the +@@ -148,7 +150,12 @@ void xsave_init(void) + int cpu = smp_processor_id(); + u32 min_size; + +- cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); ++ if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) { ++ printk(XENLOG_ERR "XSTATE_CPUID missing\n"); ++ return; ++ } ++ ++ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + printk("%s: cpu%d: cntxt_max_size: 0x%x and states: %08x:%08x\n", + __func__, cpu, ecx, edx, eax); +@@ -169,7 +176,7 @@ void xsave_init(void) + */ + set_in_cr4(X86_CR4_OSXSAVE); + set_xcr0(eax & XCNTXT_MASK); +- cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); ++ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + clear_in_cr4(X86_CR4_OSXSAVE); + + if ( cpu == 0 ) diff --git a/22214-x86-msr-misc-enable.patch b/22214-x86-msr-misc-enable.patch new file mode 100644 index 0000000..e1c3e98 --- /dev/null +++ b/22214-x86-msr-misc-enable.patch @@ -0,0 +1,83 @@ +# HG changeset patch +# User Keir Fraser +# Date 1285340079 -3600 +# Node ID 71f836615ea211ac4e6f3b9793f58c6f6934c030 +# Parent eb247ea9db8c8b541a7f8c9cdc51c064c4c9e41c +x86: adjust MSR_IA32_MISC_ENABLE handling + +In the warning message issued on writes, the Xen-modified value should +be printed (and used to determine whether anything needs to be printed +at all), as the guest kernel will usually do a read-modify-write +cycle. + +A question is whether Dom0 shouldn't be allowed control over some +bits, or whether some bits shouldn't be fully virtualized. I'm +particularly thinking of MSR_IA32_MISC_ENABLE_FAST_STRING, which +recent Linux kernels want to disable for CONFIG_KMEMCHECK. + +While putting this together I also noticed that rdmsr_safe() failed to +initialize its output registers in the failure path, thus leading to +printing of uninitialized data in the guest WRMSR warning message. + +Further, the default case value-changed check can be simplified. + +Signed-off-by: Jan Beulich + +--- a/xen/arch/x86/traps.c ++++ b/xen/arch/x86/traps.c +@@ -1661,6 +1661,16 @@ unsigned long guest_to_host_gpr_switch(u + + void (*pv_post_outb_hook)(unsigned int port, u8 value); + ++static inline uint32_t guest_misc_enable(uint32_t eax) ++{ ++ eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | ++ MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); ++ eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | ++ MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | ++ MSR_IA32_MISC_ENABLE_XTPR_DISABLE; ++ return eax; ++} ++ + /* Instruction fetch with error handling. */ + #define insn_fetch(type, base, eip, limit) \ + ({ unsigned long _rc, _ptr = (base) + (eip); \ +@@ -2258,6 +2268,13 @@ static int emulate_privileged_op(struct + if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 ) + goto fail; + break; ++ case MSR_IA32_MISC_ENABLE: ++ if ( rdmsr_safe(regs->ecx, l, h) ) ++ goto invalid; ++ l = guest_misc_enable(l); ++ if ( eax != l || edx != h ) ++ goto invalid; ++ break; + case MSR_IA32_MPERF: + case MSR_IA32_APERF: + if (( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) && +@@ -2364,11 +2381,7 @@ static int emulate_privileged_op(struct + case MSR_IA32_MISC_ENABLE: + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) + goto fail; +- regs->eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL | +- MSR_IA32_MISC_ENABLE_MONITOR_ENABLE); +- regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL | +- MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL | +- MSR_IA32_MISC_ENABLE_XTPR_DISABLE; ++ regs->eax = guest_misc_enable(regs->eax); + break; + case MSR_EFER: + case MSR_AMD_PATCHLEVEL: +--- a/xen/include/asm-x86/msr.h ++++ b/xen/include/asm-x86/msr.h +@@ -39,7 +39,8 @@ static inline void wrmsrl(unsigned int m + __asm__ __volatile__( \ + "1: rdmsr\n2:\n" \ + ".section .fixup,\"ax\"\n" \ +- "3: movl %5,%2\n; jmp 2b\n" \ ++ "3: xor %0,%0\n; xor %1,%1\n" \ ++ " movl %5,%2\n; jmp 2b\n" \ + ".previous\n" \ + ".section __ex_table,\"a\"\n" \ + " "__FIXUP_ALIGN"\n" \ diff --git a/22222-x86-timer-extint.patch b/22222-x86-timer-extint.patch new file mode 100644 index 0000000..d1722e3 --- /dev/null +++ b/22222-x86-timer-extint.patch @@ -0,0 +1,70 @@ +# HG changeset patch +# User Keir Fraser +# Date 1286028195 -3600 +# Node ID aed9fd361340158daf2d7160d1b367478b6312d6 +# Parent 3518149c4d5d0d8ce7402a24f95d3badbecc1c17 +x86: fix boot failure (regression from pre-4.0 IRQ handling changes) +References: bnc#583568, bnc#615206 + +With the change to index irq_desc[] by IRQ rather than by vector, the +prior implicit change of the used flow handler when altering the IRQ +routing path to go through the 8259A didn't work anymore, and hence +on boards needing the ExtINT delivery workaround failed to boot. + +Make make_8259A_irq() a real function again, thus allowing the flow +handler to be changed there. + +Also eliminate the generally superfluous and (at least theoretically) +dangerous hard coded setting of the flow handler for IRQ0: Earlier +code should have set this already based on information coming from +ACPI/MPS, and non-standard systems may e.g. have this IRQ level +triggered. + +Signed-off-by: Jan Beulich +Tested-by: Markus Schuster + +--- a/xen/arch/x86/i8259.c ++++ b/xen/arch/x86/i8259.c +@@ -367,6 +367,12 @@ void __devinit init_8259A(int auto_eoi) + spin_unlock_irqrestore(&i8259A_lock, flags); + } + ++void __init make_8259A_irq(unsigned int irq) ++{ ++ io_apic_irqs &= ~(1 << irq); ++ irq_to_desc(irq)->handler = &i8259A_irq_type; ++} ++ + static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL}; + + void __init init_IRQ(void) +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -38,9 +38,6 @@ + #include + #include + +-/* Different to Linux: our implementation can be simpler. */ +-#define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq))) +- + int (*ioapic_renumber_irq)(int ioapic, int irq); + atomic_t irq_mis_count; + +@@ -1929,7 +1926,6 @@ static inline void check_timer(void) + + irq_desc[0].depth = 0; + irq_desc[0].status &= ~IRQ_DISABLED; +- irq_desc[0].handler = &ioapic_edge_type; + + /* + * Subtle, code in do_timer_interrupt() expects an AEOI +--- a/xen/include/asm-x86/irq.h ++++ b/xen/include/asm-x86/irq.h +@@ -94,6 +94,7 @@ int i8259A_irq_pending(unsigned int irq) + void mask_8259A(void); + void unmask_8259A(void); + void init_8259A(int aeoi); ++void make_8259A_irq(unsigned int irq); + int i8259A_suspend(void); + int i8259A_resume(void); + diff --git a/22223-vtd-igd-workaround.patch b/22223-vtd-igd-workaround.patch new file mode 100644 index 0000000..cb9ab44 --- /dev/null +++ b/22223-vtd-igd-workaround.patch @@ -0,0 +1,131 @@ +# HG changeset patch +# User Keir Fraser +# Date 1286028261 -3600 +# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc +# Parent aed9fd361340158daf2d7160d1b367478b6312d6 +Vt-d: fix dom0 graphics problem on Levnovo T410. +References: bnc#643477 + +The patch is derived from a similar quirk in Linux kernel by David +Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC +register. If VT is not enabled correctly in the IGD, Xen does not +enable VT-d translation for IGD VT-d engine. In case where iommu boot +parameter is set to force, Xen calls panic(). + +Signed-off-by: Allen Kay + +jb: Simplified and switched operands of && in first if() added to +iommu_enable_translation(). + +--- a/xen/drivers/passthrough/vtd/dmar.c ++++ b/xen/drivers/passthrough/vtd/dmar.c +@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units); + LIST_HEAD(acpi_atsr_units); + LIST_HEAD(acpi_rhsa_units); + ++static u64 igd_drhd_address; + u8 dmar_host_address_width; + + void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus) +@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str + return NULL; + } + ++int is_igd_drhd(struct acpi_drhd_unit *drhd) ++{ ++ return ( drhd->address == igd_drhd_address ? 1 : 0); ++} ++ + /* + * Count number of devices in device scope. Do not include PCI sub + * hierarchies. +@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v + if ( iommu_verbose ) + dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n", + bus, path->dev, path->fn); ++ ++ if ( type == DMAR_TYPE ) ++ { ++ struct acpi_drhd_unit *drhd = acpi_entry; ++ ++ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) ) ++ igd_drhd_address = drhd->address; ++ } ++ + break; + + case ACPI_DEV_IOAPIC: +--- a/xen/drivers/passthrough/vtd/dmar.h ++++ b/xen/drivers/passthrough/vtd/dmar.h +@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus, + int vtd_hw_check(void); + void disable_pmr(struct iommu *iommu); + int is_usb_device(u8 bus, u8 devfn); ++int is_igd_drhd(struct acpi_drhd_unit *drhd); + + #endif /* _DMAR_H_ */ +--- a/xen/drivers/passthrough/vtd/iommu.c ++++ b/xen/drivers/passthrough/vtd/iommu.c +@@ -688,10 +688,34 @@ static int iommu_set_root_entry(struct i + return 0; + } + +-static void iommu_enable_translation(struct iommu *iommu) ++#define GGC 0x52 ++#define GGC_MEMORY_VT_ENABLED (0x8 << 8) ++static int is_igd_vt_enabled(void) ++{ ++ unsigned short ggc; ++ ++ /* integrated graphics on Intel platforms is located at 0:2.0 */ ++ ggc = pci_conf_read16(0, 2, 0, GGC); ++ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 ); ++} ++ ++static void iommu_enable_translation(struct acpi_drhd_unit *drhd) + { + u32 sts; + unsigned long flags; ++ struct iommu *iommu = drhd->iommu; ++ ++ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled() ) ++ { ++ if ( force_iommu ) ++ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n"); ++ else ++ { ++ dprintk(XENLOG_WARNING VTDPREFIX, ++ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n"); ++ return; ++ } ++ } + + if ( iommu_verbose ) + dprintk(VTDPREFIX, +@@ -1178,7 +1202,6 @@ static int intel_iommu_domain_init(struc + + static void intel_iommu_dom0_init(struct domain *d) + { +- struct iommu *iommu; + struct acpi_drhd_unit *drhd; + + if ( !iommu_passthrough && !need_iommu(d) ) +@@ -1194,8 +1217,7 @@ static void intel_iommu_dom0_init(struct + + for_each_drhd_unit ( drhd ) + { +- iommu = drhd->iommu; +- iommu_enable_translation(iommu); ++ iommu_enable_translation(drhd); + } + } + +@@ -2163,7 +2185,7 @@ static void vtd_resume(void) + (u32) iommu_state[i][DMAR_FEUADDR_REG]); + spin_unlock_irqrestore(&iommu->register_lock, flags); + +- iommu_enable_translation(iommu); ++ iommu_enable_translation(drhd); + } + } + diff --git a/32on64-extra-mem.patch b/32on64-extra-mem.patch index 6249b76..2a16398 100644 --- a/32on64-extra-mem.patch +++ b/32on64-extra-mem.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2920,7 +2920,7 @@ class XendDomainInfo: +@@ -2917,7 +2917,7 @@ class XendDomainInfo: self.guest_bitsize = self.image.getBitSize() # Make sure there's enough RAM available for the domain diff --git a/altgr_2.patch b/altgr_2.patch new file mode 100644 index 0000000..cca3a6d --- /dev/null +++ b/altgr_2.patch @@ -0,0 +1,56 @@ +When access domU from Windows VNC client, spanish keyboard altgr key +doesn't work. According to log info, we found that the keycodes passed +from vncclient to qemu vncserver have something wrong. When altgr and "2" +pressed, keycodes vncserver receives are: +ALT_R down, +CTRL_L down, +CTRL_L up, +ATL_R up, +"2" down, +"2" up, +... +Since when send "2" down, there is no altgr modifier, the char displayed +on screen will be "2" but not "@". + +To solve this problem, there is another patch applied by upstream which +sends an additional altgr modifier before "2" down in the above case. +It works well when domU is windows, but on sles10 sp3 domU, sometimes it +display "@" and sometimes it still displays "2", especially when press +altgr+2 continuously. + +For the sles10 sp3 domU problem, maybe because there are two many alt_r (same +keycode as altgr on "es") up and down events and the domU OS couldn't handle +it well. + +To furtherly solve this problem, I write this patch, when vncserver +is "es" and receives a alt_r keysym (this is already abnormal since "es" has +no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr +keycodes up and down events and make sure the intentionally added altgr keycode can take effect. + +Signed-off by Chunyan Liu (cyliu@novell.com) + +diff -r a108300bd904 tools/ioemu-qemu-xen/vnc.c +--- a/tools/ioemu-qemu-xen/vnc.c Mon Sep 27 21:20:36 2010 +0800 ++++ b/tools/ioemu-qemu-xen/vnc.c Wed Sep 29 01:55:55 2010 +0800 +@@ -1279,11 +1279,9 @@ + kbd_put_keycode(0xe0); + if (down){ + kbd_put_keycode(0xb8 & 0x7f); +- vs->modifiers_state[0xb8] = 1; + } + else { + kbd_put_keycode(0xb8 | 0x80); +- vs->modifiers_state[0xb8] = 0; + } + } + +@@ -1310,6 +1308,9 @@ + shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36]; + altgr_keys = vs->modifiers_state[0xb8]; + ++ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea ) ++ sym = 0xffe9; ++ + keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF); + if (keycode == 0) { + fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym); diff --git a/block-dmmd b/block-dmmd index cfe81f0..752263f 100644 --- a/block-dmmd +++ b/block-dmmd @@ -235,9 +235,11 @@ case "$command" in release_lock "dmmd" exit 1 fi + lastparam=${dmmd##*;} + usedevice=${lastparam%(*} claim_lock "block" - xenstore-write $XENBUS_PATH/node ${dmmd##*;} - write_dev ${dmmd##*;} + xenstore-write $XENBUS_PATH/node "$usedevice" + write_dev "$usedevice" release_lock "block" release_lock "dmmd" exit 0 diff --git a/change_home_server.patch b/change_home_server.patch new file mode 100644 index 0000000..4cddc7c --- /dev/null +++ b/change_home_server.patch @@ -0,0 +1,16 @@ +Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py +=================================================================== +--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -3133,6 +3133,11 @@ class XendDomainInfo: + self._cleanup_phantom_devs(paths) + self._cleanupVm() + ++ if "change_home_server" in self.info: ++ chs = self.info["change_home_server"] ++ if (type(chs) is str and chs == "False") or \ ++ (type(chs) is bool and chs is False): ++ self.setChangeHomeServer(None) + if ("transient" in self.info["other_config"] and \ + bool(self.info["other_config"]["transient"])) or \ + ("change_home_server" in self.info and \ diff --git a/check_device_status.patch b/check_device_status.patch index 983b104..1ade565 100644 --- a/check_device_status.patch +++ b/check_device_status.patch @@ -1,3 +1,10 @@ +Improve check_device_status to handle HA cases + +In HA environment, sometimes xenstore status has changed but ev.wait() cannot +get the signal, it will wait until timeout, thus incorrect device status is +returned. To fix this problem, we do not depend on ev.wait() result, but read +xenstore directly to get correct device status. + diff -r ce65e0e03a57 tools/python/xen/xend/server/DevController.py --- a/tools/python/xen/xend/server/DevController.py Fri Aug 27 16:53:00 2010 +0800 +++ b/tools/python/xen/xend/server/DevController.py Fri Aug 27 17:13:32 2010 +0800 diff --git a/cpupools-core.patch b/cpupools-core.patch index 48932a4..916383e 100644 --- a/cpupools-core.patch +++ b/cpupools-core.patch @@ -1,9 +1,7 @@ From: Juergen Gross -Index: xen-4.0.1-testing/xen/arch/x86/acpi/power.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/acpi/power.c -+++ xen-4.0.1-testing/xen/arch/x86/acpi/power.c +--- a/xen/arch/x86/acpi/power.c ++++ b/xen/arch/x86/acpi/power.c @@ -234,7 +234,7 @@ static int enter_state(u32 state) return error; } @@ -22,10 +20,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/acpi/power.c } static int acpi_get_wake_status(void) -Index: xen-4.0.1-testing/xen/arch/x86/domain.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/domain.c -+++ xen-4.0.1-testing/xen/arch/x86/domain.c +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c @@ -1522,42 +1522,52 @@ void sync_vcpu_execstate(struct vcpu *v) } @@ -139,10 +135,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/domain.c return 0; } -Index: xen-4.0.1-testing/xen/arch/x86/domain_build.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/domain_build.c -+++ xen-4.0.1-testing/xen/arch/x86/domain_build.c +--- a/xen/arch/x86/domain_build.c ++++ b/xen/arch/x86/domain_build.c @@ -9,6 +9,7 @@ #include #include @@ -183,10 +177,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/domain_build.c /* Set up CR3 value for write_ptbase */ if ( paging_mode_enabled(d) ) -Index: xen-4.0.1-testing/xen/arch/x86/microcode.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/microcode.c -+++ xen-4.0.1-testing/xen/arch/x86/microcode.c +--- a/xen/arch/x86/microcode.c ++++ b/xen/arch/x86/microcode.c @@ -114,7 +114,7 @@ static int microcode_update_cpu(const vo return err; } @@ -214,10 +206,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/microcode.c + return continue_hypercall_on_cpu(info->cpu, NULL, + do_microcode_update, info); } -Index: xen-4.0.1-testing/xen/arch/x86/mm.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/mm.c -+++ xen-4.0.1-testing/xen/arch/x86/mm.c +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c @@ -243,7 +243,7 @@ void __init arch_init_memory(void) * Any Xen-heap pages that we will allow to be mapped will have * their domain field set to dom_xen. @@ -244,19 +234,9 @@ Index: xen-4.0.1-testing/xen/arch/x86/mm.c BUG_ON(dom_cow == NULL); /* First 1MB of RAM is historically marked as I/O. */ -Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/platform_hypercall.c -+++ xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c -@@ -19,6 +19,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -48,12 +49,12 @@ static DEFINE_PER_CPU(uint64_t, freq); +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -48,12 +48,12 @@ static DEFINE_PER_CPU(uint64_t, freq); extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf); extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power); @@ -271,7 +251,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c { int cpu = (unsigned long)data; return cpu_down(cpu); -@@ -314,7 +315,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe +@@ -314,7 +314,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe if ( op->u.change_freq.flags || !cpu_online(op->u.change_freq.cpu) ) break; per_cpu(freq, op->u.change_freq.cpu) = op->u.change_freq.freq; @@ -280,25 +260,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c cpu_frequency_change_helper, NULL); break; -@@ -406,7 +407,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe - g_info = &op->u.pcpu_info; - - /* spin_trylock() avoids deadlock with stop_machine_run(). */ -- if ( !spin_trylock(&cpu_add_remove_lock) ) -+ if ( !spin_trylock(&cpupool_lock) ) - { - ret = -EBUSY; - break; -@@ -429,7 +430,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe - - g_info->max_present = last_cpu(cpu_present_map); - -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); - - ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0; - } -@@ -470,7 +471,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe +@@ -470,7 +470,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe break; } ret = continue_hypercall_on_cpu( @@ -307,10 +269,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c break; } break; -Index: xen-4.0.1-testing/xen/arch/x86/setup.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/setup.c -+++ xen-4.0.1-testing/xen/arch/x86/setup.c +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c @@ -2,6 +2,7 @@ #include #include @@ -343,10 +303,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/setup.c if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) ) panic("Error creating domain 0\n"); -Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/smpboot.c -+++ xen-4.0.1-testing/xen/arch/x86/smpboot.c +--- a/xen/arch/x86/smpboot.c ++++ b/xen/arch/x86/smpboot.c @@ -39,6 +39,7 @@ #include #include @@ -355,24 +313,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c #include #include #include -@@ -104,7 +105,6 @@ static void map_cpu_to_logical_apicid(vo - DEFINE_PER_CPU(int, cpu_state) = { 0 }; - - void *stack_base[NR_CPUS]; --DEFINE_SPINLOCK(cpu_add_remove_lock); - - /* - * The bootstrap kernel entry code has set these up. Save them for -@@ -821,7 +821,7 @@ wakeup_secondary_cpu(int phys_apicid, un - - extern cpumask_t cpu_initialized; - /* -- * Caller should hold cpu_add_remove_lock if not called when booting -+ * Caller should hold cpupool_lock if not called when booting - */ - int alloc_cpu_id(void) - { -@@ -1306,10 +1306,11 @@ int __cpu_disable(void) +@@ -1306,10 +1307,11 @@ int __cpu_disable(void) __sync_lazy_execstate(); /* It's now safe to remove this processor from the online map */ @@ -385,82 +326,46 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c return 0; } -@@ -1343,10 +1344,10 @@ int cpu_down(unsigned int cpu) +@@ -1341,16 +1343,12 @@ static int take_cpu_down(void *unused) + int cpu_down(unsigned int cpu) + { int err = 0; ++ bool_t pool_rm = 0; /* spin_trylock() avoids deadlock with stop_machine_run(). */ -- if (!spin_trylock(&cpu_add_remove_lock)) -+ if (!spin_trylock(&cpupool_lock)) + if (!spin_trylock(&cpu_add_remove_lock)) return -EBUSY; - if (num_online_cpus() == 1) { -+ if ((!cpu_isset(cpu, cpupool0->cpu_valid)) || (cpus_weight(cpupool0->cpu_valid) == 1)) { - err = -EBUSY; - goto out; - } -@@ -1379,7 +1380,7 @@ int cpu_down(unsigned int cpu) +- err = -EBUSY; +- goto out; +- } +- + /* Can not offline BSP */ + if (cpu == 0) { + err = -EINVAL; +@@ -1364,6 +1362,11 @@ int cpu_down(unsigned int cpu) + + printk("Prepare to bring CPU%d down...\n", cpu); + ++ err = cpupool_cpu_remove(cpu); ++ if (err) ++ goto out; ++ pool_rm = 1; ++ + cpufreq_del_cpu(cpu); + + err = stop_machine_run(take_cpu_down, NULL, cpu); +@@ -1379,6 +1382,8 @@ int cpu_down(unsigned int cpu) out: if (!err) send_guest_global_virq(dom0, VIRQ_PCPU_STATE); -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); ++ else if (pool_rm) ++ cpupool_cpu_add(cpu); + spin_unlock(&cpu_add_remove_lock); return err; } - -@@ -1388,7 +1389,7 @@ int cpu_up(unsigned int cpu) - int err = 0; - - /* spin_trylock() avoids deadlock with stop_machine_run(). */ -- if (!spin_trylock(&cpu_add_remove_lock)) -+ if (!spin_trylock(&cpupool_lock)) - return -EBUSY; - - if (cpu_online(cpu)) { -@@ -1406,7 +1407,7 @@ int cpu_up(unsigned int cpu) - out: - if (!err) - send_guest_global_virq(dom0, VIRQ_PCPU_STATE); -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); - return err; - } - -@@ -1492,14 +1493,14 @@ int cpu_add(uint32_t apic_id, uint32_t a - return -EEXIST; - - /* spin_trylock() avoids deadlock with stop_machine_run(). */ -- if (!spin_trylock(&cpu_add_remove_lock)) -+ if (!spin_trylock(&cpupool_lock)) - return -EBUSY; - - cpu = mp_register_lapic(apic_id, 1); - - if (cpu < 0) - { -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); - return cpu; - } - -@@ -1516,7 +1517,7 @@ int cpu_add(uint32_t apic_id, uint32_t a - "Setup node failed for pxm %x\n", pxm); - x86_acpiid_to_apicid[acpi_id] = 0xff; - mp_unregister_lapic(apic_id, cpu); -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); - return node; - } - apicid_to_node[apic_id] = node; -@@ -1524,7 +1525,7 @@ int cpu_add(uint32_t apic_id, uint32_t a - - srat_detect_node(cpu); - numa_add_cpu(cpu); -- spin_unlock(&cpu_add_remove_lock); -+ spin_unlock(&cpupool_lock); - dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu); - return cpu; - } -@@ -1568,6 +1569,7 @@ int __devinit __cpu_up(unsigned int cpu) +@@ -1568,6 +1573,7 @@ int __devinit __cpu_up(unsigned int cpu) process_pending_softirqs(); } @@ -468,10 +373,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c cpufreq_add_cpu(cpu); return 0; } -Index: xen-4.0.1-testing/xen/arch/x86/sysctl.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/sysctl.c -+++ xen-4.0.1-testing/xen/arch/x86/sysctl.c +--- a/xen/arch/x86/sysctl.c ++++ b/xen/arch/x86/sysctl.c @@ -29,7 +29,7 @@ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) @@ -490,10 +393,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/sysctl.c break; case XEN_SYSCTL_CPU_HOTPLUG_STATUS: ret = 0; -Index: xen-4.0.1-testing/xen/common/Makefile -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/Makefile -+++ xen-4.0.1-testing/xen/common/Makefile +--- a/xen/common/Makefile ++++ b/xen/common/Makefile @@ -1,5 +1,6 @@ obj-y += bitmap.o obj-y += cpu.o @@ -501,11 +402,9 @@ Index: xen-4.0.1-testing/xen/common/Makefile obj-y += domctl.o obj-y += domain.o obj-y += event_channel.o -Index: xen-4.0.1-testing/xen/common/cpupool.c -=================================================================== --- /dev/null -+++ xen-4.0.1-testing/xen/common/cpupool.c -@@ -0,0 +1,585 @@ ++++ b/xen/common/cpupool.c +@@ -0,0 +1,609 @@ +/****************************************************************************** + * cpupool.c + * @@ -539,11 +438,12 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c + +static int cpupool_moving_cpu = -1; +static struct cpupool *cpupool_cpu_moving = NULL; ++static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE; + +/* cpupool lock: be carefull, this lock is sometimes released on another cpu + * as it was obtained! + */ -+DEFINE_SPINLOCK(cpupool_lock); ++static DEFINE_SPINLOCK(cpupool_lock); + +DEFINE_PER_CPU(struct cpupool *, cpupool); + @@ -734,8 +634,9 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c + * might be zombies. + * possible failures: + * - last cpu and still active domains in cpupool ++ * - cpu just being unplugged + */ -+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) ++static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu) +{ + int work_cpu; + int ret; @@ -748,6 +649,8 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c + ret = -EBUSY; + if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) ) + goto out; ++ if ( cpu_isset(cpu, cpupool_locked_cpus) ) ++ goto out; + + ret = 0; + if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) ) @@ -872,6 +775,7 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c + if ( cpupool0 == NULL ) + return; + spin_lock(&cpupool_lock); ++ cpu_clear(cpu, cpupool_locked_cpus); + cpu_set(cpu, cpupool_free_cpus); + (void)cpupool_assign_cpu_locked(cpupool0, cpu); + spin_unlock(&cpupool_lock); @@ -879,6 +783,25 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c +} + +/* ++ * called to remove a cpu from pool admin ++ * the cpu to be removed is locked to avoid removing it from dom0 ++ * returns failure if not in pool0 ++ */ ++int cpupool_cpu_remove(unsigned int cpu) ++{ ++ int ret = 0; ++ ++ spin_lock(&cpupool_lock); ++ if ( !cpu_isset(cpu, cpupool0->cpu_valid)) ++ ret = -EBUSY; ++ else ++ cpu_set(cpu, cpupool_locked_cpus); ++ spin_unlock(&cpupool_lock); ++ ++ return ret; ++} ++ ++/* + * do cpupool related sysctl operations + */ +int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op) @@ -1091,10 +1014,8 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c + * indent-tabs-mode: nil + * End: + */ -Index: xen-4.0.1-testing/xen/common/domain.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/domain.c -+++ xen-4.0.1-testing/xen/common/domain.c +--- a/xen/common/domain.c ++++ b/xen/common/domain.c @@ -209,7 +209,7 @@ static void __init parse_extra_guest_irq custom_param("extra_guest_irqs", parse_extra_guest_irqs); @@ -1123,10 +1044,8 @@ Index: xen-4.0.1-testing/xen/common/domain.c sched_destroy_domain(d); /* Free page used by xen oprofile buffer. */ -Index: xen-4.0.1-testing/xen/common/domctl.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/domctl.c -+++ xen-4.0.1-testing/xen/common/domctl.c +--- a/xen/common/domctl.c ++++ b/xen/common/domctl.c @@ -11,6 +11,7 @@ #include #include @@ -1202,10 +1121,8 @@ Index: xen-4.0.1-testing/xen/common/domctl.c if ( alloc_vcpu(d, i, cpu) == NULL ) goto maxvcpu_out; -Index: xen-4.0.1-testing/xen/common/kexec.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/kexec.c -+++ xen-4.0.1-testing/xen/common/kexec.c +--- a/xen/common/kexec.c ++++ b/xen/common/kexec.c @@ -235,7 +235,7 @@ void kexec_crash(void) BUG(); } @@ -1224,10 +1141,8 @@ Index: xen-4.0.1-testing/xen/common/kexec.c break; case KEXEC_TYPE_CRASH: kexec_crash(); /* Does not return */ -Index: xen-4.0.1-testing/xen/common/sched_credit.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/sched_credit.c -+++ xen-4.0.1-testing/xen/common/sched_credit.c +--- a/xen/common/sched_credit.c ++++ b/xen/common/sched_credit.c @@ -70,11 +70,15 @@ /* * Useful macros @@ -2116,7 +2031,7 @@ Index: xen-4.0.1-testing/xen/common/sched_credit.c .destroy_vcpu = csched_vcpu_destroy, .sleep = csched_vcpu_sleep, -@@ -1411,6 +1540,13 @@ const struct scheduler sched_credit_def +@@ -1411,6 +1540,13 @@ const struct scheduler sched_credit_def .dump_cpu_state = csched_dump_pcpu, .dump_settings = csched_dump, .init = csched_init, @@ -2130,10 +2045,8 @@ Index: xen-4.0.1-testing/xen/common/sched_credit.c .tick_suspend = csched_tick_suspend, .tick_resume = csched_tick_resume, -Index: xen-4.0.1-testing/xen/common/sched_sedf.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/sched_sedf.c -+++ xen-4.0.1-testing/xen/common/sched_sedf.c +--- a/xen/common/sched_sedf.c ++++ b/xen/common/sched_sedf.c @@ -21,6 +21,9 @@ printk(_a ); \ } while ( 0 ) @@ -2399,7 +2312,7 @@ Index: xen-4.0.1-testing/xen/common/sched_sedf.c .name = "Simple EDF Scheduler", .opt_name = "sedf", .sched_id = XEN_SCHEDULER_SEDF, -@@ -1464,9 +1509,15 @@ const struct scheduler sched_sedf_def = +@@ -1464,9 +1509,15 @@ const struct scheduler sched_sedf_def = .init_domain = sedf_init_domain, .destroy_domain = sedf_destroy_domain, @@ -2416,10 +2329,8 @@ Index: xen-4.0.1-testing/xen/common/sched_sedf.c .do_schedule = sedf_do_schedule, .pick_cpu = sedf_pick_cpu, .dump_cpu_state = sedf_dump_cpu_state, -Index: xen-4.0.1-testing/xen/common/schedule.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/schedule.c -+++ xen-4.0.1-testing/xen/common/schedule.c +--- a/xen/common/schedule.c ++++ b/xen/common/schedule.c @@ -53,10 +53,11 @@ static void poll_timer_fn(void *data); /* This is global for now so that private implementations can reach it */ @@ -2938,10 +2849,8 @@ Index: xen-4.0.1-testing/xen/common/schedule.c } #ifdef CONFIG_COMPAT -Index: xen-4.0.1-testing/xen/common/softirq.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/softirq.c -+++ xen-4.0.1-testing/xen/common/softirq.c +--- a/xen/common/softirq.c ++++ b/xen/common/softirq.c @@ -88,9 +88,11 @@ void raise_softirq(unsigned int nr) } @@ -3035,10 +2944,8 @@ Index: xen-4.0.1-testing/xen/common/softirq.c open_softirq(TASKLET_SOFTIRQ, tasklet_action); } -Index: xen-4.0.1-testing/xen/common/sysctl.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/common/sysctl.c -+++ xen-4.0.1-testing/xen/common/sysctl.c +--- a/xen/common/sysctl.c ++++ b/xen/common/sysctl.c @@ -314,6 +314,14 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc } break; @@ -3054,10 +2961,8 @@ Index: xen-4.0.1-testing/xen/common/sysctl.c default: ret = arch_do_sysctl(op, u_sysctl); break; -Index: xen-4.0.1-testing/xen/include/asm-x86/domain.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/asm-x86/domain.h -+++ xen-4.0.1-testing/xen/include/asm-x86/domain.h +--- a/xen/include/asm-x86/domain.h ++++ b/xen/include/asm-x86/domain.h @@ -458,7 +458,8 @@ struct arch_vcpu #define hvm_svm hvm_vcpu.u.svm @@ -3068,22 +2973,8 @@ Index: xen-4.0.1-testing/xen/include/asm-x86/domain.h void vcpu_show_execution_state(struct vcpu *); void vcpu_show_registers(const struct vcpu *); -Index: xen-4.0.1-testing/xen/include/asm-x86/smp.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/asm-x86/smp.h -+++ xen-4.0.1-testing/xen/include/asm-x86/smp.h -@@ -56,7 +56,6 @@ extern u32 cpu_2_logical_apicid[]; - #define CPU_ONLINE 0x0002 /* CPU is up */ - #define CPU_DEAD 0x0004 /* CPU is dead */ - DECLARE_PER_CPU(int, cpu_state); --extern spinlock_t(cpu_add_remove_lock); - - #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) - extern int cpu_down(unsigned int cpu); -Index: xen-4.0.1-testing/xen/include/public/domctl.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/public/domctl.h -+++ xen-4.0.1-testing/xen/include/public/domctl.h +--- a/xen/include/public/domctl.h ++++ b/xen/include/public/domctl.h @@ -60,10 +60,10 @@ struct xen_domctl_createdomain { /* Should domain memory integrity be verifed by tboot during Sx? */ #define _XEN_DOMCTL_CDF_s3_integrity 2 @@ -3112,10 +3003,8 @@ Index: xen-4.0.1-testing/xen/include/public/domctl.h struct xen_domctl { uint32_t cmd; #define XEN_DOMCTL_createdomain 1 -Index: xen-4.0.1-testing/xen/include/public/sysctl.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/public/sysctl.h -+++ xen-4.0.1-testing/xen/include/public/sysctl.h +--- a/xen/include/public/sysctl.h ++++ b/xen/include/public/sysctl.h @@ -491,6 +491,28 @@ struct xen_sysctl_lockprof_op { typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); @@ -3153,11 +3042,9 @@ Index: xen-4.0.1-testing/xen/include/public/sysctl.h uint8_t pad[128]; } u; }; -Index: xen-4.0.1-testing/xen/include/xen/sched-if.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/xen/sched-if.h -+++ xen-4.0.1-testing/xen/include/xen/sched-if.h -@@ -10,16 +10,29 @@ +--- a/xen/include/xen/sched-if.h ++++ b/xen/include/xen/sched-if.h +@@ -10,16 +10,26 @@ #include @@ -3167,9 +3054,6 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h + +/* cpus currently in no cpupool */ +extern cpumask_t cpupool_free_cpus; -+ -+/* cpupool lock (used for cpu on/offline, too) */ -+extern spinlock_t cpupool_lock; + struct schedule_data { spinlock_t schedule_lock; /* spinlock protecting curr */ @@ -3187,7 +3071,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h static inline void vcpu_schedule_lock(struct vcpu *v) { -@@ -59,28 +72,49 @@ struct scheduler { +@@ -59,28 +69,49 @@ struct scheduler { char *name; /* full name for this scheduler */ char *opt_name; /* option name for this scheduler */ unsigned int sched_id; /* ID for this scheduler */ @@ -3251,10 +3135,8 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h +struct scheduler *scheduler_get_by_id(unsigned int id); + #endif /* __XEN_SCHED_IF_H__ */ -Index: xen-4.0.1-testing/xen/include/xen/sched.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/xen/sched.h -+++ xen-4.0.1-testing/xen/include/xen/sched.h +--- a/xen/include/xen/sched.h ++++ b/xen/include/xen/sched.h @@ -9,6 +9,7 @@ #include #include @@ -3263,7 +3145,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h #include #include #include -@@ -132,8 +133,6 @@ struct vcpu +@@ -132,8 +133,6 @@ struct vcpu bool_t defer_shutdown; /* VCPU is paused following shutdown request (d->is_shutting_down)? */ bool_t paused_for_shutdown; @@ -3316,7 +3198,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); uint64_t get_cpu_idle_time(unsigned int cpu); -@@ -604,6 +607,18 @@ extern enum cpufreq_controller { +@@ -604,6 +607,19 @@ extern enum cpufreq_controller { FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen } cpufreq_controller; @@ -3327,6 +3209,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h +int cpupool0_cpu_assign(struct cpupool *c); +int cpupool_assign_ncpu(struct cpupool *c, int ncpu); +void cpupool_cpu_add(unsigned int cpu); ++int cpupool_cpu_remove(unsigned int cpu); +int cpupool_add_domain(struct domain *d, int poolid); +void cpupool_rm_domain(struct domain *d); +int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); @@ -3335,10 +3218,8 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h #endif /* __SCHED_H__ */ /* -Index: xen-4.0.1-testing/xen/include/xen/softirq.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/xen/softirq.h -+++ xen-4.0.1-testing/xen/include/xen/softirq.h +--- a/xen/include/xen/softirq.h ++++ b/xen/include/xen/softirq.h @@ -58,6 +58,7 @@ struct tasklet struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data } diff --git a/del_usb_xend_entry.patch b/del_usb_xend_entry.patch index 3114318..d62c0e7 100644 --- a/del_usb_xend_entry.patch +++ b/del_usb_xend_entry.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py +++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -1313,8 +1313,15 @@ class XendDomainInfo: +@@ -1310,8 +1310,15 @@ class XendDomainInfo: frontpath = self.getDeviceController(deviceClass).frontendPath(dev) backpath = xstransact.Read(frontpath, "backend") thread.start_new_thread(self.getDeviceController(deviceClass).finishDeviceCleanup, (backpath, path)) diff --git a/ioemu-vnc-resize.patch b/ioemu-vnc-resize.patch index cd98427..5842337 100644 --- a/ioemu-vnc-resize.patch +++ b/ioemu-vnc-resize.patch @@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c =================================================================== --- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/vnc.c +++ xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c -@@ -1713,6 +1713,31 @@ static int protocol_client_msg(VncState +@@ -1736,6 +1736,25 @@ static int protocol_client_msg(VncState } set_encodings(vs, (int32_t *)(data + 4), limit); @@ -23,12 +23,6 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c + vnc_write_u16(vs, 1); /* number of rects */ + vnc_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height, -223); + -+ /* Ensure that the new area is updated */ -+ vnc_write_u8(vs, 0); /* msg id */ -+ vnc_write_u8(vs, 0); -+ vnc_write_u16(vs, 1); /* number of rects */ -+ send_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height); -+ + vnc_flush(vs); + } break; diff --git a/multi-xvdp.patch b/multi-xvdp.patch index 0da4e79..78811a5 100644 --- a/multi-xvdp.patch +++ b/multi-xvdp.patch @@ -1,20 +1,26 @@ +Allow multiple bootloader loopback devices + +Starting several domains concurrently can fail due to using a single +bootloader loopback device. This patch creates a list of bootloader +loopback devices so more than one instance of bootloader can be run +concurrently. + Index: xen-4.0.1-testing/tools/python/xen/util/blkif.py =================================================================== --- xen-4.0.1-testing.orig/tools/python/xen/util/blkif.py +++ xen-4.0.1-testing/tools/python/xen/util/blkif.py -@@ -19,10 +19,12 @@ def blkdev_name_to_number(name): +@@ -19,11 +19,6 @@ def blkdev_name_to_number(name): devname = 'virtual-device' devnum = None -+ """ - try: - return (devname, os.stat(n).st_rdev) - except Exception, ex: - pass -+ """ - +- try: +- return (devname, os.stat(n).st_rdev) +- except Exception, ex: +- pass +- scsi_major = [ 8, 65, 66, 67, 68, 69, 70, 71, 128, 129, 130, 131, 132, 133, 134, 135 ] if re.match( '/dev/sd[a-z]([1-9]|1[0-5])?$', n): + major = scsi_major[(ord(n[7:8]) - ord('a')) / 16] Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== --- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py @@ -24,11 +30,11 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py MIGRATE_TIMEOUT = 30.0 -BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp' -+BOOTLOADER_LOOPBACK_DEVICES = ['/dev/xvdy', '/dev/xvdx', '/dev/xvdw', '/dev/xvdv', '/dev/xvdu', '/dev/xvdt', '/dev/xvds', '/dev/xvdr', '/dev/xvdq', '/dev/xvdp', '/dev/xvdo', '/dev/xvdn', '/dev/xvdm', '/dev/xvdl', '/dev/xvdk', '/dev/xvdj', '/dev/xvdi', '/dev/xvdh', '/dev/xvdg', '/dev/xvdf', '/dev/xvde', '/dev/xvdd'] ++BOOTLOADER_LOOPBACK_DEVICES = ['/dev/xvd' + chr(x) for x in range(ord('z'), ord('d'), -1)] xc = xen.lowlevel.xc.xc() xoptions = XendOptions.instance() -@@ -3314,20 +3314,27 @@ class XendDomainInfo: +@@ -3311,20 +3311,27 @@ class XendDomainInfo: # This is a file, not a device. pygrub can cope with a # file if it's raw, but if it's QCOW or other such formats # used through blktap, then we need to mount it first. @@ -70,7 +76,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py try: blcfg = bootloader(blexec, fn, self, False, -@@ -3335,11 +3342,11 @@ class XendDomainInfo: +@@ -3332,11 +3339,11 @@ class XendDomainInfo: finally: if mounted: log.info("Unmounting %s from %s." % diff --git a/x86-cpufreq-report.patch b/x86-cpufreq-report.patch index 359f246..b036e77 100644 --- a/x86-cpufreq-report.patch +++ b/x86-cpufreq-report.patch @@ -1,9 +1,7 @@ -Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/platform_hypercall.c -+++ xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c -@@ -22,7 +22,7 @@ - #include +--- a/xen/arch/x86/platform_hypercall.c ++++ b/xen/arch/x86/platform_hypercall.c +@@ -21,7 +21,7 @@ + #include #include #include -#include @@ -11,7 +9,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c #include #include #include "cpu/mtrr/mtrr.h" -@@ -63,6 +63,7 @@ static long cpu_down_helper(void *hdl, v +@@ -62,6 +62,7 @@ static long cpu_down_helper(void *hdl, v ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op) { ret_t ret = 0; @@ -19,7 +17,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c struct xen_platform_op curop, *op = &curop; if ( !IS_PRIV(current->domain) ) -@@ -487,6 +488,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe +@@ -486,6 +487,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe op->u.mem_add.epfn, op->u.mem_add.pxm); break; @@ -44,10 +42,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c default: ret = -ENOSYS; break; -Index: xen-4.0.1-testing/xen/include/public/platform.h -=================================================================== ---- xen-4.0.1-testing.orig/xen/include/public/platform.h -+++ xen-4.0.1-testing/xen/include/public/platform.h +--- a/xen/include/public/platform.h ++++ b/xen/include/public/platform.h @@ -355,6 +355,14 @@ struct xenpf_mem_hotadd uint32_t flags; }; diff --git a/x86-ioapic-ack-default.patch b/x86-ioapic-ack-default.patch index 9dccd3f..39f3be6 100644 --- a/x86-ioapic-ack-default.patch +++ b/x86-ioapic-ack-default.patch @@ -1,10 +1,8 @@ Change default IO-APIC ack mode for single IO-APIC systems to old-style. -Index: xen-4.0.1-testing/xen/arch/x86/io_apic.c -=================================================================== ---- xen-4.0.1-testing.orig/xen/arch/x86/io_apic.c -+++ xen-4.0.1-testing/xen/arch/x86/io_apic.c -@@ -1562,7 +1562,7 @@ static unsigned int startup_level_ioapic +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1559,7 +1559,7 @@ static unsigned int startup_level_ioapic return 0; /* don't check for pending */ } @@ -13,7 +11,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/io_apic.c static void setup_ioapic_ack(char *s) { if ( !strcmp(s, "old") ) -@@ -2066,6 +2066,8 @@ void __init setup_IO_APIC(void) +@@ -2062,6 +2062,8 @@ void __init setup_IO_APIC(void) else io_apic_irqs = ~PIC_IRQS; diff --git a/xen-4.0.1-testing-src.tar.bz2 b/xen-4.0.1-testing-src.tar.bz2 index 8b1baff..a170d5d 100644 --- a/xen-4.0.1-testing-src.tar.bz2 +++ b/xen-4.0.1-testing-src.tar.bz2 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f50525ffe664fa03f425d044e7cde87b264c4cae9336d2c866e312e89dcd0b83 -size 23280495 +oid sha256:b66dcbfa61d2aec1da5077a3f58935786c803a0cc1ed5d76174d2d71e3372c76 +size 23223686 diff --git a/xen-disable-libxl.diff b/xen-disable-libxl.diff deleted file mode 100644 index b4efeb2..0000000 --- a/xen-disable-libxl.diff +++ /dev/null @@ -1,13 +0,0 @@ -Index: xen-4.0.1-testing/tools/Makefile -=================================================================== ---- xen-4.0.1-testing.orig/tools/Makefile -+++ xen-4.0.1-testing/tools/Makefile -@@ -33,7 +33,7 @@ SUBDIRS-$(CONFIG_Linux) += fs-back - SUBDIRS-$(CONFIG_NetBSD) += fs-back - SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir - SUBDIRS-y += xenpmd --SUBDIRS-y += libxl -+#SUBDIRS-y += libxl - SUBDIRS-y += remus - SUBDIRS-$(CONFIG_X86) += xenpaging - SUBDIRS-$(CONFIG_X86) += debugger/gdbsx diff --git a/xen-disable-xenpaging.diff b/xen-disable-xenpaging.diff deleted file mode 100644 index 2e8615f..0000000 --- a/xen-disable-xenpaging.diff +++ /dev/null @@ -1,13 +0,0 @@ -Index: xen-4.0.1-testing/tools/Makefile -=================================================================== ---- xen-4.0.1-testing.orig/tools/Makefile -+++ xen-4.0.1-testing/tools/Makefile -@@ -35,7 +35,7 @@ SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir - SUBDIRS-y += xenpmd - #SUBDIRS-y += libxl - SUBDIRS-y += remus --SUBDIRS-$(CONFIG_X86) += xenpaging -+#SUBDIRS-$(CONFIG_X86) += xenpaging - SUBDIRS-$(CONFIG_X86) += debugger/gdbsx - - # These don't cross-compile diff --git a/xen.changes b/xen.changes index e43d30e..cc57c95 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,98 @@ +------------------------------------------------------------------- +Wed Oct 20 15:50:01 CEST 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.tools_xenpaging_cleanup.patch + +------------------------------------------------------------------- +Wed Oct 20 15:31:47 CEST 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.mem_event_check_ring-free_requests.patch + +------------------------------------------------------------------- +Wed Oct 20 15:29:40 CEST 2010 - ohering@suse.de + +- install /etc/xen/examples/xentrace_formats.txt to get human readable + tracedata if xenalyze is not used + +------------------------------------------------------------------- +Sun Oct 17 11:14:33 CEST 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.autostart_delay.patch + xenpaging.blacklist.patch + xenpaging.MRU_SIZE.patch + remove xenpaging.hacks.patch, realmode works + +------------------------------------------------------------------- +Mon Oct 11 08:59:35 MDT 2010 - carnold@novell.com + +- Upstream patches from Jan including fixes for the following bugs + bnc#583568 - Xen kernel is not booting + bnc#615206 - Xen kernel fails to boot with IO-APIC problem + bnc#640773 - Xen kernel crashing right after grub + bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain + 22223-vtd-igd-workaround.patch + 22222-x86-timer-extint.patch + 22214-x86-msr-misc-enable.patch + 22213-x86-xsave-cpuid-check.patch + 22194-tmem-check-pv-mfn.patch + 22177-i386-irq-safe-map_domain_page.patch + 22175-x86-irq-enter-exit.patch + 22174-x86-pmtimer-accuracy.patch + 22160-Intel-C6-EOI.patch + 22159-notify-evtchn-dying.patch + 22157-x86-debug-key-i.patch + +------------------------------------------------------------------- +Mon Oct 11 16:03:23 CEST 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.signal_handling.patch + xenpaging.autostart.patch + xenpaging.hacks.patch + +------------------------------------------------------------------- +Mon Oct 11 15:42:02 CEST 2010 - ohering@suse.de + +- rename xenpaging.XENMEM_decrease_reservation.patch + to xenpaging.memory_op.patch + +------------------------------------------------------------------- +Tue Oct 8 14:11:31 CST 2010 - cyliu@novell.com + +- bnc#632956 - fix VNC altgr-insert behavior + 7410-qemu-alt-gr.patch + altgr_2.patch + +------------------------------------------------------------------- +Thu Oct 7 11:18:12 MDT 2010 - jfehlig@novell.com + +- bnc#618087 - VNC view won't stay connected to fully virtualized + Linux Xen VMs + modified ioemu-vnc-resize.patch + +------------------------------------------------------------------- +Tue Oct 5 09:34:20 MDT 2010 - carnold@novell.com + +- bnc#639546 - Dom-U deleted after introduction of the parameter + "change_home_server False" in the VM configuration + change_home_server.patch + +------------------------------------------------------------------- +Mon Oct 4 09:10:03 MDT 2010 - jfehlig@novell.com + +- bnc#641859 - block-dmmd script does not handle the configuration + when only MD is used + modified block-dmmd script + +------------------------------------------------------------------- +Thu Sep 30 17:52:55 CEST 2010 - ohering@suse.de + +- fate#310510 - fix xenpaging + xenpaging.populate_only_if_paged.patch + ------------------------------------------------------------------- Mon Sep 27 09:59:37 MDT 2010 - carnold@novell.com @@ -19,15 +114,23 @@ Tue Sep 23 16:43:11 CST 2010 - cyliu@novell.com mutli-xvdp.patch ------------------------------------------------------------------- -Tue Sep 20 14:11:31 CST 2010 - cyliu@novell.com +Wed Sep 22 10:50:20 CEST 2010 - ohering@suse.de -- bnc#632956 - fix VNC altgr-insert behavior +- fate#310510 - fix xenpaging + xenpaging.XENMEM_decrease_reservation.patch + xenpaging.xenpaging_init.patch + xenpaging.policy_linear.patch ------------------------------------------------------------------- -Mon Sep 13 16:24:31 MDT 2010 - carnold@novell.com +Fri Sep 17 15:59:45 CEST 2010 - ohering@suse.de -- bnc#636231 - XEN: Unable to disconnect/remove CDROM drive from VM - xend-devid-or-name.patch +- fate#310510 - fix xenpaging + xenpaging.pageout_policy.patch + xenpaging.xs_daemon_close.patch + xenpaging.pagefile.patch + xenpaging.mem_paging_tool_qemu_flush_cache.patch + xenpaging.get_paged_frame.patch + xenpaging.notify_via_xen_event_channel.patch ------------------------------------------------------------------- Mon Sep 13 10:50:56 MDT 2010 - carnold@novell.com diff --git a/xen.spec b/xen.spec index 3702aa5..8b24946 100644 --- a/xen.spec +++ b/xen.spec @@ -1,5 +1,5 @@ # -# spec file for package xen (Version 4.0.1_21326_01) +# spec file for package xen (Version 4.0.1_01) # # Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany. # @@ -25,8 +25,36 @@ ExclusiveArch: %ix86 x86_64 %define changeset 21326 %define xen_build_dir xen-4.0.1-testing %define with_kmp 1 -BuildRequires: LibVNCServer-devel SDL-devel acpica automake bin86 curl-devel dev86 graphviz latex2html libjpeg-devel libxml2-devel ncurses-devel openssl openssl-devel pciutils-devel python-devel texinfo transfig -BuildRequires: texlive texlive-latex +BuildRequires: LibVNCServer-devel +BuildRequires: SDL-devel +BuildRequires: automake +BuildRequires: bin86 +BuildRequires: curl-devel +BuildRequires: dev86 +BuildRequires: graphviz +BuildRequires: latex2html +BuildRequires: libjpeg-devel +BuildRequires: libxml2-devel +BuildRequires: ncurses-devel +BuildRequires: openssl +BuildRequires: openssl-devel +BuildRequires: pciutils-devel +BuildRequires: python-devel +BuildRequires: texinfo +BuildRequires: transfig +%if %suse_version <= 1110 +BuildRequires: pmtools +%else +BuildRequires: acpica +%endif +%if %suse_version >= 1030 +BuildRequires: texlive +BuildRequires: texlive-latex +%else +BuildRequires: te_ams +BuildRequires: te_latex +BuildRequires: tetex +%endif %ifarch x86_64 BuildRequires: glibc-32bit glibc-devel-32bit %define max_cpus 256 @@ -38,7 +66,7 @@ BuildRequires: glibc-32bit glibc-devel-32bit %if %{?with_kmp}0 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 %endif -Version: 4.0.1_21326_01 +Version: 4.0.1_01 Release: 1 License: GPLv2+ Group: System/Kernel @@ -101,6 +129,17 @@ Patch23: 22084-x86-xsave-off.patch Patch24: 7410-qemu-alt-gr.patch Patch25: 22135-heap-lock.patch Patch26: 22148-serial-irq-dest.patch +Patch27: 22157-x86-debug-key-i.patch +Patch28: 22159-notify-evtchn-dying.patch +Patch29: 22160-Intel-C6-EOI.patch +Patch30: 22174-x86-pmtimer-accuracy.patch +Patch31: 22175-x86-irq-enter-exit.patch +Patch32: 22177-i386-irq-safe-map_domain_page.patch +Patch33: 22194-tmem-check-pv-mfn.patch +Patch34: 22213-x86-xsave-cpuid-check.patch +Patch35: 22214-x86-msr-misc-enable.patch +Patch36: 22222-x86-timer-extint.patch +Patch37: 22223-vtd-igd-workaround.patch # Our patches Patch300: xen-config.diff Patch301: xend-config.diff @@ -117,8 +156,6 @@ Patch311: xen-no-dummy-nfs-ip.diff Patch312: serial-split.patch Patch313: xen-xm-top-needs-root.diff Patch314: xen-max-free-mem.diff -Patch315: xen-disable-libxl.diff -Patch316: xen-disable-xenpaging.diff Patch317: xen-extra-fixes.patch Patch322: bridge-opensuse.patch Patch323: bridge-vlan.diff @@ -162,7 +199,6 @@ Patch370: xend-sysconfig.patch Patch371: domu-usb-controller.patch Patch372: popen2-argument-fix.patch Patch373: usb-list.patch -Patch374: xend-devid-or-name.patch # Patches for snapshot support Patch400: snapshot-ioemu-save.patch Patch401: snapshot-ioemu-restore.patch @@ -190,6 +226,8 @@ Patch431: capslock_enable.patch Patch432: enable_more_nic_pxe.patch Patch433: multi-xvdp.patch Patch434: check_device_status.patch +Patch435: change_home_server.patch +Patch436: altgr_2.patch # Jim's domain lock patch Patch450: xend-domain-lock.patch # Hypervisor and PV driver Patches @@ -215,6 +253,24 @@ Patch702: hv_xen_extension.patch Patch703: hv_win7_eoi_bug.patch # Build patch Patch999: tmp_build.patch +# FATE 310510 +Patch10001: xenpaging.tools_xenpaging_cleanup.patch +Patch10002: xenpaging.pageout_policy.patch +Patch10003: xenpaging.xs_daemon_close.patch +Patch10010: xenpaging.policy_linear.patch +Patch10011: xenpaging.pagefile.patch +Patch10012: xenpaging.xenpaging_init.patch +Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch +Patch10014: xenpaging.memory_op.patch +Patch10015: xenpaging.populate_only_if_paged.patch +Patch10017: xenpaging.autostart.patch +Patch10018: xenpaging.signal_handling.patch +Patch10019: xenpaging.MRU_SIZE.patch +Patch10020: xenpaging.get_paged_frame.patch +Patch10021: xenpaging.mem_event_check_ring-free_requests.patch +Patch10022: xenpaging.blacklist.patch +Patch10023: xenpaging.autostart_delay.patch +Patch10024: xenpaging.makefile.patch Url: http://www.cl.cam.ac.uk/Research/SRG/netos/xen/ BuildRoot: %{_tmppath}/%{name}-%{version}-build #%define pysite %(python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_lib()") @@ -574,6 +630,17 @@ Authors: %patch24 -p1 %patch25 -p1 %patch26 -p1 +%patch27 -p1 +%patch28 -p1 +%patch29 -p1 +%patch30 -p1 +%patch31 -p1 +%patch32 -p1 +%patch33 -p1 +%patch34 -p1 +%patch35 -p1 +%patch36 -p1 +%patch37 -p1 %patch300 -p1 %patch301 -p1 %patch302 -p1 @@ -589,8 +656,6 @@ Authors: %patch312 -p1 %patch313 -p1 %patch314 -p1 -%patch315 -p1 -%patch316 -p1 %patch317 -p1 %patch322 -p1 %patch323 -p1 @@ -633,7 +698,6 @@ Authors: %patch371 -p1 %patch372 -p1 %patch373 -p1 -%patch374 -p1 %patch400 -p1 %patch401 -p1 %patch402 -p1 @@ -658,6 +722,8 @@ Authors: %patch432 -p1 %patch433 -p1 %patch434 -p1 +%patch435 -p1 +%patch436 -p1 %patch450 -p1 %patch500 -p1 %patch501 -p1 @@ -679,6 +745,24 @@ Authors: %patch702 -p1 %patch703 -p1 %patch999 -p1 +%patch10001 -p1 +%patch10002 -p1 +%patch10003 -p1 +%patch10010 -p1 +%patch10011 -p1 +%patch10012 -p1 +%patch10013 -p1 +%patch10014 -p1 +%patch10015 -p1 +%patch10017 -p1 +%patch10018 -p1 +%patch10019 -p1 +%patch10020 -p1 +%patch10021 -p1 +%patch10022 -p1 +%patch10023 -p1 +%patch10024 -p1 + %build XEN_EXTRAVERSION=%version-%release @@ -688,11 +772,11 @@ sed -i "s/XEN_CHANGESET[\t ]*=.*\$/XEN_CHANGESET = %{changeset}/" xen/Makefi RPM_OPT_FLAGS=${RPM_OPT_FLAGS//-fstack-protector/} export CFLAGS="${RPM_OPT_FLAGS}" export RPM_OPT_FLAGS -make -C tools/include/xen-foreign -make tools docs +make -C tools/include/xen-foreign %{?jobs:-j%{jobs}} +make tools docs %{?jobs:-j%{jobs}} cd tools/debugger/gdb # there are code problems that don't pass the 02-check-gcc-output, hence bitbucket -./gdbbuild 1>/dev/null 2>/dev/null +env MAKE="make %{?jobs:-j%{jobs}}" ./gdbbuild 1>/dev/null 2>/dev/null cd ../../.. %if %{?with_kmp}0 # pv driver modules @@ -705,6 +789,7 @@ for flavor in %flavors_to_build; do cd obj/$flavor ./mkbuildtree make -C /usr/src/linux-obj/%_target_cpu/$flavor modules \ + %{?jobs:-j%{jobs}} \ M=$PWD cd ../.. done @@ -733,23 +818,23 @@ install_xen() ln -s xen${ext}-%{version}-%{release}.gz $RPM_BUILD_ROOT/boot/xen${ext}.gz ln -sf xen-syms${ext}-%{version}-%{release} $RPM_BUILD_ROOT/boot/xen-syms${ext} } -make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=y crash_debug=y DESTDIR=$RPM_BUILD_ROOT +make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=y crash_debug=y DESTDIR=$RPM_BUILD_ROOT %{?jobs:-j%{jobs}} install_xen dbg make -C xen clean -make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=n crash_debug=n DESTDIR=$RPM_BUILD_ROOT +make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=n crash_debug=n DESTDIR=$RPM_BUILD_ROOT %{?jobs:-j%{jobs}} install_xen make -C xen clean export CFLAGS="$RPM_OPT_FLAGS" export RPM_OPT_FLAGS -make -C tools/include/xen-foreign +make -C tools/include/xen-foreign %{?jobs:-j%{jobs}} # tools export XEN_PYTHON_NATIVE_INSTALL=1 make -C tools install \ - DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} + DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} %{?jobs:-j%{jobs}} cp tools/debugger/gdb/gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen $RPM_BUILD_ROOT/usr/bin/gdbserver-xen rm -f $RPM_BUILD_ROOT/usr/sbin/{qcow-create,img2qcow,qcow2raw} make -C tools/misc/serial-split install \ - DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} + DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} %{?jobs:-j%{jobs}} %ifarch x86_64 mkdir -p $RPM_BUILD_ROOT/${_libdir}/xen/bin/ ln -s /usr/lib/xen/bin/qemu-dm $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm @@ -789,6 +874,7 @@ mkdir -p $RPM_BUILD_ROOT/etc/xen/{vm,examples,scripts} mv $RPM_BUILD_ROOT/etc/xen/xmexample* $RPM_BUILD_ROOT/etc/xen/examples rm -f $RPM_BUILD_ROOT/etc/xen/examples/*nbd install -m644 %SOURCE9 %SOURCE10 $RPM_BUILD_ROOT/etc/xen/examples/ +install -m644 tools/xentrace/formats $RPM_BUILD_ROOT/etc/xen/examples/xentrace_formats.txt # scripts rm -f $RPM_BUILD_ROOT/etc/xen/scripts/block-*nbd install -m755 %SOURCE11 %SOURCE12 %SOURCE13 %SOURCE14 %SOURCE15 %SOURCE16 %SOURCE17 %SOURCE21 $RPM_BUILD_ROOT/etc/xen/scripts/ @@ -871,6 +957,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug %{_libdir}/libfsimage.so.* %{_libdir}/libxen*.so.* %{_libdir}/libvhd.so.* +%{_libdir}/libxlutil.so.* %files tools %defattr(-,root,root) @@ -906,6 +993,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug /usr/sbin/vhd-update /usr/sbin/vhd-util /usr/sbin/gdbsx +/usr/sbin/xl %dir %{_libdir}/xen %dir %{_libdir}/xen/bin %ifarch x86_64 @@ -958,6 +1046,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug /etc/sysconfig/network/scripts/xen-updown.sh /etc/sysconfig/network/if-up.d/xen /etc/sysconfig/network/if-down.d/xen +/etc/bash_completion.d/xl.sh %dir %{_defaultdocdir}/xen %{_defaultdocdir}/xen/COPYING %{_defaultdocdir}/xen/README.SuSE @@ -1012,6 +1101,8 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug %{_libdir}/libxen*.so %{_libdir}/libvhd.a %{_libdir}/libvhd.so +%{_libdir}/libxlutil.a +%{_libdir}/libxlutil.so /usr/bin/serial-split /usr/include/blktaplib.h /usr/include/fsimage* @@ -1019,6 +1110,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug /usr/include/xen/ /usr/include/xs.h /usr/include/xs_lib.h +/usr/include/libxl.h %files doc-html %defattr(-,root,root) diff --git a/xend-domain-lock.patch b/xend-domain-lock.patch index 01d5912..f44edb7 100644 --- a/xend-domain-lock.patch +++ b/xend-domain-lock.patch @@ -94,7 +94,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) -@@ -2990,6 +2992,11 @@ class XendDomainInfo: +@@ -2987,6 +2989,11 @@ class XendDomainInfo: self._stateSet(DOM_STATE_HALTED) self.domid = None # Do not push into _stateSet()! @@ -106,7 +106,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py finally: self.refresh_shutdown_lock.release() -@@ -4503,6 +4510,74 @@ class XendDomainInfo: +@@ -4505,6 +4512,74 @@ class XendDomainInfo: def has_device(self, dev_class, dev_uuid): return (dev_uuid in self.info['%s_refs' % dev_class.lower()]) diff --git a/xenpaging.MRU_SIZE.patch b/xenpaging.MRU_SIZE.patch new file mode 100644 index 0000000..f1c24d5 --- /dev/null +++ b/xenpaging.MRU_SIZE.patch @@ -0,0 +1,23 @@ +Subject: xenpaging: increase recently used pages from 4MB to 64MB + +Increase recently used pages from 4MB to 64MB. +Keeping more pages in memory allows the guest to make more progress if the +paging file spans the entire guest memory. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/policy_default.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c ++++ xen-4.0.1-testing/tools/xenpaging/policy_default.c +@@ -26,7 +26,7 @@ + #include "policy.h" + + +-#define MRU_SIZE 1024 ++#define MRU_SIZE (1024 * 16) + + + static unsigned long mru[MRU_SIZE]; diff --git a/xenpaging.autostart.patch b/xenpaging.autostart.patch new file mode 100644 index 0000000..bbfb233 --- /dev/null +++ b/xenpaging.autostart.patch @@ -0,0 +1,234 @@ +Subject: xenpaging: start xenpaging via config option + +Start xenpaging via config option. + +TODO: add config option for pagefile directory +TODO: add libxl support +TODO: parse config values like 42K, 42M, 42G, 42% + +Signed-off-by: Olaf Hering + +--- + tools/examples/xmexample.hvm | 3 + + tools/python/README.XendConfig | 1 + tools/python/README.sxpcfg | 1 + tools/python/xen/xend/XendConfig.py | 3 + + tools/python/xen/xend/XendDomainInfo.py | 6 ++ + tools/python/xen/xend/image.py | 87 ++++++++++++++++++++++++++++++++ + tools/python/xen/xm/create.py | 5 + + tools/python/xen/xm/xenapi_create.py | 1 + 8 files changed, 107 insertions(+) + +--- xen-4.0.1-testing.orig/tools/examples/xmexample.hvm ++++ xen-4.0.1-testing/tools/examples/xmexample.hvm +@@ -127,6 +127,9 @@ disk = [ 'file:/var/lib/xen/images/disk. + # Device Model to be used + device_model = 'qemu-dm' + ++# xenpaging, number of pages ++xenpaging = 42 ++ + #----------------------------------------------------------------------------- + # boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d) + # default: hard disk, cd-rom, floppy +--- xen-4.0.1-testing.orig/tools/python/README.XendConfig ++++ xen-4.0.1-testing/tools/python/README.XendConfig +@@ -120,6 +120,7 @@ otherConfig + image.vncdisplay + image.vncunused + image.hvm.device_model ++ image.hvm.xenpaging + image.hvm.display + image.hvm.xauthority + image.hvm.vncconsole +--- xen-4.0.1-testing.orig/tools/python/README.sxpcfg ++++ xen-4.0.1-testing/tools/python/README.sxpcfg +@@ -51,6 +51,7 @@ image + - vncunused + (HVM) + - device_model ++ - xenpaging + - display + - xauthority + - vncconsole +--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendConfig.py ++++ xen-4.0.1-testing/tools/python/xen/xend/XendConfig.py +@@ -145,6 +145,7 @@ XENAPI_PLATFORM_CFG_TYPES = { + 'apic': int, + 'boot': str, + 'device_model': str, ++ 'xenpaging': int, + 'loader': str, + 'display' : str, + 'fda': str, +@@ -508,6 +509,8 @@ class XendConfig(dict): + self['platform']['nomigrate'] = 0 + + if self.is_hvm(): ++ if 'xenpaging' not in self['platform']: ++ self['platform']['xenpaging'] = None + if 'timer_mode' not in self['platform']: + self['platform']['timer_mode'] = 1 + if 'viridian' not in self['platform']: +--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -2439,6 +2439,7 @@ class XendDomainInfo: + + if self.image: + self.image.createDeviceModel() ++ self.image.createXenPaging() + + #if have pass-through devs, need the virtual pci slots info from qemu + self.pci_device_configure_boot() +@@ -2451,6 +2452,11 @@ class XendDomainInfo: + self.image.destroyDeviceModel() + except Exception, e: + log.exception("Device model destroy failed %s" % str(e)) ++ try: ++ log.debug("stopping xenpaging") ++ self.image.destroyXenPaging() ++ except Exception, e: ++ log.exception("stopping xenpaging failed %s" % str(e)) + else: + log.debug("No device model") + +--- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py ++++ xen-4.0.1-testing/tools/python/xen/xend/image.py +@@ -122,12 +122,14 @@ class ImageHandler: + self.vm.permissionsVm("image/cmdline", { 'dom': self.vm.getDomid(), 'read': True } ) + + self.device_model = vmConfig['platform'].get('device_model') ++ self.xenpaging = vmConfig['platform'].get('xenpaging') + + self.display = vmConfig['platform'].get('display') + self.xauthority = vmConfig['platform'].get('xauthority') + self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0)) + self.dmargs = self.parseDeviceModelArgs(vmConfig) + self.pid = None ++ self.xenpaging_pid = None + rtc_timeoffset = int(vmConfig['platform'].get('rtc_timeoffset', 0)) + if int(vmConfig['platform'].get('localtime', 0)): + if time.localtime(time.time())[8]: +@@ -392,6 +394,91 @@ class ImageHandler: + sentinel_fifos_inuse[sentinel_path_fifo] = 1 + self.sentinel_path_fifo = sentinel_path_fifo + ++ def createXenPaging(self): ++ if self.xenpaging is None: ++ return ++ if self.xenpaging == 0: ++ return ++ if self.xenpaging_pid: ++ return ++ xenpaging_bin = auxbin.pathTo("xenpaging") ++ args = [xenpaging_bin] ++ args = args + ([ "%d" % self.vm.getDomid()]) ++ args = args + ([ "%s" % self.xenpaging]) ++ env = dict(os.environ) ++ self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" % str(self.vm.info['name_label']) ++ logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC ++ null = os.open("/dev/null", os.O_RDONLY) ++ logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644) ++ sys.stderr.flush() ++ contract = osdep.prefork("%s:%d" % (self.vm.getName(), self.vm.getDomid())) ++ xenpaging_pid = os.fork() ++ if xenpaging_pid == 0: #child ++ try: ++ xenpaging_dir = "/var/lib/xen/xenpaging" ++ osdep.postfork(contract) ++ os.dup2(null, 0) ++ os.dup2(logfd, 1) ++ os.dup2(logfd, 2) ++ try: ++ os.mkdir(xenpaging_dir) ++ except: ++ log.info("mkdir %s failed" % xenpaging_dir) ++ pass ++ try: ++ os.chdir(xenpaging_dir) ++ except: ++ log.warn("chdir %s failed" % xenpaging_dir) ++ try: ++ log.info("starting %s" % args) ++ os.execve(xenpaging_bin, args, env) ++ except Exception, e: ++ print >>sys.stderr, ( ++ 'failed to execute xenpaging: %s: %s' % ++ xenpaging_bin, utils.exception_string(e)) ++ os._exit(126) ++ except Exception, e: ++ log.warn("staring xenpaging in %s failed" % xenpaging_dir) ++ os._exit(127) ++ else: ++ osdep.postfork(contract, abandon=True) ++ self.xenpaging_pid = xenpaging_pid ++ os.close(null) ++ os.close(logfd) ++ ++ def destroyXenPaging(self): ++ if self.xenpaging is None: ++ return ++ if self.xenpaging_pid: ++ try: ++ os.kill(self.xenpaging_pid, signal.SIGHUP) ++ except OSError, exn: ++ log.exception(exn) ++ for i in xrange(100): ++ try: ++ (p, rv) = os.waitpid(self.xenpaging_pid, os.WNOHANG) ++ if p == self.xenpaging_pid: ++ break ++ except OSError: ++ # This is expected if Xend has been restarted within ++ # the life of this domain. In this case, we can kill ++ # the process, but we can't wait for it because it's ++ # not our child. We continue this loop, and after it is ++ # terminated make really sure the process is going away ++ # (SIGKILL). ++ pass ++ time.sleep(0.1) ++ else: ++ log.warning("xenpaging %d took more than 10s " ++ "to terminate: sending SIGKILL" % self.xenpaging_pid) ++ try: ++ os.kill(self.xenpaging_pid, signal.SIGKILL) ++ os.waitpid(self.xenpaging_pid, 0) ++ except OSError: ++ # This happens if the process doesn't exist. ++ pass ++ self.xenpaging_pid = None ++ + def createDeviceModel(self, restore = False): + if self.device_model is None: + return +--- xen-4.0.1-testing.orig/tools/python/xen/xm/create.py ++++ xen-4.0.1-testing/tools/python/xen/xm/create.py +@@ -495,6 +495,10 @@ gopts.var('nfs_root', val="PATH", + fn=set_value, default=None, + use="Set the path of the root NFS directory.") + ++gopts.var('xenpaging', val='NUM', ++ fn=set_int, default=None, ++ use="Number of pages to swap.") ++ + gopts.var('device_model', val='FILE', + fn=set_value, default=None, + use="Path to device model program.") +@@ -1080,6 +1084,7 @@ def configure_hvm(config_image, vals): + args = [ 'acpi', 'apic', + 'boot', + 'cpuid', 'cpuid_check', ++ 'xenpaging', + 'device_model', 'display', + 'fda', 'fdb', + 'gfx_passthru', 'guest_os_type', +--- xen-4.0.1-testing.orig/tools/python/xen/xm/xenapi_create.py ++++ xen-4.0.1-testing/tools/python/xen/xm/xenapi_create.py +@@ -1086,6 +1086,7 @@ class sxp2xml: + 'acpi', + 'apic', + 'boot', ++ 'xenpaging', + 'device_model', + 'loader', + 'fda', diff --git a/xenpaging.autostart_delay.patch b/xenpaging.autostart_delay.patch new file mode 100644 index 0000000..9ef1074 --- /dev/null +++ b/xenpaging.autostart_delay.patch @@ -0,0 +1,74 @@ +Subject: xenpaging: add dynamic startup delay for xenpaging + +This is a debug helper. Since the xenpaging support is still fragile, run +xenpaging at different stages in the bootprocess. Different delays will trigger +more bugs. This implementation starts without delay for 5 reboots, then +increments the delay by 0.1 seconds It uses xenstore for presistant storage of +delay values + +TODO: find the correct place to remove the xenstore directory when the guest is shutdown or crashed + +Signed-off-by: Olaf Hering + +--- + tools/python/xen/xend/image.py | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +--- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py ++++ xen-4.0.1-testing/tools/python/xen/xend/image.py +@@ -123,6 +123,18 @@ class ImageHandler: + + self.device_model = vmConfig['platform'].get('device_model') + self.xenpaging = vmConfig['platform'].get('xenpaging') ++ self.xenpaging_delay = xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay" % self.vm.info['name_label']) ++ if self.xenpaging_delay == None: ++ log.warn("XXX creating /local/domain/0/xenpaging/%s" % self.vm.info['name_label']) ++ xstransact.Mkdir("/local/domain/0/xenpaging/%s" % self.vm.info['name_label']) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay', '0.0')) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_inc', '0.1')) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_use', '5')) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_used', '0')) ++ self.xenpaging_delay = float(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay" % self.vm.info['name_label'])) ++ self.xenpaging_delay_inc = float(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_inc" % self.vm.info['name_label'])) ++ self.xenpaging_delay_use = int(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_use" % self.vm.info['name_label'])) ++ self.xenpaging_delay_used = int(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_used" % self.vm.info['name_label'])) + + self.display = vmConfig['platform'].get('display') + self.xauthority = vmConfig['platform'].get('xauthority') +@@ -401,6 +413,17 @@ class ImageHandler: + return + if self.xenpaging_pid: + return ++ if self.xenpaging_delay_used < self.xenpaging_delay_use: ++ self.xenpaging_delay_used += 1 ++ else: ++ self.xenpaging_delay_used = 0 ++ self.xenpaging_delay += self.xenpaging_delay_inc ++ log.info("delay_used %s" % self.xenpaging_delay_used) ++ log.info("delay_use %s" % self.xenpaging_delay_use) ++ log.info("delay %s" % self.xenpaging_delay) ++ log.info("delay_inc %s" % self.xenpaging_delay_inc) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay', self.xenpaging_delay)) ++ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_used', self.xenpaging_delay_used)) + xenpaging_bin = auxbin.pathTo("xenpaging") + args = [xenpaging_bin] + args = args + ([ "%d" % self.vm.getDomid()]) +@@ -430,6 +453,9 @@ class ImageHandler: + except: + log.warn("chdir %s failed" % xenpaging_dir) + try: ++ if self.xenpaging_delay != 0.0: ++ log.info("delaying xenpaging startup %s seconds ..." % self.xenpaging_delay) ++ time.sleep(self.xenpaging_delay) + log.info("starting %s" % args) + os.execve(xenpaging_bin, args, env) + except Exception, e: +@@ -449,6 +475,8 @@ class ImageHandler: + def destroyXenPaging(self): + if self.xenpaging is None: + return ++ # FIXME find correct place for guest shutdown or crash ++ #xstransact.Remove("/local/domain/0/xenpaging/%s" % self.vm.info['name_label']) + if self.xenpaging_pid: + try: + os.kill(self.xenpaging_pid, signal.SIGHUP) diff --git a/xenpaging.blacklist.patch b/xenpaging.blacklist.patch new file mode 100644 index 0000000..d854de4 --- /dev/null +++ b/xenpaging.blacklist.patch @@ -0,0 +1,27 @@ +Subject: xenpaging: prevent page-out of first 16MB + +This is more a workaround than a bugfix: +Don't page out first 16MB of memory. +When the BIOS does its initialization process and xenpaging removes pages, +crashes will occour due to lack of support of xenpaging. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/policy_default.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c ++++ xen-4.0.1-testing/tools/xenpaging/policy_default.c +@@ -60,8 +60,9 @@ int policy_init(xenpaging_t *paging) + for ( i = 0; i < MRU_SIZE; i++ ) + mru[i] = INVALID_MFN; + +- /* Don't page out page 0 */ +- set_bit(0, bitmap); ++ /* Don't page out first 16MB */ ++ for ( i = 0; i < ((16*1024*1024)/4096); i++ ) ++ set_bit(i, bitmap); + + out: + return rc; diff --git a/xenpaging.get_paged_frame.patch b/xenpaging.get_paged_frame.patch new file mode 100644 index 0000000..6d9d6f3 --- /dev/null +++ b/xenpaging.get_paged_frame.patch @@ -0,0 +1,170 @@ +Subject: xenpaging: page-in granttable entries + +When converting a gfn to mfn, check if the page is paged-out. +If it is, request a page-in and return GNTST_eagain to the caller +to indicate a retry of the hypercall is required. +This fixes granttable errors when xenpaging is enabled in the guest. + +Signed-off-by: Olaf Hering +Already-Acked-by: Patrick Colp +Already-Acked-by: Keir Fraser + +--- + xen/common/grant_table.c | 94 ++++++++++++++++++++++++++++++----------------- + 1 file changed, 60 insertions(+), 34 deletions(-) + +--- xen-4.0.1-testing.orig/xen/common/grant_table.c ++++ xen-4.0.1-testing/xen/common/grant_table.c +@@ -139,6 +139,37 @@ shared_entry_header(struct grant_table * + #define active_entry(t, e) \ + ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE]) + ++/* Check if the page has been paged out */ ++static int __get_paged_frame(unsigned long gfn, unsigned long *frame, int readonly, struct domain *rd) ++{ ++ p2m_type_t p2mt; ++ mfn_t mfn; ++ int rc = GNTST_okay; ++ ++ if ( readonly ) ++ mfn = gfn_to_mfn(rd, gfn, &p2mt); ++ else ++ mfn = gfn_to_mfn_unshare(rd, gfn, &p2mt, 1); ++ ++ if ( p2m_is_valid(p2mt) ) ++ { ++ *frame = mfn_x(mfn); ++ if ( p2m_is_paging(p2mt) ) ++ { ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(rd, gfn); ++ rc = GNTST_eagain; ++ } ++ } ++ else ++ { ++ *frame = INVALID_MFN; ++ rc = GNTST_bad_page; ++ } ++ ++ return rc; ++} ++ + static inline int + __get_maptrack_handle( + struct grant_table *t) +@@ -527,14 +558,16 @@ __gnttab_map_grant_ref( + + if ( !act->pin ) + { ++ unsigned long gfn; ++ unsigned long frame; ++ ++ gfn = sha1 ? sha1->frame : sha2->full_page.frame; ++ rc = __get_paged_frame(gfn, &frame, !!(op->flags & GNTMAP_readonly), rd); ++ if ( rc != GNTST_okay ) ++ goto unlock_out; ++ act->gfn = gfn; + act->domid = ld->domain_id; +- if ( sha1 ) +- act->gfn = sha1->frame; +- else +- act->gfn = sha2->full_page.frame; +- act->frame = (op->flags & GNTMAP_readonly) ? +- gmfn_to_mfn(rd, act->gfn) : +- gfn_to_mfn_private(rd, act->gfn); ++ act->frame = frame; + act->start = 0; + act->length = PAGE_SIZE; + act->is_sub_page = 0; +@@ -1697,6 +1730,7 @@ __acquire_grant_for_copy( + domid_t trans_domid; + grant_ref_t trans_gref; + struct domain *rrd; ++ unsigned long gfn; + unsigned long grant_frame; + unsigned trans_page_off; + unsigned trans_length; +@@ -1814,9 +1848,11 @@ __acquire_grant_for_copy( + } + else if ( sha1 ) + { +- act->gfn = sha1->frame; +- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) : +- gfn_to_mfn_private(rd, act->gfn); ++ gfn = sha1->frame; ++ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); ++ if ( rc != GNTST_okay ) ++ goto unlock_out; ++ act->gfn = gfn; + is_sub_page = 0; + trans_page_off = 0; + trans_length = PAGE_SIZE; +@@ -1824,9 +1860,11 @@ __acquire_grant_for_copy( + } + else if ( !(sha2->hdr.flags & GTF_sub_page) ) + { +- act->gfn = sha2->full_page.frame; +- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) : +- gfn_to_mfn_private(rd, act->gfn); ++ gfn = sha2->full_page.frame; ++ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); ++ if ( rc != GNTST_okay ) ++ goto unlock_out; ++ act->gfn = gfn; + is_sub_page = 0; + trans_page_off = 0; + trans_length = PAGE_SIZE; +@@ -1834,9 +1872,11 @@ __acquire_grant_for_copy( + } + else + { +- act->gfn = sha2->sub_page.frame; +- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) : +- gfn_to_mfn_private(rd, act->gfn); ++ gfn = sha2->sub_page.frame; ++ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd); ++ if ( rc != GNTST_okay ) ++ goto unlock_out; ++ act->gfn = gfn; + is_sub_page = 1; + trans_page_off = sha2->sub_page.page_off; + trans_length = sha2->sub_page.length; +@@ -1932,16 +1972,9 @@ __gnttab_copy( + else + { + #ifdef CONFIG_X86 +- p2m_type_t p2mt; +- s_frame = mfn_x(gfn_to_mfn(sd, op->source.u.gmfn, &p2mt)); +- if ( !p2m_is_valid(p2mt) ) +- s_frame = INVALID_MFN; +- if ( p2m_is_paging(p2mt) ) +- { +- p2m_mem_paging_populate(sd, op->source.u.gmfn); +- rc = -ENOENT; ++ rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd); ++ if ( rc != GNTST_okay ) + goto error_out; +- } + #else + s_frame = gmfn_to_mfn(sd, op->source.u.gmfn); + #endif +@@ -1978,16 +2011,9 @@ __gnttab_copy( + else + { + #ifdef CONFIG_X86 +- p2m_type_t p2mt; +- d_frame = mfn_x(gfn_to_mfn_unshare(dd, op->dest.u.gmfn, &p2mt, 1)); +- if ( !p2m_is_valid(p2mt) ) +- d_frame = INVALID_MFN; +- if ( p2m_is_paging(p2mt) ) +- { +- p2m_mem_paging_populate(dd, op->dest.u.gmfn); +- rc = -ENOENT; ++ rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd); ++ if ( rc != GNTST_okay ) + goto error_out; +- } + #else + d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn); + #endif diff --git a/xenpaging.makefile.patch b/xenpaging.makefile.patch new file mode 100644 index 0000000..3086f0d --- /dev/null +++ b/xenpaging.makefile.patch @@ -0,0 +1,13 @@ +Index: xen-4.0.1-testing/tools/xenpaging/Makefile +=================================================================== +--- xen-4.0.1-testing.orig/tools/xenpaging/Makefile ++++ xen-4.0.1-testing/tools/xenpaging/Makefile +@@ -27,7 +27,7 @@ IBINS = xenpaging + all: $(IBINS) + + xenpaging: $(OBJS) +- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ ++ $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS) + + install: all + $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) diff --git a/xenpaging.mem_event_check_ring-free_requests.patch b/xenpaging.mem_event_check_ring-free_requests.patch new file mode 100644 index 0000000..727e437 --- /dev/null +++ b/xenpaging.mem_event_check_ring-free_requests.patch @@ -0,0 +1,25 @@ +Subject: xenpaging: print info when free request slots drop below 3 + +Add debugging aid to free request slots in the ring buffer. +It should not happen that the ring gets full, print info anyway if it happens. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/mm/mem_event.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/mem_event.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/mem_event.c +@@ -168,6 +168,11 @@ int mem_event_check_ring(struct domain * + mem_event_ring_lock(d); + + free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring); ++ if ( unlikely(free_requests < 3) ) ++ { ++ gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests); ++ WARN_ON(free_requests == 0); ++ } + ring_full = free_requests < MEM_EVENT_RING_THRESHOLD; + + if ( (current->domain->domain_id == d->domain_id) && ring_full ) diff --git a/xenpaging.mem_paging_tool_qemu_flush_cache.patch b/xenpaging.mem_paging_tool_qemu_flush_cache.patch new file mode 100644 index 0000000..03a2fa4 --- /dev/null +++ b/xenpaging.mem_paging_tool_qemu_flush_cache.patch @@ -0,0 +1,29 @@ +Subject: xenpaging/qemu-dm: add command to flush buffer cache. + +Add support for a xenstore dm command to flush qemu's buffer cache. + +qemu will just keep mapping pages and not release them, which causes problems +for the memory pager (since the page is mapped, it won't get paged out). When +the pager has trouble finding a page to page out, it asks qemu to flush its +buffer, which releases all the page mappings. This makes it possible to find +pages to swap out agian. + +Already-Signed-off-by: Patrick Colp +Signed-off-by: Olaf Hering + +--- + tools/ioemu-qemu-xen/xenstore.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/xenstore.c ++++ xen-4.0.1-testing/tools/ioemu-qemu-xen/xenstore.c +@@ -1021,6 +1021,9 @@ static void xenstore_process_dm_command_ + do_pci_add(par); + free(par); + #endif ++ } else if (!strncmp(command, "flush-cache", len)) { ++ fprintf(logfile, "dm-command: flush caches\n"); ++ qemu_invalidate_map_cache(); + } else { + fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command); + } diff --git a/xenpaging.memory_op.patch b/xenpaging.memory_op.patch new file mode 100644 index 0000000..bc55800 --- /dev/null +++ b/xenpaging.memory_op.patch @@ -0,0 +1,454 @@ +Subject: xenpaging: handle paged-out pages in XENMEM_* commands + +Fix these two warings: +(XEN) Assertion '__mfn_valid(mfn_x(omfn))' failed at p2m.c:2200 +(XEN) memory.c:171:d1 Domain 1 page number 37ff0 invalid + +Handle paged-out pages in xc_memory_op, guest_physmap_add_entry and +guest_remove_page. Use new do_xenmem_op_retry helper function. +In addition, export also xen/errno.h to hvmloader to get ENOENT define. + + +XENMEM_populate_physmap + populate_physmap + -> guest_physmap_add_entry + +XENMEM_exchange + memory_exchange + -> guest_physmap_add_entry + +XENMEM_add_to_physmap + guest_physmap_add_page + -> guest_physmap_add_entry + +__gnttab_map_grant_ref + create_grant_host_mapping + create_grant_p2m_mapping + -> guest_physmap_add_entry + +XENMEM_decrease_reservation + decrease_reservation + -> guest_remove_page + +XENMEM_add_to_physmap + -> guest_remove_page + +XENMEM_add_to_physmap + -> XENMAPSPACE_gmfn + +Signed-off-by: Olaf Hering + +--- + tools/firmware/hvmloader/hvmloader.c | 9 +++- + tools/firmware/hvmloader/util.c | 26 +++++++++++- + tools/include/Makefile | 1 + tools/ioemu-qemu-xen/hw/vga.c | 5 +- + tools/libxc/xc_domain.c | 71 +++++++++++++++++++++-------------- + xen/arch/x86/mm.c | 26 ++++++++++-- + xen/arch/x86/mm/p2m.c | 7 +++ + xen/common/memory.c | 25 +++++++++++- + 8 files changed, 131 insertions(+), 39 deletions(-) + +--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/hvmloader.c ++++ xen-4.0.1-testing/tools/firmware/hvmloader/hvmloader.c +@@ -29,6 +29,7 @@ + #include "pci_regs.h" + #include "e820.h" + #include "option_rom.h" ++#include + #include + #include + #include +@@ -306,13 +307,19 @@ static void pci_setup(void) + while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend ) + { + struct xen_add_to_physmap xatp; ++ int rc; + if ( hvm_info->high_mem_pgend == 0 ) + hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT); + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_gmfn; + xatp.idx = --hvm_info->low_mem_pgend; + xatp.gpfn = hvm_info->high_mem_pgend++; +- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) ++ do { ++ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); ++ if ( rc == -ENOENT ) ++ cpu_relax(); ++ } while ( rc == -ENOENT ); ++ if ( rc != 0 ) + BUG(); + } + +--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/util.c ++++ xen-4.0.1-testing/tools/firmware/hvmloader/util.c +@@ -23,6 +23,7 @@ + #include "e820.h" + #include "hypercall.h" + #include ++#include + #include + #include + +@@ -323,19 +324,27 @@ void *mem_alloc(uint32_t size, uint32_t + + while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) ) + { ++ int rc; + reserve += PAGE_SIZE; + mfn = reserve >> PAGE_SHIFT; + + /* Try to allocate a brand new page in the reserved area. */ + if ( !over_allocated ) + { ++ uint8_t delay = 0; + xmr.domid = DOMID_SELF; + xmr.mem_flags = 0; + xmr.extent_order = 0; + xmr.nr_extents = 1; + set_xen_guest_handle(xmr.extent_start, &mfn); +- if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 ) ++ do { ++ rc = hypercall_memory_op(XENMEM_populate_physmap, &xmr); ++ if ( rc == 0 ) ++ cpu_relax(); ++ } while ( rc == 0 && ++delay ); ++ if ( rc == 1 ) + continue; ++ printf("%s: over_allocated\n", __func__); + over_allocated = 1; + } + +@@ -353,7 +362,12 @@ void *mem_alloc(uint32_t size, uint32_t + xatp.domid = DOMID_SELF; + xatp.space = XENMAPSPACE_gmfn; + xatp.gpfn = mfn; +- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) ++ do { ++ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); ++ if ( rc == -ENOENT ) ++ cpu_relax(); ++ } while ( rc == -ENOENT ); ++ if ( rc != 0 ) + BUG(); + } + +@@ -595,6 +609,7 @@ uint16_t get_cpu_mhz(void) + uint64_t cpu_khz; + uint32_t tsc_to_nsec_mul, version; + int8_t tsc_shift; ++ int rc; + + static uint16_t cpu_mhz; + if ( cpu_mhz != 0 ) +@@ -605,7 +620,12 @@ uint16_t get_cpu_mhz(void) + xatp.space = XENMAPSPACE_shared_info; + xatp.idx = 0; + xatp.gpfn = (unsigned long)shared_info >> 12; +- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 ) ++ do { ++ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp); ++ if ( rc == -ENOENT ) ++ cpu_relax(); ++ } while ( rc == -ENOENT ); ++ if ( rc != 0 ) + BUG(); + + /* Get a consistent snapshot of scale factor (multiplier and shift). */ +--- xen-4.0.1-testing.orig/tools/include/Makefile ++++ xen-4.0.1-testing/tools/include/Makefile +@@ -12,6 +12,7 @@ xen/.dir: + @rm -rf xen + mkdir -p xen/libelf + ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen ++ ln -sf ../$(XEN_ROOT)/xen/include/xen/errno.h xen + ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen + ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen + ln -sf ../xen-sys/$(XEN_OS) xen/sys +--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/vga.c ++++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/vga.c +@@ -2157,9 +2157,10 @@ void set_vram_mapping(void *opaque, unsi + for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) { + xatp.idx = (s->vram_gmfn >> TARGET_PAGE_BITS) + i; + xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i; +- rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp); ++ while ((rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp)) && errno == ENOENT) ++ usleep(1000); + if (rc) { +- fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc); ++ fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, errno); + return; + } + } +--- xen-4.0.1-testing.orig/tools/libxc/xc_domain.c ++++ xen-4.0.1-testing/tools/libxc/xc_domain.c +@@ -536,6 +536,44 @@ int xc_domain_get_tsc_info(int xc_handle + return rc; + } + ++static int do_xenmem_op_retry(int xc_handle, int cmd, struct xen_memory_reservation *reservation, unsigned long nr_extents, xen_pfn_t *extent_start) ++{ ++ int err = 0; ++ unsigned long count = nr_extents; ++ unsigned long delay = 0; ++ unsigned long start = 0; ++ ++ fprintf(stderr, "%s: cmd %d count %lx\n",__func__,cmd,count); ++ while ( count && start < nr_extents ) ++ { ++ set_xen_guest_handle(reservation->extent_start, extent_start + start); ++ reservation->nr_extents = count; ++ ++ err = xc_memory_op(xc_handle, cmd, reservation); ++ if ( err == count ) ++ { ++ err = 0; ++ break; ++ } ++ ++ if ( err > count || err < 0 || delay > 1000 * 1000) ++ { ++ fprintf(stderr, "%s: %d err %x count %lx start %lx delay %lu/%lu\n",__func__,cmd,err,count,start,delay,delay/666); ++ err = -1; ++ break; ++ } ++ ++ if ( err ) ++ delay = 0; ++ ++ start += err; ++ count -= err; ++ usleep(delay); ++ delay += 666; /* 1500 iterations, 12 seconds */ ++ } ++ ++ return err; ++} + + int xc_domain_memory_increase_reservation(int xc_handle, + uint32_t domid, +@@ -546,26 +584,18 @@ int xc_domain_memory_increase_reservatio + { + int err; + struct xen_memory_reservation reservation = { +- .nr_extents = nr_extents, + .extent_order = extent_order, + .mem_flags = mem_flags, + .domid = domid + }; + +- /* may be NULL */ +- set_xen_guest_handle(reservation.extent_start, extent_start); +- +- err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation); +- if ( err == nr_extents ) +- return 0; +- +- if ( err >= 0 ) ++ err = do_xenmem_op_retry(xc_handle, XENMEM_increase_reservation, &reservation, nr_extents, extent_start); ++ if ( err < 0 ) + { + DPRINTF("Failed allocation for dom %d: " + "%ld extents of order %d, mem_flags %x\n", + domid, nr_extents, extent_order, mem_flags); + errno = ENOMEM; +- err = -1; + } + + return err; +@@ -579,14 +609,11 @@ int xc_domain_memory_decrease_reservatio + { + int err; + struct xen_memory_reservation reservation = { +- .nr_extents = nr_extents, + .extent_order = extent_order, + .mem_flags = 0, + .domid = domid + }; + +- set_xen_guest_handle(reservation.extent_start, extent_start); +- + if ( extent_start == NULL ) + { + DPRINTF("decrease_reservation extent_start is NULL!\n"); +@@ -594,16 +621,12 @@ int xc_domain_memory_decrease_reservatio + return -1; + } + +- err = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation); +- if ( err == nr_extents ) +- return 0; +- +- if ( err >= 0 ) ++ err = do_xenmem_op_retry(xc_handle, XENMEM_decrease_reservation, &reservation, nr_extents, extent_start); ++ if ( err < 0 ) + { + DPRINTF("Failed deallocation for dom %d: %ld extents of order %d\n", + domid, nr_extents, extent_order); + errno = EINVAL; +- err = -1; + } + + return err; +@@ -618,23 +641,17 @@ int xc_domain_memory_populate_physmap(in + { + int err; + struct xen_memory_reservation reservation = { +- .nr_extents = nr_extents, + .extent_order = extent_order, + .mem_flags = mem_flags, + .domid = domid + }; +- set_xen_guest_handle(reservation.extent_start, extent_start); +- +- err = xc_memory_op(xc_handle, XENMEM_populate_physmap, &reservation); +- if ( err == nr_extents ) +- return 0; + +- if ( err >= 0 ) ++ err = do_xenmem_op_retry(xc_handle, XENMEM_populate_physmap, &reservation, nr_extents, extent_start); ++ if ( err < 0 ) + { + DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n", + domid, nr_extents, extent_order); + errno = EBUSY; +- err = -1; + } + + return err; +--- xen-4.0.1-testing.orig/xen/arch/x86/mm.c ++++ xen-4.0.1-testing/xen/arch/x86/mm.c +@@ -3660,6 +3660,8 @@ static int create_grant_p2m_mapping(uint + p2mt = p2m_grant_map_rw; + rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT, + frame, 0, p2mt); ++ if ( rc == -ENOENT ) ++ return GNTST_eagain; + if ( rc ) + return GNTST_general_error; + else +@@ -4315,17 +4317,25 @@ long arch_memory_op(int op, XEN_GUEST_HA + case XENMAPSPACE_gmfn: + { + p2m_type_t p2mt; ++ unsigned long tmp_mfn; + +- xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); ++ tmp_mfn = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0)); ++ if ( unlikely(p2m_is_paging(p2mt)) ) ++ { ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(d, xatp.idx); ++ rcu_unlock_domain(d); ++ return -ENOENT; ++ } + /* If the page is still shared, exit early */ + if ( p2m_is_shared(p2mt) ) + { + rcu_unlock_domain(d); + return -ENOMEM; + } +- if ( !get_page_from_pagenr(xatp.idx, d) ) ++ if ( !get_page_from_pagenr(tmp_mfn, d) ) + break; +- mfn = xatp.idx; ++ mfn = tmp_mfn; + page = mfn_to_page(mfn); + break; + } +@@ -4354,8 +4364,16 @@ long arch_memory_op(int op, XEN_GUEST_HA + /* Xen heap frames are simply unhooked from this phys slot. */ + guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0); + else ++ { + /* Normal domain memory is freed, to avoid leaking memory. */ +- guest_remove_page(d, xatp.gpfn); ++ rc = guest_remove_page(d, xatp.gpfn); ++ if ( rc == -ENOENT ) ++ { ++ domain_unlock(d); ++ rcu_unlock_domain(d); ++ return rc; ++ } ++ } + } + + /* Unmap from old location, if any. */ +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c +@@ -2186,6 +2186,13 @@ guest_physmap_add_entry(struct domain *d + P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", + mfn + i, ogfn, gfn + i); + omfn = gfn_to_mfn_query(d, ogfn, &ot); ++ if ( unlikely(p2m_is_paging(ot)) ) ++ { ++ p2m_unlock(d->arch.p2m); ++ if ( p2m_is_paged(ot) ) ++ p2m_mem_paging_populate(d, ogfn); ++ return -ENOENT; ++ } + /* If we get here, we know the local domain owns the page, + so it can't have been grant mapped in. */ + BUG_ON( p2m_is_grant(ot) ); +--- xen-4.0.1-testing.orig/xen/common/memory.c ++++ xen-4.0.1-testing/xen/common/memory.c +@@ -95,6 +95,7 @@ static void populate_physmap(struct memo + unsigned long i, j; + xen_pfn_t gpfn, mfn; + struct domain *d = a->domain; ++ int rc; + + if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done, + a->nr_extents-1) ) +@@ -134,7 +135,12 @@ static void populate_physmap(struct memo + } + + mfn = page_to_mfn(page); +- guest_physmap_add_page(d, gpfn, mfn, a->extent_order); ++ rc = guest_physmap_add_page(d, gpfn, mfn, a->extent_order); ++ if ( rc != 0 ) ++ { ++ free_domheap_pages(page, a->extent_order); ++ goto out; ++ } + + if ( !paging_mode_translate(d) ) + { +@@ -162,6 +168,12 @@ int guest_remove_page(struct domain *d, + + #ifdef CONFIG_X86 + mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); ++ if ( unlikely(p2m_is_paging(p2mt)) ) ++ { ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(d, gmfn); ++ return -ENOENT; ++ } + #else + mfn = gmfn_to_mfn(d, gmfn); + #endif +@@ -360,6 +372,13 @@ static long memory_exchange(XEN_GUEST_HA + + /* Shared pages cannot be exchanged */ + mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0)); ++ if ( p2m_is_paging(p2mt) ) ++ { ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(d, gmfn); ++ rc = -ENOENT; ++ goto fail; ++ } + if ( p2m_is_shared(p2mt) ) + { + rc = -ENOMEM; +@@ -456,7 +475,9 @@ static long memory_exchange(XEN_GUEST_HA + &gpfn, exch.out.extent_start, (i< +Already-Acked-by: Patrick Colp +Already-Acked-by: Keir Fraser + +--- + tools/xenpaging/xenpaging.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -495,15 +495,6 @@ int main(int argc, char *argv[]) + + victims = calloc(num_pages, sizeof(xenpaging_victim_t)); + +- /* Open file */ +- sprintf(filename, "page_cache_%d", domain_id); +- fd = open(filename, open_flags, open_mode); +- if ( fd < 0 ) +- { +- perror("failed to open file"); +- return -1; +- } +- + /* Seed random-number generator */ + srand(time(NULL)); + +@@ -515,6 +506,15 @@ int main(int argc, char *argv[]) + goto out; + } + ++ /* Open file */ ++ sprintf(filename, "page_cache_%d", domain_id); ++ fd = open(filename, open_flags, open_mode); ++ if ( fd < 0 ) ++ { ++ perror("failed to open file"); ++ return -1; ++ } ++ + /* Evict pages */ + memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages); + for ( i = 0; i < num_pages; i++ ) diff --git a/xenpaging.pageout_policy.patch b/xenpaging.pageout_policy.patch new file mode 100644 index 0000000..8937ce7 --- /dev/null +++ b/xenpaging.pageout_policy.patch @@ -0,0 +1,27 @@ +Subject: xenpaging: call pageout policy function in xenpaging_evict_page + +Notify policy about a page that was just paged out to disk. +Up to now the code called the opposite function, which clears the +(xenpaging internal) reference bit, instead of setting it and marking +the page as gone. + +Signed-off-by: Olaf Hering +Already-Acked-by: Patrick Colp + +--- + tools/xenpaging/xenpaging.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -358,8 +358,8 @@ int xenpaging_evict_page(xenpaging_t *pa + goto out; + } + +- /* Notify policy of page being paged in */ +- policy_notify_paged_in(paging->mem_event.domain_id, victim->gfn); ++ /* Notify policy of page being paged out */ ++ policy_notify_paged_out(paging->mem_event.domain_id, victim->gfn); + + out: + return ret; diff --git a/xenpaging.policy_linear.patch b/xenpaging.policy_linear.patch new file mode 100644 index 0000000..49ef2b1 --- /dev/null +++ b/xenpaging.policy_linear.patch @@ -0,0 +1,122 @@ +Subject: xenpaging: break endless loop during inital page-out with large pagefiles + +To allow the starting for xenpaging right after 'xm start XYZ', I +specified a pagefile size equal to the guest memory size in the hope to +catch more errors where the paged-out state of a p2mt is not checked. + +While doing that, xenpaging got into an endless loop because some pages +cant be paged out right away. Now the policy reports an error if the gfn +number wraps. + +Signed-off-by: Olaf Hering +Already-Acked-by: Patrick Colp +Already-Acked-by: Keir Fraser + +--- + tools/xenpaging/policy_default.c | 35 ++++++++++++++++++++++++++++------- + tools/xenpaging/xenpaging.c | 7 +++++-- + 2 files changed, 33 insertions(+), 9 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c ++++ xen-4.0.1-testing/tools/xenpaging/policy_default.c +@@ -30,8 +30,12 @@ + + + static unsigned long mru[MRU_SIZE]; +-static unsigned int i_mru = 0; ++static unsigned int i_mru; + static unsigned long *bitmap; ++static unsigned long *unconsumed; ++static unsigned long current_gfn; ++static unsigned long bitmap_size; ++static unsigned long max_pages; + + + int policy_init(xenpaging_t *paging) +@@ -43,6 +47,14 @@ int policy_init(xenpaging_t *paging) + rc = alloc_bitmap(&bitmap, paging->bitmap_size); + if ( rc != 0 ) + goto out; ++ /* Allocate bitmap to track unusable pages */ ++ rc = alloc_bitmap(&unconsumed, paging->bitmap_size); ++ if ( rc != 0 ) ++ goto out; ++ ++ /* record bitmap_size */ ++ bitmap_size = paging->bitmap_size; ++ max_pages = paging->domain_info->max_pages; + + /* Initialise MRU list of paged in pages */ + for ( i = 0; i < MRU_SIZE; i++ ) +@@ -51,8 +63,6 @@ int policy_init(xenpaging_t *paging) + /* Don't page out page 0 */ + set_bit(0, bitmap); + +- rc = 0; +- + out: + return rc; + } +@@ -60,17 +70,27 @@ int policy_init(xenpaging_t *paging) + int policy_choose_victim(xenpaging_t *paging, domid_t domain_id, + xenpaging_victim_t *victim) + { ++ unsigned long wrap = current_gfn; + ASSERT(victim != NULL); + + /* Domain to pick on */ + victim->domain_id = domain_id; +- ++ + do + { +- /* Randomly choose a gfn to evict */ +- victim->gfn = rand() % paging->domain_info->max_pages; ++ current_gfn++; ++ if ( current_gfn >= max_pages ) ++ current_gfn = 0; ++ if ( wrap == current_gfn ) ++ { ++ victim->gfn = INVALID_MFN; ++ return -ENOSPC; ++ } + } +- while ( test_bit(victim->gfn, bitmap) ); ++ while ( test_bit(current_gfn, bitmap) || test_bit(current_gfn, unconsumed) ); ++ ++ set_bit(current_gfn, unconsumed); ++ victim->gfn = current_gfn; + + return 0; + } +@@ -78,6 +98,7 @@ int policy_choose_victim(xenpaging_t *pa + void policy_notify_paged_out(domid_t domain_id, unsigned long gfn) + { + set_bit(gfn, bitmap); ++ clear_bit(gfn, unconsumed); + } + + void policy_notify_paged_in(domid_t domain_id, unsigned long gfn) +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -440,7 +440,8 @@ static int evict_victim(xenpaging_t *pag + ret = policy_choose_victim(paging, domain_id, victim); + if ( ret != 0 ) + { +- ERROR("Error choosing victim"); ++ if ( ret != -ENOSPC ) ++ ERROR("Error choosing victim"); + goto out; + } + +@@ -518,7 +519,9 @@ int main(int argc, char *argv[]) + memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages); + for ( i = 0; i < num_pages; i++ ) + { +- evict_victim(paging, domain_id, &victims[i], fd, i); ++ rc = evict_victim(paging, domain_id, &victims[i], fd, i); ++ if ( rc == -ENOSPC ) ++ break; + if ( i % 100 == 0 ) + DPRINTF("%d pages evicted\n", i); + } diff --git a/xenpaging.populate_only_if_paged.patch b/xenpaging.populate_only_if_paged.patch new file mode 100644 index 0000000..008f3e3 --- /dev/null +++ b/xenpaging.populate_only_if_paged.patch @@ -0,0 +1,114 @@ +Subject: xenpaging: populate only paged-out pages + +populdate a paged-out page only once to reduce pressure in the ringbuffer. +Several cpus may still request a page at once. xenpaging can handle this. + +Signed-off-by: Olaf Hering + +--- + xen/arch/x86/hvm/emulate.c | 3 ++- + xen/arch/x86/hvm/hvm.c | 17 ++++++++++------- + xen/arch/x86/mm/guest_walk.c | 3 ++- + xen/arch/x86/mm/hap/guest_walk.c | 6 ++++-- + 4 files changed, 18 insertions(+), 11 deletions(-) + +--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/emulate.c ++++ xen-4.0.1-testing/xen/arch/x86/hvm/emulate.c +@@ -65,7 +65,8 @@ static int hvmemul_do_io( + ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(curr->domain, ram_gfn); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(curr->domain, ram_gfn); + return X86EMUL_RETRY; + } + if ( p2m_is_shared(p2mt) ) +--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c +@@ -291,7 +291,8 @@ static int hvm_set_ioreq_page( + return -EINVAL; + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(d, gmfn); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(d, gmfn); + return -ENOENT; + } + if ( p2m_is_shared(p2mt) ) +@@ -1324,7 +1325,8 @@ static void *hvm_map_entry(unsigned long + mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0)); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(current->domain, gfn); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(current->domain, gfn); + return NULL; + } + if ( p2m_is_shared(p2mt) ) +@@ -1723,7 +1725,8 @@ static enum hvm_copy_result __hvm_copy( + + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(curr->domain, gfn); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(curr->domain, gfn); + return HVMCOPY_gfn_paged_out; + } + if ( p2m_is_shared(p2mt) ) +@@ -3032,8 +3035,8 @@ long do_hvm_op(unsigned long op, XEN_GUE + mfn_t mfn = gfn_to_mfn(d, pfn, &t); + if ( p2m_is_paging(t) ) + { +- p2m_mem_paging_populate(d, pfn); +- ++ if ( p2m_is_paged(t) ) ++ p2m_mem_paging_populate(d, pfn); + rc = -EINVAL; + goto param_fail3; + } +@@ -3096,8 +3099,8 @@ long do_hvm_op(unsigned long op, XEN_GUE + mfn = gfn_to_mfn_unshare(d, pfn, &t, 0); + if ( p2m_is_paging(t) ) + { +- p2m_mem_paging_populate(d, pfn); +- ++ if ( p2m_is_paged(t) ) ++ p2m_mem_paging_populate(d, pfn); + rc = -EINVAL; + goto param_fail4; + } +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/guest_walk.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/guest_walk.c +@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc + *mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0); + if ( p2m_is_paging(*p2mt) ) + { +- p2m_mem_paging_populate(d, gfn_x(gfn)); ++ if ( p2m_is_paged(*p2mt) ) ++ p2m_mem_paging_populate(d, gfn_x(gfn)); + + *rc = _PAGE_PAGED; + return NULL; +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/hap/guest_walk.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/hap/guest_walk.c +@@ -49,7 +49,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN + top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT); + + pfec[0] = PFEC_page_paged; + return INVALID_GFN; +@@ -81,7 +82,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN + gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0); + if ( p2m_is_paging(p2mt) ) + { +- p2m_mem_paging_populate(v->domain, gfn_x(gfn)); ++ if ( p2m_is_paged(p2mt) ) ++ p2m_mem_paging_populate(v->domain, gfn_x(gfn)); + + pfec[0] = PFEC_page_paged; + return INVALID_GFN; diff --git a/xenpaging.signal_handling.patch b/xenpaging.signal_handling.patch new file mode 100644 index 0000000..7bf9475 --- /dev/null +++ b/xenpaging.signal_handling.patch @@ -0,0 +1,161 @@ +Subject: xenpaging: add signal handling + +Leave paging loop if xenpaging gets a signal. +Remove paging file on exit. + +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/xenpaging.c | 39 +++++++++++++++++++++++++++++++-------- + 1 file changed, 31 insertions(+), 8 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -22,6 +22,7 @@ + + #include + #include ++#include + #include + + #include +@@ -40,6 +41,11 @@ + #define DPRINTF(...) ((void)0) + #endif + ++static int interrupted; ++static void close_handler(int sig) ++{ ++ interrupted = sig; ++} + + static void *init_page(void) + { +@@ -244,7 +250,6 @@ int xenpaging_teardown(xenpaging_t *pagi + if ( rc != 0 ) + { + ERROR("Error tearing down domain paging in xen"); +- goto err; + } + + /* Unbind VIRQ */ +@@ -252,7 +257,6 @@ int xenpaging_teardown(xenpaging_t *pagi + if ( rc != 0 ) + { + ERROR("Error unbinding event port"); +- goto err; + } + paging->mem_event.port = -1; + +@@ -261,7 +265,6 @@ int xenpaging_teardown(xenpaging_t *pagi + if ( rc != 0 ) + { + ERROR("Error closing event channel"); +- goto err; + } + paging->mem_event.xce_handle = -1; + +@@ -270,7 +273,6 @@ int xenpaging_teardown(xenpaging_t *pagi + if ( rc != 0 ) + { + ERROR("Error closing connection to xen"); +- goto err; + } + paging->xc_handle = -1; + +@@ -375,7 +377,7 @@ int xenpaging_evict_page(xenpaging_t *pa + return ret; + } + +-int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp) ++static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp) + { + int ret; + +@@ -455,6 +457,11 @@ static int evict_victim(xenpaging_t *pag + goto out; + } + ++ if ( interrupted ) ++ { ++ ret = -EINTR; ++ goto out; ++ } + ret = xc_mem_paging_nominate(paging->xc_handle, + paging->mem_event.domain_id, victim->gfn); + if ( ret == 0 ) +@@ -479,6 +486,7 @@ static int evict_victim(xenpaging_t *pag + + int main(int argc, char *argv[]) + { ++ struct sigaction act; + domid_t domain_id; + int num_pages; + xenpaging_t *paging; +@@ -513,7 +521,7 @@ int main(int argc, char *argv[]) + if ( paging == NULL ) + { + ERROR("Error initialising paging"); +- goto out; ++ return 1; + } + + /* Open file */ +@@ -522,9 +530,18 @@ int main(int argc, char *argv[]) + if ( fd < 0 ) + { + perror("failed to open file"); +- return -1; ++ return 2; + } + ++ /* ensure that if we get a signal, we'll do cleanup, then exit */ ++ act.sa_handler = close_handler; ++ act.sa_flags = 0; ++ sigemptyset(&act.sa_mask); ++ sigaction(SIGHUP, &act, NULL); ++ sigaction(SIGTERM, &act, NULL); ++ sigaction(SIGINT, &act, NULL); ++ sigaction(SIGALRM, &act, NULL); ++ + /* Evict pages */ + memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages); + for ( i = 0; i < num_pages; i++ ) +@@ -532,6 +549,8 @@ int main(int argc, char *argv[]) + rc = evict_victim(paging, domain_id, &victims[i], fd, i); + if ( rc == -ENOSPC ) + break; ++ if ( rc == -EINTR ) ++ break; + if ( i % 100 == 0 ) + DPRINTF("%d pages evicted\n", i); + } +@@ -539,7 +558,7 @@ int main(int argc, char *argv[]) + DPRINTF("pages evicted\n"); + + /* Swap pages in and out */ +- while ( 1 ) ++ while ( !interrupted ) + { + /* Wait for Xen to signal that a page needs paged in */ + rc = xc_wait_for_event_or_timeout(paging->mem_event.xce_handle, 100); +@@ -630,8 +649,11 @@ int main(int argc, char *argv[]) + } + } + } ++ DPRINTF("xenpaging got signal %d\n", interrupted); + + out: ++ unlink(filename); ++ close(fd); + free(victims); + + /* Tear down domain paging */ +@@ -642,6 +664,7 @@ int main(int argc, char *argv[]) + if ( rc == 0 ) + rc = rc1; + ++ DPRINTF("xenpaging exit code %d\n", rc); + return rc; + } + diff --git a/xenpaging.tools_xenpaging_cleanup.patch b/xenpaging.tools_xenpaging_cleanup.patch new file mode 100644 index 0000000..49105ef --- /dev/null +++ b/xenpaging.tools_xenpaging_cleanup.patch @@ -0,0 +1,54 @@ +Subject: xenpaging: Fix-up xenpaging tool code. + +This isn't directly related to EPT checking, but does some general fix-ups +to the xenpaging code (adds some extra frees, etc.) + +Already-Signed-off-by: Patrick Colp +Signed-off-by: Olaf Hering + +--- + tools/xenpaging/xenpaging.c | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -100,7 +100,7 @@ xenpaging_t *xenpaging_init(domid_t doma + paging->mem_event.ring_page = init_page(); + if ( paging->mem_event.ring_page == NULL ) + { +- ERROR("Error initialising shared page"); ++ ERROR("Error initialising ring page"); + goto err; + } + +@@ -198,13 +198,27 @@ xenpaging_t *xenpaging_init(domid_t doma + return paging; + + err: +- if ( paging->bitmap ) ++ if ( paging ) ++ { ++ if ( paging->mem_event.shared_page ) ++ { ++ munlock(paging->mem_event.shared_page, PAGE_SIZE); ++ free(paging->mem_event.shared_page); ++ } ++ ++ if ( paging->mem_event.ring_page ) ++ { ++ munlock(paging->mem_event.ring_page, PAGE_SIZE); ++ free(paging->mem_event.ring_page); ++ } ++ + free(paging->bitmap); +- if ( paging->platform_info ) + free(paging->platform_info); +- if ( paging ) ++ free(paging->domain_info); + free(paging); ++ } + ++ err_iface: + return NULL; + } + diff --git a/xenpaging.xenpaging_init.patch b/xenpaging.xenpaging_init.patch new file mode 100644 index 0000000..0283b5c --- /dev/null +++ b/xenpaging.xenpaging_init.patch @@ -0,0 +1,53 @@ +Subject: xenpaging: allow only one xenpaging binary per guest + +Make sure only one xenpaging binary is active per domain. +Print info when the host lacks the required features for xenpaging. + +Signed-off-by: Olaf Hering +Already-Acked-by: Patrick Colp +Already-Acked-by: Keir Fraser + +--- +v2: use perror for default case + + tools/xenpaging/xenpaging.c | 12 +++++++++++- + xen/arch/x86/mm/mem_event.c | 7 +++++++ + 2 files changed, 18 insertions(+), 1 deletion(-) + +--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c ++++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c +@@ -119,7 +119,17 @@ xenpaging_t *xenpaging_init(domid_t doma + paging->mem_event.ring_page); + if ( rc != 0 ) + { +- ERROR("Error initialising shared page"); ++ switch ( errno ) { ++ case EBUSY: ++ ERROR("xenpaging is (or was) active on this domain"); ++ break; ++ case ENODEV: ++ ERROR("EPT not supported for this guest"); ++ break; ++ default: ++ perror("Error initialising shared page"); ++ break; ++ } + goto err; + } + +--- xen-4.0.1-testing.orig/xen/arch/x86/mm/mem_event.c ++++ xen-4.0.1-testing/xen/arch/x86/mm/mem_event.c +@@ -226,6 +226,13 @@ int mem_event_domctl(struct domain *d, x + mfn_t ring_mfn; + mfn_t shared_mfn; + ++ /* Only one xenpaging at a time. If xenpaging crashed, ++ * the cache is in an undefined state and so is the guest ++ */ ++ rc = -EBUSY; ++ if ( d->mem_event.enabled ) ++ break; ++ + /* Currently only EPT is supported */ + rc = -ENODEV; + if ( !(is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled && diff --git a/xenpaging.xs_daemon_close.patch b/xenpaging.xs_daemon_close.patch new file mode 100644 index 0000000..86748d9 --- /dev/null +++ b/xenpaging.xs_daemon_close.patch @@ -0,0 +1,22 @@ +Subject: xenpaging: fix fd leak in xenstore + +Missing from commit 'libxl: Backported stuff from unstable' +Without this change, xs_daemon_open/xs_daemon_close will leak filedescriptors. + +Signed-off-by: Olaf Hering + +--- + tools/xenstore/xs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- xen-4.0.1-testing.orig/tools/xenstore/xs.c ++++ xen-4.0.1-testing/tools/xenstore/xs.c +@@ -285,6 +285,8 @@ void xs_daemon_close(struct xs_handle *h + mutex_unlock(&h->request_mutex); + mutex_unlock(&h->reply_mutex); + mutex_unlock(&h->watch_mutex); ++ ++ close_fds_free(h); + } + + static bool read_all(int fd, void *data, unsigned int len)