SHA256
1
0
forked from pool/xen

- fate#310510 - fix xenpaging

xenpaging.tools_xenpaging_cleanup.patch

- fate#310510 - fix xenpaging
  xenpaging.mem_event_check_ring-free_requests.patch

- install /etc/xen/examples/xentrace_formats.txt to get human readable
  tracedata if xenalyze is not used

- fate#310510 - fix xenpaging
  xenpaging.autostart_delay.patch
  xenpaging.blacklist.patch
  xenpaging.MRU_SIZE.patch
  remove xenpaging.hacks.patch, realmode works

- Upstream patches from Jan including fixes for the following bugs
  bnc#583568 - Xen kernel is not booting
  bnc#615206 - Xen kernel fails to boot with IO-APIC problem
  bnc#640773 - Xen kernel crashing right after grub
  bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain
  22223-vtd-igd-workaround.patch
  22222-x86-timer-extint.patch
  22214-x86-msr-misc-enable.patch
  22213-x86-xsave-cpuid-check.patch
  22194-tmem-check-pv-mfn.patch
  22177-i386-irq-safe-map_domain_page.patch
  22175-x86-irq-enter-exit.patch
  22174-x86-pmtimer-accuracy.patch
  22160-Intel-C6-EOI.patch

OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=76
This commit is contained in:
Charles Arnold 2010-10-20 21:00:35 +00:00 committed by Git OBS Bridge
parent 163148b426
commit 08a77ed8c4
45 changed files with 3035 additions and 328 deletions

View File

@ -0,0 +1,21 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284533274 -3600
# Node ID d4976434b8bba469fd1d337dc16249a5abfc4e5a
# Parent 14ce571d157e060fdb390e70fa8d0c95b2fd9b76
x86: fix debug key 'i' handling with no IO-APICs
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -2463,6 +2463,9 @@ void dump_ioapic_irq_info(void)
unsigned int irq, pin, printed = 0;
unsigned long flags;
+ if ( !irq_2_pin )
+ return;
+
for ( irq = 0; irq < nr_irqs_gsi; irq++ )
{
entry = &irq_2_pin[irq];

View File

@ -0,0 +1,27 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284535133 -3600
# Node ID 62edd2611cbbe4c50574b6f6f73dda2ae1136dde
# Parent 869a0fdf8686c3dada14122df6d22a38705c2401
notify_via_xen_event_channel() should check for dying domain.
Else we can fail on either ASSERTion in that function.
From: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/common/event_channel.c
+++ b/xen/common/event_channel.c
@@ -994,6 +994,12 @@ void notify_via_xen_event_channel(struct
spin_lock(&ld->event_lock);
+ if ( unlikely(ld->is_dying) )
+ {
+ spin_unlock(&ld->event_lock);
+ return;
+ }
+
ASSERT(port_is_valid(ld, lport));
lchn = evtchn_from_port(ld, lport);
ASSERT(lchn->consumer_is_xen);

91
22160-Intel-C6-EOI.patch Normal file
View File

@ -0,0 +1,91 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284537635 -3600
# Node ID 1087f9a03ab61d3a8bb0a1c65e5b09f82f3a4277
# Parent 62edd2611cbbe4c50574b6f6f73dda2ae1136dde
C6 state with EOI issue fix for some Intel processors
There is an errata in some of Intel processors.
AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6
During an Interrupt Service Routine
If core C6 is entered after the start of an interrupt service routine
but before a write to the APIC EOI register, the core may not send an
EOI transaction (if needed) and further interrupts from the same
priority level or lower may be blocked.
This patch fix this issue, by checking if ISR is pending before enter
deep Cx state. If so, it would use power->safe_state instead of deep
Cx state to prevent the above issue happen.
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -226,6 +226,31 @@ static int sched_has_urgent_vcpu(void)
return atomic_read(&this_cpu(schedule_data).urgent_count);
}
+/*
+ * "AAJ72. EOI Transaction May Not be Sent if Software Enters Core C6 During
+ * an Interrupt Service Routine"
+ *
+ * There was an errata with some Core i7 processors that an EOI transaction
+ * may not be sent if software enters core C6 during an interrupt service
+ * routine. So we don't enter deep Cx state if there is an EOI pending.
+ */
+bool_t errata_c6_eoi_workaround(void)
+{
+ static bool_t fix_needed = -1;
+
+ if ( unlikely(fix_needed == -1) )
+ {
+ int model = boot_cpu_data.x86_model;
+ fix_needed = (cpu_has_apic && !directed_eoi_enabled &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ (boot_cpu_data.x86 == 6) &&
+ ((model == 0x1a) || (model == 0x1e) || (model == 0x1f) ||
+ (model == 0x25) || (model == 0x2c) || (model == 0x2f)));
+ }
+
+ return (fix_needed && cpu_has_pending_apic_eoi());
+}
+
static void acpi_processor_idle(void)
{
struct acpi_processor_power *power = processor_powers[smp_processor_id()];
@@ -277,6 +302,9 @@ static void acpi_processor_idle(void)
return;
}
+ if ( (cx->type == ACPI_STATE_C3) && errata_c6_eoi_workaround() )
+ cx = power->safe_state;
+
power->last_state = cx;
/*
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -752,6 +752,11 @@ struct pending_eoi {
static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_DYNAMIC_VECTORS]);
#define pending_eoi_sp(p) ((p)[NR_DYNAMIC_VECTORS-1].vector)
+bool_t cpu_has_pending_apic_eoi(void)
+{
+ return (pending_eoi_sp(this_cpu(pending_eoi)) != 0);
+}
+
static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
{
if ( d->arch.pirq_eoi_map )
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -150,4 +150,6 @@ void irq_set_affinity(int irq, cpumask_t
#define domain_pirq_to_irq(d, pirq) ((d)->arch.pirq_irq[pirq])
#define domain_irq_to_pirq(d, irq) ((d)->arch.irq_pirq[irq])
+bool_t cpu_has_pending_apic_eoi(void);
+
#endif /* _ASM_HW_IRQ_H */

View File

@ -0,0 +1,68 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284739161 -3600
# Node ID 632c02167f97bb2bd25571b2780425b9b75949b4
# Parent 1b05090854ba83576aa8399fa70e481f5b602417
hvm pmtimer: correct pmtimer accuracy
Several seconds of backward time drift per minute can be seen on a
RHEL6 HVM guest by switching the clocksource to 'acpi_pm' and then
running gettimeofday() in a loop. This is due to the accumulation
of small inaccuracies that are caused by shifting out the lower 32
bits when pmt_update_time() computes 'tmr_val'.
The patch makes sure that the lower 32 bits of the computed value
are not lost. They are saved in a new field 'not_accounted' in the
PMTState structure and are accounted the next time pmt_update_time()
is called.
From: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/hvm/pmtimer.c
+++ b/xen/arch/x86/hvm/pmtimer.c
@@ -83,14 +83,16 @@ void hvm_acpi_sleep_button(struct domain
* since the last time we did that. */
static void pmt_update_time(PMTState *s)
{
- uint64_t curr_gtime;
+ uint64_t curr_gtime, tmp;
uint32_t msb = s->pm.tmr_val & TMR_VAL_MSB;
ASSERT(spin_is_locked(&s->lock));
/* Update the timer */
curr_gtime = hvm_get_guest_time(s->vcpu);
- s->pm.tmr_val += ((curr_gtime - s->last_gtime) * s->scale) >> 32;
+ tmp = ((curr_gtime - s->last_gtime) * s->scale) + s->not_accounted;
+ s->not_accounted = (uint32_t)tmp;
+ s->pm.tmr_val += tmp >> 32;
s->pm.tmr_val &= TMR_VAL_MASK;
s->last_gtime = curr_gtime;
@@ -257,6 +259,7 @@ static int pmtimer_load(struct domain *d
/* Calculate future counter values from now. */
s->last_gtime = hvm_get_guest_time(s->vcpu);
+ s->not_accounted = 0;
/* Set the SCI state from the registers */
pmt_update_sci(s);
@@ -276,6 +279,7 @@ void pmtimer_init(struct vcpu *v)
spin_lock_init(&s->lock);
s->scale = ((uint64_t)FREQUENCE_PMTIMER << 32) / SYSTEM_TIME_HZ;
+ s->not_accounted = 0;
s->vcpu = v;
/* Intercept port I/O (need two handlers because PM1a_CNT is between
--- a/xen/include/asm-x86/hvm/vpt.h
+++ b/xen/include/asm-x86/hvm/vpt.h
@@ -117,6 +117,7 @@ typedef struct PMTState {
struct hvm_hw_pmtimer pm; /* 32bit timer value */
struct vcpu *vcpu; /* Keeps sync with this vcpu's guest-time */
uint64_t last_gtime; /* Last (guest) time we updated the timer */
+ uint32_t not_accounted; /* time not accounted at last update */
uint64_t scale; /* Multiplier to get from tsc to timer ticks */
struct timer timer; /* To make sure we send SCIs */
spinlock_t lock;

View File

@ -0,0 +1,59 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284795115 -3600
# Node ID ee3c640732311ef6bc5e2de56c3b4b753cb020fa
# Parent 632c02167f97bb2bd25571b2780425b9b75949b4
x86: irq_enter()/irq_exit() covers all of do_IRQ().
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -535,6 +535,8 @@ asmlinkage void do_IRQ(struct cpu_user_r
return;
}
+ irq_enter();
+
desc = irq_to_desc(irq);
spin_lock(&desc->lock);
@@ -568,14 +570,10 @@ asmlinkage void do_IRQ(struct cpu_user_r
desc->rl_quantum_start = now;
}
- irq_enter();
tsc_in = tb_init_done ? get_cycles() : 0;
__do_IRQ_guest(irq);
TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles());
- irq_exit();
- spin_unlock(&desc->lock);
- set_irq_regs(old_regs);
- return;
+ goto out_no_end;
}
desc->status &= ~IRQ_REPLAY;
@@ -594,20 +592,20 @@ asmlinkage void do_IRQ(struct cpu_user_r
while ( desc->status & IRQ_PENDING )
{
desc->status &= ~IRQ_PENDING;
- irq_enter();
spin_unlock_irq(&desc->lock);
tsc_in = tb_init_done ? get_cycles() : 0;
action->handler(irq, action->dev_id, regs);
TRACE_3D(TRC_TRACE_IRQ, irq, tsc_in, get_cycles());
spin_lock_irq(&desc->lock);
- irq_exit();
}
desc->status &= ~IRQ_INPROGRESS;
out:
desc->handler->end(irq);
+ out_no_end:
spin_unlock(&desc->lock);
+ irq_exit();
set_irq_regs(old_regs);
}

View File

@ -0,0 +1,92 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1284796635 -3600
# Node ID 7405e0ddb912a993982e4e4122856965b7c706dd
# Parent 0da4bfd2bc23937d2e1a8bfa6d259be0d9e482ad
x86_32: [un]map_domain_page() is now IRQ safe.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
--- a/xen/arch/x86/x86_32/domain_page.c
+++ b/xen/arch/x86/x86_32/domain_page.c
@@ -42,15 +42,13 @@ static inline struct vcpu *mapcache_curr
void *map_domain_page(unsigned long mfn)
{
- unsigned long va;
- unsigned int idx, i, flags;
+ unsigned long va, flags;
+ unsigned int idx, i;
struct vcpu *v;
struct mapcache_domain *dcache;
struct mapcache_vcpu *vcache;
struct vcpu_maphash_entry *hashent;
- ASSERT(!in_irq());
-
perfc_incr(map_domain_page_count);
v = mapcache_current_vcpu();
@@ -58,6 +56,8 @@ void *map_domain_page(unsigned long mfn)
dcache = &v->domain->arch.mapcache;
vcache = &v->arch.mapcache;
+ local_irq_save(flags);
+
hashent = &vcache->hash[MAPHASH_HASHFN(mfn)];
if ( hashent->mfn == mfn )
{
@@ -69,7 +69,7 @@ void *map_domain_page(unsigned long mfn)
goto out;
}
- spin_lock_irqsave(&dcache->lock, flags);
+ spin_lock(&dcache->lock);
/* Has some other CPU caused a wrap? We must flush if so. */
if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
@@ -105,11 +105,12 @@ void *map_domain_page(unsigned long mfn)
set_bit(idx, dcache->inuse);
dcache->cursor = idx + 1;
- spin_unlock_irqrestore(&dcache->lock, flags);
+ spin_unlock(&dcache->lock);
l1e_write(&dcache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
out:
+ local_irq_restore(flags);
va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
return (void *)va;
}
@@ -119,11 +120,9 @@ void unmap_domain_page(const void *va)
unsigned int idx;
struct vcpu *v;
struct mapcache_domain *dcache;
- unsigned long mfn;
+ unsigned long mfn, flags;
struct vcpu_maphash_entry *hashent;
- ASSERT(!in_irq());
-
ASSERT((void *)MAPCACHE_VIRT_START <= va);
ASSERT(va < (void *)MAPCACHE_VIRT_END);
@@ -135,6 +134,8 @@ void unmap_domain_page(const void *va)
mfn = l1e_get_pfn(dcache->l1tab[idx]);
hashent = &v->arch.mapcache.hash[MAPHASH_HASHFN(mfn)];
+ local_irq_save(flags);
+
if ( hashent->idx == idx )
{
ASSERT(hashent->mfn == mfn);
@@ -163,6 +164,8 @@ void unmap_domain_page(const void *va)
/* /Second/, mark as garbage. */
set_bit(idx, dcache->garbage);
}
+
+ local_irq_restore(flags);
}
void mapcache_domain_init(struct domain *d)

View File

@ -0,0 +1,240 @@
# HG changeset patch
# User Keir Fraser <keir.fraser@citrix.com>
# Date 1285142048 -3600
# Node ID e8e3aeed3ebacac6faa5795f67b195a434562323
# Parent 35a1a14c408e60eca608a67a79f38ae5fdf3ea19
tmem: disallow bad gmfns from PV domains
Mfns for PV domains were not properly checked, potentially
allowing a buggy or malicious PV guest to crash Xen. Also,
use get_page/put_page to claim a reference to the pages
so they can't disappear out from under tmem's feet.
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
--- a/xen/common/tmem_xen.c
+++ b/xen/common/tmem_xen.c
@@ -87,49 +87,88 @@ void tmh_copy_page(char *to, char*from)
}
#ifdef __ia64__
-static inline void *cli_mfn_to_va(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn)
+static inline void *cli_get_page(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn,
+ pfp_t **pcli_pfp, bool_t cli_write)
{
ASSERT(0);
return NULL;
}
-#define paging_mark_dirty(_x,_y) do {} while(0)
+
+static inline void cli_put_page(void *cli_va, struct page_info *cli_pfp,
+ bool_t mark_dirty)
+{
+ ASSERT(0);
+}
#else
-static inline void *cli_mfn_to_va(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn)
+static inline void *cli_get_page(tmem_cli_mfn_t cmfn, unsigned long *pcli_mfn,
+ pfp_t **pcli_pfp, bool_t cli_write)
{
unsigned long cli_mfn;
p2m_type_t t;
+ struct page_info *page;
+ int ret;
cli_mfn = mfn_x(gfn_to_mfn(current->domain, cmfn, &t));
- if (t != p2m_ram_rw || cli_mfn == INVALID_MFN)
+ if ( t != p2m_ram_rw || !mfn_valid(cli_mfn) )
+ return NULL;
+ page = mfn_to_page(cli_mfn);
+ if ( cli_write )
+ ret = get_page_and_type(page, current->domain, PGT_writable_page);
+ else
+ ret = get_page(page, current->domain);
+ if ( !ret )
return NULL;
- if (pcli_mfn != NULL)
- *pcli_mfn = cli_mfn;
+ *pcli_mfn = cli_mfn;
+ *pcli_pfp = (pfp_t *)page;
return map_domain_page(cli_mfn);
}
+
+static inline void cli_put_page(void *cli_va, pfp_t *cli_pfp,
+ unsigned long cli_mfn, bool_t mark_dirty)
+{
+ if ( mark_dirty )
+ {
+ put_page_and_type((struct page_info *)cli_pfp);
+ paging_mark_dirty(current->domain,cli_mfn);
+ }
+ else
+ put_page((struct page_info *)cli_pfp);
+ unmap_domain_page(cli_va);
+}
#endif
EXPORT int tmh_copy_from_client(pfp_t *pfp,
tmem_cli_mfn_t cmfn, pagesize_t tmem_offset,
pagesize_t pfn_offset, pagesize_t len, void *cli_va)
{
- unsigned long tmem_mfn;
+ unsigned long tmem_mfn, cli_mfn = 0;
void *tmem_va;
+ pfp_t *cli_pfp = NULL;
+ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
ASSERT(pfp != NULL);
- if ( tmem_offset || pfn_offset || len )
- if ( (cli_va == NULL) && ((cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL) )
- return -EFAULT;
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
- mb();
- if (!len && !tmem_offset && !pfn_offset)
+ if ( tmem_offset == 0 && pfn_offset == 0 && len == 0 )
+ {
memset(tmem_va, 0, PAGE_SIZE);
- else if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
+ unmap_domain_page(tmem_va);
+ return 1;
+ }
+ if ( !tmemc )
+ {
+ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
+ if ( cli_va == NULL )
+ return -EFAULT;
+ }
+ mb();
+ if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
tmh_copy_page(tmem_va, cli_va);
else if ( (tmem_offset+len <= PAGE_SIZE) &&
- (pfn_offset+len <= PAGE_SIZE) )
+ (pfn_offset+len <= PAGE_SIZE) )
memcpy((char *)tmem_va+tmem_offset,(char *)cli_va+pfn_offset,len);
- unmap_domain_page(cli_va);
+ if ( !tmemc )
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(tmem_va);
return 1;
}
@@ -140,15 +179,24 @@ EXPORT int tmh_compress_from_client(tmem
int ret = 0;
unsigned char *dmem = this_cpu(dstmem);
unsigned char *wmem = this_cpu(workmem);
+ pfp_t *cli_pfp = NULL;
+ unsigned long cli_mfn = 0;
+ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
- if ( (cli_va == NULL) && (cli_va = cli_mfn_to_va(cmfn,NULL)) == NULL)
- return -EFAULT;
if ( dmem == NULL || wmem == NULL )
return 0; /* no buffer, so can't compress */
+ if ( !tmemc )
+ {
+ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 0);
+ if ( cli_va == NULL )
+ return -EFAULT;
+ }
mb();
ret = lzo1x_1_compress(cli_va, PAGE_SIZE, dmem, out_len, wmem);
ASSERT(ret == LZO_E_OK);
*out_va = dmem;
+ if ( !tmemc )
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 0);
unmap_domain_page(cli_va);
return 1;
}
@@ -157,14 +205,17 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
pagesize_t tmem_offset, pagesize_t pfn_offset, pagesize_t len, void *cli_va)
{
unsigned long tmem_mfn, cli_mfn = 0;
- int mark_dirty = 1;
void *tmem_va;
+ pfp_t *cli_pfp = NULL;
+ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
ASSERT(pfp != NULL);
- if ( cli_va != NULL )
- mark_dirty = 0;
- else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
- return -EFAULT;
+ if ( !tmemc )
+ {
+ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
+ if ( cli_va == NULL )
+ return -EFAULT;
+ }
tmem_mfn = page_to_mfn(pfp);
tmem_va = map_domain_page(tmem_mfn);
if (len == PAGE_SIZE && !tmem_offset && !pfn_offset)
@@ -172,11 +223,8 @@ EXPORT int tmh_copy_to_client(tmem_cli_m
else if ( (tmem_offset+len <= PAGE_SIZE) && (pfn_offset+len <= PAGE_SIZE) )
memcpy((char *)cli_va+pfn_offset,(char *)tmem_va+tmem_offset,len);
unmap_domain_page(tmem_va);
- if ( mark_dirty )
- {
- unmap_domain_page(cli_va);
- paging_mark_dirty(current->domain,cli_mfn);
- }
+ if ( !tmemc )
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return 1;
}
@@ -185,22 +233,22 @@ EXPORT int tmh_decompress_to_client(tmem
size_t size, void *cli_va)
{
unsigned long cli_mfn = 0;
- int mark_dirty = 1;
+ pfp_t *cli_pfp = NULL;
size_t out_len = PAGE_SIZE;
+ bool_t tmemc = cli_va != NULL; /* if true, cli_va is control-op buffer */
int ret;
- if ( cli_va != NULL )
- mark_dirty = 0;
- else if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
- return -EFAULT;
+ if ( !tmemc )
+ {
+ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
+ if ( cli_va == NULL )
+ return -EFAULT;
+ }
ret = lzo1x_decompress_safe(tmem_va, size, cli_va, &out_len);
ASSERT(ret == LZO_E_OK);
ASSERT(out_len == PAGE_SIZE);
- if ( mark_dirty )
- {
- unmap_domain_page(cli_va);
- paging_mark_dirty(current->domain,cli_mfn);
- }
+ if ( !tmemc )
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return 1;
}
@@ -210,18 +258,19 @@ EXPORT int tmh_copy_tze_to_client(tmem_c
{
void *cli_va;
unsigned long cli_mfn;
+ pfp_t *cli_pfp = NULL;
ASSERT(!(len & (sizeof(uint64_t)-1)));
ASSERT(len <= PAGE_SIZE);
ASSERT(len > 0 || tmem_va == NULL);
- if ( (cli_va = cli_mfn_to_va(cmfn,&cli_mfn)) == NULL)
+ cli_va = cli_get_page(cmfn, &cli_mfn, &cli_pfp, 1);
+ if ( cli_va == NULL )
return -EFAULT;
if ( len > 0 )
memcpy((char *)cli_va,(char *)tmem_va,len);
if ( len < PAGE_SIZE )
memset((char *)cli_va+len,0,PAGE_SIZE-len);
- unmap_domain_page(cli_va);
- paging_mark_dirty(current->domain,cli_mfn);
+ cli_put_page(cli_va, cli_pfp, cli_mfn, 1);
mb();
return 1;
}

View File

@ -0,0 +1,50 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1285340011 -3600
# Node ID eb247ea9db8c8b541a7f8c9cdc51c064c4c9e41c
# Parent 105c938eacbbc250447a676bb2088f804033b82b
x86: check CPUID level before enabling xsave
References: bnc#640773
While not as relevant after c/s 21894, is still seems safer to check
the CPUID level here, just like Linux does. The is particularly
relevant for the 4.0 tree (which doesn't have said c/s), but also
possibly for nested environments where writing MSR_IA32_MISC_ENABLE
may not actually take effect (Xen itself ignores such writes).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/i387.c
+++ b/xen/arch/x86/i387.c
@@ -132,6 +132,8 @@ void restore_fpu(struct vcpu *v)
}
}
+#define XSTATE_CPUID 0xd
+
/*
* Maximum size (in byte) of the XSAVE/XRSTOR save area required by all
* the supported and enabled features on the processor, including the
@@ -148,7 +150,12 @@ void xsave_init(void)
int cpu = smp_processor_id();
u32 min_size;
- cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ if ( boot_cpu_data.cpuid_level < XSTATE_CPUID ) {
+ printk(XENLOG_ERR "XSTATE_CPUID missing\n");
+ return;
+ }
+
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
printk("%s: cpu%d: cntxt_max_size: 0x%x and states: %08x:%08x\n",
__func__, cpu, ecx, edx, eax);
@@ -169,7 +176,7 @@ void xsave_init(void)
*/
set_in_cr4(X86_CR4_OSXSAVE);
set_xcr0(eax & XCNTXT_MASK);
- cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
clear_in_cr4(X86_CR4_OSXSAVE);
if ( cpu == 0 )

View File

@ -0,0 +1,83 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1285340079 -3600
# Node ID 71f836615ea211ac4e6f3b9793f58c6f6934c030
# Parent eb247ea9db8c8b541a7f8c9cdc51c064c4c9e41c
x86: adjust MSR_IA32_MISC_ENABLE handling
In the warning message issued on writes, the Xen-modified value should
be printed (and used to determine whether anything needs to be printed
at all), as the guest kernel will usually do a read-modify-write
cycle.
A question is whether Dom0 shouldn't be allowed control over some
bits, or whether some bits shouldn't be fully virtualized. I'm
particularly thinking of MSR_IA32_MISC_ENABLE_FAST_STRING, which
recent Linux kernels want to disable for CONFIG_KMEMCHECK.
While putting this together I also noticed that rdmsr_safe() failed to
initialize its output registers in the failure path, thus leading to
printing of uninitialized data in the guest WRMSR warning message.
Further, the default case value-changed check can be simplified.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -1661,6 +1661,16 @@ unsigned long guest_to_host_gpr_switch(u
void (*pv_post_outb_hook)(unsigned int port, u8 value);
+static inline uint32_t guest_misc_enable(uint32_t eax)
+{
+ eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
+ MSR_IA32_MISC_ENABLE_MONITOR_ENABLE);
+ eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
+ MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
+ MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
+ return eax;
+}
+
/* Instruction fetch with error handling. */
#define insn_fetch(type, base, eip, limit) \
({ unsigned long _rc, _ptr = (base) + (eip); \
@@ -2258,6 +2268,13 @@ static int emulate_privileged_op(struct
if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
goto fail;
break;
+ case MSR_IA32_MISC_ENABLE:
+ if ( rdmsr_safe(regs->ecx, l, h) )
+ goto invalid;
+ l = guest_misc_enable(l);
+ if ( eax != l || edx != h )
+ goto invalid;
+ break;
case MSR_IA32_MPERF:
case MSR_IA32_APERF:
if (( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ) &&
@@ -2364,11 +2381,7 @@ static int emulate_privileged_op(struct
case MSR_IA32_MISC_ENABLE:
if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
goto fail;
- regs->eax &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
- MSR_IA32_MISC_ENABLE_MONITOR_ENABLE);
- regs->eax |= MSR_IA32_MISC_ENABLE_BTS_UNAVAIL |
- MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL |
- MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
+ regs->eax = guest_misc_enable(regs->eax);
break;
case MSR_EFER:
case MSR_AMD_PATCHLEVEL:
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -39,7 +39,8 @@ static inline void wrmsrl(unsigned int m
__asm__ __volatile__( \
"1: rdmsr\n2:\n" \
".section .fixup,\"ax\"\n" \
- "3: movl %5,%2\n; jmp 2b\n" \
+ "3: xor %0,%0\n; xor %1,%1\n" \
+ " movl %5,%2\n; jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" "__FIXUP_ALIGN"\n" \

View File

@ -0,0 +1,70 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286028195 -3600
# Node ID aed9fd361340158daf2d7160d1b367478b6312d6
# Parent 3518149c4d5d0d8ce7402a24f95d3badbecc1c17
x86: fix boot failure (regression from pre-4.0 IRQ handling changes)
References: bnc#583568, bnc#615206
With the change to index irq_desc[] by IRQ rather than by vector, the
prior implicit change of the used flow handler when altering the IRQ
routing path to go through the 8259A didn't work anymore, and hence
on boards needing the ExtINT delivery workaround failed to boot.
Make make_8259A_irq() a real function again, thus allowing the flow
handler to be changed there.
Also eliminate the generally superfluous and (at least theoretically)
dangerous hard coded setting of the flow handler for IRQ0: Earlier
code should have set this already based on information coming from
ACPI/MPS, and non-standard systems may e.g. have this IRQ level
triggered.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Tested-by: Markus Schuster <ml@markus.schuster.name>
--- a/xen/arch/x86/i8259.c
+++ b/xen/arch/x86/i8259.c
@@ -367,6 +367,12 @@ void __devinit init_8259A(int auto_eoi)
spin_unlock_irqrestore(&i8259A_lock, flags);
}
+void __init make_8259A_irq(unsigned int irq)
+{
+ io_apic_irqs &= ~(1 << irq);
+ irq_to_desc(irq)->handler = &i8259A_irq_type;
+}
+
static struct irqaction __read_mostly cascade = { no_action, "cascade", NULL};
void __init init_IRQ(void)
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -38,9 +38,6 @@
#include <io_ports.h>
#include <public/physdev.h>
-/* Different to Linux: our implementation can be simpler. */
-#define make_8259A_irq(irq) (io_apic_irqs &= ~(1<<(irq)))
-
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
@@ -1929,7 +1926,6 @@ static inline void check_timer(void)
irq_desc[0].depth = 0;
irq_desc[0].status &= ~IRQ_DISABLED;
- irq_desc[0].handler = &ioapic_edge_type;
/*
* Subtle, code in do_timer_interrupt() expects an AEOI
--- a/xen/include/asm-x86/irq.h
+++ b/xen/include/asm-x86/irq.h
@@ -94,6 +94,7 @@ int i8259A_irq_pending(unsigned int irq)
void mask_8259A(void);
void unmask_8259A(void);
void init_8259A(int aeoi);
+void make_8259A_irq(unsigned int irq);
int i8259A_suspend(void);
int i8259A_resume(void);

View File

@ -0,0 +1,131 @@
# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1286028261 -3600
# Node ID 4beee577912215c734b79cb84bfe3fb20c1afbfc
# Parent aed9fd361340158daf2d7160d1b367478b6312d6
Vt-d: fix dom0 graphics problem on Levnovo T410.
References: bnc#643477
The patch is derived from a similar quirk in Linux kernel by David
Woodhouse and Adam Jackson. It checks for VT enabling bit in IGD GGC
register. If VT is not enabled correctly in the IGD, Xen does not
enable VT-d translation for IGD VT-d engine. In case where iommu boot
parameter is set to force, Xen calls panic().
Signed-off-by: Allen Kay <allen.m.kay@intel.com>
jb: Simplified and switched operands of && in first if() added to
iommu_enable_translation().
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -46,6 +46,7 @@ LIST_HEAD(acpi_rmrr_units);
LIST_HEAD(acpi_atsr_units);
LIST_HEAD(acpi_rhsa_units);
+static u64 igd_drhd_address;
u8 dmar_host_address_width;
void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec_bus, u16 sub_bus)
@@ -239,6 +240,11 @@ struct acpi_rhsa_unit * drhd_to_rhsa(str
return NULL;
}
+int is_igd_drhd(struct acpi_drhd_unit *drhd)
+{
+ return ( drhd->address == igd_drhd_address ? 1 : 0);
+}
+
/*
* Count number of devices in device scope. Do not include PCI sub
* hierarchies.
@@ -333,6 +339,15 @@ static int __init acpi_parse_dev_scope(v
if ( iommu_verbose )
dprintk(VTDPREFIX, " endpoint: %x:%x.%x\n",
bus, path->dev, path->fn);
+
+ if ( type == DMAR_TYPE )
+ {
+ struct acpi_drhd_unit *drhd = acpi_entry;
+
+ if ( (bus == 0) && (path->dev == 2) && (path->fn == 0) )
+ igd_drhd_address = drhd->address;
+ }
+
break;
case ACPI_DEV_IOAPIC:
--- a/xen/drivers/passthrough/vtd/dmar.h
+++ b/xen/drivers/passthrough/vtd/dmar.h
@@ -114,5 +114,6 @@ void *map_to_nocache_virt(int nr_iommus,
int vtd_hw_check(void);
void disable_pmr(struct iommu *iommu);
int is_usb_device(u8 bus, u8 devfn);
+int is_igd_drhd(struct acpi_drhd_unit *drhd);
#endif /* _DMAR_H_ */
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -688,10 +688,34 @@ static int iommu_set_root_entry(struct i
return 0;
}
-static void iommu_enable_translation(struct iommu *iommu)
+#define GGC 0x52
+#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
+static int is_igd_vt_enabled(void)
+{
+ unsigned short ggc;
+
+ /* integrated graphics on Intel platforms is located at 0:2.0 */
+ ggc = pci_conf_read16(0, 2, 0, GGC);
+ return ( ggc & GGC_MEMORY_VT_ENABLED ? 1 : 0 );
+}
+
+static void iommu_enable_translation(struct acpi_drhd_unit *drhd)
{
u32 sts;
unsigned long flags;
+ struct iommu *iommu = drhd->iommu;
+
+ if ( is_igd_drhd(drhd) && !is_igd_vt_enabled() )
+ {
+ if ( force_iommu )
+ panic("BIOS did not enable IGD for VT properly, crash Xen for security purpose!\n");
+ else
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "BIOS did not enable IGD for VT properly. Disabling IGD VT-d engine.\n");
+ return;
+ }
+ }
if ( iommu_verbose )
dprintk(VTDPREFIX,
@@ -1178,7 +1202,6 @@ static int intel_iommu_domain_init(struc
static void intel_iommu_dom0_init(struct domain *d)
{
- struct iommu *iommu;
struct acpi_drhd_unit *drhd;
if ( !iommu_passthrough && !need_iommu(d) )
@@ -1194,8 +1217,7 @@ static void intel_iommu_dom0_init(struct
for_each_drhd_unit ( drhd )
{
- iommu = drhd->iommu;
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}
@@ -2163,7 +2185,7 @@ static void vtd_resume(void)
(u32) iommu_state[i][DMAR_FEUADDR_REG]);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- iommu_enable_translation(iommu);
+ iommu_enable_translation(drhd);
}
}

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2920,7 +2920,7 @@ class XendDomainInfo:
@@ -2917,7 +2917,7 @@ class XendDomainInfo:
self.guest_bitsize = self.image.getBitSize()
# Make sure there's enough RAM available for the domain

56
altgr_2.patch Normal file
View File

@ -0,0 +1,56 @@
When access domU from Windows VNC client, spanish keyboard altgr key
doesn't work. According to log info, we found that the keycodes passed
from vncclient to qemu vncserver have something wrong. When altgr and "2"
pressed, keycodes vncserver receives are:
ALT_R down,
CTRL_L down,
CTRL_L up,
ATL_R up,
"2" down,
"2" up,
...
Since when send "2" down, there is no altgr modifier, the char displayed
on screen will be "2" but not "@".
To solve this problem, there is another patch applied by upstream which
sends an additional altgr modifier before "2" down in the above case.
It works well when domU is windows, but on sles10 sp3 domU, sometimes it
display "@" and sometimes it still displays "2", especially when press
altgr+2 continuously.
For the sles10 sp3 domU problem, maybe because there are two many alt_r (same
keycode as altgr on "es") up and down events and the domU OS couldn't handle
it well.
To furtherly solve this problem, I write this patch, when vncserver
is "es" and receives a alt_r keysym (this is already abnormal since "es" has
no alt_r), then treat the alt_r as alt_l. This can avoid too many altgr
keycodes up and down events and make sure the intentionally added altgr keycode can take effect.
Signed-off by Chunyan Liu (cyliu@novell.com)
diff -r a108300bd904 tools/ioemu-qemu-xen/vnc.c
--- a/tools/ioemu-qemu-xen/vnc.c Mon Sep 27 21:20:36 2010 +0800
+++ b/tools/ioemu-qemu-xen/vnc.c Wed Sep 29 01:55:55 2010 +0800
@@ -1279,11 +1279,9 @@
kbd_put_keycode(0xe0);
if (down){
kbd_put_keycode(0xb8 & 0x7f);
- vs->modifiers_state[0xb8] = 1;
}
else {
kbd_put_keycode(0xb8 | 0x80);
- vs->modifiers_state[0xb8] = 0;
}
}
@@ -1310,6 +1308,9 @@
shift_keys = vs->modifiers_state[0x2a] | vs->modifiers_state[0x36];
altgr_keys = vs->modifiers_state[0xb8];
+ if ( !strcmp(keyboard_layout,"es") && sym == 0xffea )
+ sym = 0xffe9;
+
keycode = keysym2scancode(vs->kbd_layout, sym & 0xFFFF);
if (keycode == 0) {
fprintf(stderr, "Key lost : keysym=0x%x(%d)\n", sym, sym);

View File

@ -235,9 +235,11 @@ case "$command" in
release_lock "dmmd"
exit 1
fi
lastparam=${dmmd##*;}
usedevice=${lastparam%(*}
claim_lock "block"
xenstore-write $XENBUS_PATH/node ${dmmd##*;}
write_dev ${dmmd##*;}
xenstore-write $XENBUS_PATH/node "$usedevice"
write_dev "$usedevice"
release_lock "block"
release_lock "dmmd"
exit 0

16
change_home_server.patch Normal file
View File

@ -0,0 +1,16 @@
Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -3133,6 +3133,11 @@ class XendDomainInfo:
self._cleanup_phantom_devs(paths)
self._cleanupVm()
+ if "change_home_server" in self.info:
+ chs = self.info["change_home_server"]
+ if (type(chs) is str and chs == "False") or \
+ (type(chs) is bool and chs is False):
+ self.setChangeHomeServer(None)
if ("transient" in self.info["other_config"] and \
bool(self.info["other_config"]["transient"])) or \
("change_home_server" in self.info and \

View File

@ -1,3 +1,10 @@
Improve check_device_status to handle HA cases
In HA environment, sometimes xenstore status has changed but ev.wait() cannot
get the signal, it will wait until timeout, thus incorrect device status is
returned. To fix this problem, we do not depend on ev.wait() result, but read
xenstore directly to get correct device status.
diff -r ce65e0e03a57 tools/python/xen/xend/server/DevController.py
--- a/tools/python/xen/xend/server/DevController.py Fri Aug 27 16:53:00 2010 +0800
+++ b/tools/python/xen/xend/server/DevController.py Fri Aug 27 17:13:32 2010 +0800

View File

@ -1,9 +1,7 @@
From: Juergen Gross <juergen.gross@ts.fujitsu.com>
Index: xen-4.0.1-testing/xen/arch/x86/acpi/power.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/acpi/power.c
+++ xen-4.0.1-testing/xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c
+++ b/xen/arch/x86/acpi/power.c
@@ -234,7 +234,7 @@ static int enter_state(u32 state)
return error;
}
@ -22,10 +20,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/acpi/power.c
}
static int acpi_get_wake_status(void)
Index: xen-4.0.1-testing/xen/arch/x86/domain.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/domain.c
+++ xen-4.0.1-testing/xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -1522,42 +1522,52 @@ void sync_vcpu_execstate(struct vcpu *v)
}
@ -139,10 +135,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/domain.c
return 0;
}
Index: xen-4.0.1-testing/xen/arch/x86/domain_build.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/domain_build.c
+++ xen-4.0.1-testing/xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c
+++ b/xen/arch/x86/domain_build.c
@@ -9,6 +9,7 @@
#include <xen/lib.h>
#include <xen/ctype.h>
@ -183,10 +177,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/domain_build.c
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
Index: xen-4.0.1-testing/xen/arch/x86/microcode.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/microcode.c
+++ xen-4.0.1-testing/xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -114,7 +114,7 @@ static int microcode_update_cpu(const vo
return err;
}
@ -214,10 +206,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/microcode.c
+ return continue_hypercall_on_cpu(info->cpu, NULL,
+ do_microcode_update, info);
}
Index: xen-4.0.1-testing/xen/arch/x86/mm.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/mm.c
+++ xen-4.0.1-testing/xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -243,7 +243,7 @@ void __init arch_init_memory(void)
* Any Xen-heap pages that we will allow to be mapped will have
* their domain field set to dom_xen.
@ -244,19 +234,9 @@ Index: xen-4.0.1-testing/xen/arch/x86/mm.c
BUG_ON(dom_cow == NULL);
/* First 1MB of RAM is historically marked as I/O. */
Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
@@ -19,6 +19,7 @@
#include <xen/iocap.h>
#include <xen/guest_access.h>
#include <xen/acpi.h>
+#include <xen/sched-if.h>
#include <asm/current.h>
#include <public/platform.h>
#include <acpi/cpufreq/processor_perf.h>
@@ -48,12 +49,12 @@ static DEFINE_PER_CPU(uint64_t, freq);
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -48,12 +48,12 @@ static DEFINE_PER_CPU(uint64_t, freq);
extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
@ -271,7 +251,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
{
int cpu = (unsigned long)data;
return cpu_down(cpu);
@@ -314,7 +315,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
@@ -314,7 +314,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
if ( op->u.change_freq.flags || !cpu_online(op->u.change_freq.cpu) )
break;
per_cpu(freq, op->u.change_freq.cpu) = op->u.change_freq.freq;
@ -280,25 +260,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
cpu_frequency_change_helper,
NULL);
break;
@@ -406,7 +407,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info = &op->u.pcpu_info;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if ( !spin_trylock(&cpu_add_remove_lock) )
+ if ( !spin_trylock(&cpupool_lock) )
{
ret = -EBUSY;
break;
@@ -429,7 +430,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info->max_present = last_cpu(cpu_present_map);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0;
}
@@ -470,7 +471,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
@@ -470,7 +470,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
break;
}
ret = continue_hypercall_on_cpu(
@ -307,10 +269,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
break;
}
break;
Index: xen-4.0.1-testing/xen/arch/x86/setup.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/setup.c
+++ xen-4.0.1-testing/xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -2,6 +2,7 @@
#include <xen/init.h>
#include <xen/lib.h>
@ -343,10 +303,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/setup.c
if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
panic("Error creating domain 0\n");
Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/smpboot.c
+++ xen-4.0.1-testing/xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -39,6 +39,7 @@
#include <xen/mm.h>
#include <xen/domain.h>
@ -355,24 +313,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/softirq.h>
@@ -104,7 +105,6 @@ static void map_cpu_to_logical_apicid(vo
DEFINE_PER_CPU(int, cpu_state) = { 0 };
void *stack_base[NR_CPUS];
-DEFINE_SPINLOCK(cpu_add_remove_lock);
/*
* The bootstrap kernel entry code has set these up. Save them for
@@ -821,7 +821,7 @@ wakeup_secondary_cpu(int phys_apicid, un
extern cpumask_t cpu_initialized;
/*
- * Caller should hold cpu_add_remove_lock if not called when booting
+ * Caller should hold cpupool_lock if not called when booting
*/
int alloc_cpu_id(void)
{
@@ -1306,10 +1306,11 @@ int __cpu_disable(void)
@@ -1306,10 +1307,11 @@ int __cpu_disable(void)
__sync_lazy_execstate();
/* It's now safe to remove this processor from the online map */
@ -385,82 +326,46 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c
return 0;
}
@@ -1343,10 +1344,10 @@ int cpu_down(unsigned int cpu)
@@ -1341,16 +1343,12 @@ static int take_cpu_down(void *unused)
int cpu_down(unsigned int cpu)
{
int err = 0;
+ bool_t pool_rm = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
if (!spin_trylock(&cpu_add_remove_lock))
return -EBUSY;
- if (num_online_cpus() == 1) {
+ if ((!cpu_isset(cpu, cpupool0->cpu_valid)) || (cpus_weight(cpupool0->cpu_valid) == 1)) {
err = -EBUSY;
goto out;
}
@@ -1379,7 +1380,7 @@ int cpu_down(unsigned int cpu)
- err = -EBUSY;
- goto out;
- }
-
/* Can not offline BSP */
if (cpu == 0) {
err = -EINVAL;
@@ -1364,6 +1362,11 @@ int cpu_down(unsigned int cpu)
printk("Prepare to bring CPU%d down...\n", cpu);
+ err = cpupool_cpu_remove(cpu);
+ if (err)
+ goto out;
+ pool_rm = 1;
+
cpufreq_del_cpu(cpu);
err = stop_machine_run(take_cpu_down, NULL, cpu);
@@ -1379,6 +1382,8 @@ int cpu_down(unsigned int cpu)
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
+ else if (pool_rm)
+ cpupool_cpu_add(cpu);
spin_unlock(&cpu_add_remove_lock);
return err;
}
@@ -1388,7 +1389,7 @@ int cpu_up(unsigned int cpu)
int err = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
if (cpu_online(cpu)) {
@@ -1406,7 +1407,7 @@ int cpu_up(unsigned int cpu)
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return err;
}
@@ -1492,14 +1493,14 @@ int cpu_add(uint32_t apic_id, uint32_t a
return -EEXIST;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
cpu = mp_register_lapic(apic_id, 1);
if (cpu < 0)
{
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return cpu;
}
@@ -1516,7 +1517,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
"Setup node failed for pxm %x\n", pxm);
x86_acpiid_to_apicid[acpi_id] = 0xff;
mp_unregister_lapic(apic_id, cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return node;
}
apicid_to_node[apic_id] = node;
@@ -1524,7 +1525,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
srat_detect_node(cpu);
numa_add_cpu(cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
return cpu;
}
@@ -1568,6 +1569,7 @@ int __devinit __cpu_up(unsigned int cpu)
@@ -1568,6 +1573,7 @@ int __devinit __cpu_up(unsigned int cpu)
process_pending_softirqs();
}
@ -468,10 +373,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/smpboot.c
cpufreq_add_cpu(cpu);
return 0;
}
Index: xen-4.0.1-testing/xen/arch/x86/sysctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/sysctl.c
+++ xen-4.0.1-testing/xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c
+++ b/xen/arch/x86/sysctl.c
@@ -29,7 +29,7 @@
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
@ -490,10 +393,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/sysctl.c
break;
case XEN_SYSCTL_CPU_HOTPLUG_STATUS:
ret = 0;
Index: xen-4.0.1-testing/xen/common/Makefile
===================================================================
--- xen-4.0.1-testing.orig/xen/common/Makefile
+++ xen-4.0.1-testing/xen/common/Makefile
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -1,5 +1,6 @@
obj-y += bitmap.o
obj-y += cpu.o
@ -501,11 +402,9 @@ Index: xen-4.0.1-testing/xen/common/Makefile
obj-y += domctl.o
obj-y += domain.o
obj-y += event_channel.o
Index: xen-4.0.1-testing/xen/common/cpupool.c
===================================================================
--- /dev/null
+++ xen-4.0.1-testing/xen/common/cpupool.c
@@ -0,0 +1,585 @@
+++ b/xen/common/cpupool.c
@@ -0,0 +1,609 @@
+/******************************************************************************
+ * cpupool.c
+ *
@ -539,11 +438,12 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+static cpumask_t cpupool_locked_cpus = CPU_MASK_NONE;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ * as it was obtained!
+ */
+DEFINE_SPINLOCK(cpupool_lock);
+static DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
@ -734,8 +634,9 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ * - cpu just being unplugged
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+ int ret;
@ -748,6 +649,8 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+ ret = -EBUSY;
+ if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+ goto out;
+ if ( cpu_isset(cpu, cpupool_locked_cpus) )
+ goto out;
+
+ ret = 0;
+ if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
@ -872,6 +775,7 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_clear(cpu, cpupool_locked_cpus);
+ cpu_set(cpu, cpupool_free_cpus);
+ (void)cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
@ -879,6 +783,25 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+}
+
+/*
+ * called to remove a cpu from pool admin
+ * the cpu to be removed is locked to avoid removing it from dom0
+ * returns failure if not in pool0
+ */
+int cpupool_cpu_remove(unsigned int cpu)
+{
+ int ret = 0;
+
+ spin_lock(&cpupool_lock);
+ if ( !cpu_isset(cpu, cpupool0->cpu_valid))
+ ret = -EBUSY;
+ else
+ cpu_set(cpu, cpupool_locked_cpus);
+ spin_unlock(&cpupool_lock);
+
+ return ret;
+}
+
+/*
+ * do cpupool related sysctl operations
+ */
+int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
@ -1091,10 +1014,8 @@ Index: xen-4.0.1-testing/xen/common/cpupool.c
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: xen-4.0.1-testing/xen/common/domain.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/domain.c
+++ xen-4.0.1-testing/xen/common/domain.c
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -209,7 +209,7 @@ static void __init parse_extra_guest_irq
custom_param("extra_guest_irqs", parse_extra_guest_irqs);
@ -1123,10 +1044,8 @@ Index: xen-4.0.1-testing/xen/common/domain.c
sched_destroy_domain(d);
/* Free page used by xen oprofile buffer. */
Index: xen-4.0.1-testing/xen/common/domctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/domctl.c
+++ xen-4.0.1-testing/xen/common/domctl.c
--- a/xen/common/domctl.c
+++ b/xen/common/domctl.c
@@ -11,6 +11,7 @@
#include <xen/lib.h>
#include <xen/mm.h>
@ -1202,10 +1121,8 @@ Index: xen-4.0.1-testing/xen/common/domctl.c
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
Index: xen-4.0.1-testing/xen/common/kexec.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/kexec.c
+++ xen-4.0.1-testing/xen/common/kexec.c
--- a/xen/common/kexec.c
+++ b/xen/common/kexec.c
@@ -235,7 +235,7 @@ void kexec_crash(void)
BUG();
}
@ -1224,10 +1141,8 @@ Index: xen-4.0.1-testing/xen/common/kexec.c
break;
case KEXEC_TYPE_CRASH:
kexec_crash(); /* Does not return */
Index: xen-4.0.1-testing/xen/common/sched_credit.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sched_credit.c
+++ xen-4.0.1-testing/xen/common/sched_credit.c
--- a/xen/common/sched_credit.c
+++ b/xen/common/sched_credit.c
@@ -70,11 +70,15 @@
/*
* Useful macros
@ -2116,7 +2031,7 @@ Index: xen-4.0.1-testing/xen/common/sched_credit.c
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1411,6 +1540,13 @@ const struct scheduler sched_credit_def
@@ -1411,6 +1540,13 @@ const struct scheduler sched_credit_def
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
@ -2130,10 +2045,8 @@ Index: xen-4.0.1-testing/xen/common/sched_credit.c
.tick_suspend = csched_tick_suspend,
.tick_resume = csched_tick_resume,
Index: xen-4.0.1-testing/xen/common/sched_sedf.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sched_sedf.c
+++ xen-4.0.1-testing/xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c
+++ b/xen/common/sched_sedf.c
@@ -21,6 +21,9 @@
printk(_a ); \
} while ( 0 )
@ -2399,7 +2312,7 @@ Index: xen-4.0.1-testing/xen/common/sched_sedf.c
.name = "Simple EDF Scheduler",
.opt_name = "sedf",
.sched_id = XEN_SCHEDULER_SEDF,
@@ -1464,9 +1509,15 @@ const struct scheduler sched_sedf_def =
@@ -1464,9 +1509,15 @@ const struct scheduler sched_sedf_def =
.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
@ -2416,10 +2329,8 @@ Index: xen-4.0.1-testing/xen/common/sched_sedf.c
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
.dump_cpu_state = sedf_dump_cpu_state,
Index: xen-4.0.1-testing/xen/common/schedule.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/schedule.c
+++ xen-4.0.1-testing/xen/common/schedule.c
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -53,10 +53,11 @@ static void poll_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
@ -2938,10 +2849,8 @@ Index: xen-4.0.1-testing/xen/common/schedule.c
}
#ifdef CONFIG_COMPAT
Index: xen-4.0.1-testing/xen/common/softirq.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/softirq.c
+++ xen-4.0.1-testing/xen/common/softirq.c
--- a/xen/common/softirq.c
+++ b/xen/common/softirq.c
@@ -88,9 +88,11 @@ void raise_softirq(unsigned int nr)
}
@ -3035,10 +2944,8 @@ Index: xen-4.0.1-testing/xen/common/softirq.c
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
}
Index: xen-4.0.1-testing/xen/common/sysctl.c
===================================================================
--- xen-4.0.1-testing.orig/xen/common/sysctl.c
+++ xen-4.0.1-testing/xen/common/sysctl.c
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -314,6 +314,14 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
}
break;
@ -3054,10 +2961,8 @@ Index: xen-4.0.1-testing/xen/common/sysctl.c
default:
ret = arch_do_sysctl(op, u_sysctl);
break;
Index: xen-4.0.1-testing/xen/include/asm-x86/domain.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/asm-x86/domain.h
+++ xen-4.0.1-testing/xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h
+++ b/xen/include/asm-x86/domain.h
@@ -458,7 +458,8 @@ struct arch_vcpu
#define hvm_svm hvm_vcpu.u.svm
@ -3068,22 +2973,8 @@ Index: xen-4.0.1-testing/xen/include/asm-x86/domain.h
void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
Index: xen-4.0.1-testing/xen/include/asm-x86/smp.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/asm-x86/smp.h
+++ xen-4.0.1-testing/xen/include/asm-x86/smp.h
@@ -56,7 +56,6 @@ extern u32 cpu_2_logical_apicid[];
#define CPU_ONLINE 0x0002 /* CPU is up */
#define CPU_DEAD 0x0004 /* CPU is dead */
DECLARE_PER_CPU(int, cpu_state);
-extern spinlock_t(cpu_add_remove_lock);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
extern int cpu_down(unsigned int cpu);
Index: xen-4.0.1-testing/xen/include/public/domctl.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/public/domctl.h
+++ xen-4.0.1-testing/xen/include/public/domctl.h
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -60,10 +60,10 @@ struct xen_domctl_createdomain {
/* Should domain memory integrity be verifed by tboot during Sx? */
#define _XEN_DOMCTL_CDF_s3_integrity 2
@ -3112,10 +3003,8 @@ Index: xen-4.0.1-testing/xen/include/public/domctl.h
struct xen_domctl {
uint32_t cmd;
#define XEN_DOMCTL_createdomain 1
Index: xen-4.0.1-testing/xen/include/public/sysctl.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/public/sysctl.h
+++ xen-4.0.1-testing/xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -491,6 +491,28 @@ struct xen_sysctl_lockprof_op {
typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t);
@ -3153,11 +3042,9 @@ Index: xen-4.0.1-testing/xen/include/public/sysctl.h
uint8_t pad[128];
} u;
};
Index: xen-4.0.1-testing/xen/include/xen/sched-if.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/sched-if.h
+++ xen-4.0.1-testing/xen/include/xen/sched-if.h
@@ -10,16 +10,29 @@
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -10,16 +10,26 @@
#include <xen/percpu.h>
@ -3167,9 +3054,6 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h
+
+/* cpus currently in no cpupool */
+extern cpumask_t cpupool_free_cpus;
+
+/* cpupool lock (used for cpu on/offline, too) */
+extern spinlock_t cpupool_lock;
+
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
@ -3187,7 +3071,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h
static inline void vcpu_schedule_lock(struct vcpu *v)
{
@@ -59,28 +72,49 @@ struct scheduler {
@@ -59,28 +69,49 @@ struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
@ -3251,10 +3135,8 @@ Index: xen-4.0.1-testing/xen/include/xen/sched-if.h
+struct scheduler *scheduler_get_by_id(unsigned int id);
+
#endif /* __XEN_SCHED_IF_H__ */
Index: xen-4.0.1-testing/xen/include/xen/sched.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/sched.h
+++ xen-4.0.1-testing/xen/include/xen/sched.h
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -9,6 +9,7 @@
#include <xen/shared.h>
#include <public/xen.h>
@ -3263,7 +3145,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h
#include <public/vcpu.h>
#include <public/xsm/acm.h>
#include <xen/time.h>
@@ -132,8 +133,6 @@ struct vcpu
@@ -132,8 +133,6 @@ struct vcpu
bool_t defer_shutdown;
/* VCPU is paused following shutdown request (d->is_shutting_down)? */
bool_t paused_for_shutdown;
@ -3316,7 +3198,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);
@@ -604,6 +607,18 @@ extern enum cpufreq_controller {
@@ -604,6 +607,19 @@ extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
@ -3327,6 +3209,7 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h
+int cpupool0_cpu_assign(struct cpupool *c);
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu);
+void cpupool_cpu_add(unsigned int cpu);
+int cpupool_cpu_remove(unsigned int cpu);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
+int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
@ -3335,10 +3218,8 @@ Index: xen-4.0.1-testing/xen/include/xen/sched.h
#endif /* __SCHED_H__ */
/*
Index: xen-4.0.1-testing/xen/include/xen/softirq.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/xen/softirq.h
+++ xen-4.0.1-testing/xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h
+++ b/xen/include/xen/softirq.h
@@ -58,6 +58,7 @@ struct tasklet
struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -1313,8 +1313,15 @@ class XendDomainInfo:
@@ -1310,8 +1310,15 @@ class XendDomainInfo:
frontpath = self.getDeviceController(deviceClass).frontendPath(dev)
backpath = xstransact.Read(frontpath, "backend")
thread.start_new_thread(self.getDeviceController(deviceClass).finishDeviceCleanup, (backpath, path))

View File

@ -2,7 +2,7 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c
===================================================================
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/vnc.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c
@@ -1713,6 +1713,31 @@ static int protocol_client_msg(VncState
@@ -1736,6 +1736,25 @@ static int protocol_client_msg(VncState
}
set_encodings(vs, (int32_t *)(data + 4), limit);
@ -23,12 +23,6 @@ Index: xen-4.0.1-testing/tools/ioemu-qemu-xen/vnc.c
+ vnc_write_u16(vs, 1); /* number of rects */
+ vnc_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height, -223);
+
+ /* Ensure that the new area is updated */
+ vnc_write_u8(vs, 0); /* msg id */
+ vnc_write_u8(vs, 0);
+ vnc_write_u16(vs, 1); /* number of rects */
+ send_framebuffer_update(vs, 0, 0, vs->serverds.width, vs->serverds.height);
+
+ vnc_flush(vs);
+ }
break;

View File

@ -1,20 +1,26 @@
Allow multiple bootloader loopback devices
Starting several domains concurrently can fail due to using a single
bootloader loopback device. This patch creates a list of bootloader
loopback devices so more than one instance of bootloader can be run
concurrently.
Index: xen-4.0.1-testing/tools/python/xen/util/blkif.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/util/blkif.py
+++ xen-4.0.1-testing/tools/python/xen/util/blkif.py
@@ -19,10 +19,12 @@ def blkdev_name_to_number(name):
@@ -19,11 +19,6 @@ def blkdev_name_to_number(name):
devname = 'virtual-device'
devnum = None
+ """
try:
return (devname, os.stat(n).st_rdev)
except Exception, ex:
pass
+ """
- try:
- return (devname, os.stat(n).st_rdev)
- except Exception, ex:
- pass
-
scsi_major = [ 8, 65, 66, 67, 68, 69, 70, 71, 128, 129, 130, 131, 132, 133, 134, 135 ]
if re.match( '/dev/sd[a-z]([1-9]|1[0-5])?$', n):
major = scsi_major[(ord(n[7:8]) - ord('a')) / 16]
Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
===================================================================
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
@ -24,11 +30,11 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
MIGRATE_TIMEOUT = 30.0
-BOOTLOADER_LOOPBACK_DEVICE = '/dev/xvdp'
+BOOTLOADER_LOOPBACK_DEVICES = ['/dev/xvdy', '/dev/xvdx', '/dev/xvdw', '/dev/xvdv', '/dev/xvdu', '/dev/xvdt', '/dev/xvds', '/dev/xvdr', '/dev/xvdq', '/dev/xvdp', '/dev/xvdo', '/dev/xvdn', '/dev/xvdm', '/dev/xvdl', '/dev/xvdk', '/dev/xvdj', '/dev/xvdi', '/dev/xvdh', '/dev/xvdg', '/dev/xvdf', '/dev/xvde', '/dev/xvdd']
+BOOTLOADER_LOOPBACK_DEVICES = ['/dev/xvd' + chr(x) for x in range(ord('z'), ord('d'), -1)]
xc = xen.lowlevel.xc.xc()
xoptions = XendOptions.instance()
@@ -3314,20 +3314,27 @@ class XendDomainInfo:
@@ -3311,20 +3311,27 @@ class XendDomainInfo:
# This is a file, not a device. pygrub can cope with a
# file if it's raw, but if it's QCOW or other such formats
# used through blktap, then we need to mount it first.
@ -70,7 +76,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
try:
blcfg = bootloader(blexec, fn, self, False,
@@ -3335,11 +3342,11 @@ class XendDomainInfo:
@@ -3332,11 +3339,11 @@ class XendDomainInfo:
finally:
if mounted:
log.info("Unmounting %s from %s." %

View File

@ -1,9 +1,7 @@
Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/platform_hypercall.c
+++ xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
@@ -22,7 +22,7 @@
#include <xen/sched-if.h>
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -21,7 +21,7 @@
#include <xen/acpi.h>
#include <asm/current.h>
#include <public/platform.h>
-#include <acpi/cpufreq/processor_perf.h>
@ -11,7 +9,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
#include <asm/edd.h>
#include <asm/mtrr.h>
#include "cpu/mtrr/mtrr.h"
@@ -63,6 +63,7 @@ static long cpu_down_helper(void *hdl, v
@@ -62,6 +62,7 @@ static long cpu_down_helper(void *hdl, v
ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
{
ret_t ret = 0;
@ -19,7 +17,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
struct xen_platform_op curop, *op = &curop;
if ( !IS_PRIV(current->domain) )
@@ -487,6 +488,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
@@ -486,6 +487,24 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
op->u.mem_add.epfn,
op->u.mem_add.pxm);
break;
@ -44,10 +42,8 @@ Index: xen-4.0.1-testing/xen/arch/x86/platform_hypercall.c
default:
ret = -ENOSYS;
break;
Index: xen-4.0.1-testing/xen/include/public/platform.h
===================================================================
--- xen-4.0.1-testing.orig/xen/include/public/platform.h
+++ xen-4.0.1-testing/xen/include/public/platform.h
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -355,6 +355,14 @@ struct xenpf_mem_hotadd
uint32_t flags;
};

View File

@ -1,10 +1,8 @@
Change default IO-APIC ack mode for single IO-APIC systems to old-style.
Index: xen-4.0.1-testing/xen/arch/x86/io_apic.c
===================================================================
--- xen-4.0.1-testing.orig/xen/arch/x86/io_apic.c
+++ xen-4.0.1-testing/xen/arch/x86/io_apic.c
@@ -1562,7 +1562,7 @@ static unsigned int startup_level_ioapic
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -1559,7 +1559,7 @@ static unsigned int startup_level_ioapic
return 0; /* don't check for pending */
}
@ -13,7 +11,7 @@ Index: xen-4.0.1-testing/xen/arch/x86/io_apic.c
static void setup_ioapic_ack(char *s)
{
if ( !strcmp(s, "old") )
@@ -2066,6 +2066,8 @@ void __init setup_IO_APIC(void)
@@ -2062,6 +2062,8 @@ void __init setup_IO_APIC(void)
else
io_apic_irqs = ~PIC_IRQS;

View File

@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f50525ffe664fa03f425d044e7cde87b264c4cae9336d2c866e312e89dcd0b83
size 23280495
oid sha256:b66dcbfa61d2aec1da5077a3f58935786c803a0cc1ed5d76174d2d71e3372c76
size 23223686

View File

@ -1,13 +0,0 @@
Index: xen-4.0.1-testing/tools/Makefile
===================================================================
--- xen-4.0.1-testing.orig/tools/Makefile
+++ xen-4.0.1-testing/tools/Makefile
@@ -33,7 +33,7 @@ SUBDIRS-$(CONFIG_Linux) += fs-back
SUBDIRS-$(CONFIG_NetBSD) += fs-back
SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir
SUBDIRS-y += xenpmd
-SUBDIRS-y += libxl
+#SUBDIRS-y += libxl
SUBDIRS-y += remus
SUBDIRS-$(CONFIG_X86) += xenpaging
SUBDIRS-$(CONFIG_X86) += debugger/gdbsx

View File

@ -1,13 +0,0 @@
Index: xen-4.0.1-testing/tools/Makefile
===================================================================
--- xen-4.0.1-testing.orig/tools/Makefile
+++ xen-4.0.1-testing/tools/Makefile
@@ -35,7 +35,7 @@ SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir
SUBDIRS-y += xenpmd
#SUBDIRS-y += libxl
SUBDIRS-y += remus
-SUBDIRS-$(CONFIG_X86) += xenpaging
+#SUBDIRS-$(CONFIG_X86) += xenpaging
SUBDIRS-$(CONFIG_X86) += debugger/gdbsx
# These don't cross-compile

View File

@ -1,3 +1,98 @@
-------------------------------------------------------------------
Wed Oct 20 15:50:01 CEST 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.tools_xenpaging_cleanup.patch
-------------------------------------------------------------------
Wed Oct 20 15:31:47 CEST 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.mem_event_check_ring-free_requests.patch
-------------------------------------------------------------------
Wed Oct 20 15:29:40 CEST 2010 - ohering@suse.de
- install /etc/xen/examples/xentrace_formats.txt to get human readable
tracedata if xenalyze is not used
-------------------------------------------------------------------
Sun Oct 17 11:14:33 CEST 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.autostart_delay.patch
xenpaging.blacklist.patch
xenpaging.MRU_SIZE.patch
remove xenpaging.hacks.patch, realmode works
-------------------------------------------------------------------
Mon Oct 11 08:59:35 MDT 2010 - carnold@novell.com
- Upstream patches from Jan including fixes for the following bugs
bnc#583568 - Xen kernel is not booting
bnc#615206 - Xen kernel fails to boot with IO-APIC problem
bnc#640773 - Xen kernel crashing right after grub
bnc#643477 - issues with PCI hotplug/hotunplug to Xen driver domain
22223-vtd-igd-workaround.patch
22222-x86-timer-extint.patch
22214-x86-msr-misc-enable.patch
22213-x86-xsave-cpuid-check.patch
22194-tmem-check-pv-mfn.patch
22177-i386-irq-safe-map_domain_page.patch
22175-x86-irq-enter-exit.patch
22174-x86-pmtimer-accuracy.patch
22160-Intel-C6-EOI.patch
22159-notify-evtchn-dying.patch
22157-x86-debug-key-i.patch
-------------------------------------------------------------------
Mon Oct 11 16:03:23 CEST 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.signal_handling.patch
xenpaging.autostart.patch
xenpaging.hacks.patch
-------------------------------------------------------------------
Mon Oct 11 15:42:02 CEST 2010 - ohering@suse.de
- rename xenpaging.XENMEM_decrease_reservation.patch
to xenpaging.memory_op.patch
-------------------------------------------------------------------
Tue Oct 8 14:11:31 CST 2010 - cyliu@novell.com
- bnc#632956 - fix VNC altgr-insert behavior
7410-qemu-alt-gr.patch
altgr_2.patch
-------------------------------------------------------------------
Thu Oct 7 11:18:12 MDT 2010 - jfehlig@novell.com
- bnc#618087 - VNC view won't stay connected to fully virtualized
Linux Xen VMs
modified ioemu-vnc-resize.patch
-------------------------------------------------------------------
Tue Oct 5 09:34:20 MDT 2010 - carnold@novell.com
- bnc#639546 - Dom-U deleted after introduction of the parameter
"change_home_server False" in the VM configuration
change_home_server.patch
-------------------------------------------------------------------
Mon Oct 4 09:10:03 MDT 2010 - jfehlig@novell.com
- bnc#641859 - block-dmmd script does not handle the configuration
when only MD is used
modified block-dmmd script
-------------------------------------------------------------------
Thu Sep 30 17:52:55 CEST 2010 - ohering@suse.de
- fate#310510 - fix xenpaging
xenpaging.populate_only_if_paged.patch
-------------------------------------------------------------------
Mon Sep 27 09:59:37 MDT 2010 - carnold@novell.com
@ -19,15 +114,23 @@ Tue Sep 23 16:43:11 CST 2010 - cyliu@novell.com
mutli-xvdp.patch
-------------------------------------------------------------------
Tue Sep 20 14:11:31 CST 2010 - cyliu@novell.com
Wed Sep 22 10:50:20 CEST 2010 - ohering@suse.de
- bnc#632956 - fix VNC altgr-insert behavior
- fate#310510 - fix xenpaging
xenpaging.XENMEM_decrease_reservation.patch
xenpaging.xenpaging_init.patch
xenpaging.policy_linear.patch
-------------------------------------------------------------------
Mon Sep 13 16:24:31 MDT 2010 - carnold@novell.com
Fri Sep 17 15:59:45 CEST 2010 - ohering@suse.de
- bnc#636231 - XEN: Unable to disconnect/remove CDROM drive from VM
xend-devid-or-name.patch
- fate#310510 - fix xenpaging
xenpaging.pageout_policy.patch
xenpaging.xs_daemon_close.patch
xenpaging.pagefile.patch
xenpaging.mem_paging_tool_qemu_flush_cache.patch
xenpaging.get_paged_frame.patch
xenpaging.notify_via_xen_event_channel.patch
-------------------------------------------------------------------
Mon Sep 13 10:50:56 MDT 2010 - carnold@novell.com

128
xen.spec
View File

@ -1,5 +1,5 @@
#
# spec file for package xen (Version 4.0.1_21326_01)
# spec file for package xen (Version 4.0.1_01)
#
# Copyright (c) 2010 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
@ -25,8 +25,36 @@ ExclusiveArch: %ix86 x86_64
%define changeset 21326
%define xen_build_dir xen-4.0.1-testing
%define with_kmp 1
BuildRequires: LibVNCServer-devel SDL-devel acpica automake bin86 curl-devel dev86 graphviz latex2html libjpeg-devel libxml2-devel ncurses-devel openssl openssl-devel pciutils-devel python-devel texinfo transfig
BuildRequires: texlive texlive-latex
BuildRequires: LibVNCServer-devel
BuildRequires: SDL-devel
BuildRequires: automake
BuildRequires: bin86
BuildRequires: curl-devel
BuildRequires: dev86
BuildRequires: graphviz
BuildRequires: latex2html
BuildRequires: libjpeg-devel
BuildRequires: libxml2-devel
BuildRequires: ncurses-devel
BuildRequires: openssl
BuildRequires: openssl-devel
BuildRequires: pciutils-devel
BuildRequires: python-devel
BuildRequires: texinfo
BuildRequires: transfig
%if %suse_version <= 1110
BuildRequires: pmtools
%else
BuildRequires: acpica
%endif
%if %suse_version >= 1030
BuildRequires: texlive
BuildRequires: texlive-latex
%else
BuildRequires: te_ams
BuildRequires: te_latex
BuildRequires: tetex
%endif
%ifarch x86_64
BuildRequires: glibc-32bit glibc-devel-32bit
%define max_cpus 256
@ -38,7 +66,7 @@ BuildRequires: glibc-32bit glibc-devel-32bit
%if %{?with_kmp}0
BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11
%endif
Version: 4.0.1_21326_01
Version: 4.0.1_01
Release: 1
License: GPLv2+
Group: System/Kernel
@ -101,6 +129,17 @@ Patch23: 22084-x86-xsave-off.patch
Patch24: 7410-qemu-alt-gr.patch
Patch25: 22135-heap-lock.patch
Patch26: 22148-serial-irq-dest.patch
Patch27: 22157-x86-debug-key-i.patch
Patch28: 22159-notify-evtchn-dying.patch
Patch29: 22160-Intel-C6-EOI.patch
Patch30: 22174-x86-pmtimer-accuracy.patch
Patch31: 22175-x86-irq-enter-exit.patch
Patch32: 22177-i386-irq-safe-map_domain_page.patch
Patch33: 22194-tmem-check-pv-mfn.patch
Patch34: 22213-x86-xsave-cpuid-check.patch
Patch35: 22214-x86-msr-misc-enable.patch
Patch36: 22222-x86-timer-extint.patch
Patch37: 22223-vtd-igd-workaround.patch
# Our patches
Patch300: xen-config.diff
Patch301: xend-config.diff
@ -117,8 +156,6 @@ Patch311: xen-no-dummy-nfs-ip.diff
Patch312: serial-split.patch
Patch313: xen-xm-top-needs-root.diff
Patch314: xen-max-free-mem.diff
Patch315: xen-disable-libxl.diff
Patch316: xen-disable-xenpaging.diff
Patch317: xen-extra-fixes.patch
Patch322: bridge-opensuse.patch
Patch323: bridge-vlan.diff
@ -162,7 +199,6 @@ Patch370: xend-sysconfig.patch
Patch371: domu-usb-controller.patch
Patch372: popen2-argument-fix.patch
Patch373: usb-list.patch
Patch374: xend-devid-or-name.patch
# Patches for snapshot support
Patch400: snapshot-ioemu-save.patch
Patch401: snapshot-ioemu-restore.patch
@ -190,6 +226,8 @@ Patch431: capslock_enable.patch
Patch432: enable_more_nic_pxe.patch
Patch433: multi-xvdp.patch
Patch434: check_device_status.patch
Patch435: change_home_server.patch
Patch436: altgr_2.patch
# Jim's domain lock patch
Patch450: xend-domain-lock.patch
# Hypervisor and PV driver Patches
@ -215,6 +253,24 @@ Patch702: hv_xen_extension.patch
Patch703: hv_win7_eoi_bug.patch
# Build patch
Patch999: tmp_build.patch
# FATE 310510
Patch10001: xenpaging.tools_xenpaging_cleanup.patch
Patch10002: xenpaging.pageout_policy.patch
Patch10003: xenpaging.xs_daemon_close.patch
Patch10010: xenpaging.policy_linear.patch
Patch10011: xenpaging.pagefile.patch
Patch10012: xenpaging.xenpaging_init.patch
Patch10013: xenpaging.mem_paging_tool_qemu_flush_cache.patch
Patch10014: xenpaging.memory_op.patch
Patch10015: xenpaging.populate_only_if_paged.patch
Patch10017: xenpaging.autostart.patch
Patch10018: xenpaging.signal_handling.patch
Patch10019: xenpaging.MRU_SIZE.patch
Patch10020: xenpaging.get_paged_frame.patch
Patch10021: xenpaging.mem_event_check_ring-free_requests.patch
Patch10022: xenpaging.blacklist.patch
Patch10023: xenpaging.autostart_delay.patch
Patch10024: xenpaging.makefile.patch
Url: http://www.cl.cam.ac.uk/Research/SRG/netos/xen/
BuildRoot: %{_tmppath}/%{name}-%{version}-build
#%define pysite %(python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_lib()")
@ -574,6 +630,17 @@ Authors:
%patch24 -p1
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch29 -p1
%patch30 -p1
%patch31 -p1
%patch32 -p1
%patch33 -p1
%patch34 -p1
%patch35 -p1
%patch36 -p1
%patch37 -p1
%patch300 -p1
%patch301 -p1
%patch302 -p1
@ -589,8 +656,6 @@ Authors:
%patch312 -p1
%patch313 -p1
%patch314 -p1
%patch315 -p1
%patch316 -p1
%patch317 -p1
%patch322 -p1
%patch323 -p1
@ -633,7 +698,6 @@ Authors:
%patch371 -p1
%patch372 -p1
%patch373 -p1
%patch374 -p1
%patch400 -p1
%patch401 -p1
%patch402 -p1
@ -658,6 +722,8 @@ Authors:
%patch432 -p1
%patch433 -p1
%patch434 -p1
%patch435 -p1
%patch436 -p1
%patch450 -p1
%patch500 -p1
%patch501 -p1
@ -679,6 +745,24 @@ Authors:
%patch702 -p1
%patch703 -p1
%patch999 -p1
%patch10001 -p1
%patch10002 -p1
%patch10003 -p1
%patch10010 -p1
%patch10011 -p1
%patch10012 -p1
%patch10013 -p1
%patch10014 -p1
%patch10015 -p1
%patch10017 -p1
%patch10018 -p1
%patch10019 -p1
%patch10020 -p1
%patch10021 -p1
%patch10022 -p1
%patch10023 -p1
%patch10024 -p1
%build
XEN_EXTRAVERSION=%version-%release
@ -688,11 +772,11 @@ sed -i "s/XEN_CHANGESET[\t ]*=.*\$/XEN_CHANGESET = %{changeset}/" xen/Makefi
RPM_OPT_FLAGS=${RPM_OPT_FLAGS//-fstack-protector/}
export CFLAGS="${RPM_OPT_FLAGS}"
export RPM_OPT_FLAGS
make -C tools/include/xen-foreign
make tools docs
make -C tools/include/xen-foreign %{?jobs:-j%{jobs}}
make tools docs %{?jobs:-j%{jobs}}
cd tools/debugger/gdb
# there are code problems that don't pass the 02-check-gcc-output, hence bitbucket
./gdbbuild 1>/dev/null 2>/dev/null
env MAKE="make %{?jobs:-j%{jobs}}" ./gdbbuild 1>/dev/null 2>/dev/null
cd ../../..
%if %{?with_kmp}0
# pv driver modules
@ -705,6 +789,7 @@ for flavor in %flavors_to_build; do
cd obj/$flavor
./mkbuildtree
make -C /usr/src/linux-obj/%_target_cpu/$flavor modules \
%{?jobs:-j%{jobs}} \
M=$PWD
cd ../..
done
@ -733,23 +818,23 @@ install_xen()
ln -s xen${ext}-%{version}-%{release}.gz $RPM_BUILD_ROOT/boot/xen${ext}.gz
ln -sf xen-syms${ext}-%{version}-%{release} $RPM_BUILD_ROOT/boot/xen-syms${ext}
}
make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=y crash_debug=y DESTDIR=$RPM_BUILD_ROOT
make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=y crash_debug=y DESTDIR=$RPM_BUILD_ROOT %{?jobs:-j%{jobs}}
install_xen dbg
make -C xen clean
make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=n crash_debug=n DESTDIR=$RPM_BUILD_ROOT
make -C xen install max_phys_cpus=%{max_cpus} pae=%{pae_enabled} debug=n crash_debug=n DESTDIR=$RPM_BUILD_ROOT %{?jobs:-j%{jobs}}
install_xen
make -C xen clean
export CFLAGS="$RPM_OPT_FLAGS"
export RPM_OPT_FLAGS
make -C tools/include/xen-foreign
make -C tools/include/xen-foreign %{?jobs:-j%{jobs}}
# tools
export XEN_PYTHON_NATIVE_INSTALL=1
make -C tools install \
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir}
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} %{?jobs:-j%{jobs}}
cp tools/debugger/gdb/gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen $RPM_BUILD_ROOT/usr/bin/gdbserver-xen
rm -f $RPM_BUILD_ROOT/usr/sbin/{qcow-create,img2qcow,qcow2raw}
make -C tools/misc/serial-split install \
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir}
DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} %{?jobs:-j%{jobs}}
%ifarch x86_64
mkdir -p $RPM_BUILD_ROOT/${_libdir}/xen/bin/
ln -s /usr/lib/xen/bin/qemu-dm $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm
@ -789,6 +874,7 @@ mkdir -p $RPM_BUILD_ROOT/etc/xen/{vm,examples,scripts}
mv $RPM_BUILD_ROOT/etc/xen/xmexample* $RPM_BUILD_ROOT/etc/xen/examples
rm -f $RPM_BUILD_ROOT/etc/xen/examples/*nbd
install -m644 %SOURCE9 %SOURCE10 $RPM_BUILD_ROOT/etc/xen/examples/
install -m644 tools/xentrace/formats $RPM_BUILD_ROOT/etc/xen/examples/xentrace_formats.txt
# scripts
rm -f $RPM_BUILD_ROOT/etc/xen/scripts/block-*nbd
install -m755 %SOURCE11 %SOURCE12 %SOURCE13 %SOURCE14 %SOURCE15 %SOURCE16 %SOURCE17 %SOURCE21 $RPM_BUILD_ROOT/etc/xen/scripts/
@ -871,6 +957,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
%{_libdir}/libfsimage.so.*
%{_libdir}/libxen*.so.*
%{_libdir}/libvhd.so.*
%{_libdir}/libxlutil.so.*
%files tools
%defattr(-,root,root)
@ -906,6 +993,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
/usr/sbin/vhd-update
/usr/sbin/vhd-util
/usr/sbin/gdbsx
/usr/sbin/xl
%dir %{_libdir}/xen
%dir %{_libdir}/xen/bin
%ifarch x86_64
@ -958,6 +1046,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
/etc/sysconfig/network/scripts/xen-updown.sh
/etc/sysconfig/network/if-up.d/xen
/etc/sysconfig/network/if-down.d/xen
/etc/bash_completion.d/xl.sh
%dir %{_defaultdocdir}/xen
%{_defaultdocdir}/xen/COPYING
%{_defaultdocdir}/xen/README.SuSE
@ -1012,6 +1101,8 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
%{_libdir}/libxen*.so
%{_libdir}/libvhd.a
%{_libdir}/libvhd.so
%{_libdir}/libxlutil.a
%{_libdir}/libxlutil.so
/usr/bin/serial-split
/usr/include/blktaplib.h
/usr/include/fsimage*
@ -1019,6 +1110,7 @@ rm -rf $RPM_BUILD_ROOT/%{_libdir}/debug
/usr/include/xen/
/usr/include/xs.h
/usr/include/xs_lib.h
/usr/include/libxl.h
%files doc-html
%defattr(-,root,root)

View File

@ -94,7 +94,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
XendTask.log_progress(0, 30, self._constructDomain)
XendTask.log_progress(31, 60, self._initDomain)
@@ -2990,6 +2992,11 @@ class XendDomainInfo:
@@ -2987,6 +2989,11 @@ class XendDomainInfo:
self._stateSet(DOM_STATE_HALTED)
self.domid = None # Do not push into _stateSet()!
@ -106,7 +106,7 @@ Index: xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
finally:
self.refresh_shutdown_lock.release()
@@ -4503,6 +4510,74 @@ class XendDomainInfo:
@@ -4505,6 +4512,74 @@ class XendDomainInfo:
def has_device(self, dev_class, dev_uuid):
return (dev_uuid in self.info['%s_refs' % dev_class.lower()])

23
xenpaging.MRU_SIZE.patch Normal file
View File

@ -0,0 +1,23 @@
Subject: xenpaging: increase recently used pages from 4MB to 64MB
Increase recently used pages from 4MB to 64MB.
Keeping more pages in memory allows the guest to make more progress if the
paging file spans the entire guest memory.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/policy_default.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.0.1-testing/tools/xenpaging/policy_default.c
@@ -26,7 +26,7 @@
#include "policy.h"
-#define MRU_SIZE 1024
+#define MRU_SIZE (1024 * 16)
static unsigned long mru[MRU_SIZE];

234
xenpaging.autostart.patch Normal file
View File

@ -0,0 +1,234 @@
Subject: xenpaging: start xenpaging via config option
Start xenpaging via config option.
TODO: add config option for pagefile directory
TODO: add libxl support
TODO: parse config values like 42K, 42M, 42G, 42%
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/examples/xmexample.hvm | 3 +
tools/python/README.XendConfig | 1
tools/python/README.sxpcfg | 1
tools/python/xen/xend/XendConfig.py | 3 +
tools/python/xen/xend/XendDomainInfo.py | 6 ++
tools/python/xen/xend/image.py | 87 ++++++++++++++++++++++++++++++++
tools/python/xen/xm/create.py | 5 +
tools/python/xen/xm/xenapi_create.py | 1
8 files changed, 107 insertions(+)
--- xen-4.0.1-testing.orig/tools/examples/xmexample.hvm
+++ xen-4.0.1-testing/tools/examples/xmexample.hvm
@@ -127,6 +127,9 @@ disk = [ 'file:/var/lib/xen/images/disk.
# Device Model to be used
device_model = 'qemu-dm'
+# xenpaging, number of pages
+xenpaging = 42
+
#-----------------------------------------------------------------------------
# boot on floppy (a), hard disk (c), Network (n) or CD-ROM (d)
# default: hard disk, cd-rom, floppy
--- xen-4.0.1-testing.orig/tools/python/README.XendConfig
+++ xen-4.0.1-testing/tools/python/README.XendConfig
@@ -120,6 +120,7 @@ otherConfig
image.vncdisplay
image.vncunused
image.hvm.device_model
+ image.hvm.xenpaging
image.hvm.display
image.hvm.xauthority
image.hvm.vncconsole
--- xen-4.0.1-testing.orig/tools/python/README.sxpcfg
+++ xen-4.0.1-testing/tools/python/README.sxpcfg
@@ -51,6 +51,7 @@ image
- vncunused
(HVM)
- device_model
+ - xenpaging
- display
- xauthority
- vncconsole
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendConfig.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendConfig.py
@@ -145,6 +145,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
'apic': int,
'boot': str,
'device_model': str,
+ 'xenpaging': int,
'loader': str,
'display' : str,
'fda': str,
@@ -508,6 +509,8 @@ class XendConfig(dict):
self['platform']['nomigrate'] = 0
if self.is_hvm():
+ if 'xenpaging' not in self['platform']:
+ self['platform']['xenpaging'] = None
if 'timer_mode' not in self['platform']:
self['platform']['timer_mode'] = 1
if 'viridian' not in self['platform']:
--- xen-4.0.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py
+++ xen-4.0.1-testing/tools/python/xen/xend/XendDomainInfo.py
@@ -2439,6 +2439,7 @@ class XendDomainInfo:
if self.image:
self.image.createDeviceModel()
+ self.image.createXenPaging()
#if have pass-through devs, need the virtual pci slots info from qemu
self.pci_device_configure_boot()
@@ -2451,6 +2452,11 @@ class XendDomainInfo:
self.image.destroyDeviceModel()
except Exception, e:
log.exception("Device model destroy failed %s" % str(e))
+ try:
+ log.debug("stopping xenpaging")
+ self.image.destroyXenPaging()
+ except Exception, e:
+ log.exception("stopping xenpaging failed %s" % str(e))
else:
log.debug("No device model")
--- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.0.1-testing/tools/python/xen/xend/image.py
@@ -122,12 +122,14 @@ class ImageHandler:
self.vm.permissionsVm("image/cmdline", { 'dom': self.vm.getDomid(), 'read': True } )
self.device_model = vmConfig['platform'].get('device_model')
+ self.xenpaging = vmConfig['platform'].get('xenpaging')
self.display = vmConfig['platform'].get('display')
self.xauthority = vmConfig['platform'].get('xauthority')
self.vncconsole = int(vmConfig['platform'].get('vncconsole', 0))
self.dmargs = self.parseDeviceModelArgs(vmConfig)
self.pid = None
+ self.xenpaging_pid = None
rtc_timeoffset = int(vmConfig['platform'].get('rtc_timeoffset', 0))
if int(vmConfig['platform'].get('localtime', 0)):
if time.localtime(time.time())[8]:
@@ -392,6 +394,91 @@ class ImageHandler:
sentinel_fifos_inuse[sentinel_path_fifo] = 1
self.sentinel_path_fifo = sentinel_path_fifo
+ def createXenPaging(self):
+ if self.xenpaging is None:
+ return
+ if self.xenpaging == 0:
+ return
+ if self.xenpaging_pid:
+ return
+ xenpaging_bin = auxbin.pathTo("xenpaging")
+ args = [xenpaging_bin]
+ args = args + ([ "%d" % self.vm.getDomid()])
+ args = args + ([ "%s" % self.xenpaging])
+ env = dict(os.environ)
+ self.xenpaging_logfile = "/var/log/xen/xenpaging-%s.log" % str(self.vm.info['name_label'])
+ logfile_mode = os.O_WRONLY|os.O_CREAT|os.O_APPEND|os.O_TRUNC
+ null = os.open("/dev/null", os.O_RDONLY)
+ logfd = os.open(self.xenpaging_logfile, logfile_mode, 0644)
+ sys.stderr.flush()
+ contract = osdep.prefork("%s:%d" % (self.vm.getName(), self.vm.getDomid()))
+ xenpaging_pid = os.fork()
+ if xenpaging_pid == 0: #child
+ try:
+ xenpaging_dir = "/var/lib/xen/xenpaging"
+ osdep.postfork(contract)
+ os.dup2(null, 0)
+ os.dup2(logfd, 1)
+ os.dup2(logfd, 2)
+ try:
+ os.mkdir(xenpaging_dir)
+ except:
+ log.info("mkdir %s failed" % xenpaging_dir)
+ pass
+ try:
+ os.chdir(xenpaging_dir)
+ except:
+ log.warn("chdir %s failed" % xenpaging_dir)
+ try:
+ log.info("starting %s" % args)
+ os.execve(xenpaging_bin, args, env)
+ except Exception, e:
+ print >>sys.stderr, (
+ 'failed to execute xenpaging: %s: %s' %
+ xenpaging_bin, utils.exception_string(e))
+ os._exit(126)
+ except Exception, e:
+ log.warn("staring xenpaging in %s failed" % xenpaging_dir)
+ os._exit(127)
+ else:
+ osdep.postfork(contract, abandon=True)
+ self.xenpaging_pid = xenpaging_pid
+ os.close(null)
+ os.close(logfd)
+
+ def destroyXenPaging(self):
+ if self.xenpaging is None:
+ return
+ if self.xenpaging_pid:
+ try:
+ os.kill(self.xenpaging_pid, signal.SIGHUP)
+ except OSError, exn:
+ log.exception(exn)
+ for i in xrange(100):
+ try:
+ (p, rv) = os.waitpid(self.xenpaging_pid, os.WNOHANG)
+ if p == self.xenpaging_pid:
+ break
+ except OSError:
+ # This is expected if Xend has been restarted within
+ # the life of this domain. In this case, we can kill
+ # the process, but we can't wait for it because it's
+ # not our child. We continue this loop, and after it is
+ # terminated make really sure the process is going away
+ # (SIGKILL).
+ pass
+ time.sleep(0.1)
+ else:
+ log.warning("xenpaging %d took more than 10s "
+ "to terminate: sending SIGKILL" % self.xenpaging_pid)
+ try:
+ os.kill(self.xenpaging_pid, signal.SIGKILL)
+ os.waitpid(self.xenpaging_pid, 0)
+ except OSError:
+ # This happens if the process doesn't exist.
+ pass
+ self.xenpaging_pid = None
+
def createDeviceModel(self, restore = False):
if self.device_model is None:
return
--- xen-4.0.1-testing.orig/tools/python/xen/xm/create.py
+++ xen-4.0.1-testing/tools/python/xen/xm/create.py
@@ -495,6 +495,10 @@ gopts.var('nfs_root', val="PATH",
fn=set_value, default=None,
use="Set the path of the root NFS directory.")
+gopts.var('xenpaging', val='NUM',
+ fn=set_int, default=None,
+ use="Number of pages to swap.")
+
gopts.var('device_model', val='FILE',
fn=set_value, default=None,
use="Path to device model program.")
@@ -1080,6 +1084,7 @@ def configure_hvm(config_image, vals):
args = [ 'acpi', 'apic',
'boot',
'cpuid', 'cpuid_check',
+ 'xenpaging',
'device_model', 'display',
'fda', 'fdb',
'gfx_passthru', 'guest_os_type',
--- xen-4.0.1-testing.orig/tools/python/xen/xm/xenapi_create.py
+++ xen-4.0.1-testing/tools/python/xen/xm/xenapi_create.py
@@ -1086,6 +1086,7 @@ class sxp2xml:
'acpi',
'apic',
'boot',
+ 'xenpaging',
'device_model',
'loader',
'fda',

View File

@ -0,0 +1,74 @@
Subject: xenpaging: add dynamic startup delay for xenpaging
This is a debug helper. Since the xenpaging support is still fragile, run
xenpaging at different stages in the bootprocess. Different delays will trigger
more bugs. This implementation starts without delay for 5 reboots, then
increments the delay by 0.1 seconds It uses xenstore for presistant storage of
delay values
TODO: find the correct place to remove the xenstore directory when the guest is shutdown or crashed
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/python/xen/xend/image.py | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
--- xen-4.0.1-testing.orig/tools/python/xen/xend/image.py
+++ xen-4.0.1-testing/tools/python/xen/xend/image.py
@@ -123,6 +123,18 @@ class ImageHandler:
self.device_model = vmConfig['platform'].get('device_model')
self.xenpaging = vmConfig['platform'].get('xenpaging')
+ self.xenpaging_delay = xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay" % self.vm.info['name_label'])
+ if self.xenpaging_delay == None:
+ log.warn("XXX creating /local/domain/0/xenpaging/%s" % self.vm.info['name_label'])
+ xstransact.Mkdir("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'])
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay', '0.0'))
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_inc', '0.1'))
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_use', '5'))
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_used', '0'))
+ self.xenpaging_delay = float(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay" % self.vm.info['name_label']))
+ self.xenpaging_delay_inc = float(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_inc" % self.vm.info['name_label']))
+ self.xenpaging_delay_use = int(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_use" % self.vm.info['name_label']))
+ self.xenpaging_delay_used = int(xstransact.Read("/local/domain/0/xenpaging/%s/xenpaging_delay_used" % self.vm.info['name_label']))
self.display = vmConfig['platform'].get('display')
self.xauthority = vmConfig['platform'].get('xauthority')
@@ -401,6 +413,17 @@ class ImageHandler:
return
if self.xenpaging_pid:
return
+ if self.xenpaging_delay_used < self.xenpaging_delay_use:
+ self.xenpaging_delay_used += 1
+ else:
+ self.xenpaging_delay_used = 0
+ self.xenpaging_delay += self.xenpaging_delay_inc
+ log.info("delay_used %s" % self.xenpaging_delay_used)
+ log.info("delay_use %s" % self.xenpaging_delay_use)
+ log.info("delay %s" % self.xenpaging_delay)
+ log.info("delay_inc %s" % self.xenpaging_delay_inc)
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay', self.xenpaging_delay))
+ xstransact.Store("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'], ('xenpaging_delay_used', self.xenpaging_delay_used))
xenpaging_bin = auxbin.pathTo("xenpaging")
args = [xenpaging_bin]
args = args + ([ "%d" % self.vm.getDomid()])
@@ -430,6 +453,9 @@ class ImageHandler:
except:
log.warn("chdir %s failed" % xenpaging_dir)
try:
+ if self.xenpaging_delay != 0.0:
+ log.info("delaying xenpaging startup %s seconds ..." % self.xenpaging_delay)
+ time.sleep(self.xenpaging_delay)
log.info("starting %s" % args)
os.execve(xenpaging_bin, args, env)
except Exception, e:
@@ -449,6 +475,8 @@ class ImageHandler:
def destroyXenPaging(self):
if self.xenpaging is None:
return
+ # FIXME find correct place for guest shutdown or crash
+ #xstransact.Remove("/local/domain/0/xenpaging/%s" % self.vm.info['name_label'])
if self.xenpaging_pid:
try:
os.kill(self.xenpaging_pid, signal.SIGHUP)

27
xenpaging.blacklist.patch Normal file
View File

@ -0,0 +1,27 @@
Subject: xenpaging: prevent page-out of first 16MB
This is more a workaround than a bugfix:
Don't page out first 16MB of memory.
When the BIOS does its initialization process and xenpaging removes pages,
crashes will occour due to lack of support of xenpaging.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/policy_default.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.0.1-testing/tools/xenpaging/policy_default.c
@@ -60,8 +60,9 @@ int policy_init(xenpaging_t *paging)
for ( i = 0; i < MRU_SIZE; i++ )
mru[i] = INVALID_MFN;
- /* Don't page out page 0 */
- set_bit(0, bitmap);
+ /* Don't page out first 16MB */
+ for ( i = 0; i < ((16*1024*1024)/4096); i++ )
+ set_bit(i, bitmap);
out:
return rc;

View File

@ -0,0 +1,170 @@
Subject: xenpaging: page-in granttable entries
When converting a gfn to mfn, check if the page is paged-out.
If it is, request a page-in and return GNTST_eagain to the caller
to indicate a retry of the hypercall is required.
This fixes granttable errors when xenpaging is enabled in the guest.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Already-Acked-by: Patrick Colp <pjcolp@cs.ubc.ca>
Already-Acked-by: Keir Fraser <keir.fraser@citrix.com>
---
xen/common/grant_table.c | 94 ++++++++++++++++++++++++++++++-----------------
1 file changed, 60 insertions(+), 34 deletions(-)
--- xen-4.0.1-testing.orig/xen/common/grant_table.c
+++ xen-4.0.1-testing/xen/common/grant_table.c
@@ -139,6 +139,37 @@ shared_entry_header(struct grant_table *
#define active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
+/* Check if the page has been paged out */
+static int __get_paged_frame(unsigned long gfn, unsigned long *frame, int readonly, struct domain *rd)
+{
+ p2m_type_t p2mt;
+ mfn_t mfn;
+ int rc = GNTST_okay;
+
+ if ( readonly )
+ mfn = gfn_to_mfn(rd, gfn, &p2mt);
+ else
+ mfn = gfn_to_mfn_unshare(rd, gfn, &p2mt, 1);
+
+ if ( p2m_is_valid(p2mt) )
+ {
+ *frame = mfn_x(mfn);
+ if ( p2m_is_paging(p2mt) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(rd, gfn);
+ rc = GNTST_eagain;
+ }
+ }
+ else
+ {
+ *frame = INVALID_MFN;
+ rc = GNTST_bad_page;
+ }
+
+ return rc;
+}
+
static inline int
__get_maptrack_handle(
struct grant_table *t)
@@ -527,14 +558,16 @@ __gnttab_map_grant_ref(
if ( !act->pin )
{
+ unsigned long gfn;
+ unsigned long frame;
+
+ gfn = sha1 ? sha1->frame : sha2->full_page.frame;
+ rc = __get_paged_frame(gfn, &frame, !!(op->flags & GNTMAP_readonly), rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
act->domid = ld->domain_id;
- if ( sha1 )
- act->gfn = sha1->frame;
- else
- act->gfn = sha2->full_page.frame;
- act->frame = (op->flags & GNTMAP_readonly) ?
- gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ act->frame = frame;
act->start = 0;
act->length = PAGE_SIZE;
act->is_sub_page = 0;
@@ -1697,6 +1730,7 @@ __acquire_grant_for_copy(
domid_t trans_domid;
grant_ref_t trans_gref;
struct domain *rrd;
+ unsigned long gfn;
unsigned long grant_frame;
unsigned trans_page_off;
unsigned trans_length;
@@ -1814,9 +1848,11 @@ __acquire_grant_for_copy(
}
else if ( sha1 )
{
- act->gfn = sha1->frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha1->frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 0;
trans_page_off = 0;
trans_length = PAGE_SIZE;
@@ -1824,9 +1860,11 @@ __acquire_grant_for_copy(
}
else if ( !(sha2->hdr.flags & GTF_sub_page) )
{
- act->gfn = sha2->full_page.frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha2->full_page.frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 0;
trans_page_off = 0;
trans_length = PAGE_SIZE;
@@ -1834,9 +1872,11 @@ __acquire_grant_for_copy(
}
else
{
- act->gfn = sha2->sub_page.frame;
- grant_frame = readonly ? gmfn_to_mfn(rd, act->gfn) :
- gfn_to_mfn_private(rd, act->gfn);
+ gfn = sha2->sub_page.frame;
+ rc = __get_paged_frame(gfn, &grant_frame, readonly, rd);
+ if ( rc != GNTST_okay )
+ goto unlock_out;
+ act->gfn = gfn;
is_sub_page = 1;
trans_page_off = sha2->sub_page.page_off;
trans_length = sha2->sub_page.length;
@@ -1932,16 +1972,9 @@ __gnttab_copy(
else
{
#ifdef CONFIG_X86
- p2m_type_t p2mt;
- s_frame = mfn_x(gfn_to_mfn(sd, op->source.u.gmfn, &p2mt));
- if ( !p2m_is_valid(p2mt) )
- s_frame = INVALID_MFN;
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(sd, op->source.u.gmfn);
- rc = -ENOENT;
+ rc = __get_paged_frame(op->source.u.gmfn, &s_frame, 1, sd);
+ if ( rc != GNTST_okay )
goto error_out;
- }
#else
s_frame = gmfn_to_mfn(sd, op->source.u.gmfn);
#endif
@@ -1978,16 +2011,9 @@ __gnttab_copy(
else
{
#ifdef CONFIG_X86
- p2m_type_t p2mt;
- d_frame = mfn_x(gfn_to_mfn_unshare(dd, op->dest.u.gmfn, &p2mt, 1));
- if ( !p2m_is_valid(p2mt) )
- d_frame = INVALID_MFN;
- if ( p2m_is_paging(p2mt) )
- {
- p2m_mem_paging_populate(dd, op->dest.u.gmfn);
- rc = -ENOENT;
+ rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, 0, dd);
+ if ( rc != GNTST_okay )
goto error_out;
- }
#else
d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn);
#endif

13
xenpaging.makefile.patch Normal file
View File

@ -0,0 +1,13 @@
Index: xen-4.0.1-testing/tools/xenpaging/Makefile
===================================================================
--- xen-4.0.1-testing.orig/tools/xenpaging/Makefile
+++ xen-4.0.1-testing/tools/xenpaging/Makefile
@@ -27,7 +27,7 @@ IBINS = xenpaging
all: $(IBINS)
xenpaging: $(OBJS)
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+ $(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
install: all
$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)

View File

@ -0,0 +1,25 @@
Subject: xenpaging: print info when free request slots drop below 3
Add debugging aid to free request slots in the ring buffer.
It should not happen that the ring gets full, print info anyway if it happens.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/mm/mem_event.c | 5 +++++
1 file changed, 5 insertions(+)
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/mem_event.c
@@ -168,6 +168,11 @@ int mem_event_check_ring(struct domain *
mem_event_ring_lock(d);
free_requests = RING_FREE_REQUESTS(&d->mem_event.front_ring);
+ if ( unlikely(free_requests < 3) )
+ {
+ gdprintk(XENLOG_INFO, "free request slots: %d\n", free_requests);
+ WARN_ON(free_requests == 0);
+ }
ring_full = free_requests < MEM_EVENT_RING_THRESHOLD;
if ( (current->domain->domain_id == d->domain_id) && ring_full )

View File

@ -0,0 +1,29 @@
Subject: xenpaging/qemu-dm: add command to flush buffer cache.
Add support for a xenstore dm command to flush qemu's buffer cache.
qemu will just keep mapping pages and not release them, which causes problems
for the memory pager (since the page is mapped, it won't get paged out). When
the pager has trouble finding a page to page out, it asks qemu to flush its
buffer, which releases all the page mappings. This makes it possible to find
pages to swap out agian.
Already-Signed-off-by: Patrick Colp <Patrick.Colp@citrix.com>
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/ioemu-qemu-xen/xenstore.c | 3 +++
1 file changed, 3 insertions(+)
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/xenstore.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/xenstore.c
@@ -1021,6 +1021,9 @@ static void xenstore_process_dm_command_
do_pci_add(par);
free(par);
#endif
+ } else if (!strncmp(command, "flush-cache", len)) {
+ fprintf(logfile, "dm-command: flush caches\n");
+ qemu_invalidate_map_cache();
} else {
fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command);
}

454
xenpaging.memory_op.patch Normal file
View File

@ -0,0 +1,454 @@
Subject: xenpaging: handle paged-out pages in XENMEM_* commands
Fix these two warings:
(XEN) Assertion '__mfn_valid(mfn_x(omfn))' failed at p2m.c:2200
(XEN) memory.c:171:d1 Domain 1 page number 37ff0 invalid
Handle paged-out pages in xc_memory_op, guest_physmap_add_entry and
guest_remove_page. Use new do_xenmem_op_retry helper function.
In addition, export also xen/errno.h to hvmloader to get ENOENT define.
XENMEM_populate_physmap
populate_physmap
-> guest_physmap_add_entry
XENMEM_exchange
memory_exchange
-> guest_physmap_add_entry
XENMEM_add_to_physmap
guest_physmap_add_page
-> guest_physmap_add_entry
__gnttab_map_grant_ref
create_grant_host_mapping
create_grant_p2m_mapping
-> guest_physmap_add_entry
XENMEM_decrease_reservation
decrease_reservation
-> guest_remove_page
XENMEM_add_to_physmap
-> guest_remove_page
XENMEM_add_to_physmap
-> XENMAPSPACE_gmfn
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/firmware/hvmloader/hvmloader.c | 9 +++-
tools/firmware/hvmloader/util.c | 26 +++++++++++-
tools/include/Makefile | 1
tools/ioemu-qemu-xen/hw/vga.c | 5 +-
tools/libxc/xc_domain.c | 71 +++++++++++++++++++++--------------
xen/arch/x86/mm.c | 26 ++++++++++--
xen/arch/x86/mm/p2m.c | 7 +++
xen/common/memory.c | 25 +++++++++++-
8 files changed, 131 insertions(+), 39 deletions(-)
--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/hvmloader.c
+++ xen-4.0.1-testing/tools/firmware/hvmloader/hvmloader.c
@@ -29,6 +29,7 @@
#include "pci_regs.h"
#include "e820.h"
#include "option_rom.h"
+#include <xen/errno.h>
#include <xen/version.h>
#include <xen/hvm/params.h>
#include <xen/memory.h>
@@ -306,13 +307,19 @@ static void pci_setup(void)
while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
{
struct xen_add_to_physmap xatp;
+ int rc;
if ( hvm_info->high_mem_pgend == 0 )
hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
xatp.domid = DOMID_SELF;
xatp.space = XENMAPSPACE_gmfn;
xatp.idx = --hvm_info->low_mem_pgend;
xatp.gpfn = hvm_info->high_mem_pgend++;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
}
--- xen-4.0.1-testing.orig/tools/firmware/hvmloader/util.c
+++ xen-4.0.1-testing/tools/firmware/hvmloader/util.c
@@ -23,6 +23,7 @@
#include "e820.h"
#include "hypercall.h"
#include <stdint.h>
+#include <xen/errno.h>
#include <xen/xen.h>
#include <xen/memory.h>
@@ -323,19 +324,27 @@ void *mem_alloc(uint32_t size, uint32_t
while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
{
+ int rc;
reserve += PAGE_SIZE;
mfn = reserve >> PAGE_SHIFT;
/* Try to allocate a brand new page in the reserved area. */
if ( !over_allocated )
{
+ uint8_t delay = 0;
xmr.domid = DOMID_SELF;
xmr.mem_flags = 0;
xmr.extent_order = 0;
xmr.nr_extents = 1;
set_xen_guest_handle(xmr.extent_start, &mfn);
- if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
+ do {
+ rc = hypercall_memory_op(XENMEM_populate_physmap, &xmr);
+ if ( rc == 0 )
+ cpu_relax();
+ } while ( rc == 0 && ++delay );
+ if ( rc == 1 )
continue;
+ printf("%s: over_allocated\n", __func__);
over_allocated = 1;
}
@@ -353,7 +362,12 @@ void *mem_alloc(uint32_t size, uint32_t
xatp.domid = DOMID_SELF;
xatp.space = XENMAPSPACE_gmfn;
xatp.gpfn = mfn;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
}
@@ -595,6 +609,7 @@ uint16_t get_cpu_mhz(void)
uint64_t cpu_khz;
uint32_t tsc_to_nsec_mul, version;
int8_t tsc_shift;
+ int rc;
static uint16_t cpu_mhz;
if ( cpu_mhz != 0 )
@@ -605,7 +620,12 @@ uint16_t get_cpu_mhz(void)
xatp.space = XENMAPSPACE_shared_info;
xatp.idx = 0;
xatp.gpfn = (unsigned long)shared_info >> 12;
- if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ do {
+ rc = hypercall_memory_op(XENMEM_add_to_physmap, &xatp);
+ if ( rc == -ENOENT )
+ cpu_relax();
+ } while ( rc == -ENOENT );
+ if ( rc != 0 )
BUG();
/* Get a consistent snapshot of scale factor (multiplier and shift). */
--- xen-4.0.1-testing.orig/tools/include/Makefile
+++ xen-4.0.1-testing/tools/include/Makefile
@@ -12,6 +12,7 @@ xen/.dir:
@rm -rf xen
mkdir -p xen/libelf
ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
+ ln -sf ../$(XEN_ROOT)/xen/include/xen/errno.h xen
ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen
ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen
ln -sf ../xen-sys/$(XEN_OS) xen/sys
--- xen-4.0.1-testing.orig/tools/ioemu-qemu-xen/hw/vga.c
+++ xen-4.0.1-testing/tools/ioemu-qemu-xen/hw/vga.c
@@ -2157,9 +2157,10 @@ void set_vram_mapping(void *opaque, unsi
for (i = 0; i < (end - begin) >> TARGET_PAGE_BITS; i++) {
xatp.idx = (s->vram_gmfn >> TARGET_PAGE_BITS) + i;
xatp.gpfn = (begin >> TARGET_PAGE_BITS) + i;
- rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp);
+ while ((rc = xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp)) && errno == ENOENT)
+ usleep(1000);
if (rc) {
- fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, rc);
+ fprintf(stderr, "add_to_physmap MFN %"PRI_xen_pfn" to PFN %"PRI_xen_pfn" failed: %d\n", xatp.idx, xatp.gpfn, errno);
return;
}
}
--- xen-4.0.1-testing.orig/tools/libxc/xc_domain.c
+++ xen-4.0.1-testing/tools/libxc/xc_domain.c
@@ -536,6 +536,44 @@ int xc_domain_get_tsc_info(int xc_handle
return rc;
}
+static int do_xenmem_op_retry(int xc_handle, int cmd, struct xen_memory_reservation *reservation, unsigned long nr_extents, xen_pfn_t *extent_start)
+{
+ int err = 0;
+ unsigned long count = nr_extents;
+ unsigned long delay = 0;
+ unsigned long start = 0;
+
+ fprintf(stderr, "%s: cmd %d count %lx\n",__func__,cmd,count);
+ while ( count && start < nr_extents )
+ {
+ set_xen_guest_handle(reservation->extent_start, extent_start + start);
+ reservation->nr_extents = count;
+
+ err = xc_memory_op(xc_handle, cmd, reservation);
+ if ( err == count )
+ {
+ err = 0;
+ break;
+ }
+
+ if ( err > count || err < 0 || delay > 1000 * 1000)
+ {
+ fprintf(stderr, "%s: %d err %x count %lx start %lx delay %lu/%lu\n",__func__,cmd,err,count,start,delay,delay/666);
+ err = -1;
+ break;
+ }
+
+ if ( err )
+ delay = 0;
+
+ start += err;
+ count -= err;
+ usleep(delay);
+ delay += 666; /* 1500 iterations, 12 seconds */
+ }
+
+ return err;
+}
int xc_domain_memory_increase_reservation(int xc_handle,
uint32_t domid,
@@ -546,26 +584,18 @@ int xc_domain_memory_increase_reservatio
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = mem_flags,
.domid = domid
};
- /* may be NULL */
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
- err = xc_memory_op(xc_handle, XENMEM_increase_reservation, &reservation);
- if ( err == nr_extents )
- return 0;
-
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_increase_reservation, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed allocation for dom %d: "
"%ld extents of order %d, mem_flags %x\n",
domid, nr_extents, extent_order, mem_flags);
errno = ENOMEM;
- err = -1;
}
return err;
@@ -579,14 +609,11 @@ int xc_domain_memory_decrease_reservatio
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = 0,
.domid = domid
};
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
if ( extent_start == NULL )
{
DPRINTF("decrease_reservation extent_start is NULL!\n");
@@ -594,16 +621,12 @@ int xc_domain_memory_decrease_reservatio
return -1;
}
- err = xc_memory_op(xc_handle, XENMEM_decrease_reservation, &reservation);
- if ( err == nr_extents )
- return 0;
-
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_decrease_reservation, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed deallocation for dom %d: %ld extents of order %d\n",
domid, nr_extents, extent_order);
errno = EINVAL;
- err = -1;
}
return err;
@@ -618,23 +641,17 @@ int xc_domain_memory_populate_physmap(in
{
int err;
struct xen_memory_reservation reservation = {
- .nr_extents = nr_extents,
.extent_order = extent_order,
.mem_flags = mem_flags,
.domid = domid
};
- set_xen_guest_handle(reservation.extent_start, extent_start);
-
- err = xc_memory_op(xc_handle, XENMEM_populate_physmap, &reservation);
- if ( err == nr_extents )
- return 0;
- if ( err >= 0 )
+ err = do_xenmem_op_retry(xc_handle, XENMEM_populate_physmap, &reservation, nr_extents, extent_start);
+ if ( err < 0 )
{
DPRINTF("Failed allocation for dom %d: %ld extents of order %d\n",
domid, nr_extents, extent_order);
errno = EBUSY;
- err = -1;
}
return err;
--- xen-4.0.1-testing.orig/xen/arch/x86/mm.c
+++ xen-4.0.1-testing/xen/arch/x86/mm.c
@@ -3660,6 +3660,8 @@ static int create_grant_p2m_mapping(uint
p2mt = p2m_grant_map_rw;
rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT,
frame, 0, p2mt);
+ if ( rc == -ENOENT )
+ return GNTST_eagain;
if ( rc )
return GNTST_general_error;
else
@@ -4315,17 +4317,25 @@ long arch_memory_op(int op, XEN_GUEST_HA
case XENMAPSPACE_gmfn:
{
p2m_type_t p2mt;
+ unsigned long tmp_mfn;
- xatp.idx = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0));
+ tmp_mfn = mfn_x(gfn_to_mfn_unshare(d, xatp.idx, &p2mt, 0));
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, xatp.idx);
+ rcu_unlock_domain(d);
+ return -ENOENT;
+ }
/* If the page is still shared, exit early */
if ( p2m_is_shared(p2mt) )
{
rcu_unlock_domain(d);
return -ENOMEM;
}
- if ( !get_page_from_pagenr(xatp.idx, d) )
+ if ( !get_page_from_pagenr(tmp_mfn, d) )
break;
- mfn = xatp.idx;
+ mfn = tmp_mfn;
page = mfn_to_page(mfn);
break;
}
@@ -4354,8 +4364,16 @@ long arch_memory_op(int op, XEN_GUEST_HA
/* Xen heap frames are simply unhooked from this phys slot. */
guest_physmap_remove_page(d, xatp.gpfn, prev_mfn, 0);
else
+ {
/* Normal domain memory is freed, to avoid leaking memory. */
- guest_remove_page(d, xatp.gpfn);
+ rc = guest_remove_page(d, xatp.gpfn);
+ if ( rc == -ENOENT )
+ {
+ domain_unlock(d);
+ rcu_unlock_domain(d);
+ return rc;
+ }
+ }
}
/* Unmap from old location, if any. */
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/p2m.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/p2m.c
@@ -2186,6 +2186,13 @@ guest_physmap_add_entry(struct domain *d
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
mfn + i, ogfn, gfn + i);
omfn = gfn_to_mfn_query(d, ogfn, &ot);
+ if ( unlikely(p2m_is_paging(ot)) )
+ {
+ p2m_unlock(d->arch.p2m);
+ if ( p2m_is_paged(ot) )
+ p2m_mem_paging_populate(d, ogfn);
+ return -ENOENT;
+ }
/* If we get here, we know the local domain owns the page,
so it can't have been grant mapped in. */
BUG_ON( p2m_is_grant(ot) );
--- xen-4.0.1-testing.orig/xen/common/memory.c
+++ xen-4.0.1-testing/xen/common/memory.c
@@ -95,6 +95,7 @@ static void populate_physmap(struct memo
unsigned long i, j;
xen_pfn_t gpfn, mfn;
struct domain *d = a->domain;
+ int rc;
if ( !guest_handle_subrange_okay(a->extent_list, a->nr_done,
a->nr_extents-1) )
@@ -134,7 +135,12 @@ static void populate_physmap(struct memo
}
mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ rc = guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ if ( rc != 0 )
+ {
+ free_domheap_pages(page, a->extent_order);
+ goto out;
+ }
if ( !paging_mode_translate(d) )
{
@@ -162,6 +168,12 @@ int guest_remove_page(struct domain *d,
#ifdef CONFIG_X86
mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
+ return -ENOENT;
+ }
#else
mfn = gmfn_to_mfn(d, gmfn);
#endif
@@ -360,6 +372,13 @@ static long memory_exchange(XEN_GUEST_HA
/* Shared pages cannot be exchanged */
mfn = mfn_x(gfn_to_mfn_unshare(d, gmfn + k, &p2mt, 0));
+ if ( p2m_is_paging(p2mt) )
+ {
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
+ rc = -ENOENT;
+ goto fail;
+ }
if ( p2m_is_shared(p2mt) )
{
rc = -ENOMEM;
@@ -456,7 +475,9 @@ static long memory_exchange(XEN_GUEST_HA
&gpfn, exch.out.extent_start, (i<<out_chunk_order)+j, 1);
mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+ rc = guest_physmap_add_page(d, gpfn, mfn, exch.out.extent_order);
+ if ( rc == -ENOENT )
+ goto fail;
if ( !paging_mode_translate(d) )
{

48
xenpaging.pagefile.patch Normal file
View File

@ -0,0 +1,48 @@
Subject: xenpaging: Open paging file only if xenpaging_init() succeeds
Open paging file only if xenpaging_init() succeeds. It can fail if the host
does not support the required virtualization features such as EPT or if
xenpaging was already started for this domain_id.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Already-Acked-by: Patrick Colp <pjcolp@cs.ubc.ca>
Already-Acked-by: Keir Fraser <keir.fraser@citrix.com>
---
tools/xenpaging/xenpaging.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -495,15 +495,6 @@ int main(int argc, char *argv[])
victims = calloc(num_pages, sizeof(xenpaging_victim_t));
- /* Open file */
- sprintf(filename, "page_cache_%d", domain_id);
- fd = open(filename, open_flags, open_mode);
- if ( fd < 0 )
- {
- perror("failed to open file");
- return -1;
- }
-
/* Seed random-number generator */
srand(time(NULL));
@@ -515,6 +506,15 @@ int main(int argc, char *argv[])
goto out;
}
+ /* Open file */
+ sprintf(filename, "page_cache_%d", domain_id);
+ fd = open(filename, open_flags, open_mode);
+ if ( fd < 0 )
+ {
+ perror("failed to open file");
+ return -1;
+ }
+
/* Evict pages */
memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
for ( i = 0; i < num_pages; i++ )

View File

@ -0,0 +1,27 @@
Subject: xenpaging: call pageout policy function in xenpaging_evict_page
Notify policy about a page that was just paged out to disk.
Up to now the code called the opposite function, which clears the
(xenpaging internal) reference bit, instead of setting it and marking
the page as gone.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Already-Acked-by: Patrick Colp <pjcolp@cs.ubc.ca>
---
tools/xenpaging/xenpaging.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -358,8 +358,8 @@ int xenpaging_evict_page(xenpaging_t *pa
goto out;
}
- /* Notify policy of page being paged in */
- policy_notify_paged_in(paging->mem_event.domain_id, victim->gfn);
+ /* Notify policy of page being paged out */
+ policy_notify_paged_out(paging->mem_event.domain_id, victim->gfn);
out:
return ret;

View File

@ -0,0 +1,122 @@
Subject: xenpaging: break endless loop during inital page-out with large pagefiles
To allow the starting for xenpaging right after 'xm start XYZ', I
specified a pagefile size equal to the guest memory size in the hope to
catch more errors where the paged-out state of a p2mt is not checked.
While doing that, xenpaging got into an endless loop because some pages
cant be paged out right away. Now the policy reports an error if the gfn
number wraps.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Already-Acked-by: Patrick Colp <pjcolp@cs.ubc.ca>
Already-Acked-by: Keir Fraser <keir.fraser@citrix.com>
---
tools/xenpaging/policy_default.c | 35 ++++++++++++++++++++++++++++-------
tools/xenpaging/xenpaging.c | 7 +++++--
2 files changed, 33 insertions(+), 9 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/policy_default.c
+++ xen-4.0.1-testing/tools/xenpaging/policy_default.c
@@ -30,8 +30,12 @@
static unsigned long mru[MRU_SIZE];
-static unsigned int i_mru = 0;
+static unsigned int i_mru;
static unsigned long *bitmap;
+static unsigned long *unconsumed;
+static unsigned long current_gfn;
+static unsigned long bitmap_size;
+static unsigned long max_pages;
int policy_init(xenpaging_t *paging)
@@ -43,6 +47,14 @@ int policy_init(xenpaging_t *paging)
rc = alloc_bitmap(&bitmap, paging->bitmap_size);
if ( rc != 0 )
goto out;
+ /* Allocate bitmap to track unusable pages */
+ rc = alloc_bitmap(&unconsumed, paging->bitmap_size);
+ if ( rc != 0 )
+ goto out;
+
+ /* record bitmap_size */
+ bitmap_size = paging->bitmap_size;
+ max_pages = paging->domain_info->max_pages;
/* Initialise MRU list of paged in pages */
for ( i = 0; i < MRU_SIZE; i++ )
@@ -51,8 +63,6 @@ int policy_init(xenpaging_t *paging)
/* Don't page out page 0 */
set_bit(0, bitmap);
- rc = 0;
-
out:
return rc;
}
@@ -60,17 +70,27 @@ int policy_init(xenpaging_t *paging)
int policy_choose_victim(xenpaging_t *paging, domid_t domain_id,
xenpaging_victim_t *victim)
{
+ unsigned long wrap = current_gfn;
ASSERT(victim != NULL);
/* Domain to pick on */
victim->domain_id = domain_id;
-
+
do
{
- /* Randomly choose a gfn to evict */
- victim->gfn = rand() % paging->domain_info->max_pages;
+ current_gfn++;
+ if ( current_gfn >= max_pages )
+ current_gfn = 0;
+ if ( wrap == current_gfn )
+ {
+ victim->gfn = INVALID_MFN;
+ return -ENOSPC;
+ }
}
- while ( test_bit(victim->gfn, bitmap) );
+ while ( test_bit(current_gfn, bitmap) || test_bit(current_gfn, unconsumed) );
+
+ set_bit(current_gfn, unconsumed);
+ victim->gfn = current_gfn;
return 0;
}
@@ -78,6 +98,7 @@ int policy_choose_victim(xenpaging_t *pa
void policy_notify_paged_out(domid_t domain_id, unsigned long gfn)
{
set_bit(gfn, bitmap);
+ clear_bit(gfn, unconsumed);
}
void policy_notify_paged_in(domid_t domain_id, unsigned long gfn)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -440,7 +440,8 @@ static int evict_victim(xenpaging_t *pag
ret = policy_choose_victim(paging, domain_id, victim);
if ( ret != 0 )
{
- ERROR("Error choosing victim");
+ if ( ret != -ENOSPC )
+ ERROR("Error choosing victim");
goto out;
}
@@ -518,7 +519,9 @@ int main(int argc, char *argv[])
memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
for ( i = 0; i < num_pages; i++ )
{
- evict_victim(paging, domain_id, &victims[i], fd, i);
+ rc = evict_victim(paging, domain_id, &victims[i], fd, i);
+ if ( rc == -ENOSPC )
+ break;
if ( i % 100 == 0 )
DPRINTF("%d pages evicted\n", i);
}

View File

@ -0,0 +1,114 @@
Subject: xenpaging: populate only paged-out pages
populdate a paged-out page only once to reduce pressure in the ringbuffer.
Several cpus may still request a page at once. xenpaging can handle this.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
xen/arch/x86/hvm/emulate.c | 3 ++-
xen/arch/x86/hvm/hvm.c | 17 ++++++++++-------
xen/arch/x86/mm/guest_walk.c | 3 ++-
xen/arch/x86/mm/hap/guest_walk.c | 6 ++++--
4 files changed, 18 insertions(+), 11 deletions(-)
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/emulate.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/emulate.c
@@ -65,7 +65,8 @@ static int hvmemul_do_io(
ram_mfn = gfn_to_mfn_unshare(current->domain, ram_gfn, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(curr->domain, ram_gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(curr->domain, ram_gfn);
return X86EMUL_RETRY;
}
if ( p2m_is_shared(p2mt) )
--- xen-4.0.1-testing.orig/xen/arch/x86/hvm/hvm.c
+++ xen-4.0.1-testing/xen/arch/x86/hvm/hvm.c
@@ -291,7 +291,8 @@ static int hvm_set_ioreq_page(
return -EINVAL;
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(d, gmfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(d, gmfn);
return -ENOENT;
}
if ( p2m_is_shared(p2mt) )
@@ -1324,7 +1325,8 @@ static void *hvm_map_entry(unsigned long
mfn = mfn_x(gfn_to_mfn_unshare(current->domain, gfn, &p2mt, 0));
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(current->domain, gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(current->domain, gfn);
return NULL;
}
if ( p2m_is_shared(p2mt) )
@@ -1723,7 +1725,8 @@ static enum hvm_copy_result __hvm_copy(
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(curr->domain, gfn);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(curr->domain, gfn);
return HVMCOPY_gfn_paged_out;
}
if ( p2m_is_shared(p2mt) )
@@ -3032,8 +3035,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn_t mfn = gfn_to_mfn(d, pfn, &t);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(d, pfn);
-
+ if ( p2m_is_paged(t) )
+ p2m_mem_paging_populate(d, pfn);
rc = -EINVAL;
goto param_fail3;
}
@@ -3096,8 +3099,8 @@ long do_hvm_op(unsigned long op, XEN_GUE
mfn = gfn_to_mfn_unshare(d, pfn, &t, 0);
if ( p2m_is_paging(t) )
{
- p2m_mem_paging_populate(d, pfn);
-
+ if ( p2m_is_paged(t) )
+ p2m_mem_paging_populate(d, pfn);
rc = -EINVAL;
goto param_fail4;
}
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/guest_walk.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/guest_walk.c
@@ -96,7 +96,8 @@ static inline void *map_domain_gfn(struc
*mfn = gfn_to_mfn_unshare(d, gfn_x(gfn), p2mt, 0);
if ( p2m_is_paging(*p2mt) )
{
- p2m_mem_paging_populate(d, gfn_x(gfn));
+ if ( p2m_is_paged(*p2mt) )
+ p2m_mem_paging_populate(d, gfn_x(gfn));
*rc = _PAGE_PAGED;
return NULL;
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/hap/guest_walk.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/hap/guest_walk.c
@@ -49,7 +49,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
top_mfn = gfn_to_mfn_unshare(v->domain, cr3 >> PAGE_SHIFT, &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT);
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(v->domain, cr3 >> PAGE_SHIFT);
pfec[0] = PFEC_page_paged;
return INVALID_GFN;
@@ -81,7 +82,8 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
gfn_to_mfn_unshare(v->domain, gfn_x(gfn), &p2mt, 0);
if ( p2m_is_paging(p2mt) )
{
- p2m_mem_paging_populate(v->domain, gfn_x(gfn));
+ if ( p2m_is_paged(p2mt) )
+ p2m_mem_paging_populate(v->domain, gfn_x(gfn));
pfec[0] = PFEC_page_paged;
return INVALID_GFN;

View File

@ -0,0 +1,161 @@
Subject: xenpaging: add signal handling
Leave paging loop if xenpaging gets a signal.
Remove paging file on exit.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 39 +++++++++++++++++++++++++++++++--------
1 file changed, 31 insertions(+), 8 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -22,6 +22,7 @@
#include <inttypes.h>
#include <stdlib.h>
+#include <signal.h>
#include <xc_private.h>
#include <xen/mem_event.h>
@@ -40,6 +41,11 @@
#define DPRINTF(...) ((void)0)
#endif
+static int interrupted;
+static void close_handler(int sig)
+{
+ interrupted = sig;
+}
static void *init_page(void)
{
@@ -244,7 +250,6 @@ int xenpaging_teardown(xenpaging_t *pagi
if ( rc != 0 )
{
ERROR("Error tearing down domain paging in xen");
- goto err;
}
/* Unbind VIRQ */
@@ -252,7 +257,6 @@ int xenpaging_teardown(xenpaging_t *pagi
if ( rc != 0 )
{
ERROR("Error unbinding event port");
- goto err;
}
paging->mem_event.port = -1;
@@ -261,7 +265,6 @@ int xenpaging_teardown(xenpaging_t *pagi
if ( rc != 0 )
{
ERROR("Error closing event channel");
- goto err;
}
paging->mem_event.xce_handle = -1;
@@ -270,7 +273,6 @@ int xenpaging_teardown(xenpaging_t *pagi
if ( rc != 0 )
{
ERROR("Error closing connection to xen");
- goto err;
}
paging->xc_handle = -1;
@@ -375,7 +377,7 @@ int xenpaging_evict_page(xenpaging_t *pa
return ret;
}
-int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp)
+static int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp)
{
int ret;
@@ -455,6 +457,11 @@ static int evict_victim(xenpaging_t *pag
goto out;
}
+ if ( interrupted )
+ {
+ ret = -EINTR;
+ goto out;
+ }
ret = xc_mem_paging_nominate(paging->xc_handle,
paging->mem_event.domain_id, victim->gfn);
if ( ret == 0 )
@@ -479,6 +486,7 @@ static int evict_victim(xenpaging_t *pag
int main(int argc, char *argv[])
{
+ struct sigaction act;
domid_t domain_id;
int num_pages;
xenpaging_t *paging;
@@ -513,7 +521,7 @@ int main(int argc, char *argv[])
if ( paging == NULL )
{
ERROR("Error initialising paging");
- goto out;
+ return 1;
}
/* Open file */
@@ -522,9 +530,18 @@ int main(int argc, char *argv[])
if ( fd < 0 )
{
perror("failed to open file");
- return -1;
+ return 2;
}
+ /* ensure that if we get a signal, we'll do cleanup, then exit */
+ act.sa_handler = close_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGHUP, &act, NULL);
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+ sigaction(SIGALRM, &act, NULL);
+
/* Evict pages */
memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
for ( i = 0; i < num_pages; i++ )
@@ -532,6 +549,8 @@ int main(int argc, char *argv[])
rc = evict_victim(paging, domain_id, &victims[i], fd, i);
if ( rc == -ENOSPC )
break;
+ if ( rc == -EINTR )
+ break;
if ( i % 100 == 0 )
DPRINTF("%d pages evicted\n", i);
}
@@ -539,7 +558,7 @@ int main(int argc, char *argv[])
DPRINTF("pages evicted\n");
/* Swap pages in and out */
- while ( 1 )
+ while ( !interrupted )
{
/* Wait for Xen to signal that a page needs paged in */
rc = xc_wait_for_event_or_timeout(paging->mem_event.xce_handle, 100);
@@ -630,8 +649,11 @@ int main(int argc, char *argv[])
}
}
}
+ DPRINTF("xenpaging got signal %d\n", interrupted);
out:
+ unlink(filename);
+ close(fd);
free(victims);
/* Tear down domain paging */
@@ -642,6 +664,7 @@ int main(int argc, char *argv[])
if ( rc == 0 )
rc = rc1;
+ DPRINTF("xenpaging exit code %d\n", rc);
return rc;
}

View File

@ -0,0 +1,54 @@
Subject: xenpaging: Fix-up xenpaging tool code.
This isn't directly related to EPT checking, but does some general fix-ups
to the xenpaging code (adds some extra frees, etc.)
Already-Signed-off-by: Patrick Colp <pjcolp@cs.ubc.ca>
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenpaging/xenpaging.c | 22 ++++++++++++++++++----
1 file changed, 18 insertions(+), 4 deletions(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -100,7 +100,7 @@ xenpaging_t *xenpaging_init(domid_t doma
paging->mem_event.ring_page = init_page();
if ( paging->mem_event.ring_page == NULL )
{
- ERROR("Error initialising shared page");
+ ERROR("Error initialising ring page");
goto err;
}
@@ -198,13 +198,27 @@ xenpaging_t *xenpaging_init(domid_t doma
return paging;
err:
- if ( paging->bitmap )
+ if ( paging )
+ {
+ if ( paging->mem_event.shared_page )
+ {
+ munlock(paging->mem_event.shared_page, PAGE_SIZE);
+ free(paging->mem_event.shared_page);
+ }
+
+ if ( paging->mem_event.ring_page )
+ {
+ munlock(paging->mem_event.ring_page, PAGE_SIZE);
+ free(paging->mem_event.ring_page);
+ }
+
free(paging->bitmap);
- if ( paging->platform_info )
free(paging->platform_info);
- if ( paging )
+ free(paging->domain_info);
free(paging);
+ }
+ err_iface:
return NULL;
}

View File

@ -0,0 +1,53 @@
Subject: xenpaging: allow only one xenpaging binary per guest
Make sure only one xenpaging binary is active per domain.
Print info when the host lacks the required features for xenpaging.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Already-Acked-by: Patrick Colp <pjcolp@cs.ubc.ca>
Already-Acked-by: Keir Fraser <keir.fraser@citrix.com>
---
v2: use perror for default case
tools/xenpaging/xenpaging.c | 12 +++++++++++-
xen/arch/x86/mm/mem_event.c | 7 +++++++
2 files changed, 18 insertions(+), 1 deletion(-)
--- xen-4.0.1-testing.orig/tools/xenpaging/xenpaging.c
+++ xen-4.0.1-testing/tools/xenpaging/xenpaging.c
@@ -119,7 +119,17 @@ xenpaging_t *xenpaging_init(domid_t doma
paging->mem_event.ring_page);
if ( rc != 0 )
{
- ERROR("Error initialising shared page");
+ switch ( errno ) {
+ case EBUSY:
+ ERROR("xenpaging is (or was) active on this domain");
+ break;
+ case ENODEV:
+ ERROR("EPT not supported for this guest");
+ break;
+ default:
+ perror("Error initialising shared page");
+ break;
+ }
goto err;
}
--- xen-4.0.1-testing.orig/xen/arch/x86/mm/mem_event.c
+++ xen-4.0.1-testing/xen/arch/x86/mm/mem_event.c
@@ -226,6 +226,13 @@ int mem_event_domctl(struct domain *d, x
mfn_t ring_mfn;
mfn_t shared_mfn;
+ /* Only one xenpaging at a time. If xenpaging crashed,
+ * the cache is in an undefined state and so is the guest
+ */
+ rc = -EBUSY;
+ if ( d->mem_event.enabled )
+ break;
+
/* Currently only EPT is supported */
rc = -ENODEV;
if ( !(is_hvm_domain(d) && d->arch.hvm_domain.hap_enabled &&

View File

@ -0,0 +1,22 @@
Subject: xenpaging: fix fd leak in xenstore
Missing from commit 'libxl: Backported stuff from unstable'
Without this change, xs_daemon_open/xs_daemon_close will leak filedescriptors.
Signed-off-by: Olaf Hering <olaf@aepfle.de>
---
tools/xenstore/xs.c | 2 ++
1 file changed, 2 insertions(+)
--- xen-4.0.1-testing.orig/tools/xenstore/xs.c
+++ xen-4.0.1-testing/tools/xenstore/xs.c
@@ -285,6 +285,8 @@ void xs_daemon_close(struct xs_handle *h
mutex_unlock(&h->request_mutex);
mutex_unlock(&h->reply_mutex);
mutex_unlock(&h->watch_mutex);
+
+ close_fds_free(h);
}
static bool read_all(int fd, void *data, unsigned int len)