xen/x86_64-note-init-p2m.patch

339 lines
14 KiB
Diff

--- 2009-01-08.orig/tools/include/xen-foreign/reference.size 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/tools/include/xen-foreign/reference.size 2009-01-08 10:56:30.000000000 +0100
@@ -1,7 +1,7 @@
structs | x86_32 x86_64 ia64
-start_info | 1104 1152 1152
+start_info | 1112 1168 1168
trap_info | 8 16 -
pt_fpreg | - - 16
cpu_user_regs | 68 200 -
--- 2009-01-08.orig/xen/arch/x86/domain_build.c 2009-01-08 10:56:13.000000000 +0100
+++ 2009-01-08/xen/arch/x86/domain_build.c 2009-01-08 11:44:42.000000000 +0100
@@ -341,6 +341,12 @@ int __init construct_dom0(
#endif
}
+ if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) )
+ {
+ printk(XENLOG_WARNING "P2M table base ignored\n");
+ parms.p2m_base = UNSET_ADDR;
+ }
+
domain_set_alloc_bitsize(d);
/*
@@ -359,6 +365,8 @@ int __init construct_dom0(
vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
sizeof(unsigned long) :
sizeof(unsigned int)));
+ if ( parms.p2m_base != UNSET_ADDR )
+ vphysmap_end = vphysmap_start;
vstartinfo_start = round_pgup(vphysmap_end);
vstartinfo_end = (vstartinfo_start +
sizeof(struct start_info) +
@@ -400,6 +408,11 @@ int __init construct_dom0(
/* Ensure that our low-memory 1:1 mapping covers the allocation. */
page = alloc_domheap_pages(d, order, MEMF_bits(30));
#else
+ if ( parms.p2m_base != UNSET_ADDR )
+ {
+ vphysmap_start = parms.p2m_base;
+ vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long);
+ }
page = alloc_domheap_pages(d, order, 0);
#endif
if ( page == NULL )
@@ -740,8 +753,109 @@ int __init construct_dom0(
snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",
elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
+ count = d->tot_pages;
+#ifdef __x86_64__
+ /* Set up the phys->machine table if not part of the initial mapping. */
+ if ( parms.p2m_base != UNSET_ADDR )
+ {
+ unsigned long va = vphysmap_start;
+
+ if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
+ panic("DOM0 P->M table overlaps initial mapping");
+
+ while ( va < vphysmap_end )
+ {
+ if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
+ >> PAGE_SHIFT) + 3 > nr_pages )
+ panic("Dom0 allocation too small for initial P->M table.\n");
+
+ l4tab = l4start + l4_table_offset(va);
+ if ( !l4e_get_intpte(*l4tab) )
+ {
+ page = alloc_domheap_pages(d, 0, 0);
+ if ( !page )
+ break;
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l3_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l4tab = l4e_from_page(page, L4_PROT);
+ }
+ l3tab = page_to_virt(l4e_get_page(*l4tab));
+ l3tab += l3_table_offset(va);
+ if ( !l3e_get_intpte(*l3tab) )
+ {
+ if ( cpu_has_page1gb &&
+ !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
+ vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
+ (page = alloc_domheap_pages(d,
+ L3_PAGETABLE_SHIFT -
+ PAGE_SHIFT,
+ 0)) != NULL )
+ {
+ *l3tab = l3e_from_page(page,
+ L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+ va += 1UL << L3_PAGETABLE_SHIFT;
+ continue;
+ }
+ else if ( (page = alloc_domheap_pages(d, 0, 0)) == NULL )
+ break;
+ else
+ {
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l2_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l3tab = l3e_from_page(page, L3_PROT);
+ }
+ }
+ l2tab = page_to_virt(l3e_get_page(*l3tab));
+ l2tab += l2_table_offset(va);
+ if ( !l2e_get_intpte(*l2tab) )
+ {
+ if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
+ vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
+ (page = alloc_domheap_pages(d,
+ L2_PAGETABLE_SHIFT -
+ PAGE_SHIFT,
+ 0)) != NULL )
+ {
+ *l2tab = l2e_from_page(page,
+ L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+ va += 1UL << L2_PAGETABLE_SHIFT;
+ continue;
+ }
+ else if ( (page = alloc_domheap_pages(d, 0, 0)) == NULL )
+ break;
+ else
+ {
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l1_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l2tab = l2e_from_page(page, L2_PROT);
+ }
+ }
+ l1tab = page_to_virt(l2e_get_page(*l2tab));
+ l1tab += l1_table_offset(va);
+ BUG_ON(l1e_get_intpte(*l1tab));
+ page = alloc_domheap_pages(d, 0, 0);
+ if ( !page )
+ break;
+ *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
+ va += PAGE_SIZE;
+ va &= PAGE_MASK;
+ }
+ if ( !page )
+ panic("Not enough RAM for DOM0 P->M table.\n");
+ }
+#endif
+
/* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+ for ( pfn = 0; pfn < count; pfn++ )
{
mfn = pfn + alloc_spfn;
#ifndef NDEBUG
@@ -755,6 +869,26 @@ int __init construct_dom0(
((unsigned int *)vphysmap_start)[pfn] = mfn;
set_gpfn_from_mfn(mfn, pfn);
}
+ si->first_p2m_pfn = pfn;
+ si->nr_p2m_frames = d->tot_pages - count;
+ list_for_each_entry ( page, &d->page_list, list )
+ {
+ mfn = page_to_mfn(page);
+ if ( get_gpfn_from_mfn(mfn) >= count )
+ {
+ BUG_ON(is_pv_32bit_domain(d));
+ if ( !page->u.inuse.type_info &&
+ !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ set_gpfn_from_mfn(mfn, pfn);
+ ++pfn;
+#ifndef NDEBUG
+ ++alloc_epfn;
+#endif
+ }
+ }
+ BUG_ON(pfn != d->tot_pages);
while ( pfn < nr_pages )
{
if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
--- 2009-01-08.orig/xen/arch/x86/mm.c 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/xen/arch/x86/mm.c 2009-01-08 10:56:30.000000000 +0100
@@ -1013,7 +1013,8 @@ static int put_page_from_l2e(l2_pgentry_
{
unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
int writeable = l2e_get_flags(l2e) & _PAGE_RW;
- ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
+
+ ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
do {
put_data_page(mfn_to_page(m), writeable);
} while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
@@ -1031,14 +1032,28 @@ static int __put_page_type(struct page_i
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
int partial, int preemptible)
{
- if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
- (l3e_get_pfn(l3e) != pfn) )
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
+ return 1;
+
+#ifdef __x86_64__
+ if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
{
- if ( unlikely(partial > 0) )
- return __put_page_type(l3e_get_page(l3e), preemptible);
- return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
+ unsigned long mfn = l3e_get_pfn(l3e);
+ int writeable = l3e_get_flags(l3e) & _PAGE_RW;
+
+ ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+ do {
+ put_data_page(mfn_to_page(mfn), writeable);
+ } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
+
+ return 0;
}
- return 1;
+#endif
+
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l3e_get_page(l3e), preemptible);
+
+ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
}
#if CONFIG_PAGING_LEVELS >= 4
--- 2009-01-08.orig/xen/common/libelf/libelf-dominfo.c 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/xen/common/libelf/libelf-dominfo.c 2009-01-08 10:56:30.000000000 +0100
@@ -90,6 +90,7 @@ int elf_xen_parse_note(struct elf_binary
[XEN_ELFNOTE_ENTRY] = { "ENTRY", 0},
[XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0},
[XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0},
+ [XEN_ELFNOTE_INIT_P2M] = { "INIT_P2M", 0},
[XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0},
[XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0},
[XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1},
@@ -164,6 +165,9 @@ int elf_xen_parse_note(struct elf_binary
case XEN_ELFNOTE_ENTRY:
parms->virt_entry = val;
break;
+ case XEN_ELFNOTE_INIT_P2M:
+ parms->p2m_base = val;
+ break;
case XEN_ELFNOTE_PADDR_OFFSET:
parms->elf_paddr_offset = val;
break;
@@ -392,6 +396,7 @@ static int elf_xen_addr_calc_check(struc
elf_msg(elf, " virt_kstart = 0x%" PRIx64 "\n", parms->virt_kstart);
elf_msg(elf, " virt_kend = 0x%" PRIx64 "\n", parms->virt_kend);
elf_msg(elf, " virt_entry = 0x%" PRIx64 "\n", parms->virt_entry);
+ elf_msg(elf, " p2m_base = 0x%" PRIx64 "\n", parms->p2m_base);
if ( (parms->virt_kstart > parms->virt_kend) ||
(parms->virt_entry < parms->virt_kstart) ||
@@ -403,6 +408,15 @@ static int elf_xen_addr_calc_check(struc
return -1;
}
+ if ( (parms->p2m_base != UNSET_ADDR) &&
+ (parms->p2m_base >= parms->virt_kstart) &&
+ (parms->p2m_base < parms->virt_kend) )
+ {
+ elf_err(elf, "%s: ERROR: P->M table base is out of bounds.\n",
+ __FUNCTION__);
+ return -1;
+ }
+
return 0;
}
@@ -422,6 +436,7 @@ int elf_xen_parse(struct elf_binary *elf
parms->virt_entry = UNSET_ADDR;
parms->virt_hypercall = UNSET_ADDR;
parms->virt_hv_start_low = UNSET_ADDR;
+ parms->p2m_base = UNSET_ADDR;
parms->elf_paddr_offset = UNSET_ADDR;
/* Find and parse elf notes. */
--- 2009-01-08.orig/xen/include/public/elfnote.h 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/xen/include/public/elfnote.h 2009-01-08 10:56:30.000000000 +0100
@@ -162,9 +162,20 @@
#define XEN_ELFNOTE_SUSPEND_CANCEL 14
/*
+ * The (non-default) location the initial phys-to-machine map should be
+ * placed at by the hypervisor (Dom0) or the tools (DomU).
+ * The kernel must be prepared for this mapping to be established using
+ * large pages, despite such otherwise not being available to guests.
+ * The kernel must also be able to handle the page table pages used for
+ * this mapping not being accessible through the initial mapping.
+ * (Only x86-64 supports this at present.)
+ */
+#define XEN_ELFNOTE_INIT_P2M 15
+
+/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M
/*
* System information exported through crash notes.
--- 2009-01-08.orig/xen/include/public/libelf.h 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/xen/include/public/libelf.h 2009-01-08 10:56:30.000000000 +0100
@@ -232,6 +232,7 @@ struct elf_dom_parms {
uint64_t virt_entry;
uint64_t virt_hypercall;
uint64_t virt_hv_start_low;
+ uint64_t p2m_base;
uint64_t elf_paddr_offset;
uint32_t f_supported[XENFEAT_NR_SUBMAPS];
uint32_t f_required[XENFEAT_NR_SUBMAPS];
--- 2009-01-08.orig/xen/include/public/xen.h 2009-01-08 11:44:11.000000000 +0100
+++ 2009-01-08/xen/include/public/xen.h 2009-01-08 10:56:30.000000000 +0100
@@ -513,6 +513,7 @@ typedef struct shared_info shared_info_t
* a. relocated kernel image
* b. initial ram disk [mod_start, mod_len]
* c. list of allocated page frames [mfn_list, nr_pages]
+ * (unless relocated due to XEN_ELFNOTE_INIT_P2M)
* d. start_info_t structure [register ESI (x86)]
* e. bootstrap page tables [pt_base, CR3 (x86)]
* f. bootstrap stack [register ESP (x86)]
@@ -554,6 +555,9 @@ struct start_info {
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
int8_t cmd_line[MAX_GUEST_CMDLINE];
+ /* The pfn range here covers both page table and p->m table frames. */
+ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
+ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
};
typedef struct start_info start_info_t;