1422 lines
45 KiB
Diff
1422 lines
45 KiB
Diff
# HG changeset patch
|
|
# User Keir Fraser <keir.fraser@citrix.com>
|
|
# Date 1220262725 -3600
|
|
# Node ID 86b956d8cf046d071c828ca9e461311f68fc0c6e
|
|
# Parent 7cb51e8484f67e32c1cc169948d63cd5579fd5bf
|
|
x86: make {get,put}_page_type() preemptible
|
|
|
|
This is only a first step - more call sites need to be hooked up.
|
|
|
|
Most of this is really Keir's work, I just took what he handed me and
|
|
fixed a few remaining issues.
|
|
|
|
Signed-off-by: Jan Beulich <jbeulich@novell.com>
|
|
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
|
|
|
|
Index: xen-3.3.1-testing/xen/arch/x86/domain.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c
|
|
+++ xen-3.3.1-testing/xen/arch/x86/domain.c
|
|
@@ -1646,23 +1646,26 @@ static int relinquish_memory(
|
|
|
|
/*
|
|
* Forcibly invalidate top-most, still valid page tables at this point
|
|
- * to break circular 'linear page table' references. This is okay
|
|
- * because MMU structures are not shared across domains and this domain
|
|
- * is now dead. Thus top-most valid tables are not in use so a non-zero
|
|
- * count means circular reference.
|
|
+ * to break circular 'linear page table' references as well as clean up
|
|
+ * partially validated pages. This is okay because MMU structures are
|
|
+ * not shared across domains and this domain is now dead. Thus top-most
|
|
+ * valid tables are not in use so a non-zero count means circular
|
|
+ * reference or partially validated.
|
|
*/
|
|
y = page->u.inuse.type_info;
|
|
for ( ; ; )
|
|
{
|
|
x = y;
|
|
- if ( likely((x & (PGT_type_mask|PGT_validated)) !=
|
|
- (type|PGT_validated)) )
|
|
+ if ( likely((x & PGT_type_mask) != type) ||
|
|
+ likely(!(x & (PGT_validated|PGT_partial))) )
|
|
break;
|
|
|
|
- y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
|
|
+ y = cmpxchg(&page->u.inuse.type_info, x,
|
|
+ x & ~(PGT_validated|PGT_partial));
|
|
if ( likely(y == x) )
|
|
{
|
|
- free_page_type(page, type);
|
|
+ if ( free_page_type(page, x, 0) != 0 )
|
|
+ BUG();
|
|
break;
|
|
}
|
|
}
|
|
Index: xen-3.3.1-testing/xen/arch/x86/mm.c
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/arch/x86/mm.c
|
|
+++ xen-3.3.1-testing/xen/arch/x86/mm.c
|
|
@@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag
|
|
goto fail;
|
|
|
|
unmap_domain_page(descs);
|
|
- return 1;
|
|
+ return 0;
|
|
|
|
fail:
|
|
unmap_domain_page(descs);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
|
|
@@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned
|
|
|
|
static int get_page_and_type_from_pagenr(unsigned long page_nr,
|
|
unsigned long type,
|
|
- struct domain *d)
|
|
+ struct domain *d,
|
|
+ int preemptible)
|
|
{
|
|
struct page_info *page = mfn_to_page(page_nr);
|
|
+ int rc;
|
|
|
|
if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
|
|
- if ( unlikely(!get_page_type(page, type)) )
|
|
- {
|
|
+ rc = (preemptible ?
|
|
+ get_page_type_preemptible(page, type) :
|
|
+ (get_page_type(page, type) ? 0 : -EINVAL));
|
|
+
|
|
+ if ( rc )
|
|
put_page(page);
|
|
- return 0;
|
|
- }
|
|
|
|
- return 1;
|
|
+ return rc;
|
|
}
|
|
|
|
/*
|
|
@@ -754,22 +757,23 @@ get_page_from_l2e(
|
|
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
|
|
{
|
|
MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
- rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d);
|
|
- if ( unlikely(!rc) )
|
|
- rc = get_l2_linear_pagetable(l2e, pfn, d);
|
|
+ rc = get_page_and_type_from_pagenr(
|
|
+ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
|
|
+ if ( unlikely(rc) && rc != -EAGAIN &&
|
|
+ get_l2_linear_pagetable(l2e, pfn, d) )
|
|
+ rc = -EINVAL;
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
define_get_linear_pagetable(l3);
|
|
static int
|
|
get_page_from_l3e(
|
|
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
|
|
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
|
|
{
|
|
int rc;
|
|
|
|
@@ -779,22 +783,23 @@ get_page_from_l3e(
|
|
if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
|
|
{
|
|
MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
- rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d);
|
|
- if ( unlikely(!rc) )
|
|
- rc = get_l3_linear_pagetable(l3e, pfn, d);
|
|
+ rc = get_page_and_type_from_pagenr(
|
|
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
|
|
+ if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR &&
|
|
+ get_l3_linear_pagetable(l3e, pfn, d) )
|
|
+ rc = -EINVAL;
|
|
|
|
return rc;
|
|
}
|
|
-#endif /* 3 level */
|
|
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
define_get_linear_pagetable(l4);
|
|
static int
|
|
get_page_from_l4e(
|
|
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d)
|
|
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
|
|
{
|
|
int rc;
|
|
|
|
@@ -804,12 +809,14 @@ get_page_from_l4e(
|
|
if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
|
|
{
|
|
MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
- rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d);
|
|
- if ( unlikely(!rc) )
|
|
- rc = get_l4_linear_pagetable(l4e, pfn, d);
|
|
+ rc = get_page_and_type_from_pagenr(
|
|
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
|
|
+ if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR &&
|
|
+ get_l4_linear_pagetable(l4e, pfn, d) )
|
|
+ rc = -EINVAL;
|
|
|
|
return rc;
|
|
}
|
|
@@ -946,29 +953,35 @@ void put_page_from_l1e(l1_pgentry_t l1e,
|
|
* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
|
|
* Note also that this automatically deals correctly with linear p.t.'s.
|
|
*/
|
|
-static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
|
|
+static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
|
|
{
|
|
if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
|
|
(l2e_get_pfn(l2e) != pfn) )
|
|
+ {
|
|
put_page_and_type(l2e_get_page(l2e));
|
|
+ return 0;
|
|
+ }
|
|
+ return 1;
|
|
}
|
|
|
|
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
-static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
|
|
+static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
|
|
+ int preemptible)
|
|
{
|
|
if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
|
|
(l3e_get_pfn(l3e) != pfn) )
|
|
- put_page_and_type(l3e_get_page(l3e));
|
|
+ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
|
|
+ return 1;
|
|
}
|
|
-#endif
|
|
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
-static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
|
|
+static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
|
|
+ int preemptible)
|
|
{
|
|
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
|
|
(l4e_get_pfn(l4e) != pfn) )
|
|
- put_page_and_type(l4e_get_page(l4e));
|
|
+ return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
|
|
+ return 1;
|
|
}
|
|
#endif
|
|
|
|
@@ -977,7 +990,7 @@ static int alloc_l1_table(struct page_in
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l1_pgentry_t *pl1e;
|
|
- int i;
|
|
+ unsigned int i;
|
|
|
|
pl1e = map_domain_page(pfn);
|
|
|
|
@@ -991,7 +1004,7 @@ static int alloc_l1_table(struct page_in
|
|
}
|
|
|
|
unmap_domain_page(pl1e);
|
|
- return 1;
|
|
+ return 0;
|
|
|
|
fail:
|
|
MEM_LOG("Failure in alloc_l1_table: entry %d", i);
|
|
@@ -1000,7 +1013,7 @@ static int alloc_l1_table(struct page_in
|
|
put_page_from_l1e(pl1e[i], d);
|
|
|
|
unmap_domain_page(pl1e);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
|
|
@@ -1128,47 +1141,53 @@ static void pae_flush_pgd(
|
|
# define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
|
|
#endif
|
|
|
|
-static int alloc_l2_table(struct page_info *page, unsigned long type)
|
|
+static int alloc_l2_table(struct page_info *page, unsigned long type,
|
|
+ int preemptible)
|
|
{
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l2_pgentry_t *pl2e;
|
|
- int i;
|
|
+ unsigned int i;
|
|
+ int rc = 0;
|
|
|
|
pl2e = map_domain_page(pfn);
|
|
|
|
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
|
|
+ for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
|
|
{
|
|
- if ( !is_guest_l2_slot(d, type, i) )
|
|
+ if ( preemptible && i && hypercall_preempt_check() )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ rc = -EAGAIN;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if ( !is_guest_l2_slot(d, type, i) ||
|
|
+ (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
|
|
continue;
|
|
|
|
- if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) )
|
|
- goto fail;
|
|
-
|
|
+ if ( rc < 0 )
|
|
+ {
|
|
+ MEM_LOG("Failure in alloc_l2_table: entry %d", i);
|
|
+ while ( i-- > 0 )
|
|
+ if ( is_guest_l2_slot(d, type, i) )
|
|
+ put_page_from_l2e(pl2e[i], pfn);
|
|
+ break;
|
|
+ }
|
|
+
|
|
adjust_guest_l2e(pl2e[i], d);
|
|
}
|
|
|
|
unmap_domain_page(pl2e);
|
|
- return 1;
|
|
-
|
|
- fail:
|
|
- MEM_LOG("Failure in alloc_l2_table: entry %d", i);
|
|
- while ( i-- > 0 )
|
|
- if ( is_guest_l2_slot(d, type, i) )
|
|
- put_page_from_l2e(pl2e[i], pfn);
|
|
-
|
|
- unmap_domain_page(pl2e);
|
|
- return 0;
|
|
+ return rc > 0 ? 0 : rc;
|
|
}
|
|
|
|
-
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
-static int alloc_l3_table(struct page_info *page)
|
|
+static int alloc_l3_table(struct page_info *page, int preemptible)
|
|
{
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l3_pgentry_t *pl3e;
|
|
- int i;
|
|
+ unsigned int i;
|
|
+ int rc = 0;
|
|
|
|
#if CONFIG_PAGING_LEVELS == 3
|
|
/*
|
|
@@ -1181,7 +1200,7 @@ static int alloc_l3_table(struct page_in
|
|
d->vcpu[0] && d->vcpu[0]->is_initialised )
|
|
{
|
|
MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
#endif
|
|
|
|
@@ -1197,64 +1216,96 @@ static int alloc_l3_table(struct page_in
|
|
if ( is_pv_32on64_domain(d) )
|
|
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
|
|
|
|
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
|
|
+ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
|
|
{
|
|
if ( is_pv_32bit_domain(d) && (i == 3) )
|
|
{
|
|
if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
|
|
- (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ||
|
|
- !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
|
|
- PGT_l2_page_table |
|
|
- PGT_pae_xen_l2,
|
|
- d) )
|
|
- goto fail;
|
|
+ (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
|
|
+ rc = -EINVAL;
|
|
+ else
|
|
+ rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
|
|
+ PGT_l2_page_table |
|
|
+ PGT_pae_xen_l2,
|
|
+ d, preemptible);
|
|
}
|
|
- else if ( !is_guest_l3_slot(i) )
|
|
+ else if ( !is_guest_l3_slot(i) ||
|
|
+ (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
|
|
continue;
|
|
- else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
|
|
- goto fail;
|
|
+
|
|
+ if ( rc == -EAGAIN )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 1;
|
|
+ }
|
|
+ else if ( rc == -EINTR && i )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 0;
|
|
+ rc = -EAGAIN;
|
|
+ }
|
|
+ if ( rc < 0 )
|
|
+ break;
|
|
|
|
adjust_guest_l3e(pl3e[i], d);
|
|
}
|
|
|
|
- if ( !create_pae_xen_mappings(d, pl3e) )
|
|
- goto fail;
|
|
-
|
|
- unmap_domain_page(pl3e);
|
|
- return 1;
|
|
-
|
|
- fail:
|
|
- MEM_LOG("Failure in alloc_l3_table: entry %d", i);
|
|
- while ( i-- > 0 )
|
|
+ if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
|
|
+ rc = -EINVAL;
|
|
+ if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
|
|
{
|
|
- if ( !is_guest_l3_slot(i) )
|
|
- continue;
|
|
- unadjust_guest_l3e(pl3e[i], d);
|
|
- put_page_from_l3e(pl3e[i], pfn);
|
|
+ MEM_LOG("Failure in alloc_l3_table: entry %d", i);
|
|
+ while ( i-- > 0 )
|
|
+ {
|
|
+ if ( !is_guest_l3_slot(i) )
|
|
+ continue;
|
|
+ unadjust_guest_l3e(pl3e[i], d);
|
|
+ put_page_from_l3e(pl3e[i], pfn, 0);
|
|
+ }
|
|
}
|
|
|
|
unmap_domain_page(pl3e);
|
|
- return 0;
|
|
+ return rc > 0 ? 0 : rc;
|
|
}
|
|
-#else
|
|
-#define alloc_l3_table(page) (0)
|
|
-#endif
|
|
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
-static int alloc_l4_table(struct page_info *page)
|
|
+static int alloc_l4_table(struct page_info *page, int preemptible)
|
|
{
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l4_pgentry_t *pl4e = page_to_virt(page);
|
|
- int i;
|
|
+ unsigned int i;
|
|
+ int rc = 0;
|
|
|
|
- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
|
|
+ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
|
|
{
|
|
- if ( !is_guest_l4_slot(d, i) )
|
|
+ if ( !is_guest_l4_slot(d, i) ||
|
|
+ (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
|
|
continue;
|
|
|
|
- if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) )
|
|
- goto fail;
|
|
+ if ( rc == -EAGAIN )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 1;
|
|
+ }
|
|
+ else if ( rc == -EINTR )
|
|
+ {
|
|
+ if ( i )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 0;
|
|
+ rc = -EAGAIN;
|
|
+ }
|
|
+ }
|
|
+ else if ( rc < 0 )
|
|
+ {
|
|
+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
|
+ while ( i-- > 0 )
|
|
+ if ( is_guest_l4_slot(d, i) )
|
|
+ put_page_from_l4e(pl4e[i], pfn, 0);
|
|
+ }
|
|
+ if ( rc < 0 )
|
|
+ return rc;
|
|
|
|
adjust_guest_l4e(pl4e[i], d);
|
|
}
|
|
@@ -1269,18 +1320,10 @@ static int alloc_l4_table(struct page_in
|
|
l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
|
|
__PAGE_HYPERVISOR);
|
|
|
|
- return 1;
|
|
-
|
|
- fail:
|
|
- MEM_LOG("Failure in alloc_l4_table: entry %d", i);
|
|
- while ( i-- > 0 )
|
|
- if ( is_guest_l4_slot(d, i) )
|
|
- put_page_from_l4e(pl4e[i], pfn);
|
|
-
|
|
- return 0;
|
|
+ return rc > 0 ? 0 : rc;
|
|
}
|
|
#else
|
|
-#define alloc_l4_table(page) (0)
|
|
+#define alloc_l4_table(page, preemptible) (-EINVAL)
|
|
#endif
|
|
|
|
|
|
@@ -1289,7 +1332,7 @@ static void free_l1_table(struct page_in
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l1_pgentry_t *pl1e;
|
|
- int i;
|
|
+ unsigned int i;
|
|
|
|
pl1e = map_domain_page(pfn);
|
|
|
|
@@ -1301,74 +1344,114 @@ static void free_l1_table(struct page_in
|
|
}
|
|
|
|
|
|
-static void free_l2_table(struct page_info *page)
|
|
+static int free_l2_table(struct page_info *page, int preemptible)
|
|
{
|
|
#ifdef CONFIG_COMPAT
|
|
struct domain *d = page_get_owner(page);
|
|
#endif
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l2_pgentry_t *pl2e;
|
|
- int i;
|
|
+ unsigned int i = page->nr_validated_ptes - 1;
|
|
+ int err = 0;
|
|
|
|
pl2e = map_domain_page(pfn);
|
|
|
|
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
|
|
- if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) )
|
|
- put_page_from_l2e(pl2e[i], pfn);
|
|
+ ASSERT(page->nr_validated_ptes);
|
|
+ do {
|
|
+ if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
|
|
+ put_page_from_l2e(pl2e[i], pfn) == 0 &&
|
|
+ preemptible && i && hypercall_preempt_check() )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ err = -EAGAIN;
|
|
+ }
|
|
+ } while ( !err && i-- );
|
|
|
|
unmap_domain_page(pl2e);
|
|
|
|
- page->u.inuse.type_info &= ~PGT_pae_xen_l2;
|
|
-}
|
|
-
|
|
+ if ( !err )
|
|
+ page->u.inuse.type_info &= ~PGT_pae_xen_l2;
|
|
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
+ return err;
|
|
+}
|
|
|
|
-static void free_l3_table(struct page_info *page)
|
|
+static int free_l3_table(struct page_info *page, int preemptible)
|
|
{
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l3_pgentry_t *pl3e;
|
|
- int i;
|
|
+ unsigned int i = page->nr_validated_ptes - !page->partial_pte;
|
|
+ int rc = 0;
|
|
|
|
#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
|
|
if ( d->arch.relmem == RELMEM_l3 )
|
|
- return;
|
|
+ return 0;
|
|
#endif
|
|
|
|
pl3e = map_domain_page(pfn);
|
|
|
|
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
|
|
+ do {
|
|
if ( is_guest_l3_slot(i) )
|
|
{
|
|
- put_page_from_l3e(pl3e[i], pfn);
|
|
+ rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
|
|
+ if ( rc > 0 )
|
|
+ continue;
|
|
+ if ( rc )
|
|
+ break;
|
|
unadjust_guest_l3e(pl3e[i], d);
|
|
}
|
|
+ } while ( i-- );
|
|
|
|
unmap_domain_page(pl3e);
|
|
-}
|
|
|
|
-#endif
|
|
+ if ( rc == -EAGAIN )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 1;
|
|
+ }
|
|
+ else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
|
|
+ {
|
|
+ page->nr_validated_ptes = i + 1;
|
|
+ page->partial_pte = 0;
|
|
+ rc = -EAGAIN;
|
|
+ }
|
|
+ return rc > 0 ? 0 : rc;
|
|
+}
|
|
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
-
|
|
-static void free_l4_table(struct page_info *page)
|
|
+static int free_l4_table(struct page_info *page, int preemptible)
|
|
{
|
|
struct domain *d = page_get_owner(page);
|
|
unsigned long pfn = page_to_mfn(page);
|
|
l4_pgentry_t *pl4e = page_to_virt(page);
|
|
- int i;
|
|
+ unsigned int i = page->nr_validated_ptes - !page->partial_pte;
|
|
+ int rc = 0;
|
|
|
|
#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
|
|
if ( d->arch.relmem == RELMEM_l4 )
|
|
- return;
|
|
+ return 0;
|
|
#endif
|
|
|
|
- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ )
|
|
+ do {
|
|
if ( is_guest_l4_slot(d, i) )
|
|
- put_page_from_l4e(pl4e[i], pfn);
|
|
-}
|
|
+ rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
|
|
+ } while ( rc >= 0 && i-- );
|
|
|
|
+ if ( rc == -EAGAIN )
|
|
+ {
|
|
+ page->nr_validated_ptes = i;
|
|
+ page->partial_pte = 1;
|
|
+ }
|
|
+ else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
|
|
+ {
|
|
+ page->nr_validated_ptes = i + 1;
|
|
+ page->partial_pte = 0;
|
|
+ rc = -EAGAIN;
|
|
+ }
|
|
+ return rc > 0 ? 0 : rc;
|
|
+}
|
|
+#else
|
|
+#define free_l4_table(page, preemptible) (-EINVAL)
|
|
#endif
|
|
|
|
static void page_lock(struct page_info *page)
|
|
@@ -1560,7 +1643,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
|
|
return rc;
|
|
}
|
|
|
|
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) )
|
|
+ if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
|
|
return page_unlock(l2pg), 0;
|
|
|
|
adjust_guest_l2e(nl2e, d);
|
|
@@ -1583,24 +1666,23 @@ static int mod_l2_entry(l2_pgentry_t *pl
|
|
return rc;
|
|
}
|
|
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
-
|
|
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
|
|
static int mod_l3_entry(l3_pgentry_t *pl3e,
|
|
l3_pgentry_t nl3e,
|
|
unsigned long pfn,
|
|
- int preserve_ad)
|
|
+ int preserve_ad,
|
|
+ int preemptible)
|
|
{
|
|
l3_pgentry_t ol3e;
|
|
struct vcpu *curr = current;
|
|
struct domain *d = curr->domain;
|
|
struct page_info *l3pg = mfn_to_page(pfn);
|
|
- int rc = 1;
|
|
+ int rc = 0;
|
|
|
|
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
|
|
{
|
|
MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
@@ -1608,12 +1690,12 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
|
* would be a pain to ensure they remain continuously valid throughout.
|
|
*/
|
|
if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
|
|
page_lock(l3pg);
|
|
|
|
if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
|
|
- return page_unlock(l3pg), 0;
|
|
+ return page_unlock(l3pg), -EFAULT;
|
|
|
|
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
|
|
{
|
|
@@ -1622,7 +1704,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
|
page_unlock(l3pg);
|
|
MEM_LOG("Bad L3 flags %x",
|
|
l3e_get_flags(nl3e) & l3_disallow_mask(d));
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
/* Fast path for identical mapping and presence. */
|
|
@@ -1631,28 +1713,30 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
|
adjust_guest_l3e(nl3e, d);
|
|
rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
|
|
page_unlock(l3pg);
|
|
- return rc;
|
|
+ return rc ? 0 : -EFAULT;
|
|
}
|
|
|
|
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) )
|
|
- return page_unlock(l3pg), 0;
|
|
+ rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
|
|
+ if ( unlikely(rc < 0) )
|
|
+ return page_unlock(l3pg), rc;
|
|
+ rc = 0;
|
|
|
|
adjust_guest_l3e(nl3e, d);
|
|
if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
|
|
preserve_ad)) )
|
|
{
|
|
ol3e = nl3e;
|
|
- rc = 0;
|
|
+ rc = -EFAULT;
|
|
}
|
|
}
|
|
else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
|
|
preserve_ad)) )
|
|
{
|
|
page_unlock(l3pg);
|
|
- return 0;
|
|
+ return -EFAULT;
|
|
}
|
|
|
|
- if ( likely(rc) )
|
|
+ if ( likely(rc == 0) )
|
|
{
|
|
if ( !create_pae_xen_mappings(d, pl3e) )
|
|
BUG();
|
|
@@ -1661,36 +1745,35 @@ static int mod_l3_entry(l3_pgentry_t *pl
|
|
}
|
|
|
|
page_unlock(l3pg);
|
|
- put_page_from_l3e(ol3e, pfn);
|
|
+ put_page_from_l3e(ol3e, pfn, 0);
|
|
return rc;
|
|
}
|
|
|
|
-#endif
|
|
-
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
|
|
/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
|
|
static int mod_l4_entry(l4_pgentry_t *pl4e,
|
|
l4_pgentry_t nl4e,
|
|
unsigned long pfn,
|
|
- int preserve_ad)
|
|
+ int preserve_ad,
|
|
+ int preemptible)
|
|
{
|
|
struct vcpu *curr = current;
|
|
struct domain *d = curr->domain;
|
|
l4_pgentry_t ol4e;
|
|
struct page_info *l4pg = mfn_to_page(pfn);
|
|
- int rc = 1;
|
|
+ int rc = 0;
|
|
|
|
if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
|
|
{
|
|
MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
page_lock(l4pg);
|
|
|
|
if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
|
|
- return page_unlock(l4pg), 0;
|
|
+ return page_unlock(l4pg), -EFAULT;
|
|
|
|
if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
|
|
{
|
|
@@ -1699,7 +1782,7 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
|
page_unlock(l4pg);
|
|
MEM_LOG("Bad L4 flags %x",
|
|
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
/* Fast path for identical mapping and presence. */
|
|
@@ -1708,29 +1791,31 @@ static int mod_l4_entry(l4_pgentry_t *pl
|
|
adjust_guest_l4e(nl4e, d);
|
|
rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
|
|
page_unlock(l4pg);
|
|
- return rc;
|
|
+ return rc ? 0 : -EFAULT;
|
|
}
|
|
|
|
- if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) )
|
|
- return page_unlock(l4pg), 0;
|
|
+ rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
|
|
+ if ( unlikely(rc < 0) )
|
|
+ return page_unlock(l4pg), rc;
|
|
+ rc = 0;
|
|
|
|
adjust_guest_l4e(nl4e, d);
|
|
if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
|
|
preserve_ad)) )
|
|
{
|
|
ol4e = nl4e;
|
|
- rc = 0;
|
|
+ rc = -EFAULT;
|
|
}
|
|
}
|
|
else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
|
|
preserve_ad)) )
|
|
{
|
|
page_unlock(l4pg);
|
|
- return 0;
|
|
+ return -EFAULT;
|
|
}
|
|
|
|
page_unlock(l4pg);
|
|
- put_page_from_l4e(ol4e, pfn);
|
|
+ put_page_from_l4e(ol4e, pfn, 0);
|
|
return rc;
|
|
}
|
|
|
|
@@ -1788,9 +1873,11 @@ int get_page(struct page_info *page, str
|
|
}
|
|
|
|
|
|
-static int alloc_page_type(struct page_info *page, unsigned long type)
|
|
+static int alloc_page_type(struct page_info *page, unsigned long type,
|
|
+ int preemptible)
|
|
{
|
|
struct domain *owner = page_get_owner(page);
|
|
+ int rc;
|
|
|
|
/* A page table is dirtied when its type count becomes non-zero. */
|
|
if ( likely(owner != NULL) )
|
|
@@ -1799,30 +1886,65 @@ static int alloc_page_type(struct page_i
|
|
switch ( type & PGT_type_mask )
|
|
{
|
|
case PGT_l1_page_table:
|
|
- return alloc_l1_table(page);
|
|
+ alloc_l1_table(page);
|
|
+ rc = 0;
|
|
+ break;
|
|
case PGT_l2_page_table:
|
|
- return alloc_l2_table(page, type);
|
|
+ rc = alloc_l2_table(page, type, preemptible);
|
|
+ break;
|
|
case PGT_l3_page_table:
|
|
- return alloc_l3_table(page);
|
|
+ rc = alloc_l3_table(page, preemptible);
|
|
+ break;
|
|
case PGT_l4_page_table:
|
|
- return alloc_l4_table(page);
|
|
+ rc = alloc_l4_table(page, preemptible);
|
|
+ break;
|
|
case PGT_seg_desc_page:
|
|
- return alloc_segdesc_page(page);
|
|
+ rc = alloc_segdesc_page(page);
|
|
+ break;
|
|
default:
|
|
printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
|
|
type, page->u.inuse.type_info,
|
|
page->count_info);
|
|
+ rc = -EINVAL;
|
|
BUG();
|
|
}
|
|
|
|
- return 0;
|
|
+ /* No need for atomic update of type_info here: noone else updates it. */
|
|
+ wmb();
|
|
+ if ( rc == -EAGAIN )
|
|
+ {
|
|
+ page->u.inuse.type_info |= PGT_partial;
|
|
+ }
|
|
+ else if ( rc == -EINTR )
|
|
+ {
|
|
+ ASSERT((page->u.inuse.type_info &
|
|
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
|
|
+ page->u.inuse.type_info &= ~PGT_count_mask;
|
|
+ }
|
|
+ else if ( rc )
|
|
+ {
|
|
+ ASSERT(rc < 0);
|
|
+ MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
|
|
+ PRtype_info ": caf=%08x taf=%" PRtype_info,
|
|
+ page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
|
|
+ type, page->count_info, page->u.inuse.type_info);
|
|
+ page->u.inuse.type_info = 0;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ page->u.inuse.type_info |= PGT_validated;
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
}
|
|
|
|
|
|
-void free_page_type(struct page_info *page, unsigned long type)
|
|
+int free_page_type(struct page_info *page, unsigned long type,
|
|
+ int preemptible)
|
|
{
|
|
struct domain *owner = page_get_owner(page);
|
|
unsigned long gmfn;
|
|
+ int rc;
|
|
|
|
if ( likely(owner != NULL) )
|
|
{
|
|
@@ -1842,7 +1964,7 @@ void free_page_type(struct page_info *pa
|
|
paging_mark_dirty(owner, page_to_mfn(page));
|
|
|
|
if ( shadow_mode_refcounts(owner) )
|
|
- return;
|
|
+ return 0;
|
|
|
|
gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
|
|
ASSERT(VALID_M2P(gmfn));
|
|
@@ -1850,42 +1972,80 @@ void free_page_type(struct page_info *pa
|
|
}
|
|
}
|
|
|
|
+ if ( !(type & PGT_partial) )
|
|
+ {
|
|
+ page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
|
|
+ page->partial_pte = 0;
|
|
+ }
|
|
switch ( type & PGT_type_mask )
|
|
{
|
|
case PGT_l1_page_table:
|
|
free_l1_table(page);
|
|
+ rc = 0;
|
|
break;
|
|
-
|
|
case PGT_l2_page_table:
|
|
- free_l2_table(page);
|
|
+ rc = free_l2_table(page, preemptible);
|
|
break;
|
|
-
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
case PGT_l3_page_table:
|
|
- free_l3_table(page);
|
|
- break;
|
|
+#if CONFIG_PAGING_LEVELS == 3
|
|
+ if ( !(type & PGT_partial) )
|
|
+ page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
|
|
#endif
|
|
-
|
|
-#if CONFIG_PAGING_LEVELS >= 4
|
|
+ rc = free_l3_table(page, preemptible);
|
|
+ break;
|
|
case PGT_l4_page_table:
|
|
- free_l4_table(page);
|
|
+ rc = free_l4_table(page, preemptible);
|
|
break;
|
|
-#endif
|
|
-
|
|
default:
|
|
- printk("%s: type %lx pfn %lx\n",__FUNCTION__,
|
|
- type, page_to_mfn(page));
|
|
+ MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
|
|
+ rc = -EINVAL;
|
|
BUG();
|
|
}
|
|
+
|
|
+ /* No need for atomic update of type_info here: noone else updates it. */
|
|
+ if ( rc == 0 )
|
|
+ {
|
|
+ /*
|
|
+ * Record TLB information for flush later. We do not stamp page tables
|
|
+ * when running in shadow mode:
|
|
+ * 1. Pointless, since it's the shadow pt's which must be tracked.
|
|
+ * 2. Shadow mode reuses this field for shadowed page tables to
|
|
+ * store flags info -- we don't want to conflict with that.
|
|
+ */
|
|
+ if ( !(shadow_mode_enabled(page_get_owner(page)) &&
|
|
+ (page->count_info & PGC_page_table)) )
|
|
+ page->tlbflush_timestamp = tlbflush_current_time();
|
|
+ wmb();
|
|
+ page->u.inuse.type_info--;
|
|
+ }
|
|
+ else if ( rc == -EINTR )
|
|
+ {
|
|
+ ASSERT(!(page->u.inuse.type_info &
|
|
+ (PGT_count_mask|PGT_validated|PGT_partial)));
|
|
+ if ( !(shadow_mode_enabled(page_get_owner(page)) &&
|
|
+ (page->count_info & PGC_page_table)) )
|
|
+ page->tlbflush_timestamp = tlbflush_current_time();
|
|
+ wmb();
|
|
+ page->u.inuse.type_info |= PGT_validated;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ BUG_ON(rc != -EAGAIN);
|
|
+ wmb();
|
|
+ page->u.inuse.type_info |= PGT_partial;
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
}
|
|
|
|
|
|
-void put_page_type(struct page_info *page)
|
|
+static int __put_page_type(struct page_info *page,
|
|
+ int preemptible)
|
|
{
|
|
unsigned long nx, x, y = page->u.inuse.type_info;
|
|
|
|
- again:
|
|
- do {
|
|
+ for ( ; ; )
|
|
+ {
|
|
x = y;
|
|
nx = x - 1;
|
|
|
|
@@ -1894,21 +2054,19 @@ void put_page_type(struct page_info *pag
|
|
if ( unlikely((nx & PGT_count_mask) == 0) )
|
|
{
|
|
if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
|
|
- likely(nx & PGT_validated) )
|
|
+ likely(nx & (PGT_validated|PGT_partial)) )
|
|
{
|
|
/*
|
|
* Page-table pages must be unvalidated when count is zero. The
|
|
* 'free' is safe because the refcnt is non-zero and validated
|
|
* bit is clear => other ops will spin or fail.
|
|
*/
|
|
- if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x,
|
|
- x & ~PGT_validated)) != x) )
|
|
- goto again;
|
|
+ nx = x & ~(PGT_validated|PGT_partial);
|
|
+ if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
|
|
+ x, nx)) != x) )
|
|
+ continue;
|
|
/* We cleared the 'valid bit' so we do the clean up. */
|
|
- free_page_type(page, x);
|
|
- /* Carry on, but with the 'valid bit' now clear. */
|
|
- x &= ~PGT_validated;
|
|
- nx &= ~PGT_validated;
|
|
+ return free_page_type(page, x, preemptible);
|
|
}
|
|
|
|
/*
|
|
@@ -1922,25 +2080,33 @@ void put_page_type(struct page_info *pag
|
|
(page->count_info & PGC_page_table)) )
|
|
page->tlbflush_timestamp = tlbflush_current_time();
|
|
}
|
|
+
|
|
+ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
|
|
+ break;
|
|
+
|
|
+ if ( preemptible && hypercall_preempt_check() )
|
|
+ return -EINTR;
|
|
}
|
|
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
|
|
-int get_page_type(struct page_info *page, unsigned long type)
|
|
+static int __get_page_type(struct page_info *page, unsigned long type,
|
|
+ int preemptible)
|
|
{
|
|
unsigned long nx, x, y = page->u.inuse.type_info;
|
|
|
|
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
|
|
|
|
- again:
|
|
- do {
|
|
+ for ( ; ; )
|
|
+ {
|
|
x = y;
|
|
nx = x + 1;
|
|
if ( unlikely((nx & PGT_count_mask) == 0) )
|
|
{
|
|
MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
else if ( unlikely((x & PGT_count_mask) == 0) )
|
|
{
|
|
@@ -1993,28 +2159,43 @@ int get_page_type(struct page_info *page
|
|
/* Don't log failure if it could be a recursive-mapping attempt. */
|
|
if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
|
|
(type == PGT_l1_page_table) )
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
|
|
(type == PGT_l2_page_table) )
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
|
|
(type == PGT_l3_page_table) )
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
|
|
"for mfn %lx (pfn %lx)",
|
|
x, type, page_to_mfn(page),
|
|
get_gpfn_from_mfn(page_to_mfn(page)));
|
|
- return 0;
|
|
+ return -EINVAL;
|
|
}
|
|
else if ( unlikely(!(x & PGT_validated)) )
|
|
{
|
|
- /* Someone else is updating validation of this page. Wait... */
|
|
- while ( (y = page->u.inuse.type_info) == x )
|
|
- cpu_relax();
|
|
- goto again;
|
|
+ if ( !(x & PGT_partial) )
|
|
+ {
|
|
+ /* Someone else is updating validation of this page. Wait... */
|
|
+ while ( (y = page->u.inuse.type_info) == x )
|
|
+ {
|
|
+ if ( preemptible && hypercall_preempt_check() )
|
|
+ return -EINTR;
|
|
+ cpu_relax();
|
|
+ }
|
|
+ continue;
|
|
+ }
|
|
+ /* Type ref count was left at 1 when PGT_partial got set. */
|
|
+ ASSERT((x & PGT_count_mask) == 1);
|
|
+ nx = x & ~PGT_partial;
|
|
}
|
|
+
|
|
+ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
|
|
+ break;
|
|
+
|
|
+ if ( preemptible && hypercall_preempt_check() )
|
|
+ return -EINTR;
|
|
}
|
|
- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) );
|
|
|
|
if ( unlikely((x & PGT_type_mask) != type) )
|
|
{
|
|
@@ -2032,25 +2213,42 @@ int get_page_type(struct page_info *page
|
|
|
|
if ( unlikely(!(nx & PGT_validated)) )
|
|
{
|
|
- /* Try to validate page type; drop the new reference on failure. */
|
|
- if ( unlikely(!alloc_page_type(page, type)) )
|
|
+ if ( !(x & PGT_partial) )
|
|
{
|
|
- MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
|
|
- PRtype_info ": caf=%08x taf=%" PRtype_info,
|
|
- page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
|
|
- type, page->count_info, page->u.inuse.type_info);
|
|
- /* Noone else can get a reference. We hold the only ref. */
|
|
- page->u.inuse.type_info = 0;
|
|
- return 0;
|
|
+ page->nr_validated_ptes = 0;
|
|
+ page->partial_pte = 0;
|
|
}
|
|
-
|
|
- /* Noone else is updating simultaneously. */
|
|
- __set_bit(_PGT_validated, &page->u.inuse.type_info);
|
|
+ return alloc_page_type(page, type, preemptible);
|
|
}
|
|
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
|
|
+void put_page_type(struct page_info *page)
|
|
+{
|
|
+ int rc = __put_page_type(page, 0);
|
|
+ ASSERT(rc == 0);
|
|
+ (void)rc;
|
|
+}
|
|
+
|
|
+int get_page_type(struct page_info *page, unsigned long type)
|
|
+{
|
|
+ int rc = __get_page_type(page, type, 0);
|
|
+ if ( likely(rc == 0) )
|
|
+ return 1;
|
|
+ ASSERT(rc == -EINVAL);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int put_page_type_preemptible(struct page_info *page)
|
|
+{
|
|
+ return __put_page_type(page, 1);
|
|
+}
|
|
+
|
|
+int get_page_type_preemptible(struct page_info *page, unsigned long type)
|
|
+{
|
|
+ return __get_page_type(page, type, 1);
|
|
+}
|
|
|
|
void cleanup_page_cacheattr(struct page_info *page)
|
|
{
|
|
@@ -2087,7 +2285,7 @@ int new_guest_cr3(unsigned long mfn)
|
|
l4e_from_pfn(
|
|
mfn,
|
|
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
|
|
- pagetable_get_pfn(v->arch.guest_table), 0);
|
|
+ pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
|
|
if ( unlikely(!okay) )
|
|
{
|
|
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
|
|
@@ -2102,7 +2300,7 @@ int new_guest_cr3(unsigned long mfn)
|
|
#endif
|
|
okay = paging_mode_refcounts(d)
|
|
? get_page_from_pagenr(mfn, d)
|
|
- : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d);
|
|
+ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
|
|
if ( unlikely(!okay) )
|
|
{
|
|
MEM_LOG("Error while installing new baseptr %lx", mfn);
|
|
@@ -2276,9 +2474,7 @@ int do_mmuext_op(
|
|
{
|
|
if ( hypercall_preempt_check() )
|
|
{
|
|
- rc = hypercall_create_continuation(
|
|
- __HYPERVISOR_mmuext_op, "hihi",
|
|
- uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+ rc = -EAGAIN;
|
|
break;
|
|
}
|
|
|
|
@@ -2325,10 +2521,14 @@ int do_mmuext_op(
|
|
if ( paging_mode_refcounts(FOREIGNDOM) )
|
|
break;
|
|
|
|
- okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM);
|
|
+ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
|
|
+ okay = !rc;
|
|
if ( unlikely(!okay) )
|
|
{
|
|
- MEM_LOG("Error while pinning mfn %lx", mfn);
|
|
+ if ( rc == -EINTR )
|
|
+ rc = -EAGAIN;
|
|
+ else if ( rc != -EAGAIN )
|
|
+ MEM_LOG("Error while pinning mfn %lx", mfn);
|
|
break;
|
|
}
|
|
|
|
@@ -2373,8 +2573,11 @@ int do_mmuext_op(
|
|
{
|
|
put_page_and_type(page);
|
|
put_page(page);
|
|
- /* A page is dirtied when its pin status is cleared. */
|
|
- paging_mark_dirty(d, mfn);
|
|
+ if ( !rc )
|
|
+ {
|
|
+ /* A page is dirtied when its pin status is cleared. */
|
|
+ paging_mark_dirty(d, mfn);
|
|
+ }
|
|
}
|
|
else
|
|
{
|
|
@@ -2398,8 +2601,8 @@ int do_mmuext_op(
|
|
if ( paging_mode_refcounts(d) )
|
|
okay = get_page_from_pagenr(mfn, d);
|
|
else
|
|
- okay = get_page_and_type_from_pagenr(
|
|
- mfn, PGT_root_page_table, d);
|
|
+ okay = !get_page_and_type_from_pagenr(
|
|
+ mfn, PGT_root_page_table, d, 0);
|
|
if ( unlikely(!okay) )
|
|
{
|
|
MEM_LOG("Error while installing new mfn %lx", mfn);
|
|
@@ -2517,6 +2720,11 @@ int do_mmuext_op(
|
|
guest_handle_add_offset(uops, 1);
|
|
}
|
|
|
|
+ if ( rc == -EAGAIN )
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmuext_op, "hihi",
|
|
+ uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+
|
|
process_deferred_ops();
|
|
|
|
perfc_add(num_mmuext_ops, i);
|
|
@@ -2576,9 +2784,7 @@ int do_mmu_update(
|
|
{
|
|
if ( hypercall_preempt_check() )
|
|
{
|
|
- rc = hypercall_create_continuation(
|
|
- __HYPERVISOR_mmu_update, "hihi",
|
|
- ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+ rc = -EAGAIN;
|
|
break;
|
|
}
|
|
|
|
@@ -2653,27 +2859,29 @@ int do_mmu_update(
|
|
cmd == MMU_PT_UPDATE_PRESERVE_AD);
|
|
}
|
|
break;
|
|
-#if CONFIG_PAGING_LEVELS >= 3
|
|
case PGT_l3_page_table:
|
|
{
|
|
l3_pgentry_t l3e = l3e_from_intpte(req.val);
|
|
- okay = mod_l3_entry(va, l3e, mfn,
|
|
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
|
|
+ rc = mod_l3_entry(va, l3e, mfn,
|
|
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
|
|
+ okay = !rc;
|
|
}
|
|
break;
|
|
-#endif
|
|
#if CONFIG_PAGING_LEVELS >= 4
|
|
case PGT_l4_page_table:
|
|
{
|
|
l4_pgentry_t l4e = l4e_from_intpte(req.val);
|
|
- okay = mod_l4_entry(va, l4e, mfn,
|
|
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
|
|
+ rc = mod_l4_entry(va, l4e, mfn,
|
|
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
|
|
+ okay = !rc;
|
|
}
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
put_page_type(page);
|
|
+ if ( rc == -EINTR )
|
|
+ rc = -EAGAIN;
|
|
}
|
|
break;
|
|
|
|
@@ -2742,6 +2950,11 @@ int do_mmu_update(
|
|
guest_handle_add_offset(ureqs, 1);
|
|
}
|
|
|
|
+ if ( rc == -EAGAIN )
|
|
+ rc = hypercall_create_continuation(
|
|
+ __HYPERVISOR_mmu_update, "hihi",
|
|
+ ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
|
|
+
|
|
process_deferred_ops();
|
|
|
|
domain_mmap_cache_destroy(&mapcache);
|
|
@@ -3621,9 +3834,8 @@ static int ptwr_emulated_update(
|
|
nl1e = l1e_from_intpte(val);
|
|
if ( unlikely(!get_page_from_l1e(nl1e, d)) )
|
|
{
|
|
- if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
|
|
- (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
|
|
- (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
|
|
+ if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
|
|
+ !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
|
|
{
|
|
/*
|
|
* If this is an upper-half write to a PAE PTE then we assume that
|
|
Index: xen-3.3.1-testing/xen/include/asm-x86/mm.h
|
|
===================================================================
|
|
--- xen-3.3.1-testing.orig/xen/include/asm-x86/mm.h
|
|
+++ xen-3.3.1-testing/xen/include/asm-x86/mm.h
|
|
@@ -59,6 +59,17 @@ struct page_info
|
|
u32 tlbflush_timestamp;
|
|
|
|
/*
|
|
+ * When PGT_partial is true then this field is valid and indicates
|
|
+ * that PTEs in the range [0, @nr_validated_ptes) have been validated.
|
|
+ * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
|
|
+ * partially validated.
|
|
+ */
|
|
+ struct {
|
|
+ u16 nr_validated_ptes;
|
|
+ bool_t partial_pte;
|
|
+ };
|
|
+
|
|
+ /*
|
|
* Guest pages with a shadow. This does not conflict with
|
|
* tlbflush_timestamp since page table pages are explicitly not
|
|
* tracked for TLB-flush avoidance when a guest runs in shadow mode.
|
|
@@ -86,9 +97,12 @@ struct page_info
|
|
/* PAE only: is this an L2 page directory containing Xen-private mappings? */
|
|
#define _PGT_pae_xen_l2 26
|
|
#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
|
|
+/* Has this page been *partially* validated for use as its current type? */
|
|
+#define _PGT_partial 25
|
|
+#define PGT_partial (1U<<_PGT_partial)
|
|
|
|
- /* 26-bit count of uses of this frame as its current type. */
|
|
-#define PGT_count_mask ((1U<<26)-1)
|
|
+ /* 25-bit count of uses of this frame as its current type. */
|
|
+#define PGT_count_mask ((1U<<25)-1)
|
|
|
|
/* Cleared when the owning guest 'frees' this page. */
|
|
#define _PGC_allocated 31
|
|
@@ -154,7 +168,8 @@ extern unsigned long max_page;
|
|
extern unsigned long total_pages;
|
|
void init_frametable(void);
|
|
|
|
-void free_page_type(struct page_info *page, unsigned long type);
|
|
+int free_page_type(struct page_info *page, unsigned long type,
|
|
+ int preemptible);
|
|
int _shadow_mode_refcounts(struct domain *d);
|
|
|
|
void cleanup_page_cacheattr(struct page_info *page);
|
|
@@ -165,6 +180,8 @@ void put_page(struct page_info *page);
|
|
int get_page(struct page_info *page, struct domain *domain);
|
|
void put_page_type(struct page_info *page);
|
|
int get_page_type(struct page_info *page, unsigned long type);
|
|
+int put_page_type_preemptible(struct page_info *page);
|
|
+int get_page_type_preemptible(struct page_info *page, unsigned long type);
|
|
int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
|
|
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
|
|
|
|
@@ -174,6 +191,19 @@ static inline void put_page_and_type(str
|
|
put_page(page);
|
|
}
|
|
|
|
+static inline int put_page_and_type_preemptible(struct page_info *page,
|
|
+ int preemptible)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ if ( preemptible )
|
|
+ rc = put_page_type_preemptible(page);
|
|
+ else
|
|
+ put_page_type(page);
|
|
+ if ( likely(rc == 0) )
|
|
+ put_page(page);
|
|
+ return rc;
|
|
+}
|
|
|
|
static inline int get_page_and_type(struct page_info *page,
|
|
struct domain *domain,
|