xen/22526-ept-access-once.patch

# HG changeset patch
# User Keir Fraser <keir@xen.org>
# Date 1292410025 0
# Node ID 7a5ee380041707177ca9c78e800095d1f5f3d373
# Parent  01f3b350902385627d1fa9e8cd1c231953e7610c
ept: Remove lock in ept_get_entry, replace with access-once semantics.

This mirrors the RVI/shadow situation, where p2m read access is
lockless because it's done in the hardware (linear map of the p2m
table).

This fixes the original bug (call it bug A) without introducing bug B
(a deadlock).

Bug A was caused by a race when updating p2m entries: between testing
if it's valid, and testing if it's populate-on-demand, it may have
been changed from populate-on-demand to valid.

My original patch simply introduced a lock into ept_get_entry, but
that caused bug B, caused by circular locking order: p2m_change_type
[grabs p2m lock] -> set_p2m_entry -> ept_set_entry ->
ept_set_middle_level -> p2m_alloc [grabs hap lock] write cr4 ->
hap_update_paging_modes [grabes hap lock] -> hap_update_cr3 ->
gfn_to_mfn -> ept_get_entry -> [grabs p2m lock]

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

--- a/xen/arch/x86/mm/hap/p2m-ept.c
+++ b/xen/arch/x86/mm/hap/p2m-ept.c
@@ -137,7 +137,7 @@ static int ept_next_level(struct domain 
                           ept_entry_t **table, unsigned long *gfn_remainder,
                           u32 shift)
 {
-    ept_entry_t *ept_entry;
+    ept_entry_t *ept_entry, e;
     ept_entry_t *next;
     u32 index;
 
@@ -145,9 +145,11 @@ static int ept_next_level(struct domain 
 
     ept_entry = (*table) + index;
 
-    if ( !is_epte_present(ept_entry) )
+    e=*ept_entry;
+
+    if ( !is_epte_present(&e) )
     {
-        if ( ept_entry->avail1 == p2m_populate_on_demand )
+        if ( e.avail1 == p2m_populate_on_demand )
             return GUEST_TABLE_POD_PAGE;
 
         if ( read_only )
@@ -155,15 +157,17 @@ static int ept_next_level(struct domain 
 
         if ( !ept_set_middle_entry(d, ept_entry) )
             return GUEST_TABLE_MAP_FAILED;
+        else
+            e=*ept_entry;
     }
 
     /* The only time sp would be set here is if we had hit a superpage */
-    if ( is_epte_superpage(ept_entry) )
+    if ( is_epte_superpage(&e) )
         return GUEST_TABLE_SUPER_PAGE;
     else
     {
         *gfn_remainder &= (1UL << shift) - 1;
-        next = map_domain_page(ept_entry->mfn);
+        next = map_domain_page(e.mfn);
         unmap_domain_page(*table);
         *table = next;
         return GUEST_TABLE_NORMAL_PAGE;
@@ -235,35 +239,39 @@ ept_set_entry(struct domain *d, unsigned
         if ( mfn_valid(mfn_x(mfn)) || direct_mmio || p2m_is_paged(p2mt) ||
              (p2mt == p2m_ram_paging_in_start) )
         {
-            ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
+            ept_entry_t new_entry;
+
+            new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat,
                                                 direct_mmio);
-            ept_entry->ipat = ipat;
-            ept_entry->sp = order ? 1 : 0;
+            new_entry.ipat = ipat;
+            new_entry.sp = order ? 1 : 0;
 
             if ( ret == GUEST_TABLE_SUPER_PAGE )
             {
-                if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
+                if ( new_entry.mfn == (mfn_x(mfn) - offset) )
                     need_modify_vtd_table = 0;  
                 else                  
-                    ept_entry->mfn = mfn_x(mfn) - offset;
+                    new_entry.mfn = mfn_x(mfn) - offset;
 
-                if ( (ept_entry->avail1 == p2m_ram_logdirty)
+                if ( (new_entry.avail1 == p2m_ram_logdirty)
                      && (p2mt == p2m_ram_rw) )
                     for ( i = 0; i < 512; i++ )
                         paging_mark_dirty(d, mfn_x(mfn) - offset + i);
             }
             else
             {
-                if ( ept_entry->mfn == mfn_x(mfn) )
+                if ( new_entry.mfn == mfn_x(mfn) )
                     need_modify_vtd_table = 0;
                 else
-                    ept_entry->mfn = mfn_x(mfn);
+                    new_entry.mfn = mfn_x(mfn);
             }
 
-            ept_entry->avail1 = p2mt;
-            ept_entry->avail2 = 0;
+            new_entry.avail1 = p2mt;
+            new_entry.avail2 = 0;
+
+            ept_p2m_type_to_flags(&new_entry, p2mt);
 
-            ept_p2m_type_to_flags(ept_entry, p2mt);
+            ept_entry->epte = new_entry.epte;
         }
         else
             ept_entry->epte = 0;
- bnc#658704 - SLES11 SP1 Xen boot panic in x2apic mode 22707-x2apic-preenabled-check.patch - bnc#641419 - L3: Xen: qemu-dm reports "xc_map_foreign_batch: mmap failed: Cannot allocate memory" 7434-qemu-rlimit-as.patch - Additional or upstream patches from Jan 22693-fam10-mmio-conf-base-protect.patch 22694-x86_64-no-weak.patch 22708-xenctx-misc.patch 21432-4.0-cpu-boot-failure.patch 22645-amd-flush-filter.patch qemu-fix-7433.patch - Maintain compatibility with the extid flag even though it is deprecated for both legacy and sxp config files. hv_extid_compatibility.patch - bnc#649209-improve suspend eventchn lock suspend_evtchn_lock.patch - Removed the hyper-v shim patches in favor of using the upstream version. - bnc#641419 - L3: Xen: qemu-dm reports "xc_map_foreign_batch: mmap failed: Cannot allocate memory" qemu-rlimit-as.patch - Upstream c/s 7433 to replace qemu_altgr_more.patch 7433-qemu-altgr.patch OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=90 2011-01-14 19:24:51 +01:00			`# HG changeset patch`
			`# User Keir Fraser <keir@xen.org>`
			`# Date 1292410025 0`
			`# Node ID 7a5ee380041707177ca9c78e800095d1f5f3d373`
			`# Parent 01f3b350902385627d1fa9e8cd1c231953e7610c`
			`ept: Remove lock in ept_get_entry, replace with access-once semantics.`

			`This mirrors the RVI/shadow situation, where p2m read access is`
			`lockless because it's done in the hardware (linear map of the p2m`
			`table).`

			`This fixes the original bug (call it bug A) without introducing bug B`
			`(a deadlock).`

			`Bug A was caused by a race when updating p2m entries: between testing`
			`if it's valid, and testing if it's populate-on-demand, it may have`
			`been changed from populate-on-demand to valid.`

			`My original patch simply introduced a lock into ept_get_entry, but`
			`that caused bug B, caused by circular locking order: p2m_change_type`
			`[grabs p2m lock] -> set_p2m_entry -> ept_set_entry ->`
			`ept_set_middle_level -> p2m_alloc [grabs hap lock] write cr4 ->`
			`hap_update_paging_modes [grabes hap lock] -> hap_update_cr3 ->`
			`gfn_to_mfn -> ept_get_entry -> [grabs p2m lock]`

			`Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>`

			`--- a/xen/arch/x86/mm/hap/p2m-ept.c`
			`+++ b/xen/arch/x86/mm/hap/p2m-ept.c`
			`@@ -137,7 +137,7 @@ static int ept_next_level(struct domain`
			`ept_entry_t *table, unsigned long gfn_remainder,`
			`u32 shift)`
			`{`
			`- ept_entry_t *ept_entry;`
			`+ ept_entry_t *ept_entry, e;`
			`ept_entry_t *next;`
			`u32 index;`

			`@@ -145,9 +145,11 @@ static int ept_next_level(struct domain`

			`ept_entry = (*table) + index;`

			`- if ( !is_epte_present(ept_entry) )`
			`+ e=*ept_entry;`
			`+`
			`+ if ( !is_epte_present(&e) )`
			`{`
			`- if ( ept_entry->avail1 == p2m_populate_on_demand )`
			`+ if ( e.avail1 == p2m_populate_on_demand )`
			`return GUEST_TABLE_POD_PAGE;`

			`if ( read_only )`
			`@@ -155,15 +157,17 @@ static int ept_next_level(struct domain`

			`if ( !ept_set_middle_entry(d, ept_entry) )`
			`return GUEST_TABLE_MAP_FAILED;`
			`+ else`
			`+ e=*ept_entry;`
			`}`

			`/* The only time sp would be set here is if we had hit a superpage */`
			`- if ( is_epte_superpage(ept_entry) )`
			`+ if ( is_epte_superpage(&e) )`
			`return GUEST_TABLE_SUPER_PAGE;`
			`else`
			`{`
			`*gfn_remainder &= (1UL << shift) - 1;`
			`- next = map_domain_page(ept_entry->mfn);`
			`+ next = map_domain_page(e.mfn);`
			`unmap_domain_page(*table);`
			`*table = next;`
			`return GUEST_TABLE_NORMAL_PAGE;`
			`@@ -235,35 +239,39 @@ ept_set_entry(struct domain *d, unsigned`
			`if ( mfn_valid(mfn_x(mfn)) \|\| direct_mmio \|\| p2m_is_paged(p2mt) \|\|`
			`(p2mt == p2m_ram_paging_in_start) )`
			`{`
			`- ept_entry->emt = epte_get_entry_emt(d, gfn, mfn, &ipat,`
			`+ ept_entry_t new_entry;`
			`+`
			`+ new_entry.emt = epte_get_entry_emt(d, gfn, mfn, &ipat,`
			`direct_mmio);`
			`- ept_entry->ipat = ipat;`
			`- ept_entry->sp = order ? 1 : 0;`
			`+ new_entry.ipat = ipat;`
			`+ new_entry.sp = order ? 1 : 0;`

			`if ( ret == GUEST_TABLE_SUPER_PAGE )`
			`{`
			`- if ( ept_entry->mfn == (mfn_x(mfn) - offset) )`
			`+ if ( new_entry.mfn == (mfn_x(mfn) - offset) )`
			`need_modify_vtd_table = 0;`
			`else`
			`- ept_entry->mfn = mfn_x(mfn) - offset;`
			`+ new_entry.mfn = mfn_x(mfn) - offset;`

			`- if ( (ept_entry->avail1 == p2m_ram_logdirty)`
			`+ if ( (new_entry.avail1 == p2m_ram_logdirty)`
			`&& (p2mt == p2m_ram_rw) )`
			`for ( i = 0; i < 512; i++ )`
			`paging_mark_dirty(d, mfn_x(mfn) - offset + i);`
			`}`
			`else`
			`{`
			`- if ( ept_entry->mfn == mfn_x(mfn) )`
			`+ if ( new_entry.mfn == mfn_x(mfn) )`
			`need_modify_vtd_table = 0;`
			`else`
			`- ept_entry->mfn = mfn_x(mfn);`
			`+ new_entry.mfn = mfn_x(mfn);`
			`}`

			`- ept_entry->avail1 = p2mt;`
			`- ept_entry->avail2 = 0;`
			`+ new_entry.avail1 = p2mt;`
			`+ new_entry.avail2 = 0;`
			`+`
			`+ ept_p2m_type_to_flags(&new_entry, p2mt);`

			`- ept_p2m_type_to_flags(ept_entry, p2mt);`
			`+ ept_entry->epte = new_entry.epte;`
			`}`
			`else`
			`ept_entry->epte = 0;`