c608e23838
Turn off building the KMPs now that we are using the pvops kernel xen.spec - Upstream patches from Jan 561bbc8b-VT-d-don-t-suppress-invalidation-address-write-when-it-is-zero.patch 561d20a0-x86-hide-MWAITX-from-PV-domains.patch 561e3283-x86-NUMA-fix-SRAT-table-processor-entry-parsing-and-consumption.patch 5632118e-arm-Support-hypercall_create_continuation-for-multicall.patch 56321222-arm-rate-limit-logging-from-unimplemented-PHYSDEVOP-and-HVMOP.patch 56321249-arm-handle-races-between-relinquish_memory-and-free_domheap_pages.patch 5632127b-x86-guard-against-undue-super-page-PTE-creation.patch 5632129c-free-domain-s-vcpu-array.patch (Replaces CVE-2015-7969-xsa149.patch) 563212c9-x86-PoD-Eager-sweep-for-zeroed-pages.patch 563212e4-xenoprof-free-domain-s-vcpu-array.patch 563212ff-x86-rate-limit-logging-in-do_xen-oprof-pmu-_op.patch 56323737-libxl-adjust-PoD-target-by-memory-fudge-too.patch 56377442-x86-PoD-Make-p2m_pod_empty_cache-restartable.patch 5641ceec-x86-HVM-always-intercept-AC-and-DB.patch (Replaces CVE-2015-5307-xsa156.patch) 5644b756-x86-HVM-don-t-inject-DB-with-error-code.patch - Dropped 55b0a2db-x86-MSI-track-guest-masking.patch - Use upstream variants of block-iscsi and block-nbd - Remove xenalyze.hg, its part of xen-4.6 OBS-URL: https://build.opensuse.org/package/show/Virtualization/xen?expand=0&rev=389
206 lines
6.8 KiB
Diff
206 lines
6.8 KiB
Diff
# Commit 101ce53266866144e724ed593173bc4098b300b9
|
|
# Date 2015-10-29 13:36:25 +0100
|
|
# Author Andrew Cooper <andrew.cooper3@citrix.com>
|
|
# Committer Jan Beulich <jbeulich@suse.com>
|
|
x86/PoD: Eager sweep for zeroed pages
|
|
|
|
Based on the contents of a guests physical address space,
|
|
p2m_pod_emergency_sweep() could degrade into a linear memcmp() from 0 to
|
|
max_gfn, which runs non-preemptibly.
|
|
|
|
As p2m_pod_emergency_sweep() runs behind the scenes in a number of contexts,
|
|
making it preemptible is not feasible.
|
|
|
|
Instead, a different approach is taken. Recently-populated pages are eagerly
|
|
checked for reclaimation, which amortises the p2m_pod_emergency_sweep()
|
|
operation across each p2m_pod_demand_populate() operation.
|
|
|
|
Note that in the case that a 2M superpage can't be reclaimed as a superpage,
|
|
it is shattered if 4K pages of zeros can be reclaimed. This is unfortunate
|
|
but matches the previous behaviour, and is required to avoid regressions
|
|
(domain crash from PoD exhaustion) with VMs configured close to the limit.
|
|
|
|
This is CVE-2015-7970 / XSA-150.
|
|
|
|
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
|
|
Reviewed-by: Jan Beulich <jbeulich@suse.com>
|
|
Reviewed-by: George Dunlap <george.dunlap@citrix.com>
|
|
|
|
--- a/xen/arch/x86/mm/p2m-pod.c
|
|
+++ b/xen/arch/x86/mm/p2m-pod.c
|
|
@@ -901,28 +901,6 @@ p2m_pod_zero_check(struct p2m_domain *p2
|
|
}
|
|
|
|
#define POD_SWEEP_LIMIT 1024
|
|
-
|
|
-/* When populating a new superpage, look at recently populated superpages
|
|
- * hoping that they've been zeroed. This will snap up zeroed pages as soon as
|
|
- * the guest OS is done with them. */
|
|
-static void
|
|
-p2m_pod_check_last_super(struct p2m_domain *p2m, unsigned long gfn_aligned)
|
|
-{
|
|
- unsigned long check_gfn;
|
|
-
|
|
- ASSERT(p2m->pod.last_populated_index < POD_HISTORY_MAX);
|
|
-
|
|
- check_gfn = p2m->pod.last_populated[p2m->pod.last_populated_index];
|
|
-
|
|
- p2m->pod.last_populated[p2m->pod.last_populated_index] = gfn_aligned;
|
|
-
|
|
- p2m->pod.last_populated_index =
|
|
- ( p2m->pod.last_populated_index + 1 ) % POD_HISTORY_MAX;
|
|
-
|
|
- p2m_pod_zero_check_superpage(p2m, check_gfn);
|
|
-}
|
|
-
|
|
-
|
|
#define POD_SWEEP_STRIDE 16
|
|
static void
|
|
p2m_pod_emergency_sweep(struct p2m_domain *p2m)
|
|
@@ -963,7 +941,7 @@ p2m_pod_emergency_sweep(struct p2m_domai
|
|
* NB that this is a zero-sum game; we're increasing our cache size
|
|
* by re-increasing our 'debt'. Since we hold the pod lock,
|
|
* (entry_count - count) must remain the same. */
|
|
- if ( p2m->pod.count > 0 && i < limit )
|
|
+ if ( i < limit && (p2m->pod.count > 0 || hypercall_preempt_check()) )
|
|
break;
|
|
}
|
|
|
|
@@ -975,6 +953,58 @@ p2m_pod_emergency_sweep(struct p2m_domai
|
|
|
|
}
|
|
|
|
+static void pod_eager_reclaim(struct p2m_domain *p2m)
|
|
+{
|
|
+ struct pod_mrp_list *mrp = &p2m->pod.mrp;
|
|
+ unsigned int i = 0;
|
|
+
|
|
+ /*
|
|
+ * Always check one page for reclaimation.
|
|
+ *
|
|
+ * If the PoD pool is empty, keep checking some space is found, or all
|
|
+ * entries have been exhaused.
|
|
+ */
|
|
+ do
|
|
+ {
|
|
+ unsigned int idx = (mrp->idx + i++) % ARRAY_SIZE(mrp->list);
|
|
+ unsigned long gfn = mrp->list[idx];
|
|
+
|
|
+ if ( gfn != INVALID_GFN )
|
|
+ {
|
|
+ if ( gfn & POD_LAST_SUPERPAGE )
|
|
+ {
|
|
+ gfn &= ~POD_LAST_SUPERPAGE;
|
|
+
|
|
+ if ( p2m_pod_zero_check_superpage(p2m, gfn) == 0 )
|
|
+ {
|
|
+ unsigned int x;
|
|
+
|
|
+ for ( x = 0; x < SUPERPAGE_PAGES; ++x, ++gfn )
|
|
+ p2m_pod_zero_check(p2m, &gfn, 1);
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ p2m_pod_zero_check(p2m, &gfn, 1);
|
|
+
|
|
+ mrp->list[idx] = INVALID_GFN;
|
|
+ }
|
|
+
|
|
+ } while ( (p2m->pod.count == 0) && (i < ARRAY_SIZE(mrp->list)) );
|
|
+}
|
|
+
|
|
+static void pod_eager_record(struct p2m_domain *p2m,
|
|
+ unsigned long gfn, unsigned int order)
|
|
+{
|
|
+ struct pod_mrp_list *mrp = &p2m->pod.mrp;
|
|
+
|
|
+ ASSERT(mrp->list[mrp->idx] == INVALID_GFN);
|
|
+ ASSERT(gfn != INVALID_GFN);
|
|
+
|
|
+ mrp->list[mrp->idx++] =
|
|
+ gfn | (order == PAGE_ORDER_2M ? POD_LAST_SUPERPAGE : 0);
|
|
+ mrp->idx %= ARRAY_SIZE(mrp->list);
|
|
+}
|
|
+
|
|
int
|
|
p2m_pod_demand_populate(struct p2m_domain *p2m, unsigned long gfn,
|
|
unsigned int order,
|
|
@@ -1015,6 +1045,8 @@ p2m_pod_demand_populate(struct p2m_domai
|
|
return 0;
|
|
}
|
|
|
|
+ pod_eager_reclaim(p2m);
|
|
+
|
|
/* Only sweep if we're actually out of memory. Doing anything else
|
|
* causes unnecessary time and fragmentation of superpages in the p2m. */
|
|
if ( p2m->pod.count == 0 )
|
|
@@ -1051,6 +1083,8 @@ p2m_pod_demand_populate(struct p2m_domai
|
|
p2m->pod.entry_count -= (1 << order);
|
|
BUG_ON(p2m->pod.entry_count < 0);
|
|
|
|
+ pod_eager_record(p2m, gfn_aligned, order);
|
|
+
|
|
if ( tb_init_done )
|
|
{
|
|
struct {
|
|
@@ -1066,12 +1100,6 @@ p2m_pod_demand_populate(struct p2m_domai
|
|
__trace_var(TRC_MEM_POD_POPULATE, 0, sizeof(t), &t);
|
|
}
|
|
|
|
- /* Check the last guest demand-populate */
|
|
- if ( p2m->pod.entry_count > p2m->pod.count
|
|
- && (order == PAGE_ORDER_2M)
|
|
- && (q & P2M_ALLOC) )
|
|
- p2m_pod_check_last_super(p2m, gfn_aligned);
|
|
-
|
|
pod_unlock(p2m);
|
|
return 0;
|
|
out_of_memory:
|
|
--- a/xen/arch/x86/mm/p2m.c
|
|
+++ b/xen/arch/x86/mm/p2m.c
|
|
@@ -60,6 +60,7 @@ boolean_param("hap_2mb", opt_hap_2mb);
|
|
/* Init the datastructures for later use by the p2m code */
|
|
static int p2m_initialise(struct domain *d, struct p2m_domain *p2m)
|
|
{
|
|
+ unsigned int i;
|
|
int ret = 0;
|
|
|
|
mm_rwlock_init(&p2m->lock);
|
|
@@ -75,6 +76,9 @@ static int p2m_initialise(struct domain
|
|
|
|
p2m->np2m_base = P2M_BASE_EADDR;
|
|
|
|
+ for ( i = 0; i < ARRAY_SIZE(p2m->pod.mrp.list); ++i )
|
|
+ p2m->pod.mrp.list[i] = INVALID_GFN;
|
|
+
|
|
if ( hap_enabled(d) && cpu_has_vmx )
|
|
ret = ept_p2m_init(p2m);
|
|
else
|
|
--- a/xen/include/asm-x86/p2m.h
|
|
+++ b/xen/include/asm-x86/p2m.h
|
|
@@ -292,10 +292,20 @@ struct p2m_domain {
|
|
entry_count; /* # of pages in p2m marked pod */
|
|
unsigned long reclaim_single; /* Last gpfn of a scan */
|
|
unsigned long max_guest; /* gpfn of max guest demand-populate */
|
|
-#define POD_HISTORY_MAX 128
|
|
- /* gpfn of last guest superpage demand-populated */
|
|
- unsigned long last_populated[POD_HISTORY_MAX];
|
|
- unsigned int last_populated_index;
|
|
+
|
|
+ /*
|
|
+ * Tracking of the most recently populated PoD pages, for eager
|
|
+ * reclamation.
|
|
+ */
|
|
+ struct pod_mrp_list {
|
|
+#define NR_POD_MRP_ENTRIES 32
|
|
+
|
|
+/* Encode ORDER_2M superpage in top bit of GFN */
|
|
+#define POD_LAST_SUPERPAGE (INVALID_GFN & ~(INVALID_GFN >> 1))
|
|
+
|
|
+ unsigned long list[NR_POD_MRP_ENTRIES];
|
|
+ unsigned int idx;
|
|
+ } mrp;
|
|
mm_lock_t lock; /* Locking of private pod structs, *
|
|
* not relying on the p2m lock. */
|
|
} pod;
|