diff --git a/18412-x86-page-type-preemptible.patch b/18412-x86-page-type-preemptible.patch new file mode 100644 index 0000000..b8ce785 --- /dev/null +++ b/18412-x86-page-type-preemptible.patch @@ -0,0 +1,1421 @@ +# HG changeset patch +# User Keir Fraser +# Date 1220262725 -3600 +# Node ID 86b956d8cf046d071c828ca9e461311f68fc0c6e +# Parent 7cb51e8484f67e32c1cc169948d63cd5579fd5bf +x86: make {get,put}_page_type() preemptible + +This is only a first step - more call sites need to be hooked up. + +Most of this is really Keir's work, I just took what he handed me and +fixed a few remaining issues. + +Signed-off-by: Jan Beulich +Signed-off-by: Keir Fraser + +Index: xen-3.3.1-testing/xen/arch/x86/domain.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/domain.c ++++ xen-3.3.1-testing/xen/arch/x86/domain.c +@@ -1646,23 +1646,26 @@ static int relinquish_memory( + + /* + * Forcibly invalidate top-most, still valid page tables at this point +- * to break circular 'linear page table' references. This is okay +- * because MMU structures are not shared across domains and this domain +- * is now dead. Thus top-most valid tables are not in use so a non-zero +- * count means circular reference. ++ * to break circular 'linear page table' references as well as clean up ++ * partially validated pages. This is okay because MMU structures are ++ * not shared across domains and this domain is now dead. Thus top-most ++ * valid tables are not in use so a non-zero count means circular ++ * reference or partially validated. + */ + y = page->u.inuse.type_info; + for ( ; ; ) + { + x = y; +- if ( likely((x & (PGT_type_mask|PGT_validated)) != +- (type|PGT_validated)) ) ++ if ( likely((x & PGT_type_mask) != type) || ++ likely(!(x & (PGT_validated|PGT_partial))) ) + break; + +- y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated); ++ y = cmpxchg(&page->u.inuse.type_info, x, ++ x & ~(PGT_validated|PGT_partial)); + if ( likely(y == x) ) + { +- free_page_type(page, type); ++ if ( free_page_type(page, x, 0) != 0 ) ++ BUG(); + break; + } + } +Index: xen-3.3.1-testing/xen/arch/x86/mm.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/mm.c ++++ xen-3.3.1-testing/xen/arch/x86/mm.c +@@ -507,11 +507,11 @@ static int alloc_segdesc_page(struct pag + goto fail; + + unmap_domain_page(descs); +- return 1; ++ return 0; + + fail: + unmap_domain_page(descs); +- return 0; ++ return -EINVAL; + } + + +@@ -565,20 +565,23 @@ static int get_page_from_pagenr(unsigned + + static int get_page_and_type_from_pagenr(unsigned long page_nr, + unsigned long type, +- struct domain *d) ++ struct domain *d, ++ int preemptible) + { + struct page_info *page = mfn_to_page(page_nr); ++ int rc; + + if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) +- return 0; ++ return -EINVAL; + +- if ( unlikely(!get_page_type(page, type)) ) +- { ++ rc = (preemptible ? ++ get_page_type_preemptible(page, type) : ++ (get_page_type(page, type) ? 0 : -EINVAL)); ++ ++ if ( rc ) + put_page(page); +- return 0; +- } + +- return 1; ++ return rc; + } + + /* +@@ -754,22 +757,23 @@ get_page_from_l2e( + if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) ) + { + MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK); +- return 0; ++ return -EINVAL; + } + +- rc = get_page_and_type_from_pagenr(l2e_get_pfn(l2e), PGT_l1_page_table, d); +- if ( unlikely(!rc) ) +- rc = get_l2_linear_pagetable(l2e, pfn, d); ++ rc = get_page_and_type_from_pagenr( ++ l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); ++ if ( unlikely(rc) && rc != -EAGAIN && ++ get_l2_linear_pagetable(l2e, pfn, d) ) ++ rc = -EINVAL; + + return rc; + } + + +-#if CONFIG_PAGING_LEVELS >= 3 + define_get_linear_pagetable(l3); + static int + get_page_from_l3e( +- l3_pgentry_t l3e, unsigned long pfn, struct domain *d) ++ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible) + { + int rc; + +@@ -779,22 +783,23 @@ get_page_from_l3e( + if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) ) + { + MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d)); +- return 0; ++ return -EINVAL; + } + +- rc = get_page_and_type_from_pagenr(l3e_get_pfn(l3e), PGT_l2_page_table, d); +- if ( unlikely(!rc) ) +- rc = get_l3_linear_pagetable(l3e, pfn, d); ++ rc = get_page_and_type_from_pagenr( ++ l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible); ++ if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && ++ get_l3_linear_pagetable(l3e, pfn, d) ) ++ rc = -EINVAL; + + return rc; + } +-#endif /* 3 level */ + + #if CONFIG_PAGING_LEVELS >= 4 + define_get_linear_pagetable(l4); + static int + get_page_from_l4e( +- l4_pgentry_t l4e, unsigned long pfn, struct domain *d) ++ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible) + { + int rc; + +@@ -804,12 +809,14 @@ get_page_from_l4e( + if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) ) + { + MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK); +- return 0; ++ return -EINVAL; + } + +- rc = get_page_and_type_from_pagenr(l4e_get_pfn(l4e), PGT_l3_page_table, d); +- if ( unlikely(!rc) ) +- rc = get_l4_linear_pagetable(l4e, pfn, d); ++ rc = get_page_and_type_from_pagenr( ++ l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible); ++ if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && ++ get_l4_linear_pagetable(l4e, pfn, d) ) ++ rc = -EINVAL; + + return rc; + } +@@ -946,29 +953,35 @@ void put_page_from_l1e(l1_pgentry_t l1e, + * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. + * Note also that this automatically deals correctly with linear p.t.'s. + */ +-static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) ++static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) + { + if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) && + (l2e_get_pfn(l2e) != pfn) ) ++ { + put_page_and_type(l2e_get_page(l2e)); ++ return 0; ++ } ++ return 1; + } + + +-#if CONFIG_PAGING_LEVELS >= 3 +-static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn) ++static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn, ++ int preemptible) + { + if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) && + (l3e_get_pfn(l3e) != pfn) ) +- put_page_and_type(l3e_get_page(l3e)); ++ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible); ++ return 1; + } +-#endif + + #if CONFIG_PAGING_LEVELS >= 4 +-static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn) ++static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn, ++ int preemptible) + { + if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) && + (l4e_get_pfn(l4e) != pfn) ) +- put_page_and_type(l4e_get_page(l4e)); ++ return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible); ++ return 1; + } + #endif + +@@ -977,7 +990,7 @@ static int alloc_l1_table(struct page_in + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l1_pgentry_t *pl1e; +- int i; ++ unsigned int i; + + pl1e = map_domain_page(pfn); + +@@ -991,7 +1004,7 @@ static int alloc_l1_table(struct page_in + } + + unmap_domain_page(pl1e); +- return 1; ++ return 0; + + fail: + MEM_LOG("Failure in alloc_l1_table: entry %d", i); +@@ -1000,7 +1013,7 @@ static int alloc_l1_table(struct page_in + put_page_from_l1e(pl1e[i], d); + + unmap_domain_page(pl1e); +- return 0; ++ return -EINVAL; + } + + static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e) +@@ -1128,47 +1141,53 @@ static void pae_flush_pgd( + # define pae_flush_pgd(mfn, idx, nl3e) ((void)0) + #endif + +-static int alloc_l2_table(struct page_info *page, unsigned long type) ++static int alloc_l2_table(struct page_info *page, unsigned long type, ++ int preemptible) + { + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l2_pgentry_t *pl2e; +- int i; ++ unsigned int i; ++ int rc = 0; + + pl2e = map_domain_page(pfn); + +- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) ++ for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ ) + { +- if ( !is_guest_l2_slot(d, type, i) ) ++ if ( preemptible && i && hypercall_preempt_check() ) ++ { ++ page->nr_validated_ptes = i; ++ rc = -EAGAIN; ++ break; ++ } ++ ++ if ( !is_guest_l2_slot(d, type, i) || ++ (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 ) + continue; + +- if ( unlikely(!get_page_from_l2e(pl2e[i], pfn, d)) ) +- goto fail; +- ++ if ( rc < 0 ) ++ { ++ MEM_LOG("Failure in alloc_l2_table: entry %d", i); ++ while ( i-- > 0 ) ++ if ( is_guest_l2_slot(d, type, i) ) ++ put_page_from_l2e(pl2e[i], pfn); ++ break; ++ } ++ + adjust_guest_l2e(pl2e[i], d); + } + + unmap_domain_page(pl2e); +- return 1; +- +- fail: +- MEM_LOG("Failure in alloc_l2_table: entry %d", i); +- while ( i-- > 0 ) +- if ( is_guest_l2_slot(d, type, i) ) +- put_page_from_l2e(pl2e[i], pfn); +- +- unmap_domain_page(pl2e); +- return 0; ++ return rc > 0 ? 0 : rc; + } + +- +-#if CONFIG_PAGING_LEVELS >= 3 +-static int alloc_l3_table(struct page_info *page) ++static int alloc_l3_table(struct page_info *page, int preemptible) + { + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l3_pgentry_t *pl3e; +- int i; ++ unsigned int i; ++ int rc = 0; + + #if CONFIG_PAGING_LEVELS == 3 + /* +@@ -1181,7 +1200,7 @@ static int alloc_l3_table(struct page_in + d->vcpu[0] && d->vcpu[0]->is_initialised ) + { + MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn); +- return 0; ++ return -EINVAL; + } + #endif + +@@ -1197,64 +1216,96 @@ static int alloc_l3_table(struct page_in + if ( is_pv_32on64_domain(d) ) + memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e)); + +- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) ++ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ ) + { + if ( is_pv_32bit_domain(d) && (i == 3) ) + { + if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) || +- (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) || +- !get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), +- PGT_l2_page_table | +- PGT_pae_xen_l2, +- d) ) +- goto fail; ++ (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) ) ++ rc = -EINVAL; ++ else ++ rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]), ++ PGT_l2_page_table | ++ PGT_pae_xen_l2, ++ d, preemptible); + } +- else if ( !is_guest_l3_slot(i) ) ++ else if ( !is_guest_l3_slot(i) || ++ (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 ) + continue; +- else if ( unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) ) +- goto fail; ++ ++ if ( rc == -EAGAIN ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 1; ++ } ++ else if ( rc == -EINTR && i ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 0; ++ rc = -EAGAIN; ++ } ++ if ( rc < 0 ) ++ break; + + adjust_guest_l3e(pl3e[i], d); + } + +- if ( !create_pae_xen_mappings(d, pl3e) ) +- goto fail; +- +- unmap_domain_page(pl3e); +- return 1; +- +- fail: +- MEM_LOG("Failure in alloc_l3_table: entry %d", i); +- while ( i-- > 0 ) ++ if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) ) ++ rc = -EINVAL; ++ if ( rc < 0 && rc != -EAGAIN && rc != -EINTR ) + { +- if ( !is_guest_l3_slot(i) ) +- continue; +- unadjust_guest_l3e(pl3e[i], d); +- put_page_from_l3e(pl3e[i], pfn); ++ MEM_LOG("Failure in alloc_l3_table: entry %d", i); ++ while ( i-- > 0 ) ++ { ++ if ( !is_guest_l3_slot(i) ) ++ continue; ++ unadjust_guest_l3e(pl3e[i], d); ++ put_page_from_l3e(pl3e[i], pfn, 0); ++ } + } + + unmap_domain_page(pl3e); +- return 0; ++ return rc > 0 ? 0 : rc; + } +-#else +-#define alloc_l3_table(page) (0) +-#endif + + #if CONFIG_PAGING_LEVELS >= 4 +-static int alloc_l4_table(struct page_info *page) ++static int alloc_l4_table(struct page_info *page, int preemptible) + { + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l4_pgentry_t *pl4e = page_to_virt(page); +- int i; ++ unsigned int i; ++ int rc = 0; + +- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) ++ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ ) + { +- if ( !is_guest_l4_slot(d, i) ) ++ if ( !is_guest_l4_slot(d, i) || ++ (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 ) + continue; + +- if ( unlikely(!get_page_from_l4e(pl4e[i], pfn, d)) ) +- goto fail; ++ if ( rc == -EAGAIN ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 1; ++ } ++ else if ( rc == -EINTR ) ++ { ++ if ( i ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 0; ++ rc = -EAGAIN; ++ } ++ } ++ else if ( rc < 0 ) ++ { ++ MEM_LOG("Failure in alloc_l4_table: entry %d", i); ++ while ( i-- > 0 ) ++ if ( is_guest_l4_slot(d, i) ) ++ put_page_from_l4e(pl4e[i], pfn, 0); ++ } ++ if ( rc < 0 ) ++ return rc; + + adjust_guest_l4e(pl4e[i], d); + } +@@ -1269,18 +1320,10 @@ static int alloc_l4_table(struct page_in + l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3), + __PAGE_HYPERVISOR); + +- return 1; +- +- fail: +- MEM_LOG("Failure in alloc_l4_table: entry %d", i); +- while ( i-- > 0 ) +- if ( is_guest_l4_slot(d, i) ) +- put_page_from_l4e(pl4e[i], pfn); +- +- return 0; ++ return rc > 0 ? 0 : rc; + } + #else +-#define alloc_l4_table(page) (0) ++#define alloc_l4_table(page, preemptible) (-EINVAL) + #endif + + +@@ -1289,7 +1332,7 @@ static void free_l1_table(struct page_in + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l1_pgentry_t *pl1e; +- int i; ++ unsigned int i; + + pl1e = map_domain_page(pfn); + +@@ -1301,74 +1344,114 @@ static void free_l1_table(struct page_in + } + + +-static void free_l2_table(struct page_info *page) ++static int free_l2_table(struct page_info *page, int preemptible) + { + #ifdef CONFIG_COMPAT + struct domain *d = page_get_owner(page); + #endif + unsigned long pfn = page_to_mfn(page); + l2_pgentry_t *pl2e; +- int i; ++ unsigned int i = page->nr_validated_ptes - 1; ++ int err = 0; + + pl2e = map_domain_page(pfn); + +- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) +- if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) ) +- put_page_from_l2e(pl2e[i], pfn); ++ ASSERT(page->nr_validated_ptes); ++ do { ++ if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) && ++ put_page_from_l2e(pl2e[i], pfn) == 0 && ++ preemptible && i && hypercall_preempt_check() ) ++ { ++ page->nr_validated_ptes = i; ++ err = -EAGAIN; ++ } ++ } while ( !err && i-- ); + + unmap_domain_page(pl2e); + +- page->u.inuse.type_info &= ~PGT_pae_xen_l2; +-} +- ++ if ( !err ) ++ page->u.inuse.type_info &= ~PGT_pae_xen_l2; + +-#if CONFIG_PAGING_LEVELS >= 3 ++ return err; ++} + +-static void free_l3_table(struct page_info *page) ++static int free_l3_table(struct page_info *page, int preemptible) + { + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l3_pgentry_t *pl3e; +- int i; ++ unsigned int i = page->nr_validated_ptes - !page->partial_pte; ++ int rc = 0; + + #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION + if ( d->arch.relmem == RELMEM_l3 ) +- return; ++ return 0; + #endif + + pl3e = map_domain_page(pfn); + +- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) ++ do { + if ( is_guest_l3_slot(i) ) + { +- put_page_from_l3e(pl3e[i], pfn); ++ rc = put_page_from_l3e(pl3e[i], pfn, preemptible); ++ if ( rc > 0 ) ++ continue; ++ if ( rc ) ++ break; + unadjust_guest_l3e(pl3e[i], d); + } ++ } while ( i-- ); + + unmap_domain_page(pl3e); +-} + +-#endif ++ if ( rc == -EAGAIN ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 1; ++ } ++ else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 ) ++ { ++ page->nr_validated_ptes = i + 1; ++ page->partial_pte = 0; ++ rc = -EAGAIN; ++ } ++ return rc > 0 ? 0 : rc; ++} + + #if CONFIG_PAGING_LEVELS >= 4 +- +-static void free_l4_table(struct page_info *page) ++static int free_l4_table(struct page_info *page, int preemptible) + { + struct domain *d = page_get_owner(page); + unsigned long pfn = page_to_mfn(page); + l4_pgentry_t *pl4e = page_to_virt(page); +- int i; ++ unsigned int i = page->nr_validated_ptes - !page->partial_pte; ++ int rc = 0; + + #ifdef DOMAIN_DESTRUCT_AVOID_RECURSION + if ( d->arch.relmem == RELMEM_l4 ) +- return; ++ return 0; + #endif + +- for ( i = 0; i < L4_PAGETABLE_ENTRIES; i++ ) ++ do { + if ( is_guest_l4_slot(d, i) ) +- put_page_from_l4e(pl4e[i], pfn); +-} ++ rc = put_page_from_l4e(pl4e[i], pfn, preemptible); ++ } while ( rc >= 0 && i-- ); + ++ if ( rc == -EAGAIN ) ++ { ++ page->nr_validated_ptes = i; ++ page->partial_pte = 1; ++ } ++ else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 ) ++ { ++ page->nr_validated_ptes = i + 1; ++ page->partial_pte = 0; ++ rc = -EAGAIN; ++ } ++ return rc > 0 ? 0 : rc; ++} ++#else ++#define free_l4_table(page, preemptible) (-EINVAL) + #endif + + static void page_lock(struct page_info *page) +@@ -1560,7 +1643,7 @@ static int mod_l2_entry(l2_pgentry_t *pl + return rc; + } + +- if ( unlikely(!get_page_from_l2e(nl2e, pfn, d)) ) ++ if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) ) + return page_unlock(l2pg), 0; + + adjust_guest_l2e(nl2e, d); +@@ -1583,24 +1666,23 @@ static int mod_l2_entry(l2_pgentry_t *pl + return rc; + } + +-#if CONFIG_PAGING_LEVELS >= 3 +- + /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */ + static int mod_l3_entry(l3_pgentry_t *pl3e, + l3_pgentry_t nl3e, + unsigned long pfn, +- int preserve_ad) ++ int preserve_ad, ++ int preemptible) + { + l3_pgentry_t ol3e; + struct vcpu *curr = current; + struct domain *d = curr->domain; + struct page_info *l3pg = mfn_to_page(pfn); +- int rc = 1; ++ int rc = 0; + + if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) ) + { + MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e); +- return 0; ++ return -EINVAL; + } + + /* +@@ -1608,12 +1690,12 @@ static int mod_l3_entry(l3_pgentry_t *pl + * would be a pain to ensure they remain continuously valid throughout. + */ + if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) ) +- return 0; ++ return -EINVAL; + + page_lock(l3pg); + + if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) ) +- return page_unlock(l3pg), 0; ++ return page_unlock(l3pg), -EFAULT; + + if ( l3e_get_flags(nl3e) & _PAGE_PRESENT ) + { +@@ -1622,7 +1704,7 @@ static int mod_l3_entry(l3_pgentry_t *pl + page_unlock(l3pg); + MEM_LOG("Bad L3 flags %x", + l3e_get_flags(nl3e) & l3_disallow_mask(d)); +- return 0; ++ return -EINVAL; + } + + /* Fast path for identical mapping and presence. */ +@@ -1631,28 +1713,30 @@ static int mod_l3_entry(l3_pgentry_t *pl + adjust_guest_l3e(nl3e, d); + rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad); + page_unlock(l3pg); +- return rc; ++ return rc ? 0 : -EFAULT; + } + +- if ( unlikely(!get_page_from_l3e(nl3e, pfn, d)) ) +- return page_unlock(l3pg), 0; ++ rc = get_page_from_l3e(nl3e, pfn, d, preemptible); ++ if ( unlikely(rc < 0) ) ++ return page_unlock(l3pg), rc; ++ rc = 0; + + adjust_guest_l3e(nl3e, d); + if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, + preserve_ad)) ) + { + ol3e = nl3e; +- rc = 0; ++ rc = -EFAULT; + } + } + else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, + preserve_ad)) ) + { + page_unlock(l3pg); +- return 0; ++ return -EFAULT; + } + +- if ( likely(rc) ) ++ if ( likely(rc == 0) ) + { + if ( !create_pae_xen_mappings(d, pl3e) ) + BUG(); +@@ -1661,36 +1745,35 @@ static int mod_l3_entry(l3_pgentry_t *pl + } + + page_unlock(l3pg); +- put_page_from_l3e(ol3e, pfn); ++ put_page_from_l3e(ol3e, pfn, 0); + return rc; + } + +-#endif +- + #if CONFIG_PAGING_LEVELS >= 4 + + /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */ + static int mod_l4_entry(l4_pgentry_t *pl4e, + l4_pgentry_t nl4e, + unsigned long pfn, +- int preserve_ad) ++ int preserve_ad, ++ int preemptible) + { + struct vcpu *curr = current; + struct domain *d = curr->domain; + l4_pgentry_t ol4e; + struct page_info *l4pg = mfn_to_page(pfn); +- int rc = 1; ++ int rc = 0; + + if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) ) + { + MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e); +- return 0; ++ return -EINVAL; + } + + page_lock(l4pg); + + if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) ) +- return page_unlock(l4pg), 0; ++ return page_unlock(l4pg), -EFAULT; + + if ( l4e_get_flags(nl4e) & _PAGE_PRESENT ) + { +@@ -1699,7 +1782,7 @@ static int mod_l4_entry(l4_pgentry_t *pl + page_unlock(l4pg); + MEM_LOG("Bad L4 flags %x", + l4e_get_flags(nl4e) & L4_DISALLOW_MASK); +- return 0; ++ return -EINVAL; + } + + /* Fast path for identical mapping and presence. */ +@@ -1708,29 +1791,31 @@ static int mod_l4_entry(l4_pgentry_t *pl + adjust_guest_l4e(nl4e, d); + rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad); + page_unlock(l4pg); +- return rc; ++ return rc ? 0 : -EFAULT; + } + +- if ( unlikely(!get_page_from_l4e(nl4e, pfn, d)) ) +- return page_unlock(l4pg), 0; ++ rc = get_page_from_l4e(nl4e, pfn, d, preemptible); ++ if ( unlikely(rc < 0) ) ++ return page_unlock(l4pg), rc; ++ rc = 0; + + adjust_guest_l4e(nl4e, d); + if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, + preserve_ad)) ) + { + ol4e = nl4e; +- rc = 0; ++ rc = -EFAULT; + } + } + else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, + preserve_ad)) ) + { + page_unlock(l4pg); +- return 0; ++ return -EFAULT; + } + + page_unlock(l4pg); +- put_page_from_l4e(ol4e, pfn); ++ put_page_from_l4e(ol4e, pfn, 0); + return rc; + } + +@@ -1788,9 +1873,11 @@ int get_page(struct page_info *page, str + } + + +-static int alloc_page_type(struct page_info *page, unsigned long type) ++static int alloc_page_type(struct page_info *page, unsigned long type, ++ int preemptible) + { + struct domain *owner = page_get_owner(page); ++ int rc; + + /* A page table is dirtied when its type count becomes non-zero. */ + if ( likely(owner != NULL) ) +@@ -1799,30 +1886,65 @@ static int alloc_page_type(struct page_i + switch ( type & PGT_type_mask ) + { + case PGT_l1_page_table: +- return alloc_l1_table(page); ++ alloc_l1_table(page); ++ rc = 0; ++ break; + case PGT_l2_page_table: +- return alloc_l2_table(page, type); ++ rc = alloc_l2_table(page, type, preemptible); ++ break; + case PGT_l3_page_table: +- return alloc_l3_table(page); ++ rc = alloc_l3_table(page, preemptible); ++ break; + case PGT_l4_page_table: +- return alloc_l4_table(page); ++ rc = alloc_l4_table(page, preemptible); ++ break; + case PGT_seg_desc_page: +- return alloc_segdesc_page(page); ++ rc = alloc_segdesc_page(page); ++ break; + default: + printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n", + type, page->u.inuse.type_info, + page->count_info); ++ rc = -EINVAL; + BUG(); + } + +- return 0; ++ /* No need for atomic update of type_info here: noone else updates it. */ ++ wmb(); ++ if ( rc == -EAGAIN ) ++ { ++ page->u.inuse.type_info |= PGT_partial; ++ } ++ else if ( rc == -EINTR ) ++ { ++ ASSERT((page->u.inuse.type_info & ++ (PGT_count_mask|PGT_validated|PGT_partial)) == 1); ++ page->u.inuse.type_info &= ~PGT_count_mask; ++ } ++ else if ( rc ) ++ { ++ ASSERT(rc < 0); ++ MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" ++ PRtype_info ": caf=%08x taf=%" PRtype_info, ++ page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), ++ type, page->count_info, page->u.inuse.type_info); ++ page->u.inuse.type_info = 0; ++ } ++ else ++ { ++ page->u.inuse.type_info |= PGT_validated; ++ } ++ ++ return rc; + } + + +-void free_page_type(struct page_info *page, unsigned long type) ++int free_page_type(struct page_info *page, unsigned long type, ++ int preemptible) + { + struct domain *owner = page_get_owner(page); + unsigned long gmfn; ++ int rc; + + if ( likely(owner != NULL) ) + { +@@ -1842,7 +1964,7 @@ void free_page_type(struct page_info *pa + paging_mark_dirty(owner, page_to_mfn(page)); + + if ( shadow_mode_refcounts(owner) ) +- return; ++ return 0; + + gmfn = mfn_to_gmfn(owner, page_to_mfn(page)); + ASSERT(VALID_M2P(gmfn)); +@@ -1850,42 +1972,80 @@ void free_page_type(struct page_info *pa + } + } + ++ if ( !(type & PGT_partial) ) ++ { ++ page->nr_validated_ptes = 1U << PAGETABLE_ORDER; ++ page->partial_pte = 0; ++ } + switch ( type & PGT_type_mask ) + { + case PGT_l1_page_table: + free_l1_table(page); ++ rc = 0; + break; +- + case PGT_l2_page_table: +- free_l2_table(page); ++ rc = free_l2_table(page, preemptible); + break; +- +-#if CONFIG_PAGING_LEVELS >= 3 + case PGT_l3_page_table: +- free_l3_table(page); +- break; ++#if CONFIG_PAGING_LEVELS == 3 ++ if ( !(type & PGT_partial) ) ++ page->nr_validated_ptes = L3_PAGETABLE_ENTRIES; + #endif +- +-#if CONFIG_PAGING_LEVELS >= 4 ++ rc = free_l3_table(page, preemptible); ++ break; + case PGT_l4_page_table: +- free_l4_table(page); ++ rc = free_l4_table(page, preemptible); + break; +-#endif +- + default: +- printk("%s: type %lx pfn %lx\n",__FUNCTION__, +- type, page_to_mfn(page)); ++ MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page)); ++ rc = -EINVAL; + BUG(); + } ++ ++ /* No need for atomic update of type_info here: noone else updates it. */ ++ if ( rc == 0 ) ++ { ++ /* ++ * Record TLB information for flush later. We do not stamp page tables ++ * when running in shadow mode: ++ * 1. Pointless, since it's the shadow pt's which must be tracked. ++ * 2. Shadow mode reuses this field for shadowed page tables to ++ * store flags info -- we don't want to conflict with that. ++ */ ++ if ( !(shadow_mode_enabled(page_get_owner(page)) && ++ (page->count_info & PGC_page_table)) ) ++ page->tlbflush_timestamp = tlbflush_current_time(); ++ wmb(); ++ page->u.inuse.type_info--; ++ } ++ else if ( rc == -EINTR ) ++ { ++ ASSERT(!(page->u.inuse.type_info & ++ (PGT_count_mask|PGT_validated|PGT_partial))); ++ if ( !(shadow_mode_enabled(page_get_owner(page)) && ++ (page->count_info & PGC_page_table)) ) ++ page->tlbflush_timestamp = tlbflush_current_time(); ++ wmb(); ++ page->u.inuse.type_info |= PGT_validated; ++ } ++ else ++ { ++ BUG_ON(rc != -EAGAIN); ++ wmb(); ++ page->u.inuse.type_info |= PGT_partial; ++ } ++ ++ return rc; + } + + +-void put_page_type(struct page_info *page) ++static int __put_page_type(struct page_info *page, ++ int preemptible) + { + unsigned long nx, x, y = page->u.inuse.type_info; + +- again: +- do { ++ for ( ; ; ) ++ { + x = y; + nx = x - 1; + +@@ -1894,21 +2054,19 @@ void put_page_type(struct page_info *pag + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && +- likely(nx & PGT_validated) ) ++ likely(nx & (PGT_validated|PGT_partial)) ) + { + /* + * Page-table pages must be unvalidated when count is zero. The + * 'free' is safe because the refcnt is non-zero and validated + * bit is clear => other ops will spin or fail. + */ +- if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, +- x & ~PGT_validated)) != x) ) +- goto again; ++ nx = x & ~(PGT_validated|PGT_partial); ++ if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, ++ x, nx)) != x) ) ++ continue; + /* We cleared the 'valid bit' so we do the clean up. */ +- free_page_type(page, x); +- /* Carry on, but with the 'valid bit' now clear. */ +- x &= ~PGT_validated; +- nx &= ~PGT_validated; ++ return free_page_type(page, x, preemptible); + } + + /* +@@ -1922,25 +2080,33 @@ void put_page_type(struct page_info *pag + (page->count_info & PGC_page_table)) ) + page->tlbflush_timestamp = tlbflush_current_time(); + } ++ ++ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) ++ break; ++ ++ if ( preemptible && hypercall_preempt_check() ) ++ return -EINTR; + } +- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); ++ ++ return 0; + } + + +-int get_page_type(struct page_info *page, unsigned long type) ++static int __get_page_type(struct page_info *page, unsigned long type, ++ int preemptible) + { + unsigned long nx, x, y = page->u.inuse.type_info; + + ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2))); + +- again: +- do { ++ for ( ; ; ) ++ { + x = y; + nx = x + 1; + if ( unlikely((nx & PGT_count_mask) == 0) ) + { + MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page)); +- return 0; ++ return -EINVAL; + } + else if ( unlikely((x & PGT_count_mask) == 0) ) + { +@@ -1993,28 +2159,43 @@ int get_page_type(struct page_info *page + /* Don't log failure if it could be a recursive-mapping attempt. */ + if ( ((x & PGT_type_mask) == PGT_l2_page_table) && + (type == PGT_l1_page_table) ) +- return 0; ++ return -EINVAL; + if ( ((x & PGT_type_mask) == PGT_l3_page_table) && + (type == PGT_l2_page_table) ) +- return 0; ++ return -EINVAL; + if ( ((x & PGT_type_mask) == PGT_l4_page_table) && + (type == PGT_l3_page_table) ) +- return 0; ++ return -EINVAL; + MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") " + "for mfn %lx (pfn %lx)", + x, type, page_to_mfn(page), + get_gpfn_from_mfn(page_to_mfn(page))); +- return 0; ++ return -EINVAL; + } + else if ( unlikely(!(x & PGT_validated)) ) + { +- /* Someone else is updating validation of this page. Wait... */ +- while ( (y = page->u.inuse.type_info) == x ) +- cpu_relax(); +- goto again; ++ if ( !(x & PGT_partial) ) ++ { ++ /* Someone else is updating validation of this page. Wait... */ ++ while ( (y = page->u.inuse.type_info) == x ) ++ { ++ if ( preemptible && hypercall_preempt_check() ) ++ return -EINTR; ++ cpu_relax(); ++ } ++ continue; ++ } ++ /* Type ref count was left at 1 when PGT_partial got set. */ ++ ASSERT((x & PGT_count_mask) == 1); ++ nx = x & ~PGT_partial; + } ++ ++ if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) ) ++ break; ++ ++ if ( preemptible && hypercall_preempt_check() ) ++ return -EINTR; + } +- while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); + + if ( unlikely((x & PGT_type_mask) != type) ) + { +@@ -2032,25 +2213,42 @@ int get_page_type(struct page_info *page + + if ( unlikely(!(nx & PGT_validated)) ) + { +- /* Try to validate page type; drop the new reference on failure. */ +- if ( unlikely(!alloc_page_type(page, type)) ) ++ if ( !(x & PGT_partial) ) + { +- MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %" +- PRtype_info ": caf=%08x taf=%" PRtype_info, +- page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)), +- type, page->count_info, page->u.inuse.type_info); +- /* Noone else can get a reference. We hold the only ref. */ +- page->u.inuse.type_info = 0; +- return 0; ++ page->nr_validated_ptes = 0; ++ page->partial_pte = 0; + } +- +- /* Noone else is updating simultaneously. */ +- __set_bit(_PGT_validated, &page->u.inuse.type_info); ++ return alloc_page_type(page, type, preemptible); + } + +- return 1; ++ return 0; + } + ++void put_page_type(struct page_info *page) ++{ ++ int rc = __put_page_type(page, 0); ++ ASSERT(rc == 0); ++ (void)rc; ++} ++ ++int get_page_type(struct page_info *page, unsigned long type) ++{ ++ int rc = __get_page_type(page, type, 0); ++ if ( likely(rc == 0) ) ++ return 1; ++ ASSERT(rc == -EINVAL); ++ return 0; ++} ++ ++int put_page_type_preemptible(struct page_info *page) ++{ ++ return __put_page_type(page, 1); ++} ++ ++int get_page_type_preemptible(struct page_info *page, unsigned long type) ++{ ++ return __get_page_type(page, type, 1); ++} + + void cleanup_page_cacheattr(struct page_info *page) + { +@@ -2087,7 +2285,7 @@ int new_guest_cr3(unsigned long mfn) + l4e_from_pfn( + mfn, + (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)), +- pagetable_get_pfn(v->arch.guest_table), 0); ++ pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0; + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new compat baseptr %lx", mfn); +@@ -2102,7 +2300,7 @@ int new_guest_cr3(unsigned long mfn) + #endif + okay = paging_mode_refcounts(d) + ? get_page_from_pagenr(mfn, d) +- : get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d); ++ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new baseptr %lx", mfn); +@@ -2276,9 +2474,7 @@ int do_mmuext_op( + { + if ( hypercall_preempt_check() ) + { +- rc = hypercall_create_continuation( +- __HYPERVISOR_mmuext_op, "hihi", +- uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ rc = -EAGAIN; + break; + } + +@@ -2325,10 +2521,14 @@ int do_mmuext_op( + if ( paging_mode_refcounts(FOREIGNDOM) ) + break; + +- okay = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM); ++ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1); ++ okay = !rc; + if ( unlikely(!okay) ) + { +- MEM_LOG("Error while pinning mfn %lx", mfn); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; ++ else if ( rc != -EAGAIN ) ++ MEM_LOG("Error while pinning mfn %lx", mfn); + break; + } + +@@ -2373,8 +2573,11 @@ int do_mmuext_op( + { + put_page_and_type(page); + put_page(page); +- /* A page is dirtied when its pin status is cleared. */ +- paging_mark_dirty(d, mfn); ++ if ( !rc ) ++ { ++ /* A page is dirtied when its pin status is cleared. */ ++ paging_mark_dirty(d, mfn); ++ } + } + else + { +@@ -2398,8 +2601,8 @@ int do_mmuext_op( + if ( paging_mode_refcounts(d) ) + okay = get_page_from_pagenr(mfn, d); + else +- okay = get_page_and_type_from_pagenr( +- mfn, PGT_root_page_table, d); ++ okay = !get_page_and_type_from_pagenr( ++ mfn, PGT_root_page_table, d, 0); + if ( unlikely(!okay) ) + { + MEM_LOG("Error while installing new mfn %lx", mfn); +@@ -2517,6 +2720,11 @@ int do_mmuext_op( + guest_handle_add_offset(uops, 1); + } + ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmuext_op, "hihi", ++ uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ + process_deferred_ops(); + + perfc_add(num_mmuext_ops, i); +@@ -2576,9 +2784,7 @@ int do_mmu_update( + { + if ( hypercall_preempt_check() ) + { +- rc = hypercall_create_continuation( +- __HYPERVISOR_mmu_update, "hihi", +- ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ rc = -EAGAIN; + break; + } + +@@ -2653,27 +2859,29 @@ int do_mmu_update( + cmd == MMU_PT_UPDATE_PRESERVE_AD); + } + break; +-#if CONFIG_PAGING_LEVELS >= 3 + case PGT_l3_page_table: + { + l3_pgentry_t l3e = l3e_from_intpte(req.val); +- okay = mod_l3_entry(va, l3e, mfn, +- cmd == MMU_PT_UPDATE_PRESERVE_AD); ++ rc = mod_l3_entry(va, l3e, mfn, ++ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1); ++ okay = !rc; + } + break; +-#endif + #if CONFIG_PAGING_LEVELS >= 4 + case PGT_l4_page_table: + { + l4_pgentry_t l4e = l4e_from_intpte(req.val); +- okay = mod_l4_entry(va, l4e, mfn, +- cmd == MMU_PT_UPDATE_PRESERVE_AD); ++ rc = mod_l4_entry(va, l4e, mfn, ++ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1); ++ okay = !rc; + } + break; + #endif + } + + put_page_type(page); ++ if ( rc == -EINTR ) ++ rc = -EAGAIN; + } + break; + +@@ -2742,6 +2950,11 @@ int do_mmu_update( + guest_handle_add_offset(ureqs, 1); + } + ++ if ( rc == -EAGAIN ) ++ rc = hypercall_create_continuation( ++ __HYPERVISOR_mmu_update, "hihi", ++ ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom); ++ + process_deferred_ops(); + + domain_mmap_cache_destroy(&mapcache); +@@ -3637,9 +3850,8 @@ static int ptwr_emulated_update( + nl1e = l1e_from_intpte(val); + if ( unlikely(!get_page_from_l1e(nl1e, d)) ) + { +- if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) && +- (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg && +- (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) ++ if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) && ++ !do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) ) + { + /* + * If this is an upper-half write to a PAE PTE then we assume that +Index: xen-3.3.1-testing/xen/include/asm-x86/mm.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/asm-x86/mm.h ++++ xen-3.3.1-testing/xen/include/asm-x86/mm.h +@@ -59,6 +59,17 @@ struct page_info + u32 tlbflush_timestamp; + + /* ++ * When PGT_partial is true then this field is valid and indicates ++ * that PTEs in the range [0, @nr_validated_ptes) have been validated. ++ * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been ++ * partially validated. ++ */ ++ struct { ++ u16 nr_validated_ptes; ++ bool_t partial_pte; ++ }; ++ ++ /* + * Guest pages with a shadow. This does not conflict with + * tlbflush_timestamp since page table pages are explicitly not + * tracked for TLB-flush avoidance when a guest runs in shadow mode. +@@ -86,9 +97,12 @@ struct page_info + /* PAE only: is this an L2 page directory containing Xen-private mappings? */ + #define _PGT_pae_xen_l2 26 + #define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2) ++/* Has this page been *partially* validated for use as its current type? */ ++#define _PGT_partial 25 ++#define PGT_partial (1U<<_PGT_partial) + +- /* 26-bit count of uses of this frame as its current type. */ +-#define PGT_count_mask ((1U<<26)-1) ++ /* 25-bit count of uses of this frame as its current type. */ ++#define PGT_count_mask ((1U<<25)-1) + + /* Cleared when the owning guest 'frees' this page. */ + #define _PGC_allocated 31 +@@ -154,7 +168,8 @@ extern unsigned long max_page; + extern unsigned long total_pages; + void init_frametable(void); + +-void free_page_type(struct page_info *page, unsigned long type); ++int free_page_type(struct page_info *page, unsigned long type, ++ int preemptible); + int _shadow_mode_refcounts(struct domain *d); + + void cleanup_page_cacheattr(struct page_info *page); +@@ -165,6 +180,8 @@ void put_page(struct page_info *page); + int get_page(struct page_info *page, struct domain *domain); + void put_page_type(struct page_info *page); + int get_page_type(struct page_info *page, unsigned long type); ++int put_page_type_preemptible(struct page_info *page); ++int get_page_type_preemptible(struct page_info *page, unsigned long type); + int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d); + void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d); + +@@ -174,6 +191,19 @@ static inline void put_page_and_type(str + put_page(page); + } + ++static inline int put_page_and_type_preemptible(struct page_info *page, ++ int preemptible) ++{ ++ int rc = 0; ++ ++ if ( preemptible ) ++ rc = put_page_type_preemptible(page); ++ else ++ put_page_type(page); ++ if ( likely(rc == 0) ) ++ put_page(page); ++ return rc; ++} + + static inline int get_page_and_type(struct page_info *page, + struct domain *domain, diff --git a/18420-x86-page-type-preemptible-fix.patch b/18420-x86-page-type-preemptible-fix.patch new file mode 100644 index 0000000..55cdbe8 --- /dev/null +++ b/18420-x86-page-type-preemptible-fix.patch @@ -0,0 +1,51 @@ +# HG changeset patch +# User Keir Fraser +# Date 1220450168 -3600 +# Node ID c9db93b0660ae644491c862e47744a2349ba630f +# Parent 1e98ea5c860438a227e135701e6439b22826f52f +x86: Fix interpretation of get_l*e_linear_pagetable(). + +Broken by get_page_type() preemption patch (c/s 18412). + +Signed-off-by: Keir Fraser + +Index: xen-3.3.0-testing/xen/arch/x86/mm.c +=================================================================== +--- xen-3.3.0-testing.orig/xen/arch/x86/mm.c ++++ xen-3.3.0-testing/xen/arch/x86/mm.c +@@ -762,9 +762,8 @@ get_page_from_l2e( + + rc = get_page_and_type_from_pagenr( + l2e_get_pfn(l2e), PGT_l1_page_table, d, 0); +- if ( unlikely(rc) && rc != -EAGAIN && +- get_l2_linear_pagetable(l2e, pfn, d) ) +- rc = -EINVAL; ++ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) ) ++ rc = 0; + + return rc; + } +@@ -788,9 +787,8 @@ get_page_from_l3e( + + rc = get_page_and_type_from_pagenr( + l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible); +- if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && +- get_l3_linear_pagetable(l3e, pfn, d) ) +- rc = -EINVAL; ++ if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) ) ++ rc = 0; + + return rc; + } +@@ -814,9 +812,8 @@ get_page_from_l4e( + + rc = get_page_and_type_from_pagenr( + l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible); +- if ( unlikely(rc) && rc != -EAGAIN && rc != -EINTR && +- get_l4_linear_pagetable(l4e, pfn, d) ) +- rc = -EINVAL; ++ if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) ) ++ rc = 0; + + return rc; + } diff --git a/32on64-extra-mem.patch b/32on64-extra-mem.patch index ffea46b..75ca973 100644 --- a/32on64-extra-mem.patch +++ b/32on64-extra-mem.patch @@ -1,8 +1,8 @@ -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2255,7 +2255,7 @@ class XendDomainInfo: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -2277,7 +2277,7 @@ class XendDomainInfo: vtd_mem = ((vtd_mem + 1023) / 1024) * 1024 # Make sure there's enough RAM available for the domain diff --git a/blktap-pv-cdrom.patch b/blktap-pv-cdrom.patch new file mode 100644 index 0000000..80bec88 --- /dev/null +++ b/blktap-pv-cdrom.patch @@ -0,0 +1,741 @@ +Index: xen-3.3.1-testing/tools/blktap/drivers/block-cdrom.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-3.3.1-testing/tools/blktap/drivers/block-cdrom.c 2008-09-10 14:22:17.000000000 -0600 +@@ -0,0 +1,533 @@ ++/* block-cdrom.c ++ * ++ * simple slow synchronous cdrom disk implementation. Based off ++ * of block-sync.c ++ * ++ * (c) 2006 Andrew Warfield and Julian Chesterfield ++ * (c) 2008 Novell Inc. ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License version 2 ++ * as published by the Free Software Foundation; or, when distributed ++ * separately from the Linux kernel or incorporated into other ++ * software packages, subject to the following license: ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "tapdisk.h" ++#include ++ ++struct tdcdrom_state { ++ int fd; ++ int xs_fd; /* for xen event polling */ ++ int media_present; ++ int media_changed; ++ struct xs_handle *xs_handle; ++ char *dev_name; ++ int dev_type; ++ td_flag_t flags; ++}; ++ ++#define BLOCK_DEVICE 0 ++#define FILE_DEVICE 1 ++#define CDROM_DEFAULT_SECTOR_SIZE 2048 ++#define CDROM_DEFAULT_SIZE 2000000000 ++ ++/*Get Image size, secsize*/ ++static void get_image_info(struct disk_driver *dd) ++{ ++ int ret; ++ long size; ++ unsigned long total_size; ++ struct statvfs statBuf; ++ struct stat stat; ++ struct td_state *s = dd->td_state; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ ++ s->size = 0; ++ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE; ++ s->info = (VDISK_CDROM | VDISK_REMOVABLE | VDISK_READONLY); ++ prv->media_present = 0; ++ ++ ret = fstat(prv->fd, &stat); ++ if (ret != 0) { ++ DPRINTF("ERROR: fstat failed, Couldn't stat image"); ++ return; ++ } ++ ++ if (S_ISBLK(stat.st_mode)) { ++ /*Accessing block device directly*/ ++ int status; ++ ++ prv->dev_type = BLOCK_DEVICE; ++ status = ioctl(prv->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); ++ if (status == CDS_DISC_OK) { ++ prv->media_present = 1; ++ if ((ret =ioctl(prv->fd,BLKGETSIZE,&s->size))!=0) { ++ DPRINTF("ERR: BLKGETSIZE failed, couldn't stat image"); ++ s->size = CDROM_DEFAULT_SIZE; ++ } ++ } ++ else { ++ s->size = CDROM_DEFAULT_SIZE; ++ } ++ /*Get the sector size*/ ++#if defined(BLKSSZGET) ++ { ++ int arg; ++ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE; ++ ioctl(prv->fd, BLKSSZGET, &s->sector_size); ++ ++ if (s->sector_size != CDROM_DEFAULT_SECTOR_SIZE) ++ DPRINTF("Note: sector size is %ld (not %d)\n", ++ s->sector_size, CDROM_DEFAULT_SECTOR_SIZE); ++ } ++#else ++ s->sector_size = CDROM_DEFAULT_SECTOR_SIZE; ++#endif ++ DPRINTF("Block Device: Image size: %llu", ++ (long long unsigned)s->size); ++ DPRINTF("\t media_present: %d sector_size: %lu\n", ++ prv->media_present, s->sector_size); ++ } else { ++ /*Local file? try fstat instead*/ ++ prv->dev_type = FILE_DEVICE; ++ prv->media_present = 1; ++ s->size = (stat.st_size >> SECTOR_SHIFT); ++ s->sector_size = DEFAULT_SECTOR_SIZE; ++ DPRINTF("Local File: Image size: %llu\n", ++ (long long unsigned)s->size); ++ } ++ return; ++} ++ ++static inline void init_fds(struct disk_driver *dd) ++{ ++ int i; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ ++ for(i = 0; i < MAX_IOFD; i++) ++ dd->io_fd[i] = 0; ++ ++ prv->xs_handle = xs_daemon_open(); ++ prv->xs_fd = xs_fileno(prv->xs_handle); ++ dd->io_fd[0] = prv->xs_fd; ++} ++ ++void open_device (struct disk_driver *dd) ++{ ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ int o_flags; ++ ++ o_flags = O_NONBLOCK | O_DIRECT | O_LARGEFILE | ++ ((prv->flags == TD_RDONLY) ? O_RDONLY : O_RDWR); ++ ++ if (prv->fd < 0) { ++ prv->fd = open(prv->dev_name, o_flags); ++ if ( (prv->fd == -1) && (errno == EINVAL) ) { ++ /* Maybe O_DIRECT isn't supported. */ ++ o_flags &= ~O_DIRECT; ++ prv->fd = open(prv->dev_name, o_flags); ++ if (prv->fd != -1) { ++ DPRINTF("WARNING: Accessing image without O_DIRECT! (%s)\n", prv->dev_name); ++ } ++ } ++ } ++ ++ if (prv->fd != -1) { ++ ++ get_image_info(dd); ++ ++ if (prv->dev_type == BLOCK_DEVICE) { ++ int status; ++ status = ioctl(prv->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); ++ switch (status) { ++ case CDS_DISC_OK: ++ prv->media_present = 1; ++ break; ++ default: ++ prv->media_present = 0; ++ } ++ } ++ else ++ prv->media_present = 1; ++ } ++} ++ ++/* ++ * Main entry point, called when first loaded ++ */ ++int tdcdrom_open (struct disk_driver *dd, const char *name, td_flag_t flags) ++{ ++ int ret = 0; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ ++ asprintf(&prv->dev_name, "%s", name); ++ prv->fd = -1; ++ prv->media_changed = 0; ++ prv->media_present = 0; ++ prv->flags = flags; ++ init_fds(dd); ++ ++ open_device(dd); ++ ++ return ret; ++} ++ ++int tdcdrom_queue_read(struct disk_driver *dd, uint64_t sector, ++ int nb_sectors, char *buf, td_callback_t cb, ++ int id, void *private) ++{ ++ struct td_state *s = dd->td_state; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ int size = nb_sectors * s->sector_size; ++ uint64_t offset = sector * (uint64_t)s->sector_size; ++ int ret; ++ ++ if (prv->fd == -1 || prv->media_present == 0) { ++ ret = 0 - ENOMEDIUM; ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++ } ++ size = nb_sectors * 512; ++ offset = sector * (uint64_t)512; ++ ret = lseek(prv->fd, offset, SEEK_SET); ++ if (ret != (off_t)-1) { ++ ret = read(prv->fd, buf, size); ++ if (ret != size) { ++ ret = 0 - errno; ++ } else { ++ ret = 1; ++ } ++ } else ret = 0 - errno; ++ ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++} ++ ++int tdcdrom_queue_write(struct disk_driver *dd, uint64_t sector, ++ int nb_sectors, char *buf, td_callback_t cb, ++ int id, void *private) ++{ ++ struct td_state *s = dd->td_state; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ int size = nb_sectors * s->sector_size; ++ uint64_t offset = sector * (uint64_t)s->sector_size; ++ int ret = 0; ++ ++ if (prv->fd == -1 || prv->media_present == 0) { ++ ret = 0 - ENOMEDIUM; ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++ } ++ ret = lseek(prv->fd, offset, SEEK_SET); ++ if (ret != (off_t)-1) { ++ ret = write(prv->fd, buf, size); ++ if (ret != size) { ++ ret = 0 - errno; ++ } else { ++ ret = 1; ++ } ++ } else ret = 0 - errno; ++ ++ return cb(dd, (ret < 0) ? ret : 0, sector, nb_sectors, id, private); ++} ++ ++int tdcdrom_queue_packet(struct disk_driver *dd, uint64_t sector, ++ int nb_sectors, char *buf, td_callback_t cb, ++ int id, void *private) ++{ ++ struct td_state *s = dd->td_state; ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ int size = nb_sectors * s->sector_size; ++ uint64_t offset = sector * (uint64_t)s->sector_size; ++ int ret = 0; ++ ++ union xen_block_packet *sp; ++ struct xen_cdrom_packet *xcp; ++ struct xen_cdrom_support *xcs; ++ struct xen_cdrom_open *xco; ++ struct xen_cdrom_media_info *xcmi; ++ struct xen_cdrom_media_changed *xcmc; ++ struct cdrom_generic_command cgc; ++ struct vcd_generic_command * vgc; ++ struct request_sense sense; ++ ++ sp = (union xen_block_packet *)buf; ++ sp->err = 0; ++ sp->ret = 0; ++ switch(sp->type) { ++ case XEN_TYPE_CDROM_SUPPORT: ++ xcs = &(sp->xcs); ++ xcs->supported = 1; ++ break; ++ case XEN_TYPE_CDROM_PACKET: ++ xcp = &(sp->xcp); ++ vgc = (struct vcd_generic_command *)(buf + PACKET_PAYLOAD_OFFSET); ++ ++ memset( &cgc, 0, sizeof(struct cdrom_generic_command)); ++ memcpy(cgc.cmd, vgc->cmd, CDROM_PACKET_SIZE); ++ cgc.stat = vgc->stat; ++ cgc.data_direction = vgc->data_direction; ++ cgc.quiet = vgc->quiet; ++ cgc.timeout = vgc->timeout; ++ ++ if (prv->fd == -1) { ++ xcp = &(sp->xcp); ++ xcp->ret = -1; ++ xcp->err = 0 - ENODEV; ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++ } ++ if (prv->dev_type == FILE_DEVICE) { ++ DPRINTF("%s() FILE_DEVICE inappropriate packetcmd \n",__func__); ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++ } ++ switch ( cgc.cmd[0]) { ++ case GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL: ++ { ++ int lock; ++ lock = cgc.cmd[4] & 1; ++ if (ioctl (prv->fd, CDROM_LOCKDOOR, lock) < 0) { ++ xcp->err = -(errno); ++ xcp->ret = -1; ++ } ++ } ++ break; ++ case GPCMD_START_STOP_UNIT: ++ { ++ int start, eject; ++ start = cgc.cmd[4] & 1; ++ eject = (cgc.cmd[4] >> 1) & 1; ++ if (eject && !start) { ++ if (ioctl (prv->fd, CDROMEJECT, NULL) < 0) { ++ xcp->err = -(errno); ++ xcp->ret = -1; ++ } ++ } else if (eject && start) { ++ if (ioctl (prv->fd, CDROMCLOSETRAY, NULL) < 0) { ++ xcp->err = -(errno); ++ xcp->ret = -1; ++ } ++ } ++ } ++ break; ++ default: ++ { ++ if (vgc->sense_offset) { ++ cgc.sense = &sense; ++ } ++ if (vgc->buffer_offset) { ++ cgc.buffer = malloc(vgc->buflen); ++ memcpy(cgc.buffer, (char *)sp + PACKET_BUFFER_OFFSET, vgc->buflen); ++ cgc.buflen = vgc->buflen; ++ } ++ if (ioctl (prv->fd, CDROM_SEND_PACKET, &cgc) < 0 ) { ++ xcp->err = -(errno); ++ xcp->ret = -1; ++ } ++ if (cgc.sense) { ++ memcpy((char *)sp + PACKET_SENSE_OFFSET, cgc.sense, sizeof(struct request_sense)); ++ } ++ if (cgc.buffer) { ++ vgc->buflen = cgc.buflen; ++ memcpy((char *)sp + PACKET_BUFFER_OFFSET, cgc.buffer, cgc.buflen); ++ free(cgc.buffer); ++ } ++ break; ++ } ++ } ++ break; ++ case XEN_TYPE_CDROM_OPEN: ++ { ++ char *buf = NULL; ++ unsigned int len; ++ struct stat statbuf; ++ int major = 0; ++ int minor = 0; ++ char *num; ++ ++ if (stat (prv->dev_name, &statbuf) == 0) { ++ major = major (statbuf.st_rdev); ++ minor = minor (statbuf.st_rdev); ++ } ++ xco = &(sp->xco); ++ if (xco->payload_offset) { ++ char * nodename; ++ char media_present[2]; ++ nodename = (char *)sp + xco->payload_offset; ++ asprintf(&buf, "%s/media-present", nodename); ++ if (!xs_read(prv->xs_handle, XBT_NULL, buf, &len)) { ++ sprintf(media_present, "%d", prv->media_present); ++ xs_write(prv->xs_handle, XBT_NULL, buf, media_present, strlen(media_present)); ++ xs_watch(prv->xs_handle, buf, "media-present"); ++ asprintf(&buf, "%s/params", nodename); ++ xs_watch(prv->xs_handle, buf, "params"); ++ asprintf(&num, "%x:%x", major, minor); ++ asprintf(&buf, "%s/physical-device", nodename); ++ xs_write(prv->xs_handle, XBT_NULL, buf, num, strlen(num)); ++ } ++ free(buf); ++ } ++ ++ xco->media_present = prv->media_present; ++ xco->sectors = 0; ++ xco->sector_size = 2048; ++ if (prv->media_present && prv->fd != -1 ) { ++ get_image_info(dd); ++ xco->sectors = s->size; ++ xco->sector_size = s->sector_size; ++ } ++ } ++ break; ++ case XEN_TYPE_CDROM_MEDIA_CHANGED: ++ xcmc = &(sp->xcmc); ++ xcmc->media_changed = prv->media_changed; ++ prv->media_changed = 0; ++ break; ++ default: ++ xcp = &(sp->xcp); ++ xcp->err = -EINVAL; ++ xcp->ret = -1; ++ break; ++ } ++ ++ return cb(dd, (ret < 0) ? ret: 0, sector, nb_sectors, id, private); ++} ++ ++int tdcdrom_submit(struct disk_driver *dd) ++{ ++ return 0; ++} ++ ++int tdcdrom_close(struct disk_driver *dd) ++{ ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ ++ if (prv->fd != -1) { ++ close(prv->fd); ++ prv->fd = -1; ++ } ++ prv->xs_fd = -1; ++ xs_daemon_close(prv->xs_handle); ++ free(prv->dev_name); ++ ++ return 0; ++} ++ ++void tdcdrom_process_media_change_event(struct disk_driver *dd, char **vec) ++{ ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ char *media_present = NULL; ++ unsigned int len; ++ ++ media_present = xs_read(prv->xs_handle, XBT_NULL, vec[XS_WATCH_PATH], &len); ++ if (strcmp(media_present, "0") == 0) { ++ close(prv->fd); ++ prv->fd = -1; ++ prv->media_present = 0; ++ } ++ else { ++ open_device(dd); ++ prv->media_changed = 1; ++ } ++} ++ ++void tdcrom_process_params_event(struct disk_driver *dd, char **vec) ++{ ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ char * params = NULL; ++ unsigned int len; ++ ++ params = xs_read(prv->xs_handle, XBT_NULL, vec[XS_WATCH_PATH], &len); ++ if (params != NULL) { ++ char *cp = strchr(params, ':'); ++ if (cp) { ++ cp++; ++ if (prv->dev_name) ++ free(prv->dev_name); ++ asprintf(&prv->dev_name, "%s", cp); ++ if (prv->fd != -1) { ++ close(prv->fd); ++ prv->fd = -1; ++ } ++ open_device(dd); ++ prv->media_changed = 1; ++ } ++ } ++} ++ ++int tdcdrom_do_callbacks(struct disk_driver *dd, int sid) ++{ ++ struct tdcdrom_state *prv = (struct tdcdrom_state *)dd->private; ++ char **vec; ++ unsigned int num; ++ ++ vec = xs_read_watch(prv->xs_handle, &num); ++ if (!vec) ++ return 1; ++ ++ if (!strcmp(vec[XS_WATCH_TOKEN], "media-present")) { ++ tdcdrom_process_media_change_event(dd, vec); ++ goto out; ++ } ++ ++ if (!strcmp(vec[XS_WATCH_TOKEN], "params")) { ++ tdcrom_process_params_event(dd, vec); ++ goto out; ++ } ++ ++ out: ++ free(vec); ++ return 1; ++} ++ ++int tdcdrom_get_parent_id(struct disk_driver *dd, struct disk_id *id) ++{ ++ return TD_NO_PARENT; ++} ++ ++int tdcdrom_validate_parent(struct disk_driver *dd, ++ struct disk_driver *parent, td_flag_t flags) ++{ ++ return -EINVAL; ++} ++ ++struct tap_disk tapdisk_cdrom = { ++ .disk_type = "tapdisk_cdrom", ++ .private_data_size = sizeof(struct tdcdrom_state), ++ .td_open = tdcdrom_open, ++ .td_queue_read = tdcdrom_queue_read, ++ .td_queue_packet = tdcdrom_queue_packet, ++ .td_queue_write = tdcdrom_queue_write, ++ .td_submit = tdcdrom_submit, ++ .td_close = tdcdrom_close, ++ .td_do_callbacks = tdcdrom_do_callbacks, ++ .td_get_parent_id = tdcdrom_get_parent_id, ++ .td_validate_parent = tdcdrom_validate_parent ++}; +Index: xen-3.3.1-testing/xen/include/public/io/cdromif.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-3.3.1-testing/xen/include/public/io/cdromif.h 2008-09-10 13:19:09.000000000 -0600 +@@ -0,0 +1,122 @@ ++/****************************************************************************** ++ * cdromif.h ++ * ++ * Shared definitions between backend driver and Xen guest Virtual CDROM ++ * block device. ++ * ++ * Copyright (c) 2008, Pat Campell plc@novell.com ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this source file (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, copy, modify, ++ * merge, publish, distribute, sublicense, and/or sell copies of the Software, ++ * and to permit persons to whom the Software is furnished to do so, subject to ++ * the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING ++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS ++ * IN THE SOFTWARE. ++ */ ++ ++#ifndef __XEN_PUBLIC_IO_CDROMIF_H__ ++#define __XEN_PUBLIC_IO_CDROMIF_H__ ++ ++/* ++ * Queries backend for CDROM support ++ */ ++#define XEN_TYPE_CDROM_SUPPORT _IO('c', 1) ++ ++struct xen_cdrom_support ++{ ++ uint32_t type; ++ int8_t ret; /* returned, 0 succeded, -1 error */ ++ int8_t err; /* returned, backend errno */ ++ int8_t supported; /* returned, 1 supported */ ++}; ++ ++/* ++ * Opens backend device, returns drive geometry or ++ * any encountered errors ++ */ ++#define XEN_TYPE_CDROM_OPEN _IO('c', 2) ++ ++struct xen_cdrom_open ++{ ++ uint32_t type; ++ int8_t ret; ++ int8_t err; ++ int8_t pad; ++ int8_t media_present; /* returned */ ++ uint32_t sectors; /* returned */ ++ uint32_t sector_size; /* returned */ ++ int32_t payload_offset; /* offset to backend node name payload */ ++}; ++ ++/* ++ * Queries backend for media changed status ++ */ ++#define XEN_TYPE_CDROM_MEDIA_CHANGED _IO('c', 3) ++ ++struct xen_cdrom_media_changed ++{ ++ uint32_t type; ++ int8_t ret; ++ int8_t err; ++ int8_t media_changed; /* returned */ ++}; ++ ++/* ++ * Sends vcd generic CDROM packet to backend, followed ++ * immediately by the vcd_generic_command payload ++ */ ++#define XEN_TYPE_CDROM_PACKET _IO('c', 4) ++ ++struct xen_cdrom_packet ++{ ++ uint32_t type; ++ int8_t ret; ++ int8_t err; ++ int8_t pad[2]; ++ int32_t payload_offset; /* offset to struct vcd_generic_command payload */ ++}; ++ ++/* CDROM_PACKET_COMMAND, payload for XEN_TYPE_CDROM_PACKET */ ++struct vcd_generic_command ++{ ++ uint8_t cmd[CDROM_PACKET_SIZE]; ++ uint8_t pad[4]; ++ uint32_t buffer_offset; ++ uint32_t buflen; ++ int32_t stat; ++ uint32_t sense_offset; ++ uint8_t data_direction; ++ uint8_t pad1[3]; ++ int32_t quiet; ++ int32_t timeout; ++}; ++ ++union xen_block_packet ++{ ++ uint32_t type; ++ int8_t ret; ++ int8_t err; ++ struct xen_cdrom_support xcs; ++ struct xen_cdrom_open xco; ++ struct xen_cdrom_media_changed xcmc; ++ struct xen_cdrom_packet xcp; ++}; ++ ++#define PACKET_PAYLOAD_OFFSET (sizeof(struct xen_cdrom_packet)) ++#define PACKET_SENSE_OFFSET (PACKET_PAYLOAD_OFFSET + sizeof(struct vcd_generic_command)) ++#define PACKET_BUFFER_OFFSET (PACKET_SENSE_OFFSET + sizeof(struct request_sense)) ++#define MAX_PACKET_DATA (PAGE_SIZE - sizeof(struct xen_cdrom_packet) - \ ++ sizeof(struct vcd_generic_command) - sizeof(struct request_sense)) ++ ++#endif +Index: xen-3.3.1-testing/tools/blktap/drivers/Makefile +=================================================================== +--- xen-3.3.1-testing.orig/tools/blktap/drivers/Makefile 2008-09-10 11:28:21.000000000 -0600 ++++ xen-3.3.1-testing/tools/blktap/drivers/Makefile 2008-09-10 13:31:58.000000000 -0600 +@@ -24,8 +24,9 @@ + $(warning *** libgcrypt not installed: falling back to libcrypto ***) + endif + +-LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap +-LDFLAGS_img := $(LIBAIO_DIR)/libaio.a $(CRYPT_LIB) -lpthread -lz ++LDFLAGS_xen := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) ++LDFLAGS_blktapctrl := $(LDFLAGS_xen) -L../lib -lblktap ++LDFLAGS_img := $(LIBAIO_DIR)/libaio.a $(CRYPT_LIB) -lpthread -lz $(LDFLAGS_xen) + + BLK-OBJS-y := block-aio.o + BLK-OBJS-y += block-sync.o +@@ -33,6 +34,7 @@ + BLK-OBJS-y += block-ram.o + BLK-OBJS-y += block-qcow.o + BLK-OBJS-y += block-qcow2.o ++BLK-OBJS-y += block-cdrom.o + BLK-OBJS-y += aes.o + BLK-OBJS-y += tapaio.o + BLK-OBJS-$(CONFIG_Linux) += blk_linux.o +Index: xen-3.3.1-testing/tools/blktap/drivers/tapdisk.h +=================================================================== +--- xen-3.3.1-testing.orig/tools/blktap/drivers/tapdisk.h 2008-09-10 11:28:24.000000000 -0600 ++++ xen-3.3.1-testing/tools/blktap/drivers/tapdisk.h 2008-09-10 14:09:34.000000000 -0600 +@@ -137,6 +137,9 @@ + int (*td_get_parent_id) (struct disk_driver *dd, struct disk_id *id); + int (*td_validate_parent)(struct disk_driver *dd, + struct disk_driver *p, td_flag_t flags); ++ int (*td_queue_packet) (struct disk_driver *dd, uint64_t sector, ++ int nb_sectors, char *buf, td_callback_t cb, ++ int id, void *prv); + }; + + typedef struct disk_info { +@@ -160,6 +163,7 @@ + extern struct tap_disk tapdisk_ram; + extern struct tap_disk tapdisk_qcow; + extern struct tap_disk tapdisk_qcow2; ++extern struct tap_disk tapdisk_cdrom; + + + /*Define Individual Disk Parameters here */ +@@ -240,6 +244,17 @@ + #endif + }; + ++static disk_info_t cdrom_disk = { ++ DISK_TYPE_CDROM, ++ "raw image (cdrom)", ++ "cdrom", ++ 0, ++ 1, ++#ifdef TAPDISK ++ &tapdisk_cdrom, ++#endif ++}; ++ + /*Main disk info array */ + static disk_info_t *dtypes[] = { + &aio_disk, +Index: xen-3.3.1-testing/tools/blktap/lib/blktaplib.h +=================================================================== +--- xen-3.3.1-testing.orig/tools/blktap/lib/blktaplib.h 2008-09-10 11:28:24.000000000 -0600 ++++ xen-3.3.1-testing/tools/blktap/lib/blktaplib.h 2008-09-10 13:45:24.000000000 -0600 +@@ -221,6 +221,7 @@ + #define DISK_TYPE_QCOW 4 + #define DISK_TYPE_QCOW2 5 + #define DISK_TYPE_IOEMU 6 ++#define DISK_TYPE_CDROM 7 + + /* xenstore/xenbus: */ + #define DOMNAME "Domain-0" diff --git a/blktap.patch b/blktap.patch index f1073df..72f13d1 100644 --- a/blktap.patch +++ b/blktap.patch @@ -1,11 +1,11 @@ bug #239173 bug #242953 -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -2606,7 +2606,7 @@ class XendDomainInfo: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -2618,7 +2618,7 @@ class XendDomainInfo: (fn, BOOTLOADER_LOOPBACK_DEVICE)) vbd = { @@ -14,10 +14,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py 'device': BOOTLOADER_LOOPBACK_DEVICE, } -Index: xen-3.3.0-testing/tools/ioemu-remote/xenstore.c +Index: xen-3.3.1-testing/tools/ioemu-remote/xenstore.c =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/xenstore.c -+++ xen-3.3.0-testing/tools/ioemu-remote/xenstore.c +--- xen-3.3.1-testing.orig/tools/ioemu-remote/xenstore.c ++++ xen-3.3.1-testing/tools/ioemu-remote/xenstore.c @@ -151,9 +151,9 @@ void xenstore_parse_domain_config(int hv { char **e = NULL; diff --git a/blktapctrl-default-to-ioemu.patch b/blktapctrl-default-to-ioemu.patch index fd85298..460b972 100644 --- a/blktapctrl-default-to-ioemu.patch +++ b/blktapctrl-default-to-ioemu.patch @@ -2,15 +2,36 @@ Index: xen-3.3.0-testing/tools/blktap/drivers/blktapctrl.c =================================================================== --- xen-3.3.0-testing.orig/tools/blktap/drivers/blktapctrl.c +++ xen-3.3.0-testing/tools/blktap/drivers/blktapctrl.c -@@ -474,6 +474,7 @@ static int launch_tapdisk_provider(char +@@ -65,6 +65,8 @@ + #define MAX_RAND_VAL 0xFFFF + #define MAX_ATTEMPTS 10 + ++#undef ALWAYS_USE_IOEMU ++ + int run = 1; + int max_timeout = MAX_TIMEOUT; + int ctlfd = 0; +@@ -174,7 +176,10 @@ static int test_path(char *path, char ** + } + + if (found) { +- *type = dtypes[i]->idnum; ++ if (dtypes[i]->use_ioemu) ++ *type = DISK_TYPE_IOEMU; ++ else ++ *type = dtypes[i]->idnum; + + if (dtypes[i]->single_handler == 1) { + /* Check whether tapdisk process +@@ -474,6 +479,7 @@ static int launch_tapdisk_provider(char return child; } -+#if 0 ++#ifndef ALWAYS_USE_IOEMU static int launch_tapdisk(char *wrctldev, char *rdctldev) { char *argv[] = { "tapdisk", wrctldev, rdctldev, NULL }; -@@ -483,6 +484,7 @@ static int launch_tapdisk(char *wrctldev +@@ -483,6 +489,7 @@ static int launch_tapdisk(char *wrctldev return 0; } @@ -18,15 +39,15 @@ Index: xen-3.3.0-testing/tools/blktap/drivers/blktapctrl.c static int launch_tapdisk_ioemu(void) { -@@ -554,6 +556,7 @@ static int connect_qemu(blkif_t *blkif, +@@ -554,6 +561,7 @@ static int connect_qemu(blkif_t *blkif, return 0; } -+#if 0 ++#ifndef ALWAYS_USE_IOEMU /* Launch tapdisk instance */ static int connect_tapdisk(blkif_t *blkif, int minor) { -@@ -597,6 +600,7 @@ fail: +@@ -597,6 +605,7 @@ fail: return ret; } @@ -34,22 +55,91 @@ Index: xen-3.3.0-testing/tools/blktap/drivers/blktapctrl.c static int blktapctrl_new_blkif(blkif_t *blkif) { -@@ -621,6 +625,7 @@ static int blktapctrl_new_blkif(blkif_t +@@ -621,6 +630,10 @@ static int blktapctrl_new_blkif(blkif_t blkif->cookie = next_cookie++; if (!exist) { -+#if 0 ++#ifdef ALWAYS_USE_IOEMU ++ if (connect_qemu(blkif, blkif->domid)) ++ goto fail; ++#else if (type == DISK_TYPE_IOEMU) { if (connect_qemu(blkif, blkif->domid)) goto fail; -@@ -628,6 +633,10 @@ static int blktapctrl_new_blkif(blkif_t +@@ -628,6 +641,7 @@ static int blktapctrl_new_blkif(blkif_t if (connect_tapdisk(blkif, minor)) goto fail; } -+#else -+ if (connect_qemu(blkif, blkif->domid)) -+ goto fail; +#endif } else { DPRINTF("Process exists!\n"); +Index: xen-3.3.0-testing/tools/blktap/drivers/tapdisk.h +=================================================================== +--- xen-3.3.0-testing.orig/tools/blktap/drivers/tapdisk.h ++++ xen-3.3.0-testing/tools/blktap/drivers/tapdisk.h +@@ -145,6 +145,8 @@ typedef struct disk_info { + char handle[10]; /* xend handle, e.g. 'ram' */ + int single_handler; /* is there a single controller for all */ + /* instances of disk type? */ ++ int use_ioemu; /* backend provider: 0 = tapdisk; 1 = ioemu */ ++ + #ifdef TAPDISK + struct tap_disk *drv; + #endif +@@ -166,6 +168,7 @@ static disk_info_t aio_disk = { + "raw image (aio)", + "aio", + 0, ++ 1, + #ifdef TAPDISK + &tapdisk_aio, + #endif +@@ -176,6 +179,7 @@ static disk_info_t sync_disk = { + "raw image (sync)", + "sync", + 0, ++ 1, + #ifdef TAPDISK + &tapdisk_sync, + #endif +@@ -186,6 +190,7 @@ static disk_info_t vmdk_disk = { + "vmware image (vmdk)", + "vmdk", + 1, ++ 1, + #ifdef TAPDISK + &tapdisk_vmdk, + #endif +@@ -196,6 +201,7 @@ static disk_info_t ram_disk = { + "ramdisk image (ram)", + "ram", + 1, ++ 0, + #ifdef TAPDISK + &tapdisk_ram, + #endif +@@ -206,6 +212,7 @@ static disk_info_t qcow_disk = { + "qcow disk (qcow)", + "qcow", + 0, ++ 1, + #ifdef TAPDISK + &tapdisk_qcow, + #endif +@@ -216,6 +223,7 @@ static disk_info_t qcow2_disk = { + "qcow2 disk (qcow2)", + "qcow2", + 0, ++ 1, + #ifdef TAPDISK + &tapdisk_qcow2, + #endif +@@ -226,6 +234,7 @@ static disk_info_t ioemu_disk = { + "ioemu disk", + "ioemu", + 1, ++ 1, + #ifdef TAPDISK + NULL + #endif diff --git a/build-tapdisk-ioemu.patch b/build-tapdisk-ioemu.patch index c1f4f5f..09df263 100644 --- a/build-tapdisk-ioemu.patch +++ b/build-tapdisk-ioemu.patch @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/ioemu-remote/Makefile +Index: xen-3.3.1-testing/tools/ioemu-remote/Makefile =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/Makefile -+++ xen-3.3.0-testing/tools/ioemu-remote/Makefile +--- xen-3.3.1-testing.orig/tools/ioemu-remote/Makefile ++++ xen-3.3.1-testing/tools/ioemu-remote/Makefile @@ -31,13 +31,6 @@ subdir-%: libqemu_common.a recurse-all: $(patsubst %,subdir-%, $(TARGET_DIRS)) @@ -16,28 +16,33 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/Makefile ####################################################################### # BLOCK_OBJS is code used by both qemu system emulation and qemu-img -@@ -46,6 +39,16 @@ BLOCK_OBJS+=block-cow.o block-qcow.o aes +@@ -46,6 +39,21 @@ BLOCK_OBJS+=block-cow.o block-qcow.o aes BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o BLOCK_OBJS+=block-qcow2.o block-parallels.o +####################################################################### +# tapdisk-ioemu + ++hw/tapdisk-xen_blktap.o: hw/xen_blktap.c ++ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $< ++tapdisk-ioemu.o: tapdisk-ioemu.c ++ $(CC) $(CFLAGS) $(CPPFLAGS) -DQEMU_IMG -DQEMU_TOOL -c -o $@ $< ++ +tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/libxc +tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/blktap/lib +tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/xenstore +tapdisk-ioemu: CPPFLAGS += -I$(XEN_ROOT)/tools/include -+tapdisk-ioemu: tapdisk-ioemu.c $(BLOCK_OBJS) qemu-img-block.o qemu-img-block-raw-posix.o hw/xen_blktap.c -+ $(CC) -DQEMU_TOOL -DQEMU_IMG $(CFLAGS) $(CPPFLAGS) $(BASE_CFLAGS) $(LDFLAGS) $(BASE_LDFLAGS) -o $@ $^ -lz $(LIBS) ++tapdisk-ioemu: tapdisk-ioemu.o $(BLOCK_OBJS) qemu-img-block.o qemu-img-block-raw-posix.o hw/tapdisk-xen_blktap.o ++ $(CC) $(LDFLAGS) -o $@ $^ -lz $(LIBS) + ###################################################################### # libqemu_common.a: Target independent part of system emulation. The # long term path is to suppress *all* target specific code in case of -Index: xen-3.3.0-testing/tools/ioemu-remote/configure +Index: xen-3.3.1-testing/tools/ioemu-remote/configure =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/configure -+++ xen-3.3.0-testing/tools/ioemu-remote/configure -@@ -1150,7 +1150,7 @@ fi +--- xen-3.3.1-testing.orig/tools/ioemu-remote/configure ++++ xen-3.3.1-testing/tools/ioemu-remote/configure +@@ -1144,7 +1144,7 @@ fi echo "#define CONFIG_UNAME_RELEASE \"$uname_release\"" >> $config_h diff --git a/dump-exec-state.patch b/dump-exec-state.patch new file mode 100644 index 0000000..39572a8 --- /dev/null +++ b/dump-exec-state.patch @@ -0,0 +1,98 @@ +Index: xen-3.3.0-testing/xen/arch/ia64/linux-xen/smp.c +=================================================================== +--- xen-3.3.0-testing.orig/xen/arch/ia64/linux-xen/smp.c ++++ xen-3.3.0-testing/xen/arch/ia64/linux-xen/smp.c +@@ -175,7 +175,7 @@ handle_IPI (int irq, void *dev_id, struc + * At this point the structure may be gone unless + * wait is true. + */ +- (*func)(info); ++ (*func)(info ?: regs); + + /* Notify the sending CPU that the task is done. */ + mb(); +Index: xen-3.3.0-testing/xen/arch/x86/smp.c +=================================================================== +--- xen-3.3.0-testing.orig/xen/arch/x86/smp.c ++++ xen-3.3.0-testing/xen/arch/x86/smp.c +@@ -357,7 +357,7 @@ fastcall void smp_call_function_interrup + + if ( call_data->wait ) + { +- (*func)(info); ++ (*func)(info ?: regs); + mb(); + atomic_inc(&call_data->finished); + } +@@ -365,7 +365,7 @@ fastcall void smp_call_function_interrup + { + mb(); + atomic_inc(&call_data->started); +- (*func)(info); ++ (*func)(info ?: regs); + } + + irq_exit(); +Index: xen-3.3.0-testing/xen/common/keyhandler.c +=================================================================== +--- xen-3.3.0-testing.orig/xen/common/keyhandler.c ++++ xen-3.3.0-testing/xen/common/keyhandler.c +@@ -91,14 +91,25 @@ static void show_handlers(unsigned char + key_table[i].desc); + } + +-static void __dump_execstate(void *unused) ++static void __dump_execstate(void *_regs) + { +- dump_execution_state(); +- printk("*** Dumping CPU%d guest state: ***\n", smp_processor_id()); ++ struct cpu_user_regs *regs = _regs; ++ unsigned int cpu = smp_processor_id(); ++ ++ if ( !guest_mode(regs) ) ++ { ++ printk("\n*** Dumping CPU%u host state: ***\n", cpu); ++ show_execution_state(regs); ++ } + if ( is_idle_vcpu(current) ) +- printk("No guest context (CPU is idle).\n"); ++ printk("No guest context (CPU%u is idle).\n", cpu); + else ++ { ++ printk("*** Dumping CPU%u guest state (d%d:v%d): ***\n", ++ smp_processor_id(), current->domain->domain_id, ++ current->vcpu_id); + show_execution_state(guest_cpu_user_regs()); ++ } + } + + static void dump_registers(unsigned char key, struct cpu_user_regs *regs) +@@ -111,14 +122,12 @@ static void dump_registers(unsigned char + printk("'%c' pressed -> dumping registers\n", key); + + /* Get local execution state out immediately, in case we get stuck. */ +- printk("\n*** Dumping CPU%d host state: ***\n", smp_processor_id()); +- __dump_execstate(NULL); ++ __dump_execstate(regs); + + for_each_online_cpu ( cpu ) + { + if ( cpu == smp_processor_id() ) + continue; +- printk("\n*** Dumping CPU%d host state: ***\n", cpu); + on_selected_cpus(cpumask_of_cpu(cpu), __dump_execstate, NULL, 1, 1); + } + +Index: xen-3.3.0-testing/xen/include/asm-ia64/linux-xen/asm/ptrace.h +=================================================================== +--- xen-3.3.0-testing.orig/xen/include/asm-ia64/linux-xen/asm/ptrace.h ++++ xen-3.3.0-testing/xen/include/asm-ia64/linux-xen/asm/ptrace.h +@@ -278,7 +278,7 @@ struct switch_stack { + # define ia64_task_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) + # define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr) + #ifdef XEN +-# define guest_mode(regs) (ia64_psr(regs)->cpl != 0) ++# define guest_mode(regs) (ia64_psr(regs)->cpl && !ia64_psr(regs)->vm) + # define guest_kernel_mode(regs) (ia64_psr(regs)->cpl == CONFIG_CPL0_EMUL) + # define vmx_guest_kernel_mode(regs) (ia64_psr(regs)->cpl == 0) + # define regs_increment_iip(regs) \ diff --git a/ns_tools.patch b/hv_tools.patch similarity index 78% rename from ns_tools.patch rename to hv_tools.patch index 284ed32..c48f744 100644 --- a/ns_tools.patch +++ b/hv_tools.patch @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/python/xen/lowlevel/xc/xc.c +Index: xen-3.3.1-testing/tools/python/xen/lowlevel/xc/xc.c =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/lowlevel/xc/xc.c -+++ xen-3.3.0-testing/tools/python/xen/lowlevel/xc/xc.c +--- xen-3.3.1-testing.orig/tools/python/xen/lowlevel/xc/xc.c ++++ xen-3.3.1-testing/tools/python/xen/lowlevel/xc/xc.c @@ -872,14 +872,14 @@ static PyObject *pyxc_hvm_build(XcObject int i; #endif @@ -11,7 +11,7 @@ Index: xen-3.3.0-testing/tools/python/xen/lowlevel/xc/xc.c static char *kwd_list[] = { "domid", - "memsize", "image", "vcpus", "acpi", -+ "memsize", "image", "vcpus", "extid", "acpi", ++ "memsize", "image", "vcpus", "extid", "acpi", "apic", NULL }; - if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list, + if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list, @@ -29,27 +29,27 @@ Index: xen-3.3.0-testing/tools/python/xen/lowlevel/xc/xc.c return Py_BuildValue("{}"); } -Index: xen-3.3.0-testing/tools/python/xen/xend/XendConfig.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendConfig.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendConfig.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendConfig.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py @@ -141,6 +141,7 @@ XENAPI_PLATFORM_CFG_TYPES = { 'monitor': int, 'nographic': int, 'pae' : int, -+ 'extid' : int, ++ 'extid': int, 'rtc_timeoffset': int, 'serial': str, 'sdl': int, -Index: xen-3.3.0-testing/tools/python/xen/xend/image.py +Index: xen-3.3.1-testing/tools/python/xen/xend/image.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/image.py -+++ xen-3.3.0-testing/tools/python/xen/xend/image.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/image.py ++++ xen-3.3.1-testing/tools/python/xen/xend/image.py @@ -697,6 +697,7 @@ class HVMImageHandler(ImageHandler): self.apic = int(vmConfig['platform'].get('apic', 0)) self.acpi = int(vmConfig['platform'].get('acpi', 0)) -+ self.extid = int(vmConfig['platform'].get('extid', 0)) ++ self.extid = int(vmConfig['platform'].get('extid', 0)) self.guest_os_type = vmConfig['platform'].get('guest_os_type') @@ -57,7 +57,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/image.py log.debug("store_evtchn = %d", store_evtchn) log.debug("memsize = %d", mem_mb) log.debug("vcpus = %d", self.vm.getVCpuCount()) -+ log.debug("extid = %d", self.extid) ++ log.debug("extid = %d", self.extid) log.debug("acpi = %d", self.acpi) log.debug("apic = %d", self.apic) @@ -65,14 +65,14 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/image.py image = self.loader, memsize = mem_mb, vcpus = self.vm.getVCpuCount(), -+ extid = self.extid, ++ extid = self.extid, acpi = self.acpi, apic = self.apic) rc['notes'] = { 'SUSPEND_CANCEL': 1 } -Index: xen-3.3.0-testing/tools/python/xen/xm/create.py +Index: xen-3.3.1-testing/tools/python/xen/xm/create.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xm/create.py -+++ xen-3.3.0-testing/tools/python/xen/xm/create.py +--- xen-3.3.1-testing.orig/tools/python/xen/xm/create.py ++++ xen-3.3.1-testing/tools/python/xen/xm/create.py @@ -218,6 +218,10 @@ gopts.var('timer_mode', val='TIMER_MODE' use="""Timer mode (0=delay virtual time when ticks are missed; 1=virtual time is always wallclock time.""") diff --git a/hv_xen_base.patch b/hv_xen_base.patch new file mode 100644 index 0000000..a673a9a --- /dev/null +++ b/hv_xen_base.patch @@ -0,0 +1,201 @@ +%patch +Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/domain.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/domain.h ++++ xen-3.3.1-testing/xen/include/asm-x86/hvm/domain.h +@@ -82,6 +82,7 @@ struct hvm_domain { + struct vmx_domain vmx; + struct svm_domain svm; + }; ++ void *hyperv_handle; /* will be NULL on creation*/ + }; + + #endif /* __ASM_X86_HVM_DOMAIN_H__ */ +Index: xen-3.3.1-testing/xen/arch/x86/hvm/Makefile +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/Makefile ++++ xen-3.3.1-testing/xen/arch/x86/hvm/Makefile +@@ -1,5 +1,6 @@ + subdir-y += svm + subdir-y += vmx ++subdir-y += hyperv + + obj-y += emulate.o + obj-y += hvm.o +Index: xen-3.3.1-testing/xen/arch/x86/hvm/hvm.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/hvm.c +@@ -44,6 +44,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -361,6 +362,7 @@ void hvm_domain_relinquish_resources(str + + void hvm_domain_destroy(struct domain *d) + { ++ hyperx_intercept_domain_destroy(d); + hvm_funcs.domain_destroy(d); + rtc_deinit(d); + stdvga_deinit(d); +@@ -645,8 +647,14 @@ int hvm_vcpu_initialise(struct vcpu *v) + { + int rc; + ++ if ((rc = hyperx_intercept_vcpu_initialize(v)) != 0) ++ goto fail1; ++ + if ( (rc = vlapic_init(v)) != 0 ) ++ { ++ hyperx_intercept_vcpu_destroy(v); + goto fail1; ++ } + + if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) + goto fail2; +@@ -693,6 +701,7 @@ int hvm_vcpu_initialise(struct vcpu *v) + hvm_funcs.vcpu_destroy(v); + fail2: + vlapic_destroy(v); ++ hyperx_intercept_vcpu_destroy(v); + fail1: + return rc; + } +@@ -1647,7 +1656,7 @@ void hvm_cpuid(unsigned int input, unsig + struct vcpu *v = current; + + if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) ) +- return; ++ goto hvm_cpuid_done; + + domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx); + +@@ -1659,6 +1668,8 @@ void hvm_cpuid(unsigned int input, unsig + if ( vlapic_hw_disabled(vcpu_vlapic(v)) ) + __clear_bit(X86_FEATURE_APIC & 31, edx); + } ++hvm_cpuid_done: ++ hyperx_intercept_do_cpuid(input, eax, ebx, ecx, edx); + } + + void hvm_rdtsc_intercept(struct cpu_user_regs *regs) +@@ -1749,6 +1760,8 @@ int hvm_msr_read_intercept(struct cpu_us + break; + + default: ++ if (hyperx_intercept_do_msr_read(ecx, regs)) ++ return X86EMUL_OKAY; + return hvm_funcs.msr_read_intercept(regs); + } + +@@ -1837,6 +1850,8 @@ int hvm_msr_write_intercept(struct cpu_u + break; + + default: ++ if (hyperx_intercept_do_msr_write(ecx, regs)) ++ return X86EMUL_OKAY; + return hvm_funcs.msr_write_intercept(regs); + } + +@@ -1963,6 +1978,10 @@ int hvm_do_hypercall(struct cpu_user_reg + case 0: + break; + } ++ if (hyperx_intercept_do_hypercall(regs)) ++ { ++ return HVM_HCALL_completed; ++ } + + if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] ) + { +@@ -2476,6 +2495,15 @@ long do_hvm_op(unsigned long op, XEN_GUE + rc = -EINVAL; + + break; ++ case HVM_PARAM_EXTEND_HYPERVISOR: ++ if ((a.value == 1) && hyperv_initialize(d)) ++ { ++ if (a.value != 1) ++ rc = -EINVAL; ++ else ++ rc = -ENOMEM; ++ goto param_fail; ++ } + } + + if ( rc == 0 ) +Index: xen-3.3.1-testing/xen/include/public/arch-x86/hvm/save.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/public/arch-x86/hvm/save.h ++++ xen-3.3.1-testing/xen/include/public/arch-x86/hvm/save.h +@@ -38,7 +38,7 @@ struct hvm_save_header { + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ +- uint32_t pad0; ++ uint32_t pad0; + }; + + DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); +@@ -421,9 +421,22 @@ struct hvm_hw_mtrr { + + DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); + ++struct hvm_hyperv_dom { ++ uint64_t guestid_msr; ++ uint64_t hypercall_msr; ++ uint32_t long_mode; ++ uint32_t ext_id; ++}; ++DECLARE_HVM_SAVE_TYPE(HYPERV_DOM, 15, struct hvm_hyperv_dom); ++ ++struct hvm_hyperv_cpu { ++ uint64_t control_msr; ++ uint64_t version_msr; ++}; ++DECLARE_HVM_SAVE_TYPE(HYPERV_CPU, 16, struct hvm_hyperv_cpu); + /* + * Largest type-code in use + */ +-#define HVM_SAVE_CODE_MAX 14 ++#define HVM_SAVE_CODE_MAX 16 + + #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ +Index: xen-3.3.1-testing/xen/arch/x86/hvm/vlapic.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/vlapic.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/vlapic.c +@@ -34,6 +34,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -307,6 +308,7 @@ static int vlapic_accept_sipi(struct vcp + hvm_vcpu_reset_state(v, trampoline_vector << 8, 0); + + vcpu_unpause(v); ++ hyperx_intercept_vcpu_up(v); + + return X86EMUL_OKAY; + } +Index: xen-3.3.1-testing/xen/include/public/hvm/params.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/public/hvm/params.h ++++ xen-3.3.1-testing/xen/include/public/hvm/params.h +@@ -93,6 +93,8 @@ + /* ACPI S state: currently support S0 and S3 on x86. */ + #define HVM_PARAM_ACPI_S_STATE 14 + +-#define HVM_NR_PARAMS 15 ++#define HVM_PARAM_EXTEND_HYPERVISOR 15 ++ ++#define HVM_NR_PARAMS 16 + + #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/hv_xen_extension.patch b/hv_xen_extension.patch new file mode 100644 index 0000000..19f11f1 --- /dev/null +++ b/hv_xen_extension.patch @@ -0,0 +1,1699 @@ +%patch +Index: xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,165 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * hvm_extensions.h ++ * Implement Hyperv extensions. ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#ifndef HVM_EXTENSION_H ++#define HVM_EXTENSION_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int ++hyperv_dom_create(struct domain *d); ++void ++hyperv_dom_destroy(struct domain *d); ++int ++hyperv_vcpu_initialize(struct vcpu *v); ++void ++hyperv_vcpu_up(struct vcpu *v); ++void ++hyperv_vcpu_destroy(struct vcpu *v); ++int ++hyperv_do_cpu_id(uint32_t input, unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx); ++int ++hyperv_do_rd_msr(uint32_t idx, struct cpu_user_regs *regs); ++int ++hyperv_do_wr_msr(uint32_t idx, struct cpu_user_regs *regs); ++int ++hyperv_do_hypercall(struct cpu_user_regs *pregs); ++int ++hyperv_initialize(struct domain *d); ++ ++ ++ ++ ++static inline int ++hyperx_intercept_domain_create(struct domain *d) ++{ ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { ++ return(hyperv_dom_create(d)); ++ } ++ return (0); ++} ++ ++static inline void ++hyperx_intercept_domain_destroy(struct domain *d) ++{ ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ hyperv_dom_destroy(d); ++ } ++} ++ ++static inline int ++hyperx_intercept_vcpu_initialize(struct vcpu *v) ++{ ++ struct domain *d = v->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ return(hyperv_vcpu_initialize(v)); ++ } ++ return (0); ++} ++ ++ ++static inline void ++hyperx_intercept_vcpu_up(struct vcpu *v) ++{ ++ struct domain *d = current->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ hyperv_vcpu_up(v); ++ } ++} ++ ++static inline void ++hyperx_intercept_vcpu_destroy(struct vcpu *v) ++{ ++ struct domain *d = v->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ hyperv_vcpu_destroy(v); ++ } ++} ++ ++static inline int ++hyperx_intercept_do_cpuid(uint32_t idx, unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx) ++{ ++ struct domain *d = current->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ return(hyperv_do_cpu_id(idx, eax, ebx, ecx, edx)); ++ } ++ return (0); ++} ++ ++static inline int ++hyperx_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs) ++{ ++ struct domain *d = current->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ return(hyperv_do_rd_msr(idx, regs)); ++ } ++ return (0); ++} ++ ++static inline int ++hyperx_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs) ++{ ++ struct domain *d = current->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ return(hyperv_do_wr_msr(idx, regs)); ++ } ++ return (0); ++} ++ ++static inline int ++hyperx_intercept_do_hypercall(struct cpu_user_regs *regs) ++{ ++ struct domain *d = current->domain; ++ if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) ++ { ++ return(hyperv_do_hypercall(regs)); ++ } ++ return (0); ++} ++ ++int hyperx_initialize(struct domain *d); ++ ++#endif +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/Makefile +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/Makefile 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,2 @@ ++obj-y += hv_intercept.o ++obj-y += hv_hypercall.o +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_errno.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_errno.h 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,62 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * hv_errno.h ++ * Error codes for the Novell Shim. ++ * ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#ifndef HV_ERRNO_H ++#define HV_ERRNO_H ++ ++#define HV_STATUS_SUCCESS 0x0000 ++#define HV_STATUS_INVALID_HYPERCALL_CODE 0x0002 ++#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x0003 ++#define HV_STATUS_INVALID_ALIGNMENT 0x0004 ++#define HV_STATUS_INVALID_PARAMETER 0x0005 ++#define HV_STATUS_ACCESS_DENIED 0x0006 ++#define HV_STATUS_INVALID_PARTITION_STATE 0x0007 ++#define HV_STATUS_OPERATION_DENIED 0x0008 ++#define HV_STATUS_UNKNOWN_PROPERTY 0x0009 ++#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x000A ++#define HV_STATUS_INSUFFICIENT_MEMORY 0x000B ++#define HV_STATUS_PARTITION_TOO_DEEP 0x000C ++#define HV_STATUS_INVALID_PARTITION_ID 0x000D ++#define HV_STATUS_INVALID_VP_INDEX 0x000E ++#define HV_STATUS_UNABLE_TO_RESTORE_STATE 0x000F ++#define HV_STATUS_NOT_FOUND 0x0010 ++#define HV_STATUS_INVALID_PORT_ID 0x0011 ++#define HV_STATUS_INVALID_CONNECTION_ID 0x0012 ++#define HV_STATUS_INSUFFICIENT_BUFFERS 0x0013 ++#define HV_STATUS_NOT_ACKNOWLEDGED 0x0014 ++#define HV_STATUS_INVALID_VP_STATE 0x0015 ++#define HV_STATUS_ACKNOWLEDGED 0x0016 ++#define HV_STATUS_INVALID_SAVE_RESTORE_STATE 0x0017 ++#define HV_STATUS_NO_MEMORY_4PAGES 0x0100 ++#define HV_STATUS_NO_MEMORY_16PAGES 0x0101 ++#define HV_STATUS_NO_MEMORY_64PAGES 0x0102 ++#define HV_STATUS_NO_MEMORY_256PAGES 0x0103 ++#define HV_STATUS_NO_MEMORY_1024PAGES 0x0104 ++#endif +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.c 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,125 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * nshypercall.c. ++ * This file implements the hypercall component of the hyperv Shim. ++ * ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include "hv_shim.h" ++#include "hv_errno.h" ++#include "hv_hypercall.h" ++ ++ ++void ++hv_print_stats(hv_partition_t *curp, int i) ++{ ++ hv_vcpu_t *v; ++ v = &curp->vcpu_state[i]; ++ printk("Printing stats for vcpu ID: %d\n", i); ++ ++ printk("Number of context switches: %lu\n", v->stats.num_switches); ++ printk("Number of TPR reads: %lu\n", v->stats.num_tpr_reads); ++ printk("Number of ICR reads: %lu\n", v->stats.num_icr_reads); ++ printk("Number of Eoi writes: %lu\n", v->stats.num_eoi_writes); ++ printk("Number of Tpr writes: %lu\n", v->stats.num_tpr_writes); ++ printk("Number of Icr writes: %lu\n", v->stats.num_icr_writes); ++ ++} ++ ++static int ++hv_switch_va(paddr_t input) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ hv_vcpu_t *vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; ++ ++ /* ++ * XXXKYS: the spec sys the asID is passed via memory at offset 0 of ++ * the page whose GPA is in the input register. However, it appears ++ * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02) ++ * passes the asID in the input register instead. Need to check if ++ * future builds do this. ++ */ ++ hvm_set_cr3(input); ++#ifdef HV_STATS ++ vcpup->stats.num_switches++; ++#endif ++ return (HV_STATUS_SUCCESS); ++} ++ ++void ++hv_handle_hypercall(u64 opcode, u64 input, u64 output, ++ u64 *ret_val) ++{ ++ unsigned short verb; ++ unsigned short rep_count; ++ unsigned short start_index; ++ hv_partition_t *curp = hv_get_current_partition(); ++ u64 partition_id; ++ ++ ++ verb = (short)(opcode & 0xffff); ++ rep_count = (short)((opcode >>32) & 0xfff); ++ start_index = (short)((opcode >> 48) & 0xfff); ++ switch (verb) ++ { ++ case HV_GET_PARTITION_ID: ++ if (!hv_privilege_check(curp, HV_ACCESS_PARTITION_ID)) ++ { ++ *ret_val = ++ hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); ++ return; ++ } ++ partition_id = (u64)current->domain->domain_id; ++ if (hvm_copy_to_guest_phys(output, &partition_id, 8)) ++ { ++ /* ++ * Invalid output area. ++ */ ++ *ret_val = ++ hv_build_hcall_retval(HV_STATUS_INVALID_PARAMETER, 0); ++ return; ++ } ++ *ret_val = hv_build_hcall_retval(HV_STATUS_SUCCESS, 0); ++ return; ++ case HV_SWITCH_VA: ++ *ret_val = hv_build_hcall_retval(hv_switch_va(input), 0); ++ return; ++ ++ default: ++ printk("Unkown/Unsupported hypercall: verb is: %d\n", verb); ++ *ret_val = ++ hv_build_hcall_retval(HV_STATUS_INVALID_HYPERCALL_CODE, 0); ++ return; ++ } ++} +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.h 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,45 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * hv_hypercall.h ++ * ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#ifndef HV_HYPERCALL_H ++#define HV_HYPERCALL_H ++ ++ ++/* ++ * Hypercall verbs. ++ */ ++ ++#define HV_GET_PARTITION_PROPERTY 0x0017 ++#define HV_SET_PARTITION_PROPERTY 0x0018 ++#define HV_GET_PARTITION_ID 0x0015 ++#define HV_SWITCH_VA 0x0001 ++#define HV_FLUSH_VA 0x0002 ++#define HV_FLUSH_VA_LIST 0x0003 ++ ++#endif /* HV_HYPERCALL_H */ +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_intercept.c +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_intercept.c 2008-05-23 14:37:49.000000000 -0400 +@@ -0,0 +1,983 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * nsintercept.c. ++ * This file implements the intercepts to support the Hyperv Shim. ++ * ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#include ++ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++ ++/* ++ * Local includes; extension specific. ++ */ ++#include "hv_errno.h" ++#include "hv_shim.h" ++ ++ ++/* ++ * Implement the Hyperv Shim. ++ */ ++ ++extern struct hvm_mmio_handler vlapic_mmio_handler; ++ ++ ++static inline void ++hv_hypercall_page_initialize(void *hypercall_page, hv_partition_t *curp); ++ ++static inline void * ++get_virt_from_gmfn(struct domain *d, unsigned long gmfn) ++{ ++ unsigned long mfn = gmfn_to_mfn(d, gmfn); ++ if (mfn == INVALID_MFN) ++ return (NULL); ++ return (map_domain_page(mfn)); ++} ++ ++static inline void ++inject_interrupt(struct vcpu *v, int vector, int type) ++{ ++ struct vlapic *vlapic = vcpu_vlapic(v); ++ ++ /* ++ * XXXKYS: Check the trigger mode. ++ */ ++ if (vlapic_set_irq(vlapic, vector, 1)) ++ vcpu_kick(v); ++} ++ ++ ++static inline void ++hv_write_guestid_msr(hv_partition_t *curp, hv_vcpu_t *cur_vcpu, u64 msr_content) ++{ ++ curp->guest_id_msr = msr_content; ++ if (curp->guest_id_msr == 0) ++ { ++ /* ++ * Guest has cleared the guest ID; ++ * clear the hypercall page. ++ */ ++ if (curp->hypercall_msr) ++ cur_vcpu->flags &= ~HV_VCPU_UP; ++ } ++} ++ ++ ++static inline void ++hv_write_hypercall_msr(hv_partition_t *curp, ++ hv_vcpu_t *cur_vcpu, ++ u64 msr_content) ++{ ++ unsigned long gmfn; ++ void *hypercall_page; ++ struct domain *d = cur_vcpu->xen_vcpu->domain; ++ ++ spin_lock(&curp->lock); ++ gmfn = (msr_content >> 12); ++ if (curp->guest_id_msr == 0) ++ { ++ /* Nothing to do if the guest is not registered*/ ++ spin_unlock(&curp->lock); ++ return; ++ } ++ /* ++ * Guest is registered; see if we can turn-on the ++ * hypercall page. ++ * XXXKYS: Can the guest write the GPA in one call and ++ * subsequently enable it? Check. For now assume that all the ++ * info is specified in one call. ++ */ ++ if (((u32)msr_content & (0x00000001)) == 0) ++ { ++ /* ++ * The client is not enabling the hypercall; just ++ * ignore everything. ++ */ ++ spin_unlock(&curp->lock); ++ return; ++ } ++ hypercall_page = get_virt_from_gmfn(d,gmfn); ++ if (hypercall_page == NULL) ++ { ++ /* ++ * The guest specified a bogus GPA; inject a GP fault ++ * into the guest. ++ */ ++ hvm_inject_exception(TRAP_gp_fault, 0, 0); ++ spin_unlock(&curp->lock); ++ return; ++ } ++ hv_hypercall_page_initialize(hypercall_page, curp); ++ unmap_domain_page(hypercall_page); ++ curp->hypercall_msr = msr_content; ++ spin_unlock(&curp->lock); ++ cur_vcpu->flags |= HV_VCPU_UP; ++} ++ ++ ++int ++hyperv_initialize(struct domain *d) ++{ ++ int i; ++ printk("Hyperv extensions initialized\n"); ++ if (hyperv_dom_create(d)) ++ { ++ printk("Hyperv dom create failed\n"); ++ return (1); ++ } ++ for (i=0; i < MAX_VIRT_CPUS; i++) ++ { ++ if (d->vcpu[i] != NULL) ++ { ++ if (hyperv_vcpu_initialize(d->vcpu[i])) ++ { ++ int j; ++ for (j= (i-1); j >=0; j--) ++ { ++ hyperv_vcpu_destroy(d->vcpu[j]); ++ } ++ hyperv_dom_destroy(d); ++ return (1); ++ } ++ } ++ } ++ return (0); ++} ++ ++static inline u64 ++hv_get_time_since_boot(hv_partition_t *curp) ++{ ++ u64 curTime = get_s_time(); ++ return ((curTime - curp->domain_boot_time)/100) ; ++} ++ ++static inline int ++hv_call_from_bios(void) ++{ ++ if (hvm_paging_enabled(current)) ++ return (0); ++ else ++ return (1); ++} ++ ++ ++static inline int ++hv_os_registered(void) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ return (curp->guest_id_msr != 0?1:0); ++} ++ ++ ++ ++static inline void ++hv_set_partition_privileges(hv_partition_t *hvpp) ++{ ++ /* ++ * This is based on the hypervisor spec under section 5.2.3. ++ */ ++ hvpp->privileges = HV_SHIM_PRIVILEGES; ++} ++ ++static inline u32 ++hv_get_recommendations(void) ++{ ++ /* ++ *For now we recommend all the features. Need to validate. ++ */ ++ if ( paging_mode_hap(current->domain)) ++ /* ++ * If HAP is enabled; the guest should not use TLB flush ++ * related enlightenments. ++ */ ++ return (USE_CSWITCH_HCALL | USE_APIC_MSRS | USE_RESET_MSR); ++ else ++ /* ++ * For now disable TLB flush enlightenments. ++ */ ++ return (USE_CSWITCH_HCALL | USE_APIC_MSRS | USE_RESET_MSR); ++} ++ ++static inline u32 ++hv_get_max_vcpus_supported(void) ++{ ++ return (MAX_VIRT_CPUS); ++} ++ ++ ++static inline void ++hv_read_icr(u64 *icr_content) ++{ ++ unsigned long icr_low, icr_high; ++ ++ icr_low = vlapic_mmio_handler.read_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_ICR), 4, &icr_low); ++ icr_high = vlapic_mmio_handler.read_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_ICR2), 4, &icr_high); ++ *icr_content = (((u64)icr_high<< 32) | icr_low); ++ ++} ++ ++static inline void ++hv_read_tpr(u64 *tpr_content) ++{ ++ ++ vlapic_mmio_handler.read_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_TASKPRI), ++ 4, (unsigned long *)&tpr_content); ++} ++ ++static inline void ++hv_write_eoi(u64 msr_content) ++{ ++ u32 eoi = (u32)msr_content; ++ ++ vlapic_mmio_handler.write_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_EOI), 4, eoi); ++ ++} ++ ++static inline void ++hv_write_icr(u64 msr_content) ++{ ++ u32 icr_low, icr_high; ++ icr_low = (u32)msr_content; ++ icr_high = (u32)(msr_content >> 32); ++ ++ if (icr_high != 0) ++ { ++ vlapic_mmio_handler.write_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_ICR2), 4, ++ icr_high); ++ } ++ if (icr_low != 0) ++ { ++ vlapic_mmio_handler.write_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_ICR), 4, ++ icr_low); ++ } ++ ++} ++ ++static inline void ++hv_write_tpr(u64 msr_content) ++{ ++ u32 tpr = (u32)msr_content; ++ ++ ++ vlapic_mmio_handler.write_handler(current, ++ (vlapic_base_address(vcpu_vlapic(current)) + APIC_TASKPRI), 4, tpr); ++ ++} ++ ++static inline void ++hv_hypercall_page_initialize(void *hypercall_page, hv_partition_t *curp) ++{ ++ char *p; ++ ++ if (hvm_funcs.guest_x86_mode(current) == 8) ++ curp->long_mode_guest = 1; ++ else ++ curp->long_mode_guest = 0; ++ ++ ++ memset(hypercall_page, 0, PAGE_SIZE); ++ p = (char *)(hypercall_page) ; ++ /* ++ * We need to differentiate hypercalls that are to be processed by Xen ++ * from those that need to be processed by the hyperV shim. Xen hypercalls ++ * use eax to pass the opcode; set the high order bit in eax for hypercalls ++ * destined for the hyperV shim (thanks Steven) ++ */ ++ *(u8 *)(p + 0) = 0x0d; /* eax or imm32 */ ++ *(u8 *)(p + 1) = 0x00; ++ *(u8 *)(p + 2) = 0x00; ++ *(u8 *)(p + 3) = 0x00; ++ *(u8 *)(p + 4) = 0x08; /* eax |= HYPERV_HCALL */ ++ ++ *(u8 *)(p + 5) = 0x0f; /* vmcall */ ++ *(u8 *)(p + 6) = 0x01; ++ if (boot_cpu_data.x86_vendor == 0) ++ *(u8 *)(p + 7) = 0xc1; ++ else ++ *(u8 *)(p + 7) = 0xd9; ++ *(u8 *)(p + 8) = 0xc3; /* ret */ ++} ++ ++static inline int ++hv_access_time_refcnt(hv_partition_t *curp, u64 *msr_content) ++{ ++ if (!hv_privilege_check(curp, HV_ACCESS_TIME_REF_CNT)) ++ { ++ /* ++ * The partition does not have the privilege to ++ * read this; return error. ++ */ ++ return (0); ++ } ++ *msr_content = hv_get_time_since_boot(curp); ++ return (1); ++} ++ ++ ++void ++hyperv_vcpu_up(struct vcpu *v) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ hv_vcpu_t *vcpup; ++ vcpup = &curp->vcpu_state[v->vcpu_id]; ++ vcpup->flags |= HV_VCPU_UP; ++} ++ ++int ++hyperv_do_hypercall(struct cpu_user_regs *pregs) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ hv_vcpu_t *vcpup; ++ int long_mode_guest = curp->long_mode_guest; ++ ++ if (pregs->_eax & HYPERV_HCALL) ++ { ++ u64 opcode, input, output, ret_val; ++ vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; ++ ++ pregs->_eax &= ~HYPERV_HCALL; ++ /* ++ * This is an extension hypercall; process it; but first make ++ * sure that the CPU is in the right state for invoking ++ * the hypercall - protected mode at CPL 0. ++ */ ++ if (hv_invalid_cpu_state()) ++ { ++ hvm_inject_exception(TRAP_gp_fault, 0, 0); ++ ret_val = hv_build_hcall_retval(HV_STATUS_INVALID_VP_STATE, 0); ++ hv_set_syscall_retval(pregs, long_mode_guest, ret_val); ++ return (1); ++ } ++ if (long_mode_guest) ++ { ++ opcode = pregs->ecx; ++ input = pregs->edx; ++ output = pregs->r8; ++ } else ++ { ++ opcode = ++ ((((u64)pregs->edx) << 32) | ((u64)pregs->eax)); ++ input = ++ ((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx)); ++ output = ++ ((((u64)pregs->edi) << 32) | ((u64)pregs->esi)); ++ } ++ hv_handle_hypercall(opcode, input, output, &ret_val); ++ hv_set_syscall_retval(pregs, long_mode_guest, ret_val); ++ return (1); ++ } ++ /* ++ * This hypercall page is not the page for the Veridian extension. ++ */ ++ return (0); ++} ++ ++ ++int ++hyperv_dom_create(struct domain *d) ++{ ++ hv_partition_t *hvpp; ++ hvpp = xmalloc_bytes(sizeof(hv_partition_t)); ++ if (hvpp == NULL) ++ { ++ printk("Hyprv Dom Create: Memory allocation failed\n"); ++ return (1); ++ } ++ memset(hvpp, 0, sizeof(*hvpp)); ++ spin_lock_init(&hvpp->lock); ++ /* ++ * Set the partition wide privilege; We can start with no privileges ++ * and progressively turn on fancier hypervisor features. ++ */ ++ hv_set_partition_privileges(hvpp); ++ /* ++ * Stash away pointer to our state in the hvm domain structure. ++ */ ++ d->arch.hvm_domain.hyperv_handle = hvpp; ++ hvpp->domain_boot_time = get_s_time(); ++ return (0); ++} ++ ++void ++hyperv_dom_destroy(struct domain *d) ++{ ++ int i; ++ hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; ++ printk("Hyper-V Domain Being Destroyed\n"); ++ ASSERT(curp != NULL); ++#ifdef HV_STATS ++ printk("DUMP STATS\n"); ++ for (i=0; i < MAX_VIRT_CPUS; i++) ++ if (d->vcpu[i] != NULL) ++ hv_print_stats(curp, i); ++#endif ++ ++ xfree(d->arch.hvm_domain.hyperv_handle); ++ d->arch.hvm_domain.hyperv_handle = NULL; ++} ++ ++int ++hyperv_vcpu_initialize(struct vcpu *v) ++{ ++ hv_vcpu_t *vcpup; ++ hv_partition_t *curp = v->domain->arch.hvm_domain.hyperv_handle; ++ vcpup = &curp->vcpu_state[v->vcpu_id]; ++ atomic_inc(&curp->vcpus_active); ++ if (v->vcpu_id == 0) ++ vcpup->flags |= HV_VCPU_BOOT_CPU; ++ /* ++ * Initialize all the synthetic MSRs corresponding to this VCPU. ++ * Note that all state is set to 0 to begin ++ * with. ++ */ ++ vcpup->version_msr = 0x00000001; ++ vcpup->xen_vcpu = v; ++ ++ return (0); ++} ++ ++void ++hyperv_vcpu_destroy(struct vcpu *v) ++{ ++ hv_vcpu_t *vcpup; ++ hv_partition_t *curp = v->domain->arch.hvm_domain.hyperv_handle; ++ ++ vcpup = &curp->vcpu_state[v->vcpu_id]; ++ atomic_dec(&curp->vcpus_active); ++ vcpup->flags &= ~HV_VCPU_UP; ++} ++ ++static int ++hyperv_vcpu_save(struct domain *d, hvm_domain_context_t *h) ++{ ++ struct vcpu *v; ++ struct hvm_hyperv_cpu ctxt; ++ ++ hv_vcpu_t *vcpup; ++ hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; ++ ++ if (curp == NULL) ++ return 0; ++ ++ for_each_vcpu(d, v) ++ { ++ vcpup = &curp->vcpu_state[v->vcpu_id]; ++ ++ /* ++ * We don't need to save state for a ++ * vcpu that is down; the restore ++ * code will leave it down if there is nothing saved. ++ */ ++ if ( test_bit(_VPF_down, &v->pause_flags) ) ++ continue; ++ ctxt.control_msr = vcpup->control_msr; ++ ctxt.version_msr = vcpup->version_msr; ++ if (hvm_save_entry(HYPERV_CPU, v->vcpu_id, h, &ctxt) != 0 ) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int ++hyperv_vcpu_restore(struct domain *d, hvm_domain_context_t *h) ++{ ++ int vcpuid; ++ struct hvm_hyperv_cpu ctxt; ++ ++ hv_vcpu_t *vcpup; ++ hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; ++ ++ if (curp == NULL) ++ return 0; ++ /* Which vcpu is this? */ ++ vcpuid = hvm_load_instance(h); ++ vcpup = &curp->vcpu_state[vcpuid]; ++ ASSERT(vcpup != NULL); ++ if ( hvm_load_entry(HYPERV_CPU, h, &ctxt) != 0 ) ++ return -EINVAL; ++ ++ vcpup->control_msr = ctxt.control_msr; ++ vcpup->version_msr = ctxt.version_msr; ++ ++ vcpup->flags |= HV_VCPU_UP; ++ return 0; ++} ++ ++static int ++hyperv_dom_save(struct domain *d, hvm_domain_context_t *h) ++{ ++ struct hvm_hyperv_dom ctxt; ++ hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; ++ ++ if (curp == NULL) { ++ return 0; ++ } ++ ++ ctxt.guestid_msr = curp->guest_id_msr; ++ ctxt.hypercall_msr = curp->hypercall_msr; ++ ctxt.long_mode = curp->long_mode_guest; ++ ctxt.ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; ++ return (hvm_save_entry(HYPERV_DOM, 0, h, &ctxt)); ++} ++ ++static int ++hyperv_dom_restore(struct domain *d, hvm_domain_context_t *h) ++{ ++ struct hvm_hyperv_dom ctxt; ++ hv_partition_t *curp; ++ ++ if ( hvm_load_entry(HYPERV_DOM, h, &ctxt) != 0 ) ++ return -EINVAL; ++ d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = ctxt.ext_id; ++ if (ctxt.ext_id != 1) ++ return 0; ++ if (hyperv_initialize(d)) ++ return -EINVAL; ++ curp = d->arch.hvm_domain.hyperv_handle; ++ ++ curp->guest_id_msr = ctxt.guestid_msr; ++ curp->hypercall_msr = ctxt.hypercall_msr; ++ curp->long_mode_guest = ctxt.long_mode; ++ return 0; ++} ++ ++HVM_REGISTER_SAVE_RESTORE(HYPERV_DOM, hyperv_dom_save, hyperv_dom_restore, ++ 1, HVMSR_PER_DOM); ++ ++ ++HVM_REGISTER_SAVE_RESTORE(HYPERV_CPU,hyperv_vcpu_save,hyperv_vcpu_restore, ++ 1, HVMSR_PER_VCPU); ++ ++ ++static int ++hv_preprocess_cpuid_leaves(unsigned int input, unsigned int *eax, ++ unsigned int *ebx, unsigned int *ecx, ++ unsigned int *edx ) ++{ ++ uint32_t idx; ++ struct domain *d = current->domain; ++ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; ++ ++ if (extid == 1) { ++ /* ++ * Enlightened Windows guest; need to remap and handle ++ * leaves used by PV front-end drivers. ++ */ ++ if ((input >= 0x40000000) && (input <= 0x40000005)) ++ return (0); ++ /* ++ * PV drivers use cpuid to query the hypervisor for details. On ++ * Windows we will use the following leaves for this: ++ * ++ * 4096: VMM Sinature (corresponds to 0x40000000 on Linux) ++ * 4097: VMM Version (corresponds to 0x40000001 on Linux) ++ * 4098: Hypercall details (corresponds to 0x40000002 on Linux) ++ */ ++ if ((input >= 0x40001000) && (input <= 0x40001002)) ++ { ++ idx = (input - 0x40001000); ++ cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx); ++ return (1); ++ } ++ } ++ return (0); ++} ++ ++int ++hyperv_do_cpu_id(unsigned int input, unsigned int *eax, unsigned int *ebx, ++ unsigned int *ecx, unsigned int *edx) ++{ ++ uint32_t idx; ++ hv_partition_t *curp = hv_get_current_partition(); ++ ++ /* ++ * hvmloader uses cpuid to set up a hypercall page; we don't want to ++ * intercept calls coming from the bootstrap (bios) code in the HVM ++ * guest; we discriminate based on if paging is enabled or not. ++ */ ++ if (hv_call_from_bios()) ++ /* ++ * We don't intercept this. ++ */ ++ return (0); ++ ++ if (input == 0x00000001) ++ { ++ *ecx |= 0x80000000; ++printk("KYS: hypervisor enabled\n"); ++ return (1); ++ } ++ ++ if (hv_preprocess_cpuid_leaves(input, eax, ebx, ecx, edx)) ++ return (0); ++ idx = (input - 0x40000000); ++ ++ switch (idx) ++ { ++ case 0: ++ /* ++ * 0x40000000: Hypervisor identification. ++ */ ++ *eax = 0x40000005; /* For now clamp this */ ++ *ebx = 0x65766f4e; /* "Nove" */ ++ *ecx = 0x68536c6c; /* "llSh" */ ++ *edx = 0x76486d69; /* "imHv" */ ++printk("KYS: hypervisor dentified\n"); ++ break; ++ ++ case 1: ++ /* ++ * 0x40000001: Hypervisor identification. ++ */ ++ *eax = 0x31237648; /* "Hv#1*/ ++ *ebx = 0; /* Reserved */ ++ *ecx = 0; /* Reserved */ ++ *edx = 0; /* Reserved */ ++ break; ++ case 2: ++ /* ++ * 0x40000002: Guest Info ++ */ ++ if (hv_os_registered()) ++ { ++ u64 guest_id = curp->guest_id_msr; ++ u32 guest_major, guest_minor; ++ ++ guest_major = ((guest_id >> 32) & 0xff); ++ guest_minor = ((guest_id >> 24) & 0xff); ++ *eax = (guest_id & 0xffff); /* Build # 15:0 */ ++ *ebx = ++ (guest_major << 16) | (guest_minor); /*Major: 31:16; Minor: 15:0*/ ++ *ecx = ((guest_id >>16) & 0xff); /*Service Pack/Version: 23:16*/ ++ /* ++ * Service branch (31:24)|Service number (23:0) ++ * Not sure what these numbers are: XXXKYS. ++ */ ++ *edx = 0; /*Service branch (31:24)|Service number (23:0) */ ++ } else ++ { ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ } ++ break; ++ case 3: ++ /* ++ * 0x40000003: Feature identification. ++ */ ++ *eax = HV_SHIM_SUPPORTED_MSRS; ++ /* We only support AcessSelfPartitionId bit 1 */ ++ *ebx = (HV_SHIM_PRIVILEGES>>32); ++ *ecx = 0; /* Reserved */ ++ *edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/ ++printk("KYS: hypervisor Feature identified\n"); ++ break; ++ case 4: ++ /* ++ * 0x40000004: Imlementation recommendations. ++ */ ++ *eax = hv_get_recommendations(); ++ *ebx = 0; /* Reserved */ ++ *ecx = 0; /* Reserved */ ++ *edx = 0; /* Reserved */ ++printk("KYS: hypervisor recommendation %x\n", *eax); ++ break; ++ case 5: ++ /* ++ * 0x40000005: Implementation limits. ++ * Currently we retrieve maximum number of vcpus and ++ * logical processors (hardware threads) supported. ++ */ ++ *eax = hv_get_max_vcpus_supported(); ++ *ebx = 0; /* Reserved */ ++ *ecx = 0; /* Reserved */ ++ *edx = 0; /* Reserved */ ++ break; ++ ++ default: ++ /* ++ * We don't handle this leaf. ++ */ ++ return (0); ++ ++ } ++ return (1); ++} ++ ++int ++hyperv_do_rd_msr(uint32_t idx, struct cpu_user_regs *regs) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ unsigned int vcp_index = hv_get_current_vcpu_index(); ++ u64 msr_content = 0; ++ hv_vcpu_t *cur_vcpu = &curp->vcpu_state[vcp_index]; ++ ++ /* ++ * hvmloader uses rdmsr; we don't want to ++ * intercept calls coming from the bootstrap (bios) code in the HVM ++ * guest; we descriminate based on the instruction pointer. ++ */ ++ if (hv_call_from_bios()) ++ /* ++ * We don't intercept this. ++ */ ++ return (0); ++ switch (idx) ++ { ++printk("KYS: msr read idx: %d\n", idx); ++ case HV_MSR_GUEST_OS_ID: ++ spin_lock(&curp->lock); ++ regs->eax = (u32)(curp->guest_id_msr & 0xFFFFFFFF); ++ regs->edx = (u32)(curp->guest_id_msr >> 32); ++ spin_unlock(&curp->lock); ++ break; ++ case HV_MSR_HYPERCALL: ++ spin_lock(&curp->lock); ++ regs->eax = (u32)(curp->hypercall_msr & 0xFFFFFFFF); ++ regs->edx = (u32)(curp->hypercall_msr >> 32); ++ spin_unlock(&curp->lock); ++ if ((((u32)curp->hypercall_msr) & (0x00000001)) != 0) { ++ cur_vcpu->flags |= HV_VCPU_UP; ++ } ++ break; ++ case HV_MSR_VP_INDEX: ++ regs->eax = (u32)(vcp_index); ++ regs->edx = (u32)(0x0); ++ break; ++ case HV_MSR_ICR: ++ if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { ++ goto msr_read_error; ++ } ++ hv_read_icr(&msr_content); ++#ifdef HV_STATS ++ cur_vcpu->stats.num_icr_reads++; ++#endif ++ regs->eax = (u32)(msr_content & 0xFFFFFFFF); ++ regs->edx = (u32)(msr_content >> 32); ++ break; ++ case HV_MSR_TPR: ++ if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { ++ goto msr_read_error; ++ } ++ hv_read_tpr(&msr_content); ++#ifdef HV_STATS ++ cur_vcpu->stats.num_tpr_reads++; ++#endif ++ regs->eax = (u32)(msr_content & 0xFFFFFFFF); ++ regs->edx = (u32)(msr_content >> 32); ++ break; ++ /* ++ * The following synthetic MSRs are implemented in the Novell Shim. ++ */ ++ case HV_MSR_SCONTROL: ++ if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { ++ goto msr_read_error; ++ } ++ regs->eax = (u32)(cur_vcpu->control_msr & 0xFFFFFFFF); ++ regs->edx = (u32)(cur_vcpu->control_msr >> 32); ++ break; ++ case HV_MSR_SVERSION: ++ if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { ++ goto msr_read_error; ++ } ++ regs->eax = (u32)(cur_vcpu->version_msr & 0xFFFFFFFF); ++ regs->edx = (u32)(cur_vcpu->version_msr >> 32); ++ break; ++ case HV_MSR_TIME_REF_COUNT: ++ if (!hv_access_time_refcnt(curp, &msr_content)) { ++ goto msr_read_error; ++ } ++ regs->eax = (u32)(msr_content & 0xFFFFFFFF); ++ regs->edx = (u32)(msr_content >> 32); ++ break; ++ case HV_MSR_PVDRV_HCALL: ++ regs->eax = 0; ++ regs->edx = 0; ++ break; ++ case HV_MSR_SYSTEM_RESET: ++ regs->eax = 0; ++ regs->edx = 0; ++ break; ++ default: ++ /* ++ * We did not handle the MSR address specified; ++ * let the caller figure out ++ * What to do. ++ */ ++ return (0); ++ } ++ return (1); ++msr_read_error: ++ /* ++ * Have to inject #GP fault. ++ */ ++ hvm_inject_exception(TRAP_gp_fault, 0, 0); ++ return (1); ++} ++ ++int ++hyperv_do_wr_msr(uint32_t idx, struct cpu_user_regs *regs) ++{ ++ hv_partition_t *curp = hv_get_current_partition(); ++ unsigned int vcp_index = hv_get_current_vcpu_index(); ++ u64 msr_content = 0; ++ hv_vcpu_t *cur_vcpu = &curp->vcpu_state[vcp_index]; ++ struct domain *d = current->domain; ++ ++ /* ++ * hvmloader uses wrmsr; we don't want to ++ * intercept calls coming from the bootstrap (bios) code in the HVM ++ * guest; we descriminate based on the instruction pointer. ++ */ ++ if (hv_call_from_bios()) ++ /* ++ * We don't intercept this. ++ */ ++ return (0); ++ msr_content = ++ (u32)regs->eax | ((u64)regs->edx << 32); ++ ++ switch (idx) ++ { ++printk("KYS: msr write idx: %d\n", idx); ++ case HV_MSR_GUEST_OS_ID: ++ hv_write_guestid_msr(curp, cur_vcpu, msr_content); ++ break; ++ case HV_MSR_HYPERCALL: ++ hv_write_hypercall_msr(curp, cur_vcpu, msr_content); ++ break; ++ ++ case HV_MSR_VP_INDEX: ++ goto msr_write_error; ++ ++ case HV_MSR_EOI: ++ if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { ++ goto msr_write_error; ++ } ++ hv_write_eoi(msr_content); ++#ifdef HV_STATS ++ cur_vcpu->stats.num_eoi_writes++; ++#endif ++ break; ++ case HV_MSR_ICR: ++ if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { ++ goto msr_write_error; ++ } ++ hv_write_icr(msr_content); ++#ifdef HV_STATS ++ cur_vcpu->stats.num_icr_writes++; ++#endif ++ break; ++ case HV_MSR_TPR: ++ if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { ++ goto msr_write_error; ++ } ++ hv_write_tpr(msr_content); ++#ifdef HV_STATS ++ cur_vcpu->stats.num_tpr_writes++; ++#endif ++ break; ++ ++ /* ++ * The following MSRs are synthetic MSRs supported in the Novell Shim. ++ */ ++ case HV_MSR_SCONTROL: ++ if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { ++ goto msr_write_error; ++ } ++ cur_vcpu->control_msr = msr_content; ++ break; ++ case HV_MSR_SVERSION: ++ if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { ++ goto msr_write_error; ++ } ++ /* ++ * This is a read-only MSR; generate #GP ++ */ ++ hvm_inject_exception(TRAP_gp_fault, 0, 0); ++ break; ++ case HV_MSR_TIME_REF_COUNT: ++ /* ++ * This is a read-only msr. ++ */ ++ goto msr_write_error; ++ break; ++ case HV_MSR_PVDRV_HCALL: ++ /* ++ * Establish the hypercall page for PV drivers. ++ */ ++ wrmsr_hypervisor_regs(0x40000000, regs->eax, regs->edx); ++ break; ++ case HV_MSR_SYSTEM_RESET: ++ /* ++ * Shutdown the domain/partition. ++ */ ++ if (msr_content & 0x1) { ++ domain_shutdown(d, SHUTDOWN_reboot); ++ } ++ break; ++ ++ default: ++ /* ++ * We did not handle the MSR address; ++ * let the caller deal with this. ++ */ ++ return (0); ++ } ++ return (1); ++msr_write_error: ++ /* ++ * Have to inject #GP fault. ++ */ ++ hvm_inject_exception(TRAP_gp_fault, 0, 0); ++ return (1); ++} +Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_shim.h +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_shim.h 2008-05-22 18:11:26.000000000 -0400 +@@ -0,0 +1,281 @@ ++/**************************************************************************** ++ | ++ | Copyright (c) [2007, 2008] Novell, Inc. ++ | All Rights Reserved. ++ | ++ | This program is free software; you can redistribute it and/or ++ | modify it under the terms of version 2 of the GNU General Public License as ++ | published by the Free Software Foundation. ++ | ++ | This program is distributed in the hope that it will be useful, ++ | but WITHOUT ANY WARRANTY; without even the implied warranty of ++ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ | GNU General Public License for more details. ++ | ++ | You should have received a copy of the GNU General Public License ++ | along with this program; if not, contact Novell, Inc. ++ | ++ | To contact Novell about this file by physical or electronic mail, ++ | you may find current contact information at www.novell.com ++ | ++ |*************************************************************************** ++*/ ++ ++/* ++ * Hyperv Shim Implementation. ++ * ++ * Engineering Contact: K. Y. Srinivasan ++ */ ++ ++#ifndef HV_SHIM_H ++#define HV_SHIM_H ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "hv_hypercall.h" ++ ++/* ++ * Synthetic MSR addresses ++ */ ++#define HV_MSR_GUEST_OS_ID 0x40000000 ++#define HV_MSR_HYPERCALL 0x40000001 ++#define HV_MSR_VP_INDEX 0x40000002 ++#define HV_MSR_SYSTEM_RESET 0x40000003 ++#define HV_MSR_TIME_REF_COUNT 0x40000020 ++#define HV_MSR_EOI 0x40000070 ++#define HV_MSR_ICR 0x40000071 ++#define HV_MSR_TPR 0x40000072 ++ ++#define HV_MSR_SCONTROL 0x40000080 ++#define HV_MSR_SVERSION 0x40000081 ++#define HV_MSR_SIEFP 0x40000082 ++#define HV_MSR_SIMP 0x40000083 ++#define HV_MSR_SEOM 0x40000084 ++#define HV_MSR_SINT0 0x40000090 ++#define HV_MSR_SINT1 0x40000091 ++#define HV_MSR_SINT2 0x40000092 ++#define HV_MSR_SINT3 0x40000093 ++#define HV_MSR_SINT4 0x40000094 ++#define HV_MSR_SINT5 0x40000095 ++#define HV_MSR_SINT6 0x40000096 ++#define HV_MSR_SINT7 0x40000097 ++#define HV_MSR_SINT8 0x40000098 ++#define HV_MSR_SINT9 0x40000099 ++#define HV_MSR_SINT10 0x4000009A ++#define HV_MSR_SINT11 0x4000009B ++#define HV_MSR_SINT12 0x4000009C ++#define HV_MSR_SINT13 0x4000009D ++#define HV_MSR_SINT14 0x4000009E ++#define HV_MSR_SINT15 0x4000009F ++ ++#define HV_MSR_TIMER0_CONFIG 0x400000B0 ++#define HV_MSR_TIMER0_COUNT 0x400000B1 ++#define HV_MSR_TIMER1_CONFIG 0x400000B2 ++#define HV_MSR_TIMER1_COUNT 0x400000B3 ++#define HV_MSR_TIMER2_CONFIG 0x400000B4 ++#define HV_MSR_TIMER2_COUNT 0x400000B5 ++#define HV_MSR_TIMER3_CONFIG 0x400000B6 ++#define HV_MSR_TIMER3_COUNT 0x400000B7 ++ ++/* ++ * Domain privilege flags ++ */ ++ ++#define _ACCESS_VP_RUNTIME 0 ++#define ACCESS_VP_RUNTIME (1L<<_ACCESS_VP_RUNTIME) ++#define _ACCESS_TIME_REF_COUNT 1 ++#define ACCESS_TIME_REF_COUNT (1L<<_ACCESS_TIME_REF_COUNT) ++#define _ACCESS_SYNIC_MSRS 2 ++#define ACCESS_SYNIC_MSRS (1L<<_ACCESS_TIME_REF_COUNT) ++#define _ACCESS_SYNTHETIC_TIMERS 3 ++#define ACCESS_SYNTHETIC_TIMERS (1L<<_ACCESS_SYNTHETIC_TIMERS) ++#define _ACCESS_APIC_MSRS 4 ++#define ACCESS_APIC_MSRS (1L<<_ACCESS_APIC_MSRS) ++#define _ACCESS_HYPERCALL_MSRS 5 ++#define ACCESS_HYPERCALL_MSRS (1L<<_ACCESS_HYPERCALL_MSRS) ++#define _ACCESS_VP_INDEX 6 ++#define ACCESS_VP_INDEX (1L<<_ACCESS_VP_INDEX) ++#define _ACCESS_SELF_PARTITION_ID 33 ++#define ACCESS_SELF_PARTITION_ID (1L<<_ACCESS_SELF_PARTITION_ID) ++ ++#define HV_SHIM_PRIVILEGES \ ++ (ACCESS_TIME_REF_COUNT | ACCESS_APIC_MSRS | ACCESS_HYPERCALL_MSRS | \ ++ ACCESS_VP_INDEX |ACCESS_SELF_PARTITION_ID) ++ ++/* ++ * Guest recommendations ++ */ ++#define _USE_CSWITCH_HCALL 0 ++#define USE_CSWITCH_HCALL (1U<<_USE_CSWITCH_HCALL) ++#define _USE_TLBFLUSH_HCALL 1 ++#define USE_TLBFLUSH_HCALL (1U<<_USE_TLBFLUSH_HCALL) ++#define _USE_REMOTE_TLBFLUSH_HCALL 2 ++#define USE_REMOTE_TLBFLUSH_HCALL (1U<<_USE_REMOTE_TLBFLUSH_HCALL) ++#define _USE_APIC_MSRS 3 ++#define USE_APIC_MSRS (1U<<_USE_APIC_MSRS) ++#define _USE_RESET_MSR 4 ++#define USE_RESET_MSR (1U<<_USE_RESET_MSR) ++ ++/* ++ * Supported Synthetic MSRs. 0.83 HyperV spec, section 3.4 ++ * Supported features. ++ */ ++#define _MSR_VP_RUNTIME 0 ++#define MSR_VP_RUNTIME (1U<<_MSR_VP_RUNTIME) ++#define _MSR_TIME_REF_CNT 1 ++#define MSR_TIME_REF_CNT (1U<<_MSR_TIME_REF_CNT) ++#define _MSR_SYN_IC 2 ++#define MSR_SYN_IC (1U<<_MSR_SYN_IC) ++#define _MSR_SYN_TIMER 3 ++#define MSR_SYN_TIMER (1U<<_MSR_SYN_TIMER) ++#define _APIC_MSRS 4 ++#define APIC_MSRS (1U<<_APIC_MSRS) ++#define _HYPERCALL_MSRS 5 ++#define HYPERCALL_MSRS (1U<<_HYPERCALL_MSRS) ++#define _MSR_VP_INDEX 6 ++#define MSR_VP_INDEX (1U<<_MSR_VP_INDEX) ++#define _RESET_MSR 7 ++#define RESET_MSR (1U<<_RESET_MSR) ++ ++#define HV_SHIM_SUPPORTED_MSRS \ ++ (MSR_TIME_REF_CNT|APIC_MSRS|HYPERCALL_MSRS|MSR_VP_INDEX|RESET_MSR) ++ ++/* ++ * MSR for supporting PV drivers on longhorn. ++ */ ++#define HV_MSR_PVDRV_HCALL 0x40001000 ++ ++/* ++ * Tag for HyperV hcalls. ++ */ ++#define HYPERV_HCALL 0x80000000 ++ ++/* ++ * Hyperv Shim VCPU flags. ++ */ ++#define HV_VCPU_BOOT_CPU 0x00000001 ++#define HV_VCPU_UP 0x00000002 ++ ++ ++/* ++ * Stats structure. ++ */ ++ ++typedef struct { ++ u64 num_switches; ++ u64 num_tpr_reads; ++ u64 num_icr_reads; ++ u64 num_eoi_writes; ++ u64 num_tpr_writes; ++ u64 num_icr_writes; ++} hv_vcpu_stats_t; ++ ++typedef struct hv_vcpu { ++ /* ++ * Per-vcpu state to support the hyperv shim; ++ */ ++ unsigned long flags; ++ /* ++ * Synthetic msrs. ++ */ ++ u64 control_msr; ++ u64 version_msr; ++ struct vcpu *xen_vcpu; /*corresponding xen vcpu*/ ++ hv_vcpu_stats_t stats; ++} hv_vcpu_t; ++ ++ ++#define HV_STATS //KYS: Temporary ++ ++typedef struct hv_partition { ++ /* ++ * State maintained on a per guest basis to implement ++ * the Hyperv shim. ++ */ ++ s_time_t domain_boot_time; ++ spinlock_t lock; ++ atomic_t vcpus_active; ++ u64 guest_id_msr; ++ u64 hypercall_msr; ++ u64 privileges; ++ int long_mode_guest; ++ /* ++ * Each VCPU here corresponds to the vcpu in the underlying hypervisor; ++ * they share the same ID. ++ */ ++ hv_vcpu_t vcpu_state[MAX_VIRT_CPUS]; ++} hv_partition_t; ++ ++ ++/* ++ * Privilege flags. ++ */ ++ ++#define HV_ACCESS_VP_RUNTIME (1ULL << 0) ++#define HV_ACCESS_TIME_REF_CNT (1ULL << 1) ++#define HV_ACCESS_SYNC_MSRS (1ULL << 2) ++#define HV_ACCESS_SYNC_TIMERS (1ULL << 3) ++#define HV_ACCESS_APIC_MSRS (1ULL << 4) ++#define HV_ACCESS_PARTITION_ID (1ULL << 33) ++ ++#define hv_get_current_partition() \ ++((current)->domain->arch.hvm_domain.hyperv_handle) ++ ++#define hv_get_current_vcpu_index() (current)->vcpu_id ++ ++ ++static inline int ++hv_invalid_cpu_state(void) ++{ ++ int state; ++ state = hvm_funcs.guest_x86_mode(current); ++ if ((state == 4) || (state == 8)) ++ { ++ return (0); ++ } ++ return (1); ++} ++ ++static inline u64 ++hv_build_hcall_retval(int code, int reps) ++{ ++ u64 ret_val=0; ++ ret_val |= (code & 0xff); ++ ret_val |= (((long long)(reps & 0xfff)) << 32); ++ return (ret_val); ++} ++ ++static inline void hv_set_syscall_retval(struct cpu_user_regs *pregs, ++ int long_mode, u64 ret_val) ++{ ++ if (long_mode) ++ { ++ pregs->eax = ret_val; ++ } ++ else ++ { ++ pregs->edx = (u32)(ret_val >> 32); ++ pregs->eax = (u32)(ret_val); ++ } ++} ++ ++static inline int ++hv_privilege_check(hv_partition_t *curp, u64 flags) ++{ ++ return ((curp->privileges & flags)? 1: 0); ++} ++ ++void ++hv_handle_hypercall(u64 opcode, u64 input, u64 output, ++ u64 *ret_val); ++ ++ ++void hv_print_stats(hv_partition_t *curp, int i); ++ ++#endif /*HV_SHIM_H */ diff --git a/init.xendomains b/init.xendomains index 9b4c46b..48e9d79 100644 --- a/init.xendomains +++ b/init.xendomains @@ -8,7 +8,7 @@ ### BEGIN INIT INFO # Provides: xendomains # Required-Start: $syslog $remote_fs xend -# Should-Start: iscsi +# Should-Start: iscsi o2cb ocfs2 # Required-Stop: $syslog $remote_fs xend # Should-Stop: iscsi # Default-Start: 3 5 diff --git a/ns_xen_base.patch b/ns_xen_base.patch deleted file mode 100644 index b2bd5c5..0000000 --- a/ns_xen_base.patch +++ /dev/null @@ -1,447 +0,0 @@ -%patch -Index: xen-3.2.1-testing/xen/arch/x86/hvm/svm/svm.c -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/svm/svm.c -+++ xen-3.2.1-testing/xen/arch/x86/hvm/svm/svm.c -@@ -50,6 +50,7 @@ - #include - #include - #include -+#include - - u32 svm_feature_flags; - -@@ -78,6 +79,7 @@ static void *hsa[NR_CPUS] __read_mostly; - /* vmcb used for extended host state */ - static void *root_vmcb[NR_CPUS] __read_mostly; - -+ - static void inline __update_guest_eip( - struct cpu_user_regs *regs, unsigned int inst_len) - { -@@ -888,7 +890,7 @@ static struct hvm_function_table svm_fun - .set_tsc_offset = svm_set_tsc_offset, - .inject_exception = svm_inject_exception, - .init_hypercall_page = svm_init_hypercall_page, -- .event_pending = svm_event_pending -+ .event_pending = svm_event_pending, - }; - - int start_svm(struct cpuinfo_x86 *c) -@@ -1055,6 +1057,7 @@ static void svm_vmexit_do_cpuid(struct v - HVMTRACE_3D(CPUID, v, input, - ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx); - -+ ext_intercept_do_cpuid(input, regs); - inst_len = __get_instruction_length(v, INSTR_CPUID, NULL); - __update_guest_eip(regs, inst_len); - } -@@ -1757,6 +1760,11 @@ static void svm_do_msr_access( - /* is it a read? */ - if (vmcb->exitinfo1 == 0) - { -+ if (ext_intercept_do_msr_read(ecx, regs)) -+ { -+ goto done; -+ } -+ - switch (ecx) { - case MSR_IA32_TSC: - msr_content = hvm_get_guest_time(v); -@@ -1847,6 +1855,11 @@ static void svm_do_msr_access( - } - else - { -+ if (ext_intercept_do_msr_write(ecx, regs)) -+ { -+ goto done_1; -+ } -+ - msr_content = (u32)regs->eax | ((u64)regs->edx << 32); - - hvmtrace_msr_write(v, ecx, msr_content); -@@ -1907,6 +1920,7 @@ static void svm_do_msr_access( - } - break; - } -+done_1: - - inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL); - } -Index: xen-3.2.1-testing/xen/arch/x86/hvm/vmx/vmx.c -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/vmx/vmx.c -+++ xen-3.2.1-testing/xen/arch/x86/hvm/vmx/vmx.c -@@ -49,6 +49,7 @@ - #include - #include - #include -+#include - - enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised }; - -@@ -61,6 +62,7 @@ static void vmx_install_vlapic_mapping(s - static void vmx_update_guest_cr(struct vcpu *v, unsigned int cr); - static void vmx_update_guest_efer(struct vcpu *v); - -+ - static int vmx_domain_initialise(struct domain *d) - { - return vmx_alloc_vlapic_mapping(d); -@@ -1248,7 +1250,8 @@ void vmx_cpuid_intercept( - unsigned int count = *ecx; - - #ifdef VMXASSIST -- if ( input == 0x40000003 ) -+ if (( input == 0x40000003 ) && -+ (vmx_guest_x86_mode(current) == 0)) - { - /* - * NB. Unsupported interface for private use of VMXASSIST only. -@@ -1319,12 +1322,13 @@ void vmx_cpuid_intercept( - - static void vmx_do_cpuid(struct cpu_user_regs *regs) - { -- unsigned int eax, ebx, ecx, edx; -+ unsigned int eax, ebx, ecx, edx, input; - - eax = regs->eax; - ebx = regs->ebx; - ecx = regs->ecx; - edx = regs->edx; -+ input = eax; - - vmx_cpuid_intercept(&eax, &ebx, &ecx, &edx); - -@@ -1332,6 +1336,7 @@ static void vmx_do_cpuid(struct cpu_user - regs->ebx = ebx; - regs->ecx = ecx; - regs->edx = edx; -+ ext_intercept_do_cpuid(input, regs); - } - - #define CASE_GET_REG_P(REG, reg) \ -@@ -2316,6 +2321,9 @@ int vmx_msr_read_intercept(struct cpu_us - - HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx); - -+ if (ext_intercept_do_msr_read(ecx, regs)) -+ goto done; -+ - switch ( ecx ) - { - case MSR_IA32_TSC: -@@ -2499,6 +2507,9 @@ int vmx_msr_write_intercept(struct cpu_u - HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x", - ecx, (u32)regs->eax, (u32)regs->edx); - -+ if (ext_intercept_do_msr_write(ecx, regs)) -+ return 1; -+ - msr_content = (u32)regs->eax | ((u64)regs->edx << 32); - - hvmtrace_msr_write(v, ecx, msr_content); -Index: xen-3.2.1-testing/xen/include/asm-x86/hvm/domain.h -=================================================================== ---- xen-3.2.1-testing.orig/xen/include/asm-x86/hvm/domain.h -+++ xen-3.2.1-testing/xen/include/asm-x86/hvm/domain.h -@@ -74,6 +74,10 @@ struct hvm_domain { - /* Pass-through */ - struct hvm_iommu hvm_iommu; - -+ /* Hvm extension handle */ -+ void *ext_handle; /* will be NULL on creation (memset)*/ -+ struct extension_intercept_vector *ext_vector; -+ - bool_t hap_enabled; - }; - -Index: xen-3.2.1-testing/xen/include/public/hvm/params.h -=================================================================== ---- xen-3.2.1-testing.orig/xen/include/public/hvm/params.h -+++ xen-3.2.1-testing/xen/include/public/hvm/params.h -@@ -50,10 +50,12 @@ - - #define HVM_PARAM_BUFIOREQ_PFN 6 - -+#define HVM_PARAM_EXTEND_HYPERVISOR 7 -+ - #ifdef __ia64__ --#define HVM_PARAM_NVRAM_FD 7 --#define HVM_PARAM_VHPT_SIZE 8 --#define HVM_PARAM_BUFPIOREQ_PFN 9 -+#define HVM_PARAM_NVRAM_FD 8 -+#define HVM_PARAM_VHPT_SIZE 9 -+#define HVM_PARAM_BUFPIOREQ_PFN 10 - #endif - - /* -@@ -75,15 +77,16 @@ - * Missed interrupts are collapsed together and delivered as one 'late tick'. - * Guest time always tracks wallclock (i.e., real) time. - */ --#define HVM_PARAM_TIMER_MODE 10 -+//KYS Check the modifications done to this file -+#define HVM_PARAM_TIMER_MODE 11 - #define HVMPTM_delay_for_missed_ticks 0 - #define HVMPTM_no_delay_for_missed_ticks 1 - #define HVMPTM_no_missed_ticks_pending 2 - #define HVMPTM_one_missed_tick_pending 3 - - /* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ --#define HVM_PARAM_HPET_ENABLED 11 -+#define HVM_PARAM_HPET_ENABLED 12 - --#define HVM_NR_PARAMS 12 -+#define HVM_NR_PARAMS 13 - - #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ -Index: xen-3.2.1-testing/tools/python/xen/xend/XendConstants.py -=================================================================== ---- xen-3.2.1-testing.orig/tools/python/xen/xend/XendConstants.py -+++ xen-3.2.1-testing/tools/python/xen/xend/XendConstants.py -@@ -43,11 +43,12 @@ HVM_PARAM_STORE_EVTCHN = 2 - HVM_PARAM_PAE_ENABLED = 4 - HVM_PARAM_IOREQ_PFN = 5 - HVM_PARAM_BUFIOREQ_PFN = 6 --HVM_PARAM_NVRAM_FD = 7 --HVM_PARAM_VHPT_SIZE = 8 --HVM_PARAM_BUFPIOREQ_PFN = 9 --HVM_PARAM_TIMER_MODE = 10 --HVM_PARAM_HPET_ENABLED = 11 -+HVM_PARAM_EXTEND_HYPERVISOR = 7 -+HVM_PARAM_NVRAM_FD = 8 -+HVM_PARAM_VHPT_SIZE = 9 -+HVM_PARAM_BUFPIOREQ_PFN = 10 -+HVM_PARAM_TIMER_MODE = 11 -+HVM_PARAM_HPET_ENABLED = 12 - - restart_modes = [ - "restart", -Index: xen-3.2.1-testing/xen/arch/x86/hvm/Makefile -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/Makefile -+++ xen-3.2.1-testing/xen/arch/x86/hvm/Makefile -@@ -1,5 +1,6 @@ - subdir-y += svm - subdir-y += vmx -+subdir-y += hvm_ext - - obj-y += hvm.o - obj-y += i8254.o -Index: xen-3.2.1-testing/xen/arch/x86/hvm/hvm.c -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/hvm.c -+++ xen-3.2.1-testing/xen/arch/x86/hvm/hvm.c -@@ -42,6 +42,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -103,6 +104,7 @@ void hvm_migrate_timers(struct vcpu *v) - rtc_migrate_timers(v); - hpet_migrate_timers(v); - pt_migrate(v); -+ ext_intercept_do_migrate_timers(v); - } - - void hvm_do_resume(struct vcpu *v) -@@ -266,6 +268,7 @@ void hvm_domain_relinquish_resources(str - - void hvm_domain_destroy(struct domain *d) - { -+ ext_intercept_domain_destroy(d); - hvm_funcs.domain_destroy(d); - vioapic_deinit(d); - hvm_destroy_cacheattr_region_list(d); -@@ -434,8 +437,14 @@ int hvm_vcpu_initialise(struct vcpu *v) - { - int rc; - -+ if ((rc = ext_intercept_vcpu_initialize(v)) != 0) -+ goto fail1; -+ - if ( (rc = vlapic_init(v)) != 0 ) -+ { -+ ext_intercept_vcpu_destroy(v); - goto fail1; -+ } - - if ( (rc = hvm_funcs.vcpu_initialise(v)) != 0 ) - goto fail2; -@@ -483,6 +492,7 @@ int hvm_vcpu_initialise(struct vcpu *v) - hvm_funcs.vcpu_destroy(v); - fail2: - vlapic_destroy(v); -+ ext_intercept_vcpu_destroy(v); - fail1: - return rc; - } -@@ -491,6 +501,7 @@ void hvm_vcpu_destroy(struct vcpu *v) - { - xfree(v->arch.hvm_vcpu.mtrr.var_ranges); - -+ ext_intercept_vcpu_destroy(v); - vlapic_destroy(v); - hvm_funcs.vcpu_destroy(v); - -@@ -1601,6 +1612,10 @@ int hvm_do_hypercall(struct cpu_user_reg - case 0: - break; - } -+ if (ext_intercept_do_hypercall(regs)) -+ { -+ return HVM_HCALL_completed; -+ } - - if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] ) - { -@@ -1755,6 +1770,7 @@ int hvm_bringup_ap(int vcpuid, int tramp - vcpu_wake(v); - - gdprintk(XENLOG_INFO, "AP %d bringup succeeded.\n", vcpuid); -+ ext_intercept_vcpu_up(v); - return 0; - } - -@@ -1992,6 +2008,9 @@ long do_hvm_op(unsigned long op, XEN_GUE - if ( a.value > HVMPTM_one_missed_tick_pending ) - goto param_fail; - break; -+ case HVM_PARAM_EXTEND_HYPERVISOR: -+ if (hvm_ext_bind(d, (int)a.value)) -+ goto param_fail; - } - d->arch.hvm_domain.params[a.index] = a.value; - rc = 0; -Index: xen-3.2.1-testing/xen/arch/x86/x86_64/asm-offsets.c -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/x86_64/asm-offsets.c -+++ xen-3.2.1-testing/xen/arch/x86/x86_64/asm-offsets.c -@@ -148,4 +148,7 @@ void __dummy__(void) - BLANK(); - - OFFSET(CPUINFO_ext_features, struct cpuinfo_x86, x86_capability[1]); -+ BLANK(); -+ -+ OFFSET(DOM_ext_vector, struct domain, arch.hvm_domain.ext_vector); - } -Index: xen-3.2.1-testing/xen/arch/x86/hvm/vmx/entry.S -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/vmx/entry.S -+++ xen-3.2.1-testing/xen/arch/x86/hvm/vmx/x86_64/entry.S -@@ -116,6 +116,14 @@ vmx_process_softirqs: - ALIGN - ENTRY(vmx_asm_do_vmentry) - GET_CURRENT(%rbx) -+ mov VCPU_domain(%rbx),%rax -+ mov DOM_ext_vector(%rax),%rdx -+ test %rdx,%rdx -+ je vmx_no_ext_vector -+ sti -+ callq *(%rdx) -+vmx_no_ext_vector: -+ - cli # tests must not race interrupts - - movl VCPU_processor(%rbx),%eax -Index: xen-3.2.1-testing/xen/arch/x86/hvm/svm/entry.S -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/svm/entry.S -+++ xen-3.2.1-testing/xen/arch/x86/hvm/svm/entry.S -@@ -37,6 +37,14 @@ - - ENTRY(svm_asm_do_resume) - GET_CURRENT(%rbx) -+ mov VCPU_domain(%rbx),%rax -+ mov DOM_ext_vector(%rax),%rdx -+ test %rdx,%rdx -+ je svm_no_ext_vector -+ sti -+ callq *(%rdx) -+svm_no_ext_vector: -+ - CLGI - - movl VCPU_processor(%rbx),%eax -Index: xen-3.2.1-testing/xen/arch/x86/hvm/save.c -=================================================================== ---- xen-3.2.1-testing.orig/xen/arch/x86/hvm/save.c -+++ xen-3.2.1-testing/xen/arch/x86/hvm/save.c -@@ -23,6 +23,8 @@ - - #include - #include -+#include -+#include - - void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr) - { -@@ -31,8 +33,7 @@ void arch_hvm_save(struct domain *d, str - /* Save some CPUID bits */ - cpuid(1, &eax, &ebx, &ecx, &edx); - hdr->cpuid = eax; -- -- hdr->pad0 = 0; -+ hdr->ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; - } - - int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) -@@ -61,6 +62,9 @@ int arch_hvm_load(struct domain *d, stru - - /* VGA state is not saved/restored, so we nobble the cache. */ - d->arch.hvm_domain.stdvga.cache = 0; -+ d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = hdr->ext_id; -+ if (hvm_ext_bind(d, hdr->ext_id)) -+ return -1; - - return 0; - } -Index: xen-3.2.1-testing/xen/include/public/arch-x86/hvm/save.h -=================================================================== ---- xen-3.2.1-testing.orig/xen/include/public/arch-x86/hvm/save.h -+++ xen-3.2.1-testing/xen/include/public/arch-x86/hvm/save.h -@@ -38,7 +38,7 @@ struct hvm_save_header { - uint32_t version; /* File format version */ - uint64_t changeset; /* Version of Xen that saved this file */ - uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ -- uint32_t pad0; -+ uint32_t ext_id; /* extension ID */ - }; - - DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); -@@ -422,9 +422,30 @@ struct hvm_hw_mtrr { - - DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); - -+struct hvm_ns_veridian_dom { -+ uint64_t guestid_msr; -+ uint64_t hypercall_msr; -+ uint32_t long_mode; -+ uint32_t pad0; -+}; -+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_DOM, 15, struct hvm_ns_veridian_dom); -+ -+struct hvm_ns_veridian_cpu { -+ uint64_t control_msr; -+ uint64_t version_msr; -+ uint64_t sief_msr; -+ uint64_t simp_msr; -+ uint64_t eom_msr; -+ uint64_t int_msr[16]; -+ struct { -+ uint64_t config; -+ uint64_t count; -+ } timers[4]; -+}; -+DECLARE_HVM_SAVE_TYPE(NS_VERIDIAN_CPU, 16, struct hvm_ns_veridian_cpu); - /* - * Largest type-code in use - */ --#define HVM_SAVE_CODE_MAX 14 -+#define HVM_SAVE_CODE_MAX 16 - - #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ diff --git a/ns_xen_extension.patch b/ns_xen_extension.patch deleted file mode 100644 index d710b58..0000000 --- a/ns_xen_extension.patch +++ /dev/null @@ -1,4653 +0,0 @@ -%patch -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/Makefile 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,3 @@ -+subdir-y += novell -+ -+obj-y += hvm_ext.o -Index: xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/include/asm-x86/hvm/hvm_extensions.h 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,252 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * hvm_extensions.h -+ * This file implements a framework for extending the hypervisor -+ * functionality in a modular fashion. The framework is comprised of -+ * two components: A) A set of intercepts that will allow the extension -+ * module to implement its functionality by intercepting the corresponding -+ * code paths in Xen and B) A controlled runtime for the extension module. -+ * Initially the goal was to pacakage the extension module as a boot-time -+ * loadable module. This may not be the way we wend up packaging it. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#ifndef HVM_EXTENSION_H -+#define HVM_EXTENSION_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+ -+/* -+ * Hypervisor extension hooks. -+ */ -+typedef struct extension_intercept_vector { -+ /* Do not move the first field (do_continuation). Offset -+ * hardcoded in assembly files exits.S (VMX and SVM). -+ */ -+ void (*do_continuation)(void); -+ int (*domain_create)(struct domain *d); -+ void (*domain_destroy)(struct domain *d); -+ int (*vcpu_initialize)(struct vcpu *v); -+ void (*vcpu_destroy)(struct vcpu *v); -+ int (*do_cpuid)(uint32_t idx, struct cpu_user_regs *regs); -+ int (*do_msr_read)(uint32_t idx, struct cpu_user_regs *regs); -+ int (*do_msr_write)(uint32_t idx, struct cpu_user_regs *regs); -+ int (*do_hypercall)(struct cpu_user_regs *pregs); -+ void (*do_migrate_timers)(struct vcpu *v); -+ void (*vcpu_up)(struct vcpu *v); -+} extension_intercept_vector_t; -+ -+static inline int -+ext_intercept_domain_create(struct domain *d) -+{ -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain.ext_vector->domain_create(d)); -+ } -+ return (0); -+} -+ -+static inline void -+ext_intercept_domain_destroy(struct domain *d) -+{ -+ if (d->arch.hvm_domain.ext_vector) { -+ d->arch.hvm_domain.ext_vector->domain_destroy(d); -+ } -+} -+ -+static inline int -+ext_intercept_vcpu_initialize(struct vcpu *v) -+{ -+ struct domain *d = v->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain.ext_vector->vcpu_initialize(v)); -+ } -+ return (0); -+} -+ -+ -+static inline void -+ext_intercept_vcpu_up(struct vcpu *v) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain. -+ ext_vector->vcpu_up(v)); -+ } -+} -+ -+static inline void -+ext_intercept_vcpu_destroy(struct vcpu *v) -+{ -+ struct domain *d = v->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ d->arch.hvm_domain.ext_vector->vcpu_destroy(v); -+ } -+} -+ -+static inline int -+ext_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain.ext_vector->do_cpuid( -+ idx, regs)); -+ } -+ return (0); -+} -+ -+static inline int -+ext_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain. -+ ext_vector->do_msr_read(idx, regs)); -+ } -+ return (0); -+} -+static inline int -+ext_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain. -+ ext_vector->do_msr_write(idx, regs)); -+ } -+ return (0); -+} -+ -+static inline int -+ext_intercept_do_hypercall(struct cpu_user_regs *regs) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain. -+ ext_vector->do_hypercall(regs)); -+ } -+ return (0); -+} -+ -+static inline void -+ext_intercept_do_migrate_timers(struct vcpu *v) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ return(d->arch.hvm_domain. -+ ext_vector->do_migrate_timers(v)); -+ } -+} -+ -+static inline void -+ext_intercept_do_continuation(void) -+{ -+ struct domain *d = current->domain; -+ if (d->arch.hvm_domain.ext_vector) { -+ d->arch.hvm_domain. -+ ext_vector->do_continuation(); -+ } -+} -+ -+/* -+ * Base hypervisor support available to extension modules. -+ * We may choose to do away with this level of indirection! -+ * It may still be useful to have a controlled environment for the -+ * extension modules. -+ */ -+typedef struct xen_call_vector { -+ /* -+ * We may want to embed version/compiler info here to avoid mismatches -+ */ -+ struct hvm_function_table *hvmFuncTable; -+ struct hvm_mmio_handler *mmIoHandler; -+ void (*extPanic)(const char *s, ...); -+ void (*extPrintk)(const char *format, ...); -+ void (*extPostInterrupt)(struct vcpu *v, int vector, int type); -+ void (*extSetTimer)(struct timer *timer, s_time_t expires); -+ s_time_t (*extGetTimeSinceBoot)(void); -+ void * (*extGetVirtFromGmfn)(struct domain *d, unsigned long gmfn); -+ unsigned long (*extGetMfnFromGmfn)(struct domain *d, unsigned long gmfn); -+ unsigned long (*extGetMfnFromGva)(unsigned long va); -+ void (*extUnmapDomainPage)(void *p); -+ void *(*extAllocMem)(size_t size); -+ void (*extFreeMem)(void *ptr); -+ enum hvm_copy_result (*extCopyToGuestPhysical)(paddr_t paddr, void *buf, int size); -+ enum hvm_copy_result (*extCopyFromGuestPhysical)(void *buf, paddr_t paddr, int size); -+ void *(*extAllocDomHeapPage)(void); -+ void (*extFreeDomHeapPage)(void *); -+ void * (*extGetVirtFromPagePtr)(void *); -+ void (*extVcpuPause)(struct vcpu *v); -+ void (*extVcpuUnPause)(struct vcpu *v); -+ void (*extArchGetDomainInfoCtxt)(struct vcpu *v, -+ struct vcpu_guest_context *); -+ int (*extArchSetDomainInfoCtxt)(struct vcpu *v, -+ struct vcpu_guest_context *); -+ int (*extCpuIsIntel)(void ); -+ int (*extWrmsrHypervisorRegs)(uint32_t idx, uint32_t eax, -+ uint32_t edx); -+ void (*extKillTimer)(struct timer *timer); -+ void (*extMigrateTimer)(struct timer *timer, unsigned int new_cpu); -+} xen_call_vector_t; -+ -+#define MAX_EXTENSION_ID 1 -+ -+/* -+ * int hvm_ext_bind(struct domain *d, int ext_id) -+ * Bind the specified domain to the specified extension module. -+ * -+ * Calling/Exit State: -+ * None. -+ * -+ * Remarks: -+ * The goal is to support per-domain extension modules. Domain -+ * creating tools will have to specify the needed extension -+ * module ID. For now it is hard coded. -+ */ -+int hvm_ext_bind(struct domain *d, int ext_id); -+ -+/* -+ * int hvm_ext_register(int ext_id, -+ * struct extension_intercept_vector *ext_vector, -+ * struct xen_call_vector *xen_vector) -+ * Register the extension module with the hypervisor -+ * Calling/Exit State: -+ * None. -+ */ -+ -+int hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector, -+ struct xen_call_vector *xen_vector); -+ -+ -+#endif -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/hvm_ext.c 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,350 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * hvm_ext.c -+ * Glue code for implementing the extension module. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+struct extension_intercept_vector *intercept_vector; -+ -+/* -+ * static void -+ * hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type) -+ * Inject the specified exception to the specified virtual cpu. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+hvm_ext_inject_interrupt(struct vcpu *v, int vector, int type) -+{ -+ struct vlapic *vlapic = vcpu_vlapic(v); -+ -+ /* -+ * XXXKYS: Check the trigger mode. -+ */ -+ if (vlapic_set_irq(vlapic, vector, 1)) { -+ vcpu_kick(v); -+ } -+} -+ -+/* -+ * static void -+ * hvm_ext_set_timer(struct timer *timer, s_time_t expires) -+ * Set a timeout. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+hvm_ext_set_timer(struct timer *timer, s_time_t expires) -+{ -+ set_timer(timer, expires); -+} -+ -+/* -+ * static void -+ * hvm_ext_kill_timer(struct timer *timer) -+ * Kill the specified timer. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+hvm_ext_kill_timer(struct timer *timer) -+{ -+ kill_timer(timer); -+} -+ -+/* -+ * static void -+ * hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu) -+ * Migrate the timer to the new cpu. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+hvm_ext_migrate_timer(struct timer *timer, unsigned int new_cpu) -+{ -+ migrate_timer(timer, new_cpu); -+} -+ -+ -+/* -+ * static void * -+ * hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn) -+ * Given a guest frame number return a virtual address at which -+ * the specified page can be accessed in the hypervisor. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void * -+hvm_ext_get_virt_from_gmfn(struct domain *d, unsigned long gmfn) -+{ -+ unsigned long mfn = gmfn_to_mfn(d, gmfn); -+ if (mfn == INVALID_MFN) { -+ return (NULL); -+ } -+ return (map_domain_page_global(mfn)); -+} -+ -+/* -+ * static unsigned long -+ * hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn) -+ * Get the machine frame number given the guest frame number. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static unsigned long -+hvm_ext_get_mfn_from_gmfn(struct domain *d, unsigned long gmfn) -+{ -+ return (gmfn_to_mfn(d, gmfn)); -+} -+ -+/* -+ * static unsigned long -+ * hvm_ext_get_mfn_from_gva(unsigned long va) -+ * Given the guest virtual address return the machine frame number backing the -+ * address. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static unsigned long -+hvm_ext_get_mfn_from_gva(unsigned long va) -+{ -+ uint32_t pfec = PFEC_page_present; -+ unsigned long gfn; -+ gfn = paging_gva_to_gfn(current, va, &pfec); -+ return (gmfn_to_mfn((current->domain), gfn)); -+} -+ -+/* -+ * static void * -+ * hvm_ext_alloc_mem(size_t size) -+ * Allocate specified bytes of memory. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void * -+hvm_ext_alloc_mem(size_t size) -+{ -+ return (xmalloc_bytes(size)); -+} -+ -+/* -+ * static void * -+ * hvm_ext_alloc_domheap_page(void) -+ * Allocate a page from the per-domain heap. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void * -+hvm_ext_alloc_domheap_page(void) -+{ -+ return (alloc_domheap_page(NULL)); -+} -+ -+/* -+ * static void -+ * hvm_ext_free_domheap_page(void *p) -+ * Free a dom heap page. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+hvm_ext_free_domheap_page(void *p) -+{ -+ free_domheap_pages(p, 0); -+} -+ -+/* -+ * static void * -+ * hvm_ext_get_virt_from_page_ptr(void *page) -+ * Map the specified page a return a hypervisor VA. -+ * -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void * -+hvm_ext_get_virt_from_page_ptr(void *page) -+{ -+ struct page_info *pg = page; -+ unsigned long mfn = page_to_mfn(pg); -+ return (map_domain_page_global(mfn)); -+} -+ -+extern struct cpuinfo_x86 boot_cpu_data; -+ -+/* -+ * static int -+ * hvm_ext_cpu_is_intel(void) -+ * Check if the CPU vendor is Intel. -+ * -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static int -+hvm_ext_cpu_is_intel(void) -+{ -+ if (boot_cpu_data.x86_vendor == 0) { -+ return (1); -+ } -+ return (0); -+} -+ -+/* -+ * int -+ * hvm_ext_bind(struct domain *d, int ext_id) -+ * Bind the specified domain with the specified extension module. -+ * -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+int -+hvm_ext_bind(struct domain *d, int ext_id) -+{ -+ int i; -+ /* -+ * XXXKYS: Assuming that this function will be called before the -+ * new domain begins to run. It is critical that this be the case. -+ */ -+ if (ext_id == 0) { -+ /* -+ * This is the default value for this parameter. -+ */ -+ return (0); -+ } -+ d->arch.hvm_domain.ext_vector = intercept_vector; -+ /* -+ * Let the extension initialize its state. -+ */ -+ if (intercept_vector->domain_create(d)) { -+ return (1); -+ } -+ for (i=0; i < MAX_VIRT_CPUS; i++) { -+ if (d->vcpu[i] != NULL) { -+ if (intercept_vector->vcpu_initialize(d->vcpu[i])) { -+ int j; -+ for (j= (i-1); j >=0; j--) { -+ intercept_vector->vcpu_destroy( -+ d->vcpu[j]); -+ } -+ intercept_vector->domain_destroy(d); -+ return (1); -+ } -+ } -+ } -+ return (0); -+} -+ -+ -+void extPanic(const char *fmt, ...) -+{ -+ domain_crash_synchronous(); -+} -+ -+/* -+ * For now we will support only one extension; id==1! -+ */ -+ -+extern struct hvm_function_table hvm_funcs; -+extern struct hvm_mmio_handler vlapic_mmio_handler; -+ -+/* -+ * int -+ * hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector, -+ * -+ * Register the invoking extension module with the hypervisor. -+ * -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+int -+hvm_ext_register(int ext_id, struct extension_intercept_vector *ext_vector, -+ struct xen_call_vector *xen_vector) -+{ -+ ASSERT(ext_id == 1); -+ intercept_vector = ext_vector; -+ /* -+ * Populate the vector of services from the xen side; ultimately -+ * we may decide to get rid of this level of indirection; it may -+ * still be useful to limit the breadth of xen dependency here. -+ */ -+ xen_vector->hvmFuncTable = &hvm_funcs; -+ xen_vector->mmIoHandler = &vlapic_mmio_handler; -+ xen_vector->extPanic = extPanic; -+ xen_vector->extPrintk = printk; -+ xen_vector->extPostInterrupt = hvm_ext_inject_interrupt; -+ xen_vector->extSetTimer = hvm_ext_set_timer; -+ xen_vector->extKillTimer = hvm_ext_kill_timer; -+ xen_vector->extMigrateTimer = hvm_ext_migrate_timer; -+ xen_vector->extGetTimeSinceBoot = get_s_time; -+ xen_vector->extGetVirtFromGmfn = hvm_ext_get_virt_from_gmfn; -+ xen_vector->extGetMfnFromGmfn = hvm_ext_get_mfn_from_gmfn; -+ -+ xen_vector->extGetMfnFromGva = hvm_ext_get_mfn_from_gva; -+#ifdef CONFIG_DOMAIN_PAGE -+ xen_vector->extUnmapDomainPage = unmap_domain_page_global; -+#endif -+ xen_vector->extAllocMem = hvm_ext_alloc_mem; -+ xen_vector->extFreeMem = xfree; -+ xen_vector->extCopyToGuestPhysical = hvm_copy_to_guest_phys; -+ xen_vector->extCopyFromGuestPhysical = hvm_copy_from_guest_phys; -+ xen_vector->extAllocDomHeapPage = hvm_ext_alloc_domheap_page; -+ xen_vector->extFreeDomHeapPage = hvm_ext_free_domheap_page; -+ xen_vector->extGetVirtFromPagePtr = hvm_ext_get_virt_from_page_ptr; -+ xen_vector->extVcpuPause = vcpu_pause; -+ xen_vector->extVcpuUnPause = vcpu_unpause; -+ xen_vector->extArchGetDomainInfoCtxt = arch_get_info_guest; -+ xen_vector->extArchSetDomainInfoCtxt = arch_set_info_guest; -+ xen_vector->extCpuIsIntel = hvm_ext_cpu_is_intel; -+ xen_vector->extWrmsrHypervisorRegs = wrmsr_hypervisor_regs; -+ -+ return 0; -+} -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/Makefile 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,2 @@ -+obj-y += nsintercept.o -+obj-y += nshypercall.o -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_errno.h 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,62 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * ns_errno.h -+ * Error codes for the Novell Shim. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#ifndef NS_ERRNO_H -+#define NS_ERRNO_H -+ -+#define NS_STATUS_SUCCESS 0x0000 -+#define NS_STATUS_INVALID_HYPERCALL_CODE 0x0002 -+#define NS_STATUS_INVALID_HYPERCALL_INPUT 0x0003 -+#define NS_STATUS_INVALID_ALIGNMENT 0x0004 -+#define NS_STATUS_INVALID_PARAMETER 0x0005 -+#define NS_STATUS_ACCESS_DENIED 0x0006 -+#define NS_STATUS_INVALID_PARTITION_STATE 0x0007 -+#define NS_STATUS_OPERATION_DENIED 0x0008 -+#define NS_STATUS_UNKNOWN_PROPERTY 0x0009 -+#define NS_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x000A -+#define NS_STATUS_INSUFFICIENT_MEMORY 0x000B -+#define NS_STATUS_PARTITION_TOO_DEEP 0x000C -+#define NS_STATUS_INVALID_PARTITION_ID 0x000D -+#define NS_STATUS_INVALID_VP_INDEX 0x000E -+#define NS_STATUS_UNABLE_TO_RESTORE_STATE 0x000F -+#define NS_STATUS_NOT_FOUND 0x0010 -+#define NS_STATUS_INVALID_PORT_ID 0x0011 -+#define NS_STATUS_INVALID_CONNECTION_ID 0x0012 -+#define NS_STATUS_INSUFFICIENT_BUFFERS 0x0013 -+#define NS_STATUS_NOT_ACKNOWLEDGED 0x0014 -+#define NS_STATUS_INVALID_VP_STATE 0x0015 -+#define NS_STATUS_ACKNOWLEDGED 0x0016 -+#define NS_STATUS_INVALID_SAVE_RESTORE_STATE 0x0017 -+#define NS_STATUS_NO_MEMORY_4PAGES 0x0100 -+#define NS_STATUS_NO_MEMORY_16PAGES 0x0101 -+#define NS_STATUS_NO_MEMORY_64PAGES 0x0102 -+#define NS_STATUS_NO_MEMORY_256PAGES 0x0103 -+#define NS_STATUS_NO_MEMORY_1024PAGES 0x0104 -+#endif -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/ns_shim.h 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,481 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * Novell Shim Implementation. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#ifndef NS_SHIM_H -+#define NS_SHIM_H -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include "nshypercall.h" -+ -+/* -+ * Synthetic MSR addresses -+ */ -+#define NS_MSR_GUEST_OS_ID 0x40000000 -+#define NS_MSR_HYPERCALL 0x40000001 -+#define NS_MSR_VP_INDEX 0x40000002 -+#define NS_MSR_SYSTEM_RESET 0x40000003 -+#define NS_MSR_TIME_REF_COUNT 0x40000020 -+#define NS_MSR_EOI 0x40000070 -+#define NS_MSR_ICR 0x40000071 -+#define NS_MSR_TPR 0x40000072 -+ -+#define NS_MSR_SCONTROL 0x40000080 -+#define NS_MSR_SVERSION 0x40000081 -+#define NS_MSR_SIEFP 0x40000082 -+#define NS_MSR_SIMP 0x40000083 -+#define NS_MSR_SEOM 0x40000084 -+#define NS_MSR_SINT0 0x40000090 -+#define NS_MSR_SINT1 0x40000091 -+#define NS_MSR_SINT2 0x40000092 -+#define NS_MSR_SINT3 0x40000093 -+#define NS_MSR_SINT4 0x40000094 -+#define NS_MSR_SINT5 0x40000095 -+#define NS_MSR_SINT6 0x40000096 -+#define NS_MSR_SINT7 0x40000097 -+#define NS_MSR_SINT8 0x40000098 -+#define NS_MSR_SINT9 0x40000099 -+#define NS_MSR_SINT10 0x4000009A -+#define NS_MSR_SINT11 0x4000009B -+#define NS_MSR_SINT12 0x4000009C -+#define NS_MSR_SINT13 0x4000009D -+#define NS_MSR_SINT14 0x4000009E -+#define NS_MSR_SINT15 0x4000009F -+ -+#define NS_MSR_TIMER0_CONFIG 0x400000B0 -+#define NS_MSR_TIMER0_COUNT 0x400000B1 -+#define NS_MSR_TIMER1_CONFIG 0x400000B2 -+#define NS_MSR_TIMER1_COUNT 0x400000B3 -+#define NS_MSR_TIMER2_CONFIG 0x400000B4 -+#define NS_MSR_TIMER2_COUNT 0x400000B5 -+#define NS_MSR_TIMER3_CONFIG 0x400000B6 -+#define NS_MSR_TIMER3_COUNT 0x400000B7 -+ -+/* -+ * MSR for supporting PV drivers on longhorn. -+ */ -+#define NS_MSR_PVDRV_HCALL 0x40001000 -+ -+/* -+ * MSR for supporting other enlightened oses. -+ */ -+#define NS_MSR_NONLH_GUEST_OS_ID 0x40001000 -+ -+/* -+ * Novell Shim VCPU flags. -+ * A VCPU is considered up when it is capable of invoking hypercalls. -+ */ -+#define NS_VCPU_BOOT_CPU 0x00000001 -+#define NS_VCPU_UP 0x00000002 -+ -+/* -+ * Novell shim flush flags. -+ */ -+ -+#define NS_FLUSH_TLB 0X01 -+#define NS_FLUSH_INVLPG 0X02 -+ -+/* -+ * We use the following global state to manage TLB flush requests from the -+ * guest. At most only one flush can be active in the guest; we may have to -+ * revisit this if this is a bottleneck. -+ */ -+typedef struct nsGlobalFlushState { -+ int cpuCount; //0 unused; else #cpus participating -+ cpumask_t waiters; //Cpus waiting for the flush block -+ struct vcpu *currentOwner; -+ u64 retVal; -+ flushVa_t *flushParam; -+ unsigned short repCount; -+} nsGlobalFlushState_t; -+ -+typedef struct nsSpinLock { -+ unsigned long flags; -+ spinlock_t spinLock; -+ struct nsVcpu *owner; -+ void *retAddr; -+} nsSpinLock_t; -+ -+/* -+ * Novell shim message structure. -+ */ -+typedef enum { -+ /* -+ * For now we only support timer messages -+ */ -+ nsMessageTypeNone = 0x00000000, -+ nsMessageTimerExpired = 0x80000010 -+} nsMessageType; -+ -+typedef struct nsTimerMessage { -+ nsMessageType messageType; -+ u8 pad1[3]; -+ u8 messageSize; -+ u32 timerIndex; -+ u32 pad2; -+ u64 expirationTime; -+} nsTimerMessage_t; -+ -+typedef struct nsMessage { -+ nsMessageType messageType; -+ uint8_t messageSize; -+ uint8_t flags; -+ uint8_t reserved[2]; -+ uint32_t reserved1; -+ uint64_t payLoad[30]; -+} nsMessage_t; -+ -+ -+typedef struct nsVcpTimerState { -+ u64 config; -+ u64 count; /*expiration time in 100ns units*/ -+ int timerIndex; -+ struct nsVcpu *thisCpu; -+ struct timer vcpuTimer; -+} nsVcpTimerState_t; -+ -+/* -+ * Stats structure. -+ */ -+ -+typedef struct { -+ u64 numSwitches; -+ u64 numFlushes; -+ u64 numFlushesPosted; -+ u64 numFlushRanges; -+ u64 numFlushRangesPosted; -+ -+ u64 numTprReads; -+ u64 numIcrReads; -+ u64 numEoiWrites; -+ u64 numTprWrites; -+ u64 numIcrWrites; -+ -+ u64 numGFSAcquires; -+ u64 numGFSReleases; -+ u64 numTlbFlushes; -+ u64 numInvlPages; -+ u64 numTimeOuts; -+} nsVcpuStats_t; -+ -+typedef struct nsVcpu { -+ /* -+ * Per-vcpu state to support the Novell shim; -+ */ -+ int nsVcplockDepth; -+ unsigned long nsVcpuFlags; -+ unsigned char nsVcpFlushRequest; -+ unsigned char nsVcpWaitingOnGFS; -+ unsigned char nsVcpFlushPending; -+ unsigned char nsVcpWaitingForCleanup; -+ unsigned short nsVcpRepCount; -+ /* -+ * Synthetic msrs. -+ */ -+ u64 nsVcpSControlMsr; -+ u64 nsVcpSVersionMsr; -+ u64 nsVcpSIefpMsr; -+ u64 nsVcpSimpMsr; -+ u64 nsVcpEomMsr; -+ -+ u64 nsVcpSIntMsr[16]; -+ /* -+ * Timer MSRs. -+ */ -+ nsVcpTimerState_t nsVcpTimers[4]; -+ void *nsVcpSiefPage; -+ void *nsVcpSimPage; -+ /* -+ * Hypercall input/output processing. -+ * We keep these pages mapped in the hypervisor space. -+ */ -+ void *nsVcpInputBuffer; /*input buffer virt address*/ -+ void *nsVcpInputBufferPage; /*input buffer struct page */ -+ void *nsVcpOutputBuffer; /*output buffer virt address*/ -+ void *nsVcpOutputBufferPage; /*output buffer struct page */ -+ struct vcpu *nsVcpXenVcpu; /*corresponding xen vcpu*/ -+ nsVcpuStats_t nsVcpStats; -+} nsVcpu_t; -+ -+/* -+ * Events of interest for gathering stats. -+ */ -+#define NS_CSWITCH 1 -+#define NS_FLUSH_VA_STAT 2 -+#define NS_FLUSH_RANGE 3 -+#define NS_FLUSH_VA_POSTED 4 -+#define NS_FLUSH_RANGE_POSTED 5 -+#define NS_TPR_READ 6 -+#define NS_ICR_READ 7 -+#define NS_TPR_WRITE 8 -+#define NS_ICR_WRITE 9 -+#define NS_EOI_WRITE 10 -+ -+#define NS_GFS_ACQUIRE 11 -+#define NS_GFS_RELEASE 12 -+#define NS_TLB_FLUSH 13 -+#define NS_INVL_PG 14 -+#define NS_TIMEOUTS 15 -+ -+void nsCollectStats(int event, nsVcpuStats_t *ststp); -+ -+#define NS_STATS //KYS: Temporary -+ -+#ifdef NS_STATS -+#define NS_STATS_COLLECT(event, statp) nsCollectStats(event, statp) -+#else -+define NS_STATS_COLLECT(event, statp) -+#endif -+ -+typedef struct nsPartition { -+ /* -+ * State maintained on a per guest basis to implement -+ * the Novell shim. -+ */ -+ s_time_t nsDomainBootTime; -+ nsSpinLock_t nsLock; -+ atomic_t nsNumVcpusActive; -+ u64 nsGuestIdMsr; -+ u64 nsHypercallMsr; -+ u64 nsPrivileges; -+ u64 nsSupportedFeatures; -+ unsigned long nsHypercallMfn; -+ int nsLongModeGuest; -+ /* -+ * Each VCPU here corresponds to the vcpu in the underlying hypervisor; -+ * they share the same ID. -+ */ -+ nsVcpu_t nsVcpuState[MAX_VIRT_CPUS]; -+ nsGlobalFlushState_t nsFlushState; -+} nsPartition_t; -+ -+/* -+ * Max CPUID leaves supported. -+ */ -+ -+#define NX_MAX_CPUID_LEAVES 5 -+ -+/* -+ * We don't want to intercept instructions coming from the hvm bootstrap code. -+ * -+ */ -+#define NS_BIOS_HIGH_ADDR -+/* -+ * Privilege flags. -+ */ -+ -+#define NS_ACCESS_VP_RUNTIME (1ULL << 0) -+#define NS_ACCESS_TIME_REF_CNT (1ULL << 1) -+#define NS_ACCESS_SYNC_MSRS (1ULL << 2) -+#define NS_ACCESS_SYNC_TIMERS (1ULL << 3) -+#define NS_ACCESS_APIC_MSRS (1ULL << 4) -+#define NS_ACCESS_PARTITION_ID (1ULL << 33) -+ -+#define nsGetCurrentPartition() \ -+((current)->domain->arch.hvm_domain.ext_handle) -+ -+#define nsGetCurrentVcpuIndex() (current)->vcpu_id -+ -+#define NS_PANIC(x) \ -+do {\ -+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\ -+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\ -+ nsXenVector.extPanic((x));\ -+} while (0); -+ -+#define NS_ASSERT(x) \ -+do {\ -+ if (!(x)) \ -+ NS_PANIC("ASSERTION FAILED\n")\ -+} while (0); -+ -+#define nsDebugPrint(x) \ -+do { \ -+ nsXenVector.extPrintk("File is: %s\n", __FILE__);\ -+ nsXenVector.extPrintk("Line is: %d\n", __LINE__);\ -+ nsXenVector.extPrintk((x));\ -+} while (0); -+ -+/* Hooks into Xen */ -+extern xen_call_vector_t nsXenVector; -+ -+/* -+ * static inline int -+ * nsInvalidCpuState(void) -+ * Check to see if the calling CPU is in the "correct state" to invoke -+ * the functionality implemented in the Novell Shim (Adaptor). -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline int -+nsInvalidCpuState(void) -+{ -+ int cpuState; -+ cpuState = nsXenVector.hvmFuncTable->guest_x86_mode(current); -+ if ((cpuState == 4) || (cpuState == 8)) { -+ return (0); -+ } -+ return (1); -+} -+ -+/* -+ * inline u64 -+ * nsBuildHcallRetVal(int code, int reps) -+ * -+ * Given the return code and the number of successfully completed count, -+ * compose a return value compliant with the Viridian specification. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline u64 -+nsBuildHcallRetVal(int code, int reps) -+{ -+ u64 retVal=0; -+ retVal |= (code & 0xff); -+ retVal |= (((long long)(reps & 0xfff)) << 32); -+ return (retVal); -+} -+ -+ -+/* -+ * static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs, -+ * int longModeGuest, u64 retVal) -+ * Set the return value in the saved guest registers -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void nsSetSysCallRetVal(struct cpu_user_regs *pregs, -+ int longModeGuest, u64 retVal) -+{ -+ if (longModeGuest) { -+ pregs->eax = retVal; -+ } else { -+ pregs->edx = (u32)(retVal >> 32); -+ pregs->eax = (u32)(retVal); -+ } -+} -+ -+/* -+ * static inline int -+ * nsPrivilegeCheck(nsPartition_t *curp, u64 flags) -+ * Check if the caller is privileged to perform the operation -+ * specified by the flags argument. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline int -+nsPrivilegeCheck(nsPartition_t *curp, u64 flags) -+{ -+ return ((curp->nsPrivileges & flags)? 1: 0); -+} -+ -+/* void -+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output, -+ * u64 *retVal); -+ * Common entry point for handling all the extension hypercalls. -+ * -+ * Calling/Exit State: -+ * Based on the hypercall; the caller may give up the CPU while -+ * processing the hypercall. No locks should be held on entry and -+ * no locks will be held on return. -+ * -+ */ -+void -+nsHandleHyperCall(u64 opcode, u64 input, u64 output, -+ u64 *retVal); -+ -+/* -+ * void nsDoTlbFlush(void); -+ * Perform TLB flush on the invoking virtual CPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void nsDoTlbFlush(void); -+ -+/* -+ * void -+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+ * Acquire the specified lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+void nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *lock); -+ -+/* -+ * void -+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+ * Release the specified spin lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+void nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *lock); -+ -+/* -+ * void -+ * nsLockInit(nsSpinLock_t *nsLock) -+ * Initialize the specified spin lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+void nsLockInit(nsSpinLock_t *lock); -+ -+/* -+ * void nsPrintStats(nsPartition_t *curp, int i) -+ * Print the per-vcpu stats for the specified partition. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+void nsPrintStats(nsPartition_t *curp, int i); -+ -+#define NS_LOCK_OWNED(v, l) \ -+((l)->owner == (v)) -+#endif /*NS_SHIM_H */ -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.c 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,1232 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * nshypercall.c. -+ * This file implements the hypercall component of the Novell Shim. Hopefully -+ * we can host this component either as a driver in the guest or an extension -+ * to the Xen hypervisor. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include "ns_shim.h" -+#include "ns_errno.h" -+#include "nshypercall.h" -+ -+ -+ -+void nsDoTlbFlush(void); -+static void -+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup); -+ -+ -+ -+/* -+ * void nsCollectStats(int event, nsVcpuStats_t *statsp) -+ * Collect stats. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+void nsCollectStats(int event, nsVcpuStats_t *statsp) -+{ -+ switch (event) { -+ case NS_CSWITCH: -+ statsp->numSwitches++; -+ return; -+ case NS_FLUSH_VA: -+ statsp->numFlushes++; -+ return; -+ case NS_FLUSH_RANGE: -+ statsp->numFlushRanges++; -+ return; -+ case NS_FLUSH_VA_POSTED: -+ statsp->numFlushesPosted++; -+ return; -+ case NS_FLUSH_RANGE_POSTED: -+ statsp->numFlushRangesPosted++; -+ return; -+ case NS_TPR_READ: -+ statsp->numTprReads++; -+ return; -+ case NS_ICR_READ: -+ statsp->numIcrReads++; -+ return; -+ case NS_TPR_WRITE: -+ statsp->numTprWrites++; -+ return; -+ case NS_ICR_WRITE: -+ statsp->numIcrWrites++; -+ return; -+ case NS_EOI_WRITE: -+ statsp->numEoiWrites++; -+ return; -+ -+ case NS_GFS_ACQUIRE: -+ statsp->numGFSAcquires++; -+ return; -+ case NS_GFS_RELEASE: -+ statsp->numGFSReleases++; -+ return; -+ case NS_TLB_FLUSH: -+ statsp->numTlbFlushes++; -+ return; -+ case NS_INVL_PG: -+ statsp->numInvlPages++; -+ return; -+ } -+} -+ -+/* -+ * void -+ * nsPrintStats(nsPartition_t *curp, int i) -+ * Print stats. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void -+nsPrintStats(nsPartition_t *curp, int i) -+{ -+ nsVcpu_t *v; -+ v = &curp->nsVcpuState[i]; -+ printk("Printing stats for vcpu ID: %d\n", i); -+ printk("Flush pending: %d\n", (int)v->nsVcpFlushPending); -+ printk("Flush Request: %d\n", (int)v->nsVcpFlushRequest); -+ printk("Waiting on GFS: %d\n", (int)v->nsVcpWaitingOnGFS); -+ printk("Waiting for cleanup: %d\n", (int)v->nsVcpWaitingForCleanup); -+ -+ printk("Number of context switches: %lu\n", v->nsVcpStats.numSwitches); -+ printk("Number of flushes: %lu\n", v->nsVcpStats.numFlushes); -+ printk("Number of flushes posted: %lu\n", v->nsVcpStats.numFlushesPosted); -+ printk("Number of flush ranges: %lu\n", v->nsVcpStats.numFlushRanges); -+ printk("Number of flush ranges posted: %lu\n", v->nsVcpStats.numFlushRangesPosted); -+ printk("Number of TPR reads: %lu\n", v->nsVcpStats.numTprReads); -+ printk("Number of ICR reads: %lu\n", v->nsVcpStats.numIcrReads); -+ printk("Number of Eoi writes: %lu\n", v->nsVcpStats.numEoiWrites); -+ printk("Number of Tpr writes: %lu\n", v->nsVcpStats.numTprWrites); -+ printk("Number of Icr writes: %lu\n", v->nsVcpStats.numIcrWrites); -+ printk("Number of GFS acuires: %lu\n", v->nsVcpStats.numGFSAcquires); -+ printk("Number of GFS releases: %lu\n", v->nsVcpStats.numGFSReleases); -+ printk("Number of TLB flushes: %lu\n", v->nsVcpStats.numTlbFlushes); -+ printk("Number of INVLPG flushes: %lu\n", v->nsVcpStats.numInvlPages); -+ printk("Number of TIMEOUTS: %lu\n", v->nsVcpStats.numTimeOuts); -+ -+} -+ -+/* -+ * static inline void nsWakeupWaiters(nsPartition_t *curp) -+ * Wakeup all the VCPUs that may be blocked on the Global -+ * flush state waiting to exclusively own the global flush -+ * state. -+ * -+ * Calling/Exit State: -+ * The partition-wide spin lock nsLock is held on entry and -+ * this lock is held on exit. -+ */ -+static inline void nsWakeupWaiters(nsPartition_t *curp) -+{ -+ int i; -+ if (!cpus_empty(curp->nsFlushState.waiters)) { -+ /* -+ * Need to wakeup potential waiters that -+ * are waiting for the -+ * flush block to become available. -+ */ -+ for (i=0; i < MAX_VIRT_CPUS; i++) { -+ struct vcpu *curVcpu; -+ if (!cpu_isset(i, curp->nsFlushState.waiters)) -+ continue; -+ curVcpu = -+ curp->nsVcpuState[i].nsVcpXenVcpu; -+ NS_ASSERT(curVcpu != NULL); -+ if ( test_and_clear_bit(_VPF_blocked_in_xen, -+ &curVcpu->pause_flags) ) { -+ vcpu_wake(curVcpu); -+ } -+ } -+ cpus_clear(curp->nsFlushState.waiters); -+ } -+} -+ -+/* -+ * static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup) -+ * Acquire the global flush state for exclusive use by the calling -+ * VCPU. -+ * -+ * Calling/Exit State: -+ * On entry nsLock is held and this lock is held on exit. If the calling -+ * VCPU is required to give up the CPU, this lock will be dropped. -+ */ -+static void nsAcquireGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup) -+{ -+acquireGFSAgain: -+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0); -+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0); -+ NS_ASSERT(NS_LOCK_OWNED(vcpup, &curp->nsLock)); -+ if (curp->nsFlushState.currentOwner != NULL) { -+ /* -+ * Somebody is in the midst of flushing; deal with this -+ * situation. -+ */ -+ /* -+ * We need to wait for the current flush sequence -+ * to end. -+ */ -+ NS_ASSERT(curp->nsFlushState.currentOwner != current); -+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0); -+ if (vcpup->nsVcpFlushPending) { -+ nsLockRelease(vcpup, &curp->nsLock); -+ nsDoTlbFlush(); -+ nsLockAcquire(vcpup, &curp->nsLock); -+ } -+ vcpup->nsVcpWaitingOnGFS = 1; -+ cpu_set(current->vcpu_id, curp->nsFlushState.waiters); -+ nsLockRelease(vcpup, &curp->nsLock); -+ wait_on_xen_event_channel(0, -+ ((curp->nsFlushState.currentOwner == NULL) || -+ (vcpup->nsVcpFlushPending) || -+ (cpus_empty(curp->nsFlushState.waiters)))); -+ nsLockAcquire(vcpup, &curp->nsLock); -+ cpu_clear(current->vcpu_id, curp->nsFlushState.waiters); -+ vcpup->nsVcpWaitingOnGFS = 0; -+ goto acquireGFSAgain; -+ } -+ curp->nsFlushState.repCount = vcpup->nsVcpRepCount; -+ curp->nsFlushState.flushParam = -+ vcpup->nsVcpInputBuffer; -+ NS_STATS_COLLECT(NS_GFS_ACQUIRE, &vcpup->nsVcpStats); -+} -+ -+/* -+ * static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup, -+ * int lockOwned) -+ * There can at most be one TLB flush event active in the system. All of the -+ * VCPUs that are part of the flush sequence need to relaese their hold -+ * on the global flush object before the global flush object can be freed. -+ * This function manages the release of the global flush object. -+ * If the "lockOwned" parameter is non-zero; on entry the nsLock is held. -+ * -+ * Calling/Exit State: -+ * The current owner of GFS may be forced to give up the CPU. -+ * On exit nsLock is held. -+ */ -+static void nsReleaseGlobalFlushState(nsPartition_t *curp, nsVcpu_t *vcpup, -+ int lockOwned) -+{ -+ if (!lockOwned) { -+ nsLockAcquire(vcpup, &curp->nsLock); -+ } -+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0); -+ NS_ASSERT(curp->nsFlushState.currentOwner != NULL); -+ -+ if (vcpup->nsVcpFlushPending) { -+ curp->nsFlushState.cpuCount--; -+ NS_ASSERT(curp->nsFlushState.cpuCount >= 0); -+ vcpup->nsVcpFlushPending = 0; -+ mb(); -+ } -+ -+nsReleaseGFS: -+ if (curp->nsFlushState.cpuCount > 0) { -+ if (curp->nsFlushState.currentOwner == current) { -+ /* -+ * We are the initiator; need to wait for -+ * others to complete. -+ */ -+ nsWakeupWaiters(curp); -+ vcpup->nsVcpWaitingForCleanup = 1; -+ nsLockRelease(vcpup, &curp->nsLock); -+ wait_on_xen_event_channel(0,(curp->nsFlushState.cpuCount == 0)); -+ nsLockAcquire(vcpup, &curp->nsLock); -+ vcpup->nsVcpWaitingForCleanup = 0; -+ goto nsReleaseGFS; -+ } else { -+ return; -+ } -+ } -+ NS_ASSERT(curp->nsFlushState.cpuCount == 0); -+ if (curp->nsFlushState.currentOwner == current) { -+ /* We are the current owner; do the final cleanup. -+ * But first set the return value. This has been stashed -+ * before we blocked. -+ */ -+ NS_STATS_COLLECT(NS_GFS_RELEASE, &vcpup->nsVcpStats); -+ vcpup->nsVcpFlushRequest = 0; -+ vcpup->nsVcpFlushPending = 0; -+ vcpup->nsVcpWaitingForCleanup = 0; -+ nsSetSysCallRetVal(guest_cpu_user_regs(), -+ curp->nsLongModeGuest, -+ curp->nsFlushState.retVal); -+ curp->nsFlushState.cpuCount = 0; -+ curp->nsFlushState.currentOwner = NULL; -+ mb(); -+ curp->nsFlushState.retVal = 0; -+ curp->nsFlushState.flushParam = NULL; -+ curp->nsFlushState.repCount = 0; -+ nsWakeupWaiters(curp); -+ } else { -+ /* -+ * We are not the owner; wakeup the owner. -+ */ -+ if ( test_and_clear_bit(_VPF_blocked_in_xen, -+ &(curp->nsFlushState.currentOwner->pause_flags))){ -+ vcpu_wake(curp->nsFlushState.currentOwner); -+ } -+ } -+} -+ -+ -+/* -+ * static inline int nsFlushPermitted(nsVcpu_t *vcpup) -+ * Check to see if we can execute a TLB flush on the calling vcpu. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline int nsFlushPermitted(nsVcpu_t *vcpup) -+{ -+ if (!hvm_paging_enabled(current)) { -+ return (0); -+ } -+ if (current->arch.hvm_vmx.vmxassist_enabled) { -+ return (0); -+ } -+ if (nsInvalidCpuState()) { -+ return (0); -+ } -+ -+ return (1); -+} -+ -+/* -+ * void -+ * nsDoTlbFlush(void) -+ * Perform flush operations based on the state of GFS. VCPUs may be -+ * forced to relinquish the physical CPU while attempting to flush; in -+ * those events, thi is also the continuation point for execution. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void -+nsDoTlbFlush(void) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ flushVa_t *flushArgp; -+ int i,j, numPages; -+ u64 *pgList; -+ long baseVa; -+ unsigned short repCount; -+ -+ NS_ASSERT(local_irq_is_enabled()); -+ -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+ -+ nsLockAcquire(vcpup, &curp->nsLock); -+ if (vcpup->nsVcpWaitingForCleanup) { -+ /* -+ * This is the continuation point for us; cleanup -+ * the global flush state. -+ */ -+ vcpup->nsVcpWaitingForCleanup =0; -+ NS_ASSERT(curp->nsFlushState.currentOwner == current); -+ nsReleaseGlobalFlushState(curp, vcpup, 1); -+ } else if (vcpup->nsVcpWaitingOnGFS) { -+ /* -+ * This is the continuation point for us; acquire -+ * GFS and proceed with our flush operation. -+ */ -+ vcpup->nsVcpWaitingOnGFS =0; -+ nsAcquireGlobalFlushState(curp, vcpup); -+ /* -+ * Now do the rest of the syscall processing -+ */ -+ nsFlushPostProcess(curp, vcpup); -+ } -+ if (!vcpup->nsVcpFlushPending) { -+ nsLockRelease(vcpup, &curp->nsLock); -+ return; -+ } -+ flushArgp = curp->nsFlushState.flushParam; -+ repCount = curp->nsFlushState.repCount; -+ /* -+ * At this point a flush has been posted; see if we can perform a -+ * flush given our state. -+ */ -+ if (!nsFlushPermitted(vcpup)) { -+ nsReleaseGlobalFlushState(curp, vcpup, 1); -+ nsLockRelease(vcpup, &curp->nsLock); -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+ return; -+ } -+ nsLockRelease(vcpup, &curp->nsLock); -+ if (vcpup->nsVcpFlushPending & NS_FLUSH_TLB) { -+ NS_STATS_COLLECT(NS_TLB_FLUSH, &vcpup->nsVcpStats); -+ paging_update_cr3(current); -+ } else { -+ pgList = &flushArgp->gva; -+ NS_ASSERT(vcpup->nsVcpFlushPending == NS_FLUSH_INVLPG); -+ NS_ASSERT(pgList != NULL); -+ NS_ASSERT(repCount >=1); -+ NS_STATS_COLLECT(NS_INVL_PG, &vcpup->nsVcpStats); -+ for (i = 0; i < repCount; i++) { -+ baseVa = (long)(pgList[i] & PAGE_MASK); -+ numPages = (int)(~baseVa & pgList[i]); -+ for (j = 0; j <= numPages; j++) { -+ if (paging_invlpg(current, -+ (baseVa + (j << PAGE_SHIFT)))) { -+ flush_tlb_one_local((baseVa + -+ (j<< PAGE_SHIFT))); -+ } -+ //KYS: need to deal with ASIDS -+ } -+ } -+ } -+ /* -+ * Do post processing on the global flush state. -+ */ -+ nsReleaseGlobalFlushState(curp, vcpup, 0); -+ nsLockRelease(vcpup, &curp->nsLock); -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+} -+ -+/* -+ * static int -+ * nsGetVpRegisters(paddr_t input, paddr_t output) -+ * Get the VCP register state. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsGetVpRegisters(paddr_t input, paddr_t output) -+{ -+ nsVcpu_t *vcpup, *targetp; -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ getVpRegistersInput_t *inBuf; -+ getVpRegistersOutput_t *outBuf; -+ struct vcpu_guest_context *vcpuCtx; -+ u32 *regIndexp; -+ getVpRegistersOutput_t *outRegp; -+ u32 numOutputBytes = 0; -+ -+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ inBuf = vcpup->nsVcpInputBuffer; -+ outBuf = vcpup->nsVcpOutputBuffer; -+ outRegp = outBuf; -+ /* -+ * Copy the input data to the per-cpu input buffer. -+ * This may be an overkill; obviously it is better to only -+ * copy what we need. XXXKYS: Check with Mike. -+ */ -+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) { -+ return (NS_STATUS_INVALID_ALIGNMENT); -+ } -+ /* -+ * If the partition ID specified does not match with the current -+ * domain return appropriate error. -+ */ -+ if ((u64)current->domain->domain_id != inBuf-> partitionId) { -+ return (NS_STATUS_ACCESS_DENIED); -+ } -+ if (inBuf->vpIndex > MAX_VIRT_CPUS) { -+ return (NS_STATUS_INVALID_VP_INDEX); -+ } -+ targetp = &curp->nsVcpuState[inBuf->vpIndex]; -+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) { -+ return (NS_STATUS_INVALID_VP_STATE); -+ } -+ if ((vcpuCtx = -+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) -+ == NULL) { -+ return (NS_STATUS_INSUFFICIENT_MEMORY); -+ } -+ -+ /* -+ * Get the register state of the specified vcp. -+ */ -+ if (current->vcpu_id != inBuf->vpIndex) { -+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu); -+ } -+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx); -+ if (current->vcpu_id != inBuf->vpIndex) { -+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu); -+ } -+ /* -+ * Now that we have the register state; select what we want and -+ * populate the output buffer. -+ */ -+ regIndexp = &inBuf->regIndex; -+ while (*regIndexp != 0) { -+ switch (*regIndexp) { -+ /* -+ * XXXKYS: need mapping code here; populate -+ * outBuf. -+ */ -+ NS_PANIC("nsGetVpRegisters not supported\n"); -+ } -+ regIndexp++; -+ outRegp++ ; /*128 bit registers */ -+ numOutputBytes +=16; -+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) { -+ /* -+ *input list not reminated correctly; bail out. -+ */ -+ NS_PANIC("nsGetVpRegisters:input list not terminated\n"); -+ break; -+ } -+ } -+ if (nsXenVector.extCopyToGuestPhysical(output, outBuf, -+ numOutputBytes)) { -+ /* Some problem copying data out*/ -+ NS_PANIC("nsGetVpRegisters:copyout problem\n"); -+ } -+ nsXenVector.extFreeMem(vcpuCtx); -+ return (NS_STATUS_SUCCESS); -+} -+ -+/* -+ * static int -+ * nsSetVpRegisters(paddr_t input, paddr_t output) -+ * Set the VCPU register state. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsSetVpRegisters(paddr_t input, paddr_t output) -+{ -+ nsVcpu_t *vcpup, *targetp; -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ setVpRegistersInput_t *inBuf; -+ struct vcpu_guest_context *vcpuCtx; -+ setVpRegisterSpec_t *regIndexp; -+ int retVal = NS_STATUS_SUCCESS; -+ -+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ inBuf = vcpup->nsVcpInputBuffer; -+ /* -+ * Copy the input data to the per-cpu input buffer. -+ * This may be an overkill; obviously it is better to only -+ * copy what we need. XXXKYS: Check with Mike. -+ */ -+ if (nsXenVector.extCopyFromGuestPhysical(inBuf, input, PAGE_SIZE)) { -+ return (NS_STATUS_INVALID_ALIGNMENT); -+ } -+ /* -+ * If the partition ID specified does not match with the current -+ * domain return appropriate error. -+ */ -+ if ((u64)current->domain->domain_id != inBuf-> partitionId) { -+ return (NS_STATUS_ACCESS_DENIED); -+ } -+ if (inBuf->vpIndex > MAX_VIRT_CPUS) { -+ return (NS_STATUS_INVALID_VP_INDEX); -+ } -+ targetp = &curp->nsVcpuState[inBuf->vpIndex]; -+ if (!(targetp->nsVcpuFlags & NS_VCPU_UP)) { -+ return (NS_STATUS_INVALID_VP_STATE); -+ } -+ if ((vcpuCtx = -+ nsXenVector.extAllocMem(sizeof(struct vcpu_guest_context))) -+ == NULL) { -+ return (NS_STATUS_INSUFFICIENT_MEMORY); -+ } -+ /* -+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the -+ * xen side domain is paused for this call. CHECK. -+ */ -+ if (current->vcpu_id != inBuf->vpIndex) { -+ nsXenVector.extVcpuPause(targetp->nsVcpXenVcpu); -+ } -+ -+ nsXenVector.extArchGetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx); -+ /* -+ * Now that we have the register state; update the register state -+ * based on what we are given. -+ */ -+ regIndexp = &inBuf->regSpec; -+ /* -+ * XXXKYS: Assuming the list is terminated by a regName that is 0. -+ * Check with Mike. -+ */ -+ while (regIndexp->regName != 0) { -+ switch (regIndexp->regName) { -+ /* -+ * XXXKYS: need mapping code here; populate -+ * vcpuCtx -+ */ -+ NS_PANIC("nsSetVpRegisters not supported\n"); -+ } -+ regIndexp++; -+ if ((char *)regIndexp > ((char *)inBuf + PAGE_SIZE)) { -+ /* -+ *input list not reminated correctly; bail out. -+ */ -+ NS_PANIC("nsSetVpRegisters:input list not terminated\n"); -+ break; -+ } -+ } -+ /* -+ * Now set register state. -+ * -+ * XXXKYS: Is it sufficient to just pause the target vcpu; on the -+ * xen side domain is paused for this call. CHECK. -+ */ -+ -+ if (nsXenVector.extArchSetDomainInfoCtxt(targetp->nsVcpXenVcpu, vcpuCtx)) { -+ retVal = NS_STATUS_INVALID_PARAMETER; -+ } -+ if (current->vcpu_id != inBuf->vpIndex) { -+ nsXenVector.extVcpuUnPause(targetp->nsVcpXenVcpu); -+ } -+ nsXenVector.extFreeMem(vcpuCtx); -+ return (retVal); -+} -+ -+/* -+ * static int -+ * nsSwitchVa(paddr_t input) -+ * -+ * Switch the page table base of the calling vcpu. -+ * -+ * Calling/Exit State: -+ * None. -+ * -+ * Remarks: -+ * The spec specifies that the input register is pointing to a guest -+ * physical that has the new page table base. However it appears that the -+ * page table base is being passed in the input register. -+ */ -+static int -+nsSwitchVa(paddr_t input) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ nsVcpu_t *vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ -+ /* -+ * XXXKYS: the spec sys the asID is passed via memory at offset 0 of -+ * the page whose GPA is in the input register. However, it appears -+ * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02) -+ * passes the asID in the input register instead. Need to check if -+ * future builds do this. -+ */ -+ hvm_set_cr3(input); -+ NS_STATS_COLLECT(NS_CSWITCH, &vcpup->nsVcpStats); -+ return (NS_STATUS_SUCCESS); -+} -+ -+/* -+ * static int -+ * nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup) -+ * -+ * Perform the flush operation once GFS is acquired. -+ * -+ * Calling/Exit State: -+ * On entry nsLock is held; on exit this lock continues to be held. -+ */ -+ -+static void -+nsFlushPostProcess(nsPartition_t *curp, nsVcpu_t *curVcpup) -+{ -+ int target; -+ nsVcpu_t *vcpup; -+ cpumask_t vcpuMask; -+ struct flushVa *flushArgp; -+ -+ flushArgp = curVcpup->nsVcpInputBuffer; -+ vcpuMask = flushArgp->vMask; -+ /* -+ * On entry we must own the global flush state. -+ */ -+ NS_ASSERT(NS_LOCK_OWNED(curVcpup, &curp->nsLock)); -+ NS_ASSERT(curp->nsFlushState.cpuCount == 0); -+ NS_ASSERT(curp->nsFlushState.currentOwner == NULL); -+ -+ curp->nsFlushState.retVal = -+ nsBuildHcallRetVal(NS_STATUS_SUCCESS, curVcpup->nsVcpRepCount); -+ curp->nsFlushState.currentOwner = current; -+ if (cpu_isset(current->vcpu_id, vcpuMask)) { -+ curp->nsFlushState.cpuCount = 1; -+ curVcpup->nsVcpFlushPending = -+ curVcpup->nsVcpFlushRequest; -+ mb(); -+#ifdef NS_STATS -+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) { -+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &curVcpup->nsVcpStats); -+ } else { -+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &curVcpup->nsVcpStats); -+ } -+#endif -+ -+ cpu_clear(current->vcpu_id, vcpuMask); -+ } -+ if (cpus_empty(vcpuMask)) { -+ /* -+ * We are done. -+ */ -+ goto flushVaDone; -+ } -+ while (!cpus_empty(vcpuMask)) { -+ target = first_cpu(vcpuMask); -+ vcpup = &curp->nsVcpuState[target]; -+ cpu_clear(target, vcpuMask); -+ if (!(vcpup->nsVcpuFlags & NS_VCPU_UP)) { -+ continue; -+ } -+ if (!nsFlushPermitted(vcpup)) { -+ continue; -+ } -+ curp->nsFlushState.cpuCount++; -+ vcpup->nsVcpFlushPending = -+ curVcpup->nsVcpFlushRequest; -+ mb(); -+#ifdef NS_STATS -+ if (curVcpup->nsVcpFlushRequest == NS_FLUSH_TLB) { -+ NS_STATS_COLLECT(NS_FLUSH_VA_POSTED, &vcpup->nsVcpStats); -+ } else { -+ NS_STATS_COLLECT(NS_FLUSH_RANGE_POSTED, &vcpup->nsVcpStats); -+ } -+#endif -+ -+ /* -+ * We need to force these VCPUs into the hypervisor for -+ * them to act on the pending request. -+ */ -+ -+ vcpu_kick(vcpup->nsVcpXenVcpu); -+ if ( test_and_clear_bit(_VPF_blocked_in_xen, -+ &vcpup->nsVcpXenVcpu->pause_flags) ) { -+ vcpu_wake(vcpup->nsVcpXenVcpu); -+ } -+ -+ } -+ /* -+ * Now that we have posted the state; wait for other CPUs to perform -+ * flushes; we need to wait for all the CPUs to complete the flush -+ * before returning. -+ */ -+flushVaDone: -+ /* -+ * If we are included in this round of tlb flush; we will wait for -+ * other CPUs in the tlb flush function; else we wait right here. -+ */ -+ if (!curVcpup->nsVcpFlushPending) { -+ nsReleaseGlobalFlushState(curp, curVcpup, 1); -+ } -+ return; -+} -+ -+/* -+ * static int -+ * nsFlushVa(paddr_t input) -+ * Perform a TLB flush on the specified set of VCPUs. -+ * -+ * Calling/Exit State: -+ * No locks can be held on entry and no locks will be held on return. -+ * The calling VCPU may relinquish the physical CPU. -+ */ -+static int -+nsFlushVa(paddr_t input) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ int i; -+ nsVcpu_t *curVcpup; -+ -+ flushVa_t *flushArgp; -+ cpumask_t vcpuMask; -+ u64 asId, inputMask, retVal; -+ int flushGlobal = 1; -+ -+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ flushArgp = curVcpup->nsVcpInputBuffer; -+ -+ NS_ASSERT(curVcpup->nsVcplockDepth == 0); -+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0); -+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0); -+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0); -+ -+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, -+ sizeof(*flushArgp))) { -+ return (NS_STATUS_INVALID_ALIGNMENT); -+ } -+ inputMask = flushArgp->pMask; -+ asId = flushArgp->asHandle; -+ cpus_clear(vcpuMask); -+ /* -+ * Deal with all trivial error conditions. -+ */ -+ if (flushArgp->flags != 0 && (!(flushArgp->flags & -+ (NS_FLUSH_ALL_PROCESSORS | -+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | -+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) { -+ return (NS_STATUS_INVALID_PARAMETER); -+ } -+ if (((flushArgp->pMask) == 0) && -+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) { -+ return (NS_STATUS_INVALID_PARAMETER); -+ } -+ -+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) { -+ for (i=0; i< MAX_VIRT_CPUS; i++) { -+ if (current->domain->vcpu[i] != NULL) { -+ cpu_set(i, vcpuMask); -+ } -+ } -+ } else { -+ i = 0; -+ while (inputMask) { -+ if (inputMask &0x1) { -+ cpu_set(i, vcpuMask); -+ } -+ inputMask = (inputMask >> 1); -+ i++; -+ } -+ } -+ -+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) { -+ asId = NS_ALL_AS; -+ } -+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) { -+ flushGlobal = 0; -+ } -+ /* -+ * Now operate on what we are given -+ * XXXKYS: For now we are ignoring asId and fushGlobal flag. -+ * May have to revisit this. But first stash away the processed -+ * parameters for subsequent use. -+ */ -+ flushArgp->asHandle = asId; -+ flushArgp->flags = flushGlobal; -+ flushArgp->vMask = vcpuMask; -+ -+ curVcpup->nsVcpRepCount = 0; -+ curVcpup->nsVcpFlushRequest = NS_FLUSH_TLB; -+ -+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0); -+ nsSetSysCallRetVal(guest_cpu_user_regs(), -+ curp->nsLongModeGuest, -+ retVal); -+ NS_STATS_COLLECT(NS_FLUSH_VA_STAT, &curVcpup->nsVcpStats); -+ nsLockAcquire(curVcpup, &curp->nsLock); -+ nsAcquireGlobalFlushState(curp, curVcpup); -+ nsFlushPostProcess(curp, curVcpup); -+ nsLockRelease(curVcpup, &curp->nsLock); -+ return (NS_STATUS_SUCCESS); -+} -+ -+/* -+ * static int -+ * nsFlushVaRange(paddr_t input, unsigned short startIndex, -+ * unsigned short repCount, unsigned short *repsDone) -+ * Perform a INVLPG flush on the specified set of VCPUs. -+ * -+ * Calling/Exit State: -+ * No locks can be held on entry and no locks will be held on return. -+ * The calling VCPU may relinquish the physical CPU. -+ */ -+static int -+nsFlushVaRange(paddr_t input, unsigned short startIndex, -+unsigned short repCount, unsigned short *repsDone) -+{ -+ nsVcpu_t *curVcpup; -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ flushVa_t *flushArgp; -+ cpumask_t vcpuMask; -+ u64 asId, inputMask, retVal; -+ int flushGlobal = 1; -+ int flushAllProc = 0; -+ int i; -+ -+ curVcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ flushArgp = curVcpup->nsVcpInputBuffer; -+ NS_ASSERT(curVcpup->nsVcplockDepth == 0); -+ NS_ASSERT(curVcpup->nsVcpFlushRequest == 0); -+ NS_ASSERT(curVcpup->nsVcpWaitingForCleanup == 0); -+ NS_ASSERT(curVcpup->nsVcpWaitingOnGFS == 0); -+ NS_ASSERT(repCount >=1); -+ NS_ASSERT(((sizeof(*flushArgp)) + 8*(repCount -1)) <= PAGE_SIZE); -+ if (nsXenVector.extCopyFromGuestPhysical(flushArgp, input, -+ ((sizeof(*flushArgp)) + 8*(repCount -1)))) { -+ return (NS_STATUS_INVALID_ALIGNMENT); -+ } -+ *repsDone = repCount; -+ inputMask = flushArgp->pMask; -+ asId = flushArgp->asHandle; -+ cpus_clear(vcpuMask); -+ /* -+ * Deal with all trivial error conditions. -+ */ -+ if (flushArgp->flags != 0 && (!(flushArgp->flags & -+ (NS_FLUSH_ALL_PROCESSORS | -+ NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | -+ NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) { -+ return (NS_STATUS_INVALID_PARAMETER); -+ } -+ if ((flushArgp->pMask == 0) && -+ !(flushArgp->flags & NS_FLUSH_ALL_PROCESSORS)) { -+ return (NS_STATUS_INVALID_PARAMETER); -+ } -+ -+ if (flushArgp->flags & NS_FLUSH_ALL_PROCESSORS) { -+ flushAllProc = 1; -+ for (i=0; i< MAX_VIRT_CPUS; i++) { -+ if (current->domain->vcpu[i] != NULL) { -+ cpu_set(i, vcpuMask); -+ } -+ } -+ } else { -+ i = 0; -+ /* -+ * populate the vcpu mask based on the input. -+ */ -+ while (inputMask) { -+ if (inputMask & 0x1) { -+ cpu_set(i, vcpuMask); -+ } -+ inputMask = (inputMask >> 1); -+ i++; -+ } -+ } -+ if (flushArgp->flags & NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) { -+ asId = NS_ALL_AS; -+ } -+ if (flushArgp->flags & NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) { -+ flushGlobal = 0; -+ } -+ /* -+ * Now operate on what we are given -+ * XXXKYS: For now we are ignoring asId and fushGlobal flag. -+ * May have to revisit this. -+ * May have to revisit this. But first stash away the processed -+ * parameters for subsequent use. -+ */ -+ flushArgp->asHandle = asId; -+ flushArgp->flags = flushGlobal; -+ flushArgp->vMask = vcpuMask; -+ -+ curVcpup->nsVcpRepCount = repCount; -+ curVcpup->nsVcpFlushRequest = NS_FLUSH_INVLPG; -+ -+ retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, repCount); -+ nsSetSysCallRetVal(guest_cpu_user_regs(), -+ curp->nsLongModeGuest, -+ retVal); -+ -+ -+ NS_STATS_COLLECT(NS_FLUSH_RANGE, &curVcpup->nsVcpStats); -+ nsLockAcquire(curVcpup, &curp->nsLock); -+ nsAcquireGlobalFlushState(curp, curVcpup); -+ nsFlushPostProcess(curp, curVcpup); -+ nsLockRelease(curVcpup, &curp->nsLock); -+ return (NS_STATUS_SUCCESS); -+} -+ -+/* void -+ * nsHandleHyperCall(u64 opcode, u64 input, u64 output, -+ * u64 *retVal); -+ * Common entry point for handling all the extension hypercalls. -+ * -+ * Calling/Exit State: -+ * Based on the hypercall; the caller may give up the CPU while -+ * processing the hypercall. No locks should be held on entry and -+ * no locks will be held on return. -+ * -+ */ -+ -+void -+nsHandleHyperCall(u64 opcode, u64 input, u64 output, -+ u64 *retVal) -+{ -+ unsigned short verb; -+ unsigned short repCount; -+ unsigned short repsDone =0; -+ unsigned short startIndex; -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ u64 partitionId; -+ int value; -+ -+ -+ verb = (short)(opcode & 0xffff); -+ repCount = (short)((opcode >>32) & 0xfff); -+ startIndex = (short)((opcode >> 48) & 0xfff); -+ switch (verb) { -+ case NS_CREATE_PARTITION: -+ /* -+ * Xen only allows dom0 to create domains. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_INITIALIZE_PARTITION: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_DELETE_PARTITION: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_PARTITION_PROPERTY: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_SET_PARTITION_PROPERTY: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_PARTITION_ID: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_PARTITION_ID)) { -+ *retVal = -+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ } -+ partitionId = (u64)current->domain->domain_id; -+ if (nsXenVector.extCopyToGuestPhysical(output, -+ &partitionId, 8)) { -+ /* -+ * Invalid output area. -+ */ -+ *retVal = -+ nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ } -+ *retVal = nsBuildHcallRetVal(NS_STATUS_SUCCESS, 0); -+ return; -+ case NS_GET_NEXT_CHILD_PARTITION: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_LOGICAL_PROCESSOR_RUN_TIME: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_DEPOSIT_MEMORY: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_WITHDRAW_MEMORY: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_MEMORY_BALANCE: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_MAP_GPA_PAGES: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_UNMAP_GPA_PAGES: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_INSTALL_INTERCEPT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_CREATE_VP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_TERMINATE_VP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_DELETE_VP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_NEXT_VP: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_VP_REGISTERS: -+ *retVal = nsBuildHcallRetVal( -+ nsGetVpRegisters(input, output), 0); -+ return; -+ case NS_SET_VP_REGISTERS: -+ *retVal = nsBuildHcallRetVal( -+ nsSetVpRegisters(input, output), 0); -+ case NS_SWITCH_VA: -+ *retVal = -+ nsBuildHcallRetVal(nsSwitchVa(input), 0); -+ return; -+ case NS_FLUSH_VA: -+ *retVal = -+ nsBuildHcallRetVal(nsFlushVa(input), 0); -+ return; -+ case NS_FLUSH_VA_LIST: -+ value = nsFlushVaRange(input, startIndex, -+ repCount, &repsDone); -+ *retVal = nsBuildHcallRetVal(value, repsDone); -+ return; -+ -+ case NS_TRASLATE_VA: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_READ_GPA: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_WRITE_GPA: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_ASSERT_VIRTUAL_INTERRUPT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_CLEAR_VIRTUAL_INTERRUPT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_CREATE_PORT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_DELETE_PORT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_CONNECT_PORT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_GET_PORT_PROPERTY: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_DISCONNECT_PORT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_POST_MESSAGE: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case NS_POST_EVENT: -+ /* -+ * We don't support this. -+ */ -+ *retVal = nsBuildHcallRetVal(NS_STATUS_ACCESS_DENIED, 0); -+ return; -+ case 0: -+ /* -+ * 32 bit longhorn invokes hypercall with verb == 0; need to -+ * check with Mike (XXXKYS). For now ignore it. -+ */ -+ *retVal = -+ nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0); -+ return; -+ default: -+ nsXenVector.extPrintk("Unkown hypercall: verb is: %d\n", verb); -+ *retVal = -+ nsBuildHcallRetVal(NS_STATUS_INVALID_HYPERCALL_CODE, 0); -+ return; -+ } -+} -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nshypercall.h 2008-04-23 10:58:49.000000000 -0400 -@@ -0,0 +1,125 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * nshypercall.h -+ * Memory layouts for the various hypercalls supported. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#ifndef NS_HYPERCALL_H -+#define NS_HYPERCALL_H -+ -+#include -+ -+ -+typedef struct getVpRegistersInput { -+ u64 partitionId; -+ u64 vpIndex; -+ u32 regIndex; -+} getVpRegistersInput_t; -+ -+typedef struct getVpRegistersOutput { -+ u64 lowValue; -+ u64 highValue; -+} getVpRegistersOutput_t; -+ -+ -+ -+typedef struct setVpRegisterSpec { -+ u32 regName; -+ u32 pad; -+ u64 pad1; -+ u64 lowValue; -+ u64 highValue; -+} setVpRegisterSpec_t; -+typedef struct setVpRegistersInput { -+ u64 partitionId; -+ u64 vpIndex; -+ setVpRegisterSpec_t regSpec; -+} setVpRegistersInput_t; -+ -+ -+typedef struct flushVa { -+ u64 asHandle; -+ u64 flags; -+ union { -+ u64 processorMask; -+ cpumask_t vcpuMask; -+ } procMask; -+#define pMask procMask.processorMask -+#define vMask procMask.vcpuMask -+ u64 gva; -+} flushVa_t; -+ -+#define NS_FLUSH_ALL_PROCESSORS 0x00000001 -+#define NS_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002 -+#define NS_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004 -+ -+#define NS_ALL_AS (-1) -+ -+/* -+ * Hypercall verbs. -+ */ -+ -+#define NS_CREATE_PARTITION 0x0010 -+#define NS_INITIALIZE_PARTITION 0x0011 -+#define NS_DELETE_PARTITION 0x0014 -+#define NS_GET_PARTITION_PROPERTY 0x0017 -+#define NS_SET_PARTITION_PROPERTY 0x0018 -+#define NS_GET_PARTITION_ID 0x0015 -+#define NS_GET_NEXT_CHILD_PARTITION 0x0016 -+#define NS_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005 -+#define NS_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006 -+#define NS_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE 0x0007 -+#define NS_GET_LOGICAL_PROCESSOR_RUN_TIME 0x0004 -+#define NS_DEPOSIT_MEMORY 0x001C -+#define NS_WITHDRAW_MEMORY 0x001D -+#define NS_GET_MEMORY_BALANCE 0x001E -+#define NS_MAP_GPA_PAGES 0x001A -+#define NS_UNMAP_GPA_PAGES 0x001B -+#define NS_INSTALL_INTERCEPT 0x0019 -+#define NS_CREATE_VP 0x001F -+#define NS_TERMINATE_VP 0x0020 -+#define NS_DELETE_VP 0x0021 -+#define NS_GET_NEXT_VP 0x0027 -+#define NS_GET_VP_REGISTERS 0x0022 -+#define NS_SET_VP_REGISTERS 0x0023 -+#define NS_SWITCH_VA 0x0001 -+#define NS_FLUSH_VA 0x0002 -+#define NS_FLUSH_VA_LIST 0x0003 -+#define NS_TRASLATE_VA 0x0024 -+#define NS_READ_GPA 0x0025 -+#define NS_WRITE_GPA 0x0026 -+#define NS_ASSERT_VIRTUAL_INTERRUPT 0x002A -+#define NS_CLEAR_VIRTUAL_INTERRUPT 0x002C -+#define NS_CREATE_PORT 0x002D -+#define NS_DELETE_PORT 0x002E -+#define NS_CONNECT_PORT 0x002F -+#define NS_GET_PORT_PROPERTY 0x0031 -+#define NS_DISCONNECT_PORT 0x0030 -+#define NS_POST_MESSAGE 0x0032 -+#define NS_POST_EVENT 0x0034 -+ -+#endif /* NS_HYPERCALL_H */ -Index: xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c -=================================================================== ---- /dev/null 1970-01-01 00:00:00.000000000 +0000 -+++ xen-3.2-testing/xen/arch/x86/hvm/hvm_ext/novell/nsintercept.c 2008-04-23 11:29:23.000000000 -0400 -@@ -0,0 +1,2100 @@ -+/**************************************************************************** -+ | -+ | Copyright (c) [2007, 2008] Novell, Inc. -+ | All Rights Reserved. -+ | -+ | This program is free software; you can redistribute it and/or -+ | modify it under the terms of version 2 of the GNU General Public License as -+ | published by the Free Software Foundation. -+ | -+ | This program is distributed in the hope that it will be useful, -+ | but WITHOUT ANY WARRANTY; without even the implied warranty of -+ | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ | GNU General Public License for more details. -+ | -+ | You should have received a copy of the GNU General Public License -+ | along with this program; if not, contact Novell, Inc. -+ | -+ | To contact Novell about this file by physical or electronic mail, -+ | you may find current contact information at www.novell.com -+ | -+ |*************************************************************************** -+*/ -+ -+/* -+ * nsintercept.c. -+ * This file implements the intercepts to support the Novell Shim. -+ * -+ * Engineering Contact: K. Y. Srinivasan -+ */ -+ -+#include -+#include -+#include -+ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+ -+ -+/* -+ * Local includes; extension specific. -+ */ -+#include "ns_errno.h" -+#include "ns_shim.h" -+ -+ -+/* -+ * Implement Novell Shim. -+ */ -+ -+ -+/* -+ * Hypervisor intercept vector. -+ */ -+static int -+nsDomainCreate(struct domain *d); -+static void -+nsDomainDestroy(struct domain *d); -+static int -+nsVcpuInitialize(struct vcpu *v); -+static void -+nsVcpuUp(struct vcpu *v); -+static void -+nsVcpuDestroy(struct vcpu *v); -+static int -+nsDoCpuId(uint32_t input, struct cpu_user_regs *regs); -+static int -+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs); -+static int -+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs); -+static int -+nsDoHyperCall(struct cpu_user_regs *pregs); -+static void -+nsDoMigrateTimers(struct vcpu *v); -+ -+extension_intercept_vector_t nsExtensionVector = { -+ .domain_create = nsDomainCreate, -+ .domain_destroy = nsDomainDestroy, -+ .vcpu_initialize = nsVcpuInitialize, -+ .vcpu_destroy = nsVcpuDestroy, -+ .do_cpuid = nsDoCpuId, -+ .do_msr_read = nsDoRdMsr, -+ .do_msr_write = nsDoWrMsr, -+ .do_hypercall = nsDoHyperCall, -+ .do_continuation = nsDoTlbFlush, -+ .do_migrate_timers = nsDoMigrateTimers, -+ .vcpu_up = nsVcpuUp -+}; -+ -+/* -+ * Hooks into xen services; to be populated by our proxy in xen. -+ */ -+ -+xen_call_vector_t nsXenVector; -+ -+/* -+ * Does the box support hap? -+ */ -+ -+int nsHapSupported; -+ -+ -+static inline void -+nsInjectException(int trap); -+ -+static inline void -+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp); -+ -+static inline void -+nsInitEventPage(void *siefPage); -+ -+static inline void -+nsInitMessagePage(void *simPage); -+ -+/* -+ * static int __init nsExtensionInit(void) -+ * Initialize the extensiom module. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static int __init nsExtensionInit(void) -+{ -+ int retVal; -+ retVal = hvm_ext_register(1, &nsExtensionVector, &nsXenVector); -+ NS_ASSERT(retVal == 0); -+ nsXenVector.extPrintk("NS Extension Initialized\n"); -+ return 0; -+} -+__initcall(nsExtensionInit); -+ -+/* -+ * Our lock primitives. -+ */ -+/* -+ * void -+ * nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+ * Acquire the specified lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void -+nsLockAcquire(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+{ -+ NS_ASSERT(nsLock->owner != vcpup); -+ spin_lock_irqsave(&nsLock->spinLock, nsLock->flags); -+ nsLock->owner = vcpup; -+ nsLock->retAddr = __builtin_return_address(0); -+ vcpup->nsVcplockDepth++; -+} -+ -+/* -+ * void -+ * nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+ * Release the specified spin lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void -+nsLockRelease(nsVcpu_t *vcpup, nsSpinLock_t *nsLock) -+{ -+ NS_ASSERT((nsLock->owner == vcpup)); -+ nsLock->owner = NULL; -+ vcpup->nsVcplockDepth--; -+ NS_ASSERT(vcpup->nsVcplockDepth >= 0); -+ spin_unlock_irqrestore(&nsLock->spinLock, nsLock->flags); -+} -+ -+/* -+ * void -+ * nsLockInit(nsSpinLock_t *nsLock) -+ * Initialize the specified spin lock. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+void -+nsLockInit(nsSpinLock_t *nsLock) -+{ -+ spin_lock_init(&nsLock->spinLock); -+ nsLock->owner = NULL; -+ nsLock->retAddr = NULL; -+} -+ -+/* -+ * static inline void nsWriteGuestIdMsr(nsPartition_t *curp, -+ * nsVcpu_t *curVcpu, -+ * u64 msrContent) -+ * Write the guest ID. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsWriteGuestIdMsr(nsPartition_t *curp, nsVcpu_t *curVcpu, u64 msrContent) -+{ -+ curp->nsGuestIdMsr = msrContent; -+ if (curp->nsGuestIdMsr == 0) { -+ /* -+ * Guest has cleared the guest ID; -+ * clear the hypercall page. -+ */ -+ if (curp->nsHypercallMsr) { -+ curVcpu->nsVcpuFlags &= ~NS_VCPU_UP; -+ } -+ } -+} -+ -+/* -+ * static inline void nsWriteHypercallMsr(nsPartition_t *curp, -+ * nsVcpu_t *curVcpu, -+ * u64 msrContent) -+ * Write hypercall msr. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void -+nsWriteHypercallMsr(nsPartition_t *curp, -+ nsVcpu_t *curVcpu, -+ u64 msrContent) -+{ -+ unsigned long gmfn; -+ void *hypercallPage; -+ struct domain *d = curVcpu->nsVcpXenVcpu->domain; -+ -+ nsLockAcquire(curVcpu, &curp->nsLock); -+ gmfn = (msrContent >> 12); -+ if (curp->nsGuestIdMsr == 0) { -+ /* Nothing to do if the guest is not registered*/ -+ nsLockRelease(curVcpu, &curp->nsLock); -+ return; -+ } -+ /* -+ * Guest is registered; see if we can turn-on the -+ * hypercall page. -+ * XXXKYS: Can the guest write the GPA in one call and -+ * subsequently enable it? Check. For now assume that all the -+ * info is specified in one call. -+ */ -+ if (((u32)msrContent & (0x00000001)) == 0) { -+ /* -+ * The client is not enabling the hypercall; just -+ * ignore everything. -+ */ -+ nsLockRelease(curVcpu, &curp->nsLock); -+ return; -+ } -+ hypercallPage = nsXenVector.extGetVirtFromGmfn(d,gmfn); -+ if (hypercallPage == NULL) { -+ /* -+ * The guest specified a bogus GPA; inject a GP fault -+ * into the guest. -+ */ -+ nsInjectException(TRAP_gp_fault); -+ nsLockRelease(curVcpu, &curp->nsLock); -+ return; -+ } -+ nsHypercallPageInitialize(hypercallPage, curp); -+ curp->nsHypercallMfn = nsXenVector.extGetMfnFromGmfn(d, gmfn); -+#ifdef CONFIG_DOMAIN_PAGE -+ nsXenVector.extUnmapDomainPage(hypercallPage); -+#endif -+ curp->nsHypercallMsr = msrContent; -+ nsLockRelease(curVcpu, &curp->nsLock); -+ curVcpu->nsVcpuFlags |= NS_VCPU_UP; -+} -+ -+/* -+ * static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp, -+ * nsVcpu_t *curVcpu, -+ * u64 msrContent) -+ * Write SIEFP or SIMP msr. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void nsWriteSxMsr(uint32_t idx, nsPartition_t *curp, -+ nsVcpu_t *curVcpu, -+ u64 msrContent) -+{ -+ unsigned long gmfn; -+ void *sxPage; -+ struct domain *d = curVcpu->nsVcpXenVcpu->domain; -+ gmfn = (msrContent >> 12); -+ /* -+ * Can the client enable the siefp and specify -+ * the base address in two -+ * different calls? XXXKYS: For now assume -+ * that it is done in one call. -+ */ -+ if (!((u32)msrContent & (0x00000001))) { -+ /* -+ * The client is not enabling the sx page; just -+ * ignore everything. -+ */ -+ return; -+ } -+ sxPage = nsXenVector.extGetVirtFromGmfn(d, gmfn); -+ if (sxPage == NULL) { -+ /* -+ * The guest specified a bogus GPA; inject a GP fault -+ * into the guest. -+ */ -+ nsInjectException(TRAP_gp_fault); -+ return; -+ } -+ switch (idx) { -+ case NS_MSR_SIEFP: -+ nsInitEventPage(sxPage); -+ curVcpu->nsVcpSIefpMsr = msrContent; -+ curVcpu->nsVcpSiefPage = sxPage; -+ break; -+ case NS_MSR_SIMP: -+ nsInitMessagePage(sxPage); -+ curVcpu->nsVcpSimpMsr = msrContent; -+ curVcpu->nsVcpSimPage = sxPage; -+ break; -+ } -+ -+} -+ -+/* -+ * static inline u64 -+ * nsGetTimeSinceDomainBoot(nsPartition_t *curp) -+ * Retrieve the time since boot in 100ns units. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline u64 -+nsGetTimeSinceDomainBoot(nsPartition_t *curp) -+{ -+ u64 curTime = nsXenVector.extGetTimeSinceBoot(); -+ return ((curTime - curp->nsDomainBootTime)/100) ; -+} -+ -+/* -+ * static inline int -+ * nsCallFromBios(struct cpu_user_regs *regs) -+ * Check if the caller is in the right state to consumE the services of the -+ * extension module. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline int -+nsCallFromBios(struct cpu_user_regs *regs) -+{ -+ if (hvm_paging_enabled(current)) { -+ return (0); -+ } else { -+ return (1); -+ } -+} -+ -+/* -+ * static inline void -+ * nsInjectException(int trap) -+ * Injecct the specified exception into the invoking virtual CPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void -+nsInjectException(int trap) -+{ -+ nsXenVector.hvmFuncTable->inject_exception(trap, 0, 0); -+} -+ -+ -+/* -+ * static inline int -+ * nsOsRegistered(void) -+ * Check to see if the guest has registered itself with the Novell Shim. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline int -+nsOsRegistered(void) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ return (curp->nsGuestIdMsr != 0?1:0); -+} -+ -+ -+/* -+ * static inline void -+ * nsSetPartitionPrivileges(nsPartition_t *nspp) -+ * Set the partitionwide privileges. Currently it is harcoded. -+ * We could perhaps make this an attribute of the domain and have the -+ * configuration tools manage it. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void -+nsSetPartitionPrivileges(nsPartition_t *nspp) -+{ -+ /* -+ * This is based on the hypervisor spec under section 5.2.3. -+ */ -+ nspp->nsPrivileges = 0x000000020000007f; -+} -+ -+/* -+ * static inline u32 -+ * nsGetRecommendations(void) -+ * Get the recommendations. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline u32 -+nsGetRecommendations(void) -+{ -+ /* -+ *For now we recommend all the features. Need to validate. -+ */ -+ if (nsHapSupported) { -+ /* -+ * If the box support HAP; the guest should not use TLB flush -+ * related enlightenments. -+ */ -+ return (0x19); -+ } else { -+ return (0x1f); -+ } -+} -+ -+/* -+ * static inline void -+ * nsSetPartitionFeatures(nsPartition_t *nspp) -+ * Set the partitionwide features. Currently it is harcoded. -+ * We could perhaps make this an attribute of the domain and have the -+ * configuration tools manage it. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void -+nsSetPartitionFeatures(nsPartition_t *nspp) -+{ -+ nspp->nsSupportedFeatures = 0x1f; -+} -+ -+static inline u16 -+nsGetGuestMajor(void) -+{ -+ return (0); -+} -+static inline u16 -+nsGetGuestMinor(void) -+{ -+ return (0); -+} -+static inline u32 -+nsGetGuestServicePack(void) -+{ -+ return (0); -+} -+ -+static inline u8 -+nsGetGuestServiceBranchInfo(void) -+{ -+ return (0); -+} -+static inline u32 -+nsGetGuestServiceNumber(void) -+{ -+ return (0); -+} -+ -+/* -+ * static inline u32 -+ * nsGetSupportedSyntheticMsrs(void) -+ * Get the synthetic MSRs supported by the Novell Shim. Currently -+ * it is hardcoded. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline u32 -+nsGetSupportedSyntheticMsrs(void) -+{ -+ /* -+ * All MSRS in the spec version 0.83 including RESET MSR. -+ */ -+ return (0xff); -+} -+ -+ -+/* -+ * static inline u32 -+ * nsGetMaxVcpusSupported(void) -+ * Retrieve the maximum vcpus supported. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline u32 -+nsGetMaxVcpusSupported(void) -+{ -+ return MAX_VIRT_CPUS; -+} -+ -+/* -+ * static inline u32 -+ * nsGetMaxLcpusSupported(void) -+ * Retrieve the maximum physical cpus supported. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline u32 -+nsGetMaxLcpusSupported(void) -+{ -+ return NR_CPUS; -+} -+ -+ -+/* -+ * static inline void -+ * nsReadIcr(u64 *icrContent) -+ * Read the ICR of the local APIC of the calling VCPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsReadIcr(u64 *icrContent) -+{ -+ u32 icrLow, icrHigh; -+ u64 retVal; -+ -+ -+ icrLow = nsXenVector.mmIoHandler->read_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4); -+ icrHigh = nsXenVector.mmIoHandler->read_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4); -+ retVal = icrHigh; -+ *icrContent = ((retVal << 32) | icrLow); -+ -+} -+ -+/* -+ * static inline void -+ * nsReadTpr(u64 *tprContent) -+ * Read the TPR of the local APIC of the calling VCPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsReadTpr(u64 *tprContent) -+{ -+ u32 tprLow; -+ -+ -+ tprLow = nsXenVector.mmIoHandler->read_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4); -+ *tprContent = (u64)tprLow; -+ -+} -+ -+/* -+ * static inline void -+ * nsWriteEoi(u64 msrContent) -+ * Write the EOI register of the local APIC of the calling VCPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsWriteEoi(u64 msrContent) -+{ -+ u32 eoi = (u32)msrContent; -+ -+ nsXenVector.mmIoHandler->write_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi); -+ -+} -+ -+/* -+ * static inline void -+ * nsWriteIcr(u64 msrContent) -+ * Write the ICR register of the local APIC of the calling VCPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsWriteIcr(u64 msrContent) -+{ -+ u32 icrLow, icrHigh; -+ icrLow = (u32)msrContent; -+ icrHigh = (u32)(msrContent >> 32); -+ -+ if (icrHigh != 0) { -+ nsXenVector.mmIoHandler->write_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4, -+ icrHigh); -+ } -+ if (icrLow != 0) { -+ nsXenVector.mmIoHandler->write_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4, -+ icrLow); -+ } -+ -+} -+ -+/* -+ * static inline void -+ * nsWriteTpr(u64 msrContent) -+ * Write the TPR register of the local APIC of the calling VCPU. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsWriteTpr(u64 msrContent) -+{ -+ u32 tpr = (u32)msrContent; -+ -+ -+ nsXenVector.mmIoHandler->write_handler(current, -+ (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr); -+ -+} -+ -+/* -+ * static inline void -+ * nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp) -+ * Initialize the hypercall page to support the Novell Shim Hypercalls. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsHypercallPageInitialize(void *hypercallPage, nsPartition_t *curp) -+{ -+ char *p; -+ -+ nsHapSupported = 0; -+ if (nsXenVector.hvmFuncTable->guest_x86_mode(current) == 8) { -+ curp->nsLongModeGuest = 1; -+ } else { -+ curp->nsLongModeGuest = 0; -+ } -+ -+ memset(hypercallPage, 0, PAGE_SIZE); -+ p = (char *)(hypercallPage) ; -+ *(u8 *)(p + 0) = 0x0f; /* vmcall */ -+ *(u8 *)(p + 1) = 0x01; -+ if (nsXenVector.extCpuIsIntel()) { -+ *(u8 *)(p + 2) = 0xc1; -+ nsHapSupported = cpu_has_vmx_ept; -+ } else { -+ *(u8 *)(p + 2) = 0xd9; -+ nsHapSupported = cpu_has_svm_npt; -+ } -+ *(u8 *)(p + 3) = 0xc3; /* ret */ -+} -+ -+/* -+ * static inline void -+ * nsInitEventPage(void *siefPage) -+ * Initialize the per-vcpu event page. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsInitEventPage(void *siefPage) -+{ -+ memset(siefPage, 0, PAGE_SIZE); -+} -+ -+/* -+ * static inline void -+ * nsInitMessagePage(void *siefPage) -+ * Initialize the per-vcpu message page. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsInitMessagePage(void *simPage) -+{ -+ memset(simPage, 0, PAGE_SIZE); -+} -+ -+ -+/* -+ * static inline void -+ * nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu) -+ * Process the message queue. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsProcessMessageQ(nsPartition_t *curp, nsVcpu_t *curVcpu) -+{ -+ /* -+ * XXXKYS: we currently do not support queued messages. -+ */ -+} -+ -+/* -+ * static inline void -+ * nsScheduleTimeOut(nsVcpTimerState_t *timer) -+ * Schedule a timeout based on the specified timer. -+ * -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static inline void -+nsScheduleTimeOut(nsVcpTimerState_t *timer) -+{ -+ /* -+ * We maintain the count in the units of 100ns. Furthermore, -+ * this is not relative to NOW() but rather absolute. -+ */ -+ nsXenVector.extSetTimer(&timer->vcpuTimer, (timer->count * 100)); -+} -+ -+/* -+ * static void -+ * nsTimeOutHandler(void *arg) -+ * The timeout handler for Novell Shim/Adaptor. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static void -+nsTimeOutHandler(void *arg) -+{ -+ nsVcpTimerState_t *timerData = arg; -+ nsVcpu_t *curVcpu = timerData->thisCpu; -+ int sIntNum; -+ int vector; -+ if (!(curVcpu->nsVcpSControlMsr & 0x9)) { -+ goto nsToPostProcess; -+ } -+ /* -+ * SynIC is enabled; do further processing. Timeouts are posted as -+ * messages; verify if the message page is enabled. -+ */ -+ if (!(curVcpu->nsVcpSimpMsr & 0x1)) { -+ goto nsToPostProcess; -+ } -+ sIntNum = (((u32)(timerData->config >> 16)) & 0x0000000f); -+ /* -+ * First post the message and then optionally deal with the -+ * interrupt notification. -+ */ -+ if (curVcpu->nsVcpSimPage == NULL) { -+ NS_PANIC("Novell Shim: Sim page not setup\n"); -+ } -+ if ((((nsMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType != -+ nsMessageTypeNone) { -+ /* -+ * The message slot is not empty just silently return. -+ */ -+ goto nsToPostProcess; -+ } -+ /* -+ * The slot is available; post the message. -+ */ -+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageType = -+ nsMessageTimerExpired; -+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).messageSize = -+ sizeof(nsTimerMessage_t); -+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).timerIndex = -+ timerData->timerIndex; -+ (((nsTimerMessage_t *)curVcpu->nsVcpSimPage)[sIntNum]).expirationTime = -+ timerData->count; -+ if ((curVcpu->nsVcpSIntMsr[sIntNum] >> 16) &0x1) { -+ /* -+ * The designated sintx register is masked; just return. -+ */ -+ goto nsToPostProcess; -+ } -+ vector = ((u32)curVcpu->nsVcpSIntMsr[sIntNum] &0xff); -+ -+ /* -+ * Now post the interrupt to the VCPU. -+ * XXXKYS: What is the delivery mode for interrupts delivered here. -+ * Check with Mike? -+ */ -+ nsXenVector.extPostInterrupt(current, vector, APIC_DM_FIXED); -+ -+ /* -+ * If auto eoi is set; deal with that. -+ */ -+ if (((u32)(curVcpu->nsVcpSIntMsr[sIntNum] >> 16)) & 0x1) { -+ nsWriteEoi(0); -+ } -+ -+nsToPostProcess: -+ /* -+ * Prior to returning, deal with all the post timeout issues. -+ */ -+ if (((u32)(timerData->config)) & 0x00000002) { -+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats); -+ nsScheduleTimeOut(timerData); -+ } -+} -+ -+/* -+ * static inline void -+ * nsTimerInit(nsVcpu_t *vcpup, int timer) -+ * Initialize the specified timer structure. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline void -+nsTimerInit(nsVcpu_t *vcpup, int timer) -+{ -+ vcpup->nsVcpTimers[timer].config = 0; -+ vcpup->nsVcpTimers[timer].count = 0; -+ vcpup->nsVcpTimers[timer].thisCpu = vcpup; -+ vcpup->nsVcpTimers[timer].timerIndex = timer; -+ init_timer(&vcpup->nsVcpTimers[timer].vcpuTimer, nsTimeOutHandler, -+ &vcpup->nsVcpTimers[timer], current->processor); -+} -+ -+/* -+ * static inline int -+ * nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent) -+ * Read the per-partition time base. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static inline int -+nsAccessTimeRefCnt(nsPartition_t *curp, u64 *msrContent) -+{ -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_TIME_REF_CNT)) { -+ /* -+ * The partition does not have the privilege to -+ * read this; return error. -+ */ -+ return (0); -+ } -+ *msrContent = nsGetTimeSinceDomainBoot(curp); -+ return (1); -+} -+ -+/* -+ * static void -+ * nsDoMigrateTimers(struct vcpu *v) -+ * The binding between this vcpu and the physical cpu has changed; migrate -+ * the timers for this vcpu. -+ * -+ * Calling/Exit State: -+ * The new binding is already in place. -+ */ -+ -+static void -+nsDoMigrateTimers(struct vcpu *v) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ nsVcpu_t *vcpup; -+ int i; -+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ -+ for (i=0; i<4; i++) { -+ nsXenVector.extMigrateTimer(&vcpup->nsVcpTimers[i].vcpuTimer, -+ v->processor); -+ } -+} -+ -+/* -+ * static void -+ * nsVcpuUp(struct vcpu *v) -+ * A secondary processor has come on line; mark the processor as up. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static void -+nsVcpuUp(struct vcpu *v) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ nsVcpu_t *vcpup; -+ vcpup = &curp->nsVcpuState[v->vcpu_id]; -+ vcpup->nsVcpuFlags |= NS_VCPU_UP; -+} -+ -+/* -+ * static int -+ * nsDoHyperCall(struct cpu_user_regs *pregs) -+ * Intercept for implementing Extension hypercalls. -+ * -+ * Calling/Exit State: -+ * Based on the hypercall; the caller may give up the CPU while -+ * processing the hypercall. No locks should be held on entry and -+ * no locks will be held on return. -+ * -+ * -+ */ -+ -+static int -+nsDoHyperCall(struct cpu_user_regs *pregs) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ nsVcpu_t *vcpup; -+ int longModeGuest = curp->nsLongModeGuest; -+ unsigned long hypercallMfn; -+ unsigned long gmfn; -+ gmfn = (curp->nsHypercallMsr >> 12); -+ -+ hypercallMfn = nsXenVector.extGetMfnFromGva(pregs->eip); -+ -+ if (hypercallMfn == curp->nsHypercallMfn) { -+ u64 opcode, input, output, retVal; -+ vcpup = &curp->nsVcpuState[nsGetCurrentVcpuIndex()]; -+ -+ /* -+ * This is an extension hypercall; process it; but first make -+ * sure that the CPU is in the right state for invoking -+ * the hypercall - protected mode at CPL 0. -+ */ -+ if (nsInvalidCpuState()) { -+ nsInjectException(TRAP_gp_fault); -+ retVal = nsBuildHcallRetVal(NS_STATUS_INVALID_VP_STATE, -+ 0); -+ nsSetSysCallRetVal(pregs, longModeGuest, retVal); -+ return (1); -+ } -+ if (longModeGuest) { -+ opcode = pregs->ecx; -+ input = pregs->edx; -+ output = pregs->r8; -+ } else { -+ opcode = -+ ((((u64)pregs->edx) << 32) | ((u64)pregs->eax)); -+ input = -+ ((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx)); -+ output = -+ ((((u64)pregs->edi) << 32) | ((u64)pregs->esi)); -+ } -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+ nsHandleHyperCall(opcode, input, output, &retVal); -+ nsSetSysCallRetVal(pregs, longModeGuest, retVal); -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+ return (1); -+ } -+ /* -+ * This hypercall page is not the page for extension. -+ */ -+ return (0); -+} -+ -+/* -+ * static int -+ * nsDomainCreate(struct domain *d) -+ * NS intercept for domain creation. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+ -+static int -+nsDomainCreate(struct domain *d) -+{ -+ nsPartition_t *nspp; -+ nspp = nsXenVector.extAllocMem(sizeof(nsPartition_t)); -+ if (nspp == NULL) { -+ nsDebugPrint("Memory allocation failed\n"); -+ return (1); -+ } -+ memset(nspp, 0, sizeof(*nspp)); -+ nsLockInit(&nspp->nsLock); -+ /* -+ * Set the partition wide privilege; We can start with no privileges -+ * and progressively turn on fancier hypervisor features. -+ */ -+ nsSetPartitionPrivileges(nspp); -+ nsSetPartitionFeatures(nspp); -+ /* -+ * Stash away pointer to our state in the hvm domain structure. -+ */ -+ d->arch.hvm_domain.ext_handle = nspp; -+ nspp->nsDomainBootTime = nsXenVector.extGetTimeSinceBoot(); -+ return (0); -+} -+ -+ -+ -+/* -+ * static void -+ * nsDomainDestroy(struct domain *d) -+ * NS intercept for the domain destruction. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+nsDomainDestroy(struct domain *d) -+{ -+ int i; -+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle; -+ nsXenVector.extPrintk("NS Domain Being Destroyed\n"); -+ NS_ASSERT(curp != NULL); -+ nsXenVector.extPrintk("DUMP STATS\n"); -+ nsXenVector.extPrintk("GFS cpucount is %d\n", curp->nsFlushState.cpuCount); -+ if (curp->nsFlushState.currentOwner != NULL) { -+ nsXenVector.extPrintk("GFS owner is %d\n", curp->nsFlushState.currentOwner->vcpu_id); -+ } else { -+ nsXenVector.extPrintk("GFS is free\n"); -+ } -+ if (!cpus_empty(curp->nsFlushState.waiters)) { -+ nsXenVector.extPrintk("GFS: waiters not empty\n"); -+ } else { -+ nsXenVector.extPrintk("GFS: waiters empty\n"); -+ } -+ for (i=0; i < MAX_VIRT_CPUS; i++) { -+ if (d->vcpu[i] != NULL) { -+ nsPrintStats(curp, i); -+ } -+ } -+ -+ nsXenVector.extFreeMem(d->arch.hvm_domain.ext_handle); -+ d->arch.hvm_domain.ext_handle = NULL; -+} -+ -+/* -+ * static int -+ * nsVcpuInitialize(struct vcpu *v) -+ * NS intercept for vcpu creation. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsVcpuInitialize(struct vcpu *v) -+{ -+ nsVcpu_t *vcpup; -+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle; -+ int i; -+ vcpup = &curp->nsVcpuState[v->vcpu_id]; -+ atomic_inc(&curp->nsNumVcpusActive); -+ if (v->vcpu_id == 0) { -+ vcpup->nsVcpuFlags |= NS_VCPU_BOOT_CPU; -+ } -+ /* -+ * Initialize all the synthetic MSRs corresponding to this VCPU. -+ * Note that all state is set to 0 to begin -+ * with. -+ */ -+ vcpup->nsVcpSVersionMsr = 0x00000001; -+ /* -+ * Initialize the synthetic timet structures. -+ */ -+ for (i=0; i < 4; i++) { -+ nsTimerInit(vcpup, i); -+ } -+ /* -+ * Setup the input page for handling hypercalls. -+ * -+ */ -+ vcpup->nsVcpInputBufferPage = -+ nsXenVector.extAllocDomHeapPage(); -+ if (vcpup->nsVcpInputBufferPage == NULL) { -+ nsDebugPrint("Memory allocation failed\n"); -+ return (1); -+ } -+ vcpup->nsVcpInputBuffer = -+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpInputBufferPage); -+ if (vcpup->nsVcpInputBuffer == NULL) { -+ nsDebugPrint("Coud not get VA\n"); -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage); -+ return (1); -+ } -+ memset(vcpup->nsVcpInputBuffer, 0, PAGE_SIZE); -+ vcpup->nsVcpOutputBufferPage = -+ nsXenVector.extAllocDomHeapPage(); -+ if (vcpup->nsVcpOutputBufferPage == NULL) { -+ nsDebugPrint("Memory allocation failed\n"); -+#ifdef CONFIG_DOMAIN_PAGE -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer); -+#endif -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage); -+ return (1); -+ } -+ vcpup->nsVcpOutputBuffer = -+ nsXenVector.extGetVirtFromPagePtr(vcpup->nsVcpOutputBufferPage); -+ if (vcpup->nsVcpOutputBuffer == NULL) { -+ nsDebugPrint("Coud not get VA\n"); -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage); -+#ifdef CONFIG_DOMAIN_PAGE -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer); -+#endif -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage); -+ return (1); -+ } -+ vcpup->nsVcpXenVcpu = v; -+ vcpup->nsVcpFlushRequest = 0; -+ -+ return (0); -+} -+ -+/* -+ * static void -+ * nsVcpuDestroy(struct vcpu *v) -+ * NS intercept for domain destruction. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+static void -+nsVcpuDestroy(struct vcpu *v) -+{ -+ nsVcpu_t *vcpup; -+ nsPartition_t *curp = v->domain->arch.hvm_domain.ext_handle; -+ int i; -+ -+ vcpup = &curp->nsVcpuState[v->vcpu_id]; -+ atomic_dec(&curp->nsNumVcpusActive); -+ vcpup->nsVcpuFlags &= ~NS_VCPU_UP; -+ /* -+ * Get rid of the pages we have allocated for this VCPU. -+ */ -+#ifdef CONFIG_DOMAIN_PAGE -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSiefPage); -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpSimPage); -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpInputBuffer); -+ nsXenVector.extUnmapDomainPage(vcpup->nsVcpOutputBuffer); -+#endif -+ -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpInputBufferPage); -+ nsXenVector.extFreeDomHeapPage(vcpup->nsVcpOutputBufferPage); -+ /* -+ * Kill the timers -+ */ -+ for (i=0; i < 4; i++) { -+ nsXenVector.extKillTimer(&vcpup->nsVcpTimers[i].vcpuTimer); -+ } -+ return; -+} -+ -+/* -+ * static int nsVcpuSave(struct domain *d, hvm_domain_context_t *h) -+ * Save per-cpu shim state to support either migration or domain save. -+ * -+ * Calling exit state: -+ * None. -+ */ -+static int -+nsVcpuSave(struct domain *d, hvm_domain_context_t *h) -+{ -+ struct vcpu *v; -+ struct hvm_ns_veridian_cpu ctxt; -+ -+ nsVcpu_t *vcpup; -+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle; -+ int i; -+ -+ if (curp == NULL) { -+ return 0; -+ } -+ for_each_vcpu(d, v) { -+ vcpup = &curp->nsVcpuState[v->vcpu_id]; -+ -+ NS_ASSERT(vcpup->nsVcplockDepth == 0); -+ NS_ASSERT(vcpup->nsVcpFlushRequest == 0); -+ NS_ASSERT(vcpup->nsVcpWaitingOnGFS == 0); -+ NS_ASSERT(vcpup->nsVcpFlushPending == 0); -+ NS_ASSERT(vcpup->nsVcpWaitingForCleanup == 0); -+ /* -+ * We don't need to save state for a -+ * vcpu that is down; the restore -+ * code will leave it down if there is nothing saved. -+ */ -+ if ( test_bit(_VPF_down, &v->pause_flags) ) -+ continue; -+ ctxt.control_msr = vcpup->nsVcpSControlMsr; -+ ctxt.version_msr = vcpup->nsVcpSVersionMsr; -+ ctxt.sief_msr = vcpup->nsVcpSIefpMsr; -+ ctxt.simp_msr = vcpup->nsVcpSimpMsr; -+ ctxt.eom_msr = vcpup->nsVcpEomMsr; -+ for (i=0; i < 16; i++) -+ ctxt.int_msr[i] = vcpup->nsVcpSIntMsr[i]; -+ for (i=0; i < 4; i++) { -+ ctxt.timers[i].config = vcpup->nsVcpTimers[i].config; -+ /* -+ * Save the count in units of 100ns relative to NOW() -+ * When we restore we will add NOW() to properly -+ * account for the elapsed time when the timer was -+ * active. -+ */ -+ if (vcpup->nsVcpTimers[i].count > ((NOW())/100)) { -+ ctxt.timers[i].count = -+ (vcpup->nsVcpTimers[i].count - ((NOW())/100)); -+ } else { -+ ctxt.timers[i].count = 0; -+ } -+ } -+ if ( hvm_save_entry(NS_VERIDIAN_CPU, -+ v->vcpu_id, h, &ctxt) != 0 ) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * static int nsVcpuRestore(struct domain *d, hvm_domain_context_t *h) -+ * Restore per-cpu shim state to support either migration or domain save. -+ * -+ * Calling exit state: -+ * None. -+ */ -+static int -+nsVcpuRestore(struct domain *d, hvm_domain_context_t *h) -+{ -+ int vcpuid, i; -+ struct hvm_ns_veridian_cpu ctxt; -+ -+ nsVcpu_t *vcpup; -+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle; -+ -+ if (curp == NULL) { -+ return 0; -+ } -+ /* Which vcpu is this? */ -+ vcpuid = hvm_load_instance(h); -+ vcpup = &curp->nsVcpuState[vcpuid]; -+ NS_ASSERT(vcpup != NULL); -+ if ( hvm_load_entry(NS_VERIDIAN_CPU, h, &ctxt) != 0 ) -+ return -22; -+ -+ vcpup->nsVcpSControlMsr = ctxt.control_msr; -+ vcpup->nsVcpSVersionMsr = ctxt.version_msr; -+ -+ nsWriteSxMsr(NS_MSR_SIEFP, curp, vcpup, ctxt.sief_msr); -+ nsWriteSxMsr(NS_MSR_SIMP, curp, vcpup, ctxt.simp_msr); -+ -+ vcpup->nsVcpEomMsr = ctxt.eom_msr; -+ for (i=0; i<16; i++) -+ vcpup->nsVcpSIntMsr[i] = ctxt.int_msr[i]; -+ for (i=0; i < 4; i++) { -+ vcpup->nsVcpTimers[i].config = ctxt.timers[i].config; -+ vcpup->nsVcpTimers[i].count = -+ (ctxt.timers[i].count + ((NOW())/100)); -+ if ((vcpup->nsVcpTimers[i].config | 0x9)) { -+ /* -+ * XXXKYS: Some issues with regards to time -+ * management here: -+ * 1) We will ignore the elapsed wall clock time -+ * when the domain was not running. -+ * 2) Clearly we should account fot the time that -+ * has elapsed when the domain was running with -+ * respect to the timeouts that were scheduled -+ * prior to saving the domain. -+ * We will deal with on the save side. -+ */ -+ nsScheduleTimeOut(&vcpup->nsVcpTimers[i]); -+ NS_STATS_COLLECT(NS_TIMEOUTS, &vcpup->nsVcpStats); -+ } -+ } -+ -+ vcpup->nsVcpuFlags |= NS_VCPU_UP; -+ return 0; -+} -+ -+ -+ -+/* -+ * static int nsDomSave(struct domain *d, hvm_domain_context_t *h) -+ * Save per-domain shim state to support either migration or domain save. -+ * -+ * Calling exit state: -+ * None. -+ */ -+ -+static int -+nsDomSave(struct domain *d, hvm_domain_context_t *h) -+{ -+ struct hvm_ns_veridian_dom ctxt; -+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle; -+ -+ if (curp == NULL) { -+ return 0; -+ } -+ -+ ctxt.guestid_msr = curp->nsGuestIdMsr; -+ ctxt.hypercall_msr = curp->nsHypercallMsr; -+ ctxt.long_mode = curp->nsLongModeGuest; -+ ctxt.pad0 = 0; -+ return (hvm_save_entry(NS_VERIDIAN_DOM, 0, h, &ctxt)); -+} -+ -+/* -+ * static int nsDomRestore(struct domain *d, hvm_domain_context_t *h) -+ * Restore per-domain shim state to support either migration or domain save. -+ * -+ * Calling exit state: -+ * None. -+ */ -+ -+static int -+nsDomRestore(struct domain *d, hvm_domain_context_t *h) -+{ -+ struct hvm_ns_veridian_dom ctxt; -+ nsPartition_t *curp = d->arch.hvm_domain.ext_handle; -+ -+ if (curp == NULL) { -+ return 0; -+ } -+ -+ if ( hvm_load_entry(NS_VERIDIAN_DOM, h, &ctxt) != 0 ) -+ return -22; -+ curp->nsGuestIdMsr = ctxt.guestid_msr; -+ curp->nsHypercallMsr = ctxt.hypercall_msr; -+ curp->nsLongModeGuest = ctxt.long_mode; -+ curp->nsHypercallMfn = -+ nsXenVector.extGetMfnFromGmfn(d, (ctxt.hypercall_msr >> 12)); -+ -+ return 0; -+} -+ -+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_DOM, nsDomSave, nsDomRestore, -+ 1, HVMSR_PER_DOM); -+ -+ -+HVM_REGISTER_SAVE_RESTORE(NS_VERIDIAN_CPU, nsVcpuSave , nsVcpuRestore, -+ 1, HVMSR_PER_VCPU); -+ -+ -+/* -+ * static int -+ * nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs) -+ * -+ * Preprocess cpuid leaves. Both xen and Veridian use identical cpuid -+ * leaves for getting info from the hypervisor. -+ * -+ * Calling exit state: -+ * None. -+ */ -+static int -+nsPreProcessCpuIdLeaves(unsigned int input, struct cpu_user_regs *regs) -+{ -+ uint32_t idx; -+ struct domain *d = current->domain; -+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; -+ -+ if (extid == 1) { -+ /* -+ * Enlightened Windows guest; need to remap and handle -+ * leaves used by PV front-end drivers. -+ */ -+ if ((input >= 0x40000000) && (input <= 0x40000005)) { -+ return (0); -+ } -+ /* -+ * PV drivers use cpuid to query the hypervisor for details. On -+ * Windows we will use the following leaves for this: -+ * -+ * 4096: VMM Sinature (corresponds to 0x40000000 on Linux) -+ * 4097: VMM Version (corresponds to 0x40000001 on Linux) -+ * 4098: Hypercall details (corresponds to 0x40000002 on Linux) -+ */ -+ if ((input >= 0x40001000) && (input <= 0x40001002)) { -+ idx = (input - 0x40001000); -+ switch (idx) { -+ case 0: -+ regs->eax = 0x40000002; /* Largest leaf */ -+ regs->ebx = 0x566e6558;/*Signature 1: "XenV" */ -+ regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */ -+ regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/ -+ break; -+ case 1: -+ regs->eax = -+ (XEN_VERSION << 16) | -+ XEN_SUBVERSION; -+ regs->ebx = 0; /* Reserved */ -+ regs->ecx = 0; /* Reserved */ -+ regs->edx = 0; /* Reserved */ -+ break; -+ -+ case 2: -+ regs->eax = 1; /*Number of hypercall-transfer pages*/ -+ /*In linux this is 0x40000000 */ -+ regs->ebx = 0x40001000; /* MSR base address */ -+ regs->ecx = 0; /* Features 1 */ -+ regs->edx = 0; /* Features 2 */ -+ break; -+ } -+ } -+ return (1); -+ } else { -+ /* -+ * For now this is all other "enlightened guests" -+ */ -+ if ((input >= 0x40000000) && (input <= 0x40000002)) { -+ /* -+ * These leaves have already been correctly -+ * processed; just return. -+ */ -+ return (1); -+ } -+ return (0); -+ } -+} -+ -+/* -+ * static int -+ * nsDoCpuId(unsigned int input, struct cpu_user_regs *regs) -+ * NS intercept for cpuid instruction -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsDoCpuId(unsigned int input, struct cpu_user_regs *regs) -+{ -+ uint32_t idx; -+ -+ /* -+ * hvmloader uses cpuid to set up a hypercall page; we don't want to -+ * intercept calls coming from the bootstrap (bios) code in the HVM -+ * guest; we discriminate based on the instruction pointer. -+ */ -+ if (nsCallFromBios(regs)) { -+ /* -+ * We don't intercept this. -+ */ -+ return (0); -+ } -+ -+ if (input == 0x00000001) { -+ regs->ecx = (regs->ecx | 0x80000000); -+ return (1); -+ } -+ -+ if (nsPreProcessCpuIdLeaves(input, regs)) { -+ return (0); -+ } -+ idx = (input - 0x40000000); -+ -+ switch (idx) { -+ case 0: -+ /* -+ * 0x40000000: Hypervisor identification. -+ */ -+ regs->eax = 0x40000005; /* For now clamp this */ -+ regs->ebx = 0x65766f4e; /* "Nove" */ -+ regs->ecx = 0x68536c6c; /* "llSh" */ -+ regs->edx = 0x76486d69; /* "imHv" */ -+ break; -+ -+ case 1: -+ /* -+ * 0x40000001: Hypervisor identification. -+ */ -+ regs->eax = 0x31237648; /* "Hv#1*/ -+ regs->ebx = 0; /* Reserved */ -+ regs->ecx = 0; /* Reserved */ -+ regs->edx = 0; /* Reserved */ -+ break; -+ case 2: -+ /* -+ * 0x40000002: Guest Info -+ */ -+ if (nsOsRegistered()) { -+ regs->eax = nsGetGuestMajor(); -+ regs->ebx = -+ (nsGetGuestMajor() << 16) | nsGetGuestMinor(); -+ regs->ecx = nsGetGuestServicePack(); -+ regs->edx = -+ (nsGetGuestServiceBranchInfo() << 24) | -+ nsGetGuestServiceNumber(); -+ } else { -+ regs->eax = 0; -+ regs->ebx = 0; -+ regs->ecx = 0; -+ regs->edx = 0; -+ } -+ break; -+ case 3: -+ /* -+ * 0x40000003: Feature identification. -+ */ -+ regs->eax = nsGetSupportedSyntheticMsrs(); -+ /* We only support AcessSelfPartitionId bit 1 */ -+ regs->ebx = 0x2; -+ regs->ecx = 0; /* Reserved */ -+ regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/ -+ break; -+ case 4: -+ /* -+ * 0x40000004: Imlementation recommendations. -+ */ -+ regs->eax = nsGetRecommendations(); -+ regs->ebx = 0; /* Reserved */ -+ regs->ecx = 0; /* Reserved */ -+ regs->edx = 0; /* Reserved */ -+ break; -+ case 5: -+ /* -+ * 0x40000005: Implementation limits. -+ * Currently we retrieve maximum number of vcpus and -+ * logical processors (hardware threads) supported. -+ */ -+ regs->eax = nsGetMaxVcpusSupported(); -+ regs->ebx = nsGetMaxLcpusSupported(); -+ regs->ecx = 0; /* Reserved */ -+ regs->edx = 0; /* Reserved */ -+ break; -+ -+ default: -+ /* -+ * We don't handle this leaf. -+ */ -+ return (0); -+ -+ } -+ return (1); -+} -+ -+/* -+ * static int -+ * nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs) -+ * NS intercept for reading MSRS. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsDoRdMsr(uint32_t idx, struct cpu_user_regs *regs) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex(); -+ u64 msrContent = 0; -+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex]; -+ int synInt, timer; -+ struct domain *d = current->domain; -+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; -+ u64 timerCount; -+ -+ /* -+ * hvmloader uses rdmsr; we don't want to -+ * intercept calls coming from the bootstrap (bios) code in the HVM -+ * guest; we descriminate based on the instruction pointer. -+ */ -+ if (nsCallFromBios(regs)) { -+ /* -+ * We don't intercept this. -+ */ -+ return (0); -+ } -+ if (extid > 1) { -+ /* -+ * For now this is all other "Enlightened" operating systems -+ * other than Longhorn. -+ */ -+ if (idx == 0x40000000) { -+ /* -+ * PV driver hypercall setup. Let xen handle this. -+ */ -+ return (0); -+ } -+ if (idx == 0x40001000) { -+ idx = 0x40000000; -+ } -+ } -+ switch (idx) { -+ case NS_MSR_GUEST_OS_ID: -+ nsLockAcquire(curVcpu, &curp->nsLock); -+ regs->eax = (u32)(curp->nsGuestIdMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curp->nsGuestIdMsr >> 32); -+ nsLockRelease(curVcpu, &curp->nsLock); -+ break; -+ case NS_MSR_HYPERCALL: -+ nsLockAcquire(curVcpu, &curp->nsLock); -+ regs->eax = (u32)(curp->nsHypercallMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curp->nsHypercallMsr >> 32); -+ nsLockRelease(curVcpu, &curp->nsLock); -+ if ((((u32)curp->nsHypercallMsr) & (0x00000001)) != 0) { -+ curVcpu->nsVcpuFlags |= NS_VCPU_UP; -+ } -+ break; -+ case NS_MSR_VP_INDEX: -+ regs->eax = (u32)(vcpuIndex); -+ regs->edx = (u32)(0x0); -+ break; -+ case NS_MSR_ICR: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) { -+ goto msrReadError; -+ } -+ nsReadIcr(&msrContent); -+ NS_STATS_COLLECT(NS_ICR_READ, &curVcpu->nsVcpStats); -+ regs->eax = (u32)(msrContent & 0xFFFFFFFF); -+ regs->edx = (u32)(msrContent >> 32); -+ break; -+ case NS_MSR_TPR: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) { -+ goto msrReadError; -+ } -+ nsReadTpr(&msrContent); -+ NS_STATS_COLLECT(NS_TPR_READ, &curVcpu->nsVcpStats); -+ regs->eax = (u32)(msrContent & 0xFFFFFFFF); -+ regs->edx = (u32)(msrContent >> 32); -+ break; -+ /* -+ * The following synthetic MSRs are implemented in the Novell Shim. -+ */ -+ case NS_MSR_SCONTROL: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(curVcpu->nsVcpSControlMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curVcpu->nsVcpSControlMsr >> 32); -+ break; -+ case NS_MSR_SVERSION: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(curVcpu->nsVcpSVersionMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curVcpu->nsVcpSVersionMsr >> 32); -+ break; -+ case NS_MSR_SIEFP: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(curVcpu->nsVcpSIefpMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curVcpu->nsVcpSIefpMsr >> 32); -+ break; -+ case NS_MSR_SIMP: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(curVcpu->nsVcpSimpMsr & 0xFFFFFFFF); -+ regs->edx = (u32)(curVcpu->nsVcpSimpMsr >> 32); -+ break; -+ case NS_MSR_SINT0: -+ synInt = 0; -+ goto synIntReadProcess; -+ case NS_MSR_SINT1: -+ synInt = 1; -+ goto synIntReadProcess; -+ case NS_MSR_SINT2: -+ synInt = 2; -+ goto synIntReadProcess; -+ case NS_MSR_SINT3: -+ synInt = 3; -+ goto synIntReadProcess; -+ case NS_MSR_SINT4: -+ synInt = 4; -+ goto synIntReadProcess; -+ case NS_MSR_SINT5: -+ synInt = 5; -+ goto synIntReadProcess; -+ case NS_MSR_SINT6: -+ synInt = 6; -+ goto synIntReadProcess; -+ case NS_MSR_SINT7: -+ synInt = 7; -+ goto synIntReadProcess; -+ case NS_MSR_SINT8: -+ synInt = 8; -+ goto synIntReadProcess; -+ case NS_MSR_SINT9: -+ synInt = 9; -+ goto synIntReadProcess; -+ case NS_MSR_SINT10: -+ synInt = 10; -+ goto synIntReadProcess; -+ case NS_MSR_SINT11: -+ synInt = 11; -+ goto synIntReadProcess; -+ case NS_MSR_SINT12: -+ synInt = 12; -+ goto synIntReadProcess; -+ case NS_MSR_SINT13: -+ synInt = 13; -+ goto synIntReadProcess; -+ case NS_MSR_SINT14: -+ synInt = 14; -+ goto synIntReadProcess; -+ case NS_MSR_SINT15: -+ synInt = 15; -+synIntReadProcess: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(curVcpu->nsVcpSIntMsr[synInt] & 0xFFFFFFFF); -+ regs->edx = (u32)(curVcpu->nsVcpSIntMsr[synInt] >> 32); -+ break; -+ -+ case NS_MSR_SEOM: -+ /* -+ * This is a write only register; reads return 0. -+ */ -+ regs->eax = 0; -+ regs->edx = 0; -+ break; -+ case NS_MSR_TIME_REF_COUNT: -+ if (!nsAccessTimeRefCnt(curp, &msrContent)) { -+ goto msrReadError; -+ } -+ regs->eax = (u32)(msrContent & 0xFFFFFFFF); -+ regs->edx = (u32)(msrContent >> 32); -+ break; -+ /* -+ * Synthetic timer MSRs. -+ */ -+ case NS_MSR_TIMER0_CONFIG: -+ timer = 0; -+ goto processTimerConfigRead; -+ case NS_MSR_TIMER1_CONFIG: -+ timer = 1; -+ goto processTimerConfigRead; -+ case NS_MSR_TIMER2_CONFIG: -+ timer = 2; -+ goto processTimerConfigRead; -+ case NS_MSR_TIMER3_CONFIG: -+ timer = 3; -+processTimerConfigRead: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) { -+ goto msrReadError; -+ } -+ regs->eax = -+ (u32)(curVcpu->nsVcpTimers[timer].config & 0xFFFFFFFF); -+ regs->edx = -+ (u32)(curVcpu->nsVcpTimers[timer].config >> 32); -+ break; -+ case NS_MSR_TIMER0_COUNT: -+ timer = 0; -+ goto processTimerCountRead; -+ case NS_MSR_TIMER1_COUNT: -+ timer = 1; -+ goto processTimerCountRead; -+ case NS_MSR_TIMER2_COUNT: -+ timer = 2; -+ goto processTimerCountRead; -+ case NS_MSR_TIMER3_COUNT: -+ timer = 3; -+processTimerCountRead: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) { -+ goto msrReadError; -+ } -+ timerCount = curVcpu->nsVcpTimers[timer].count; -+ if (timerCount > ((NOW())/100)) { -+ timerCount -= ((NOW())/100); -+ } else { -+ timerCount = 0; -+ } -+ regs->eax = -+ (u32)(timerCount & 0xFFFFFFFF); -+ regs->edx = -+ (u32)(timerCount >> 32); -+ break; -+ case NS_MSR_PVDRV_HCALL: -+ regs->eax = 0; -+ regs->edx = 0; -+ break; -+ case NS_MSR_SYSTEM_RESET: -+ regs->eax = 0; -+ regs->edx = 0; -+ break; -+ default: -+ /* -+ * We did not handle the MSR address specified; -+ * let the caller figure out -+ * What to do. -+ */ -+ return (0); -+ } -+ return (1); -+msrReadError: -+ /* -+ * Have to inject #GP fault. -+ */ -+ nsInjectException(TRAP_gp_fault); -+ return (1); -+} -+ -+/* -+ * static int -+ * nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs) -+ * NS intercept for writing MSRS. -+ * -+ * Calling/Exit State: -+ * None. -+ */ -+ -+static int -+nsDoWrMsr(uint32_t idx, struct cpu_user_regs *regs) -+{ -+ nsPartition_t *curp = nsGetCurrentPartition(); -+ unsigned int vcpuIndex = nsGetCurrentVcpuIndex(); -+ u64 msrContent = 0; -+ nsVcpu_t *curVcpu = &curp->nsVcpuState[vcpuIndex]; -+ int synInt, timer; -+ struct domain *d = current->domain; -+ int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; -+ -+ /* -+ * hvmloader uses wrmsr; we don't want to -+ * intercept calls coming from the bootstrap (bios) code in the HVM -+ * guest; we descriminate based on the instruction pointer. -+ */ -+ if (nsCallFromBios(regs)) { -+ /* -+ * We don't intercept this. -+ */ -+ return (0); -+ } -+ msrContent = -+ (u32)regs->eax | ((u64)regs->edx << 32); -+ if (extid > 1) { -+ /* -+ * For now this is all other "Enlightened" operating systems -+ * other than Longhorn. -+ */ -+ if (idx == 0x40000000) { -+ /* -+ * PV driver hypercall setup. Let xen handle this. -+ */ -+ return (0); -+ } -+ if (idx == 0x40001000) { -+ idx = 0x40000000; -+ } -+ } -+ switch (idx) { -+ case NS_MSR_GUEST_OS_ID: -+ nsWriteGuestIdMsr(curp, curVcpu, msrContent); -+ break; -+ case NS_MSR_HYPERCALL: -+ nsWriteHypercallMsr(curp, curVcpu, msrContent); -+ break; -+ -+ case NS_MSR_VP_INDEX: -+ goto msrWriteError; -+ -+ case NS_MSR_EOI: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) { -+ goto msrWriteError; -+ } -+ nsWriteEoi(msrContent); -+ NS_STATS_COLLECT(NS_EOI_WRITE, &curVcpu->nsVcpStats); -+ break; -+ case NS_MSR_ICR: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) { -+ goto msrWriteError; -+ } -+ nsWriteIcr(msrContent); -+ NS_STATS_COLLECT(NS_ICR_WRITE, &curVcpu->nsVcpStats); -+ break; -+ case NS_MSR_TPR: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_APIC_MSRS)) { -+ goto msrWriteError; -+ } -+ nsWriteTpr(msrContent); -+ NS_STATS_COLLECT(NS_TPR_WRITE, &curVcpu->nsVcpStats); -+ break; -+ -+ /* -+ * The following MSRs are synthetic MSRs supported in the Novell Shim. -+ */ -+ case NS_MSR_SCONTROL: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrWriteError; -+ } -+ curVcpu->nsVcpSControlMsr = msrContent; -+ break; -+ case NS_MSR_SVERSION: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrWriteError; -+ } -+ /* -+ * This is a read-only MSR; generate #GP -+ */ -+ nsInjectException(TRAP_gp_fault); -+ break; -+ case NS_MSR_SIEFP: -+ case NS_MSR_SIMP: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrWriteError; -+ } -+ nsWriteSxMsr(idx, curp, curVcpu, msrContent); -+ break; -+ case NS_MSR_SINT0: -+ synInt = 0; -+ goto synIntWrProcess; -+ case NS_MSR_SINT1: -+ synInt = 1; -+ goto synIntWrProcess; -+ case NS_MSR_SINT2: -+ synInt = 2; -+ goto synIntWrProcess; -+ case NS_MSR_SINT3: -+ synInt = 3; -+ goto synIntWrProcess; -+ case NS_MSR_SINT4: -+ synInt = 4; -+ goto synIntWrProcess; -+ case NS_MSR_SINT5: -+ synInt = 5; -+ goto synIntWrProcess; -+ case NS_MSR_SINT6: -+ synInt = 6; -+ goto synIntWrProcess; -+ case NS_MSR_SINT7: -+ synInt = 7; -+ goto synIntWrProcess; -+ case NS_MSR_SINT8: -+ synInt = 8; -+ goto synIntWrProcess; -+ case NS_MSR_SINT9: -+ synInt = 9; -+ goto synIntWrProcess; -+ case NS_MSR_SINT10: -+ synInt = 10; -+ goto synIntWrProcess; -+ case NS_MSR_SINT11: -+ synInt = 11; -+ goto synIntWrProcess; -+ case NS_MSR_SINT12: -+ synInt = 12; -+ goto synIntWrProcess; -+ case NS_MSR_SINT13: -+ synInt = 13; -+ goto synIntWrProcess; -+ case NS_MSR_SINT14: -+ synInt = 14; -+ goto synIntWrProcess; -+ case NS_MSR_SINT15: -+ synInt = 15; -+synIntWrProcess: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrWriteError; -+ } -+ /* -+ * XXXKYS: We assume that the synInt registers will be -+ * first written before the interrupt generation can occur. -+ * Specifically if SINT is masked all interrupts that may have -+ * been generated will be lost. Also when SINT is disabled; -+ * its effects will be only felt for subsequent interrupts that -+ * may be posted. XXXKYS: CHECK -+ */ -+ curVcpu->nsVcpSIntMsr[synInt] = msrContent; -+ break; -+ -+ case NS_MSR_SEOM: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_MSRS)) { -+ goto msrWriteError; -+ } -+ curVcpu->nsVcpEomMsr = msrContent; -+ nsProcessMessageQ(curp, curVcpu); -+ break; -+ case NS_MSR_TIME_REF_COUNT: -+ /* -+ * This is a read-only msr. -+ */ -+ goto msrWriteError; -+ -+ /* -+ * Synthetic timer MSRs. -+ */ -+ case NS_MSR_TIMER0_CONFIG: -+ timer = 0; -+ goto processTimerConfig; -+ case NS_MSR_TIMER1_CONFIG: -+ timer = 1; -+ goto processTimerConfig; -+ case NS_MSR_TIMER2_CONFIG: -+ timer = 2; -+ goto processTimerConfig; -+ case NS_MSR_TIMER3_CONFIG: -+ timer = 3; -+processTimerConfig: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) { -+ goto msrWriteError; -+ } -+ /* -+ * Assume that the client is going to write the whole msr. -+ */ -+ if (!(msrContent & 0x9)) { -+ /* -+ * We are neither setting Auto Enable or Enable; -+ * silently exit. -+ * Should this be considered to turn off a -+ * timer that may be currently -+ * active; XXXKYS: Check. For now we are -+ * not doing anything here. -+ */ -+ break; -+ } -+ if (!(((u32)(msrContent >> 16)) & 0x0000000f)) { -+ /* -+ * sintx is 0; clear the enable bit(s). -+ */ -+ msrContent &= ~(0x1); -+ } -+ curVcpu->nsVcpTimers[timer].config = msrContent; -+ /* -+ * XXXKYS: Can any order be assumed here; -+ * should we just act on whatever is in the -+ * count register. For now act as if the count -+ * register is valid and act on it. -+ */ -+ if (msrContent & 0x1) { -+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); -+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats); -+ } -+ break; -+ case NS_MSR_TIMER0_COUNT: -+ timer = 0; -+ goto processTimerCount; -+ case NS_MSR_TIMER1_COUNT: -+ timer = 1; -+ goto processTimerCount; -+ case NS_MSR_TIMER2_COUNT: -+ timer = 2; -+ goto processTimerCount; -+ case NS_MSR_TIMER3_COUNT: -+ timer = 3; -+processTimerCount: -+ if (!nsPrivilegeCheck(curp, NS_ACCESS_SYNC_TIMERS)) { -+ goto msrWriteError; -+ } -+ curVcpu->nsVcpTimers[timer].count = -+ (msrContent + ((NOW())/100)); -+ if ((curVcpu->nsVcpTimers[timer].config | 0x9)) { -+ nsScheduleTimeOut(&curVcpu->nsVcpTimers[timer]); -+ NS_STATS_COLLECT(NS_TIMEOUTS, &curVcpu->nsVcpStats); -+ } -+ -+ break; -+ case NS_MSR_PVDRV_HCALL: -+ /* -+ * Establish the hypercall page for PV drivers. -+ */ -+ nsXenVector.extWrmsrHypervisorRegs(0x40000000, regs->eax, -+ regs->edx); -+ break; -+ case NS_MSR_SYSTEM_RESET: -+ /* -+ * Shutdown the domain/partition. -+ */ -+ if (msrContent & 0x1) { -+ domain_shutdown(d, SHUTDOWN_reboot); -+ } -+ break; -+ -+ default: -+ /* -+ * We did not handle the MSR address; -+ * let the caller deal with this. -+ */ -+ return (0); -+ } -+ return (1); -+msrWriteError: -+ /* -+ * Have to inject #GP fault. -+ */ -+ nsInjectException(TRAP_gp_fault); -+ return (1); -+} diff --git a/poll-single-port.patch b/poll-single-port.patch new file mode 100644 index 0000000..aa4fcfd --- /dev/null +++ b/poll-single-port.patch @@ -0,0 +1,163 @@ +Index: xen-3.3.1-testing/xen/common/domain.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/common/domain.c ++++ xen-3.3.1-testing/xen/common/domain.c +@@ -209,6 +209,7 @@ struct domain *domain_create( + atomic_set(&d->refcnt, 1); + spin_lock_init(&d->domain_lock); + spin_lock_init(&d->page_alloc_lock); ++ spin_lock_init(&d->poll_lock); + spin_lock_init(&d->shutdown_lock); + spin_lock_init(&d->hypercall_deadlock_mutex); + INIT_LIST_HEAD(&d->page_list); +@@ -653,7 +654,7 @@ void vcpu_reset(struct vcpu *v) + + v->fpu_initialised = 0; + v->fpu_dirtied = 0; +- v->is_polling = 0; ++ v->poll_evtchn = 0; + v->is_initialised = 0; + v->nmi_pending = 0; + v->mce_pending = 0; +Index: xen-3.3.1-testing/xen/common/event_channel.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/common/event_channel.c ++++ xen-3.3.1-testing/xen/common/event_channel.c +@@ -545,6 +545,7 @@ out: + static int evtchn_set_pending(struct vcpu *v, int port) + { + struct domain *d = v->domain; ++ unsigned long flags; + + /* + * The following bit operations must happen in strict order. +@@ -564,19 +565,36 @@ static int evtchn_set_pending(struct vcp + } + + /* Check if some VCPU might be polling for this event. */ +- if ( unlikely(d->is_polling) ) ++ if ( likely(!d->is_polling) ) ++ return 0; ++ ++ spin_lock_irqsave(&d->poll_lock, flags); ++ ++ if ( likely(d->is_polling) ) + { +- d->is_polling = 0; ++ bool_t is_polling = 0; ++ ++ d->is_polling = -1; + smp_mb(); /* check vcpu poll-flags /after/ clearing domain poll-flag */ + for_each_vcpu ( d, v ) + { +- if ( !v->is_polling ) ++ int poll_evtchn = v->poll_evtchn; ++ ++ if ( !poll_evtchn ) ++ continue; ++ if ( poll_evtchn > 0 && poll_evtchn != port ) ++ { ++ is_polling = 1; + continue; +- v->is_polling = 0; ++ } ++ v->poll_evtchn = 0; + vcpu_unblock(v); + } ++ cmpxchg(&d->is_polling, -1, is_polling); + } + ++ spin_unlock_irqrestore(&d->poll_lock, flags); ++ + return 0; + } + +Index: xen-3.3.1-testing/xen/common/schedule.c +=================================================================== +--- xen-3.3.1-testing.orig/xen/common/schedule.c ++++ xen-3.3.1-testing/xen/common/schedule.c +@@ -348,7 +348,7 @@ static long do_poll(struct sched_poll *s + return -EFAULT; + + set_bit(_VPF_blocked, &v->pause_flags); +- v->is_polling = 1; ++ v->poll_evtchn = -1; + d->is_polling = 1; + + /* Check for events /after/ setting flags: avoids wakeup waiting race. */ +@@ -369,6 +369,9 @@ static long do_poll(struct sched_poll *s + goto out; + } + ++ if ( i == 1 ) ++ v->poll_evtchn = port; ++ + if ( sched_poll->timeout != 0 ) + set_timer(&v->poll_timer, sched_poll->timeout); + +@@ -378,7 +381,7 @@ static long do_poll(struct sched_poll *s + return 0; + + out: +- v->is_polling = 0; ++ v->poll_evtchn = 0; + clear_bit(_VPF_blocked, &v->pause_flags); + return rc; + } +@@ -760,10 +763,10 @@ static void poll_timer_fn(void *data) + { + struct vcpu *v = data; + +- if ( !v->is_polling ) ++ if ( !v->poll_evtchn ) + return; + +- v->is_polling = 0; ++ v->poll_evtchn = 0; + vcpu_unblock(v); + } + +Index: xen-3.3.1-testing/xen/include/xen/sched.h +=================================================================== +--- xen-3.3.1-testing.orig/xen/include/xen/sched.h ++++ xen-3.3.1-testing/xen/include/xen/sched.h +@@ -106,8 +106,6 @@ struct vcpu + bool_t fpu_initialised; + /* Has the FPU been used since it was last saved? */ + bool_t fpu_dirtied; +- /* Is this VCPU polling any event channels (SCHEDOP_poll)? */ +- bool_t is_polling; + /* Initialization completed for this VCPU? */ + bool_t is_initialised; + /* Currently running on a CPU? */ +@@ -137,6 +135,11 @@ struct vcpu + unsigned long pause_flags; + atomic_t pause_count; + ++ /* Is this VCPU polling any event channels (SCHEDOP_poll)? ++ * Positive values indicate a single, negative values multiple channels ++ * being polled. */ ++ int poll_evtchn; ++ + /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */ + u16 virq_to_evtchn[NR_VIRQS]; + spinlock_t virq_lock; +@@ -210,7 +213,7 @@ struct domain + /* Is this guest being debugged by dom0? */ + bool_t debugger_attached; + /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ +- bool_t is_polling; ++ signed char is_polling; + /* Is this guest dying (i.e., a zombie)? */ + enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying; + /* Domain is paused by controller software? */ +@@ -218,6 +221,9 @@ struct domain + /* Domain's VCPUs are pinned 1:1 to physical CPUs? */ + bool_t is_pinned; + ++ /* Protects is_polling modification in evtchn_set_pending(). */ ++ spinlock_t poll_lock; ++ + /* Guest has shut down (inc. reason code)? */ + spinlock_t shutdown_lock; + bool_t is_shutting_down; /* in process of shutting down? */ diff --git a/snapshot-xend.patch b/snapshot-xend.patch index 2bee0a3..09a23e9 100644 --- a/snapshot-xend.patch +++ b/snapshot-xend.patch @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/python/xen/xend/image.py +Index: xen-3.3.1-testing/tools/python/xen/xend/image.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/image.py -+++ xen-3.3.0-testing/tools/python/xen/xend/image.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/image.py ++++ xen-3.3.1-testing/tools/python/xen/xend/image.py @@ -476,6 +476,10 @@ class ImageHandler: # but this can easily lead to very rapid restart loops against # which we currently have no protection @@ -13,10 +13,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/image.py def recreate(self): if self.device_model is None: return -Index: xen-3.3.0-testing/tools/python/xen/xend/server/blkif.py +Index: xen-3.3.1-testing/tools/python/xen/xend/server/blkif.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/server/blkif.py -+++ xen-3.3.0-testing/tools/python/xen/xend/server/blkif.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/blkif.py ++++ xen-3.3.1-testing/tools/python/xen/xend/server/blkif.py @@ -78,6 +78,9 @@ class BlkifController(DevController): if uuid: back['uuid'] = uuid @@ -27,11 +27,11 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/server/blkif.py if security.on() == xsconstants.XS_POLICY_ACM: self.do_access_control(config, uname) -Index: xen-3.3.0-testing/tools/python/xen/xend/server/SrvDomain.py +Index: xen-3.3.1-testing/tools/python/xen/xend/server/SrvDomain.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/server/SrvDomain.py -+++ xen-3.3.0-testing/tools/python/xen/xend/server/SrvDomain.py -@@ -95,6 +95,34 @@ class SrvDomain(SrvDir): +--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/SrvDomain.py ++++ xen-3.3.1-testing/tools/python/xen/xend/server/SrvDomain.py +@@ -95,6 +95,31 @@ class SrvDomain(SrvDir): def do_save(self, _, req): return self.xd.domain_save(self.dom.domid, req.args['file'][0]) @@ -40,36 +40,42 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/server/SrvDomain.py + return req.threadRequest(self.do_snapshot_create, op, req) + + def do_snapshot_create(self, _, req): -+ return self.xd.domain_snapshot_create(self.dom.domid, req.args) ++ return self.xd.domain_snapshot_create(self.dom.domid, req.args['name'][0]) + + def op_snapshot_list(self, op, req): + self.acceptCommand(req) -+ return req.threadRequest(self.do_snapshot_list, op, req) -+ -+ def do_snapshot_list(self, _, req): -+ return self.xd.domain_snapshot_list(self.dom.domid) ++ return self.xd.domain_snapshot_list(self.dom.getName()) + + def op_snapshot_apply(self, op, req): + self.acceptCommand(req) + return req.threadRequest(self.do_snapshot_apply, op, req) + + def do_snapshot_apply(self, _, req): -+ return self.xd.domain_snapshot_apply(self.dom.domid, req.args) ++ return self.xd.domain_snapshot_apply(self.dom.getName(), req.args['name'][0]) + + def op_snapshot_delete(self, op, req): + self.acceptCommand(req) + return req.threadRequest(self.do_snapshot_delete, op, req) + + def do_snapshot_delete(self, _, req): -+ return self.xd.domain_snapshot_delete(self.dom.domid, req.args) ++ return self.xd.domain_snapshot_delete(self.dom.getName(), req.args['name'][0]) + def op_dump(self, op, req): self.acceptCommand(req) return req.threadRequest(self.do_dump, op, req) -Index: xen-3.3.0-testing/tools/python/xen/xend/XendCheckpoint.py +@@ -230,7 +255,7 @@ class SrvDomain(SrvDir): + def render_GET(self, req): + op = req.args.get('op') + +- if op and op[0] in ['vcpuinfo']: ++ if op and op[0] in ['vcpuinfo', 'snapshot_list']: + return self.perform(req) + + # +Index: xen-3.3.1-testing/tools/python/xen/xend/XendCheckpoint.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendCheckpoint.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendCheckpoint.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendCheckpoint.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendCheckpoint.py @@ -65,10 +65,12 @@ def insert_after(list, pred, value): return @@ -279,10 +285,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendCheckpoint.py if not paused: dominfo.unpause() -Index: xen-3.3.0-testing/tools/python/xen/xend/XendConfig.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendConfig.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendConfig.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendConfig.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendConfig.py @@ -208,6 +208,7 @@ XENAPI_CFG_TYPES = { 'cpuid' : dict, 'cpuid_check' : dict, @@ -291,10 +297,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendConfig.py } # List of legacy configuration keys that have no equivalent in the -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomain.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomain.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomain.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomain.py @@ -52,6 +52,7 @@ from xen.xend.xenstore.xstransact import from xen.xend.xenstore.xswatch import xswatch from xen.util import mkdir @@ -303,7 +309,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py xc = xen.lowlevel.xc.xc() xoptions = XendOptions.instance() -@@ -1396,6 +1397,164 @@ class XendDomain: +@@ -1400,6 +1401,164 @@ class XendDomain: raise XendError("can't write guest state file %s: %s" % (dst, ex[1])) @@ -468,10 +474,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py def domain_pincpu(self, domid, vcpu, cpumap): """Set which cpus vcpu can use -Index: xen-3.3.0-testing/tools/python/xen/xm/main.py +Index: xen-3.3.1-testing/tools/python/xen/xm/main.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xm/main.py -+++ xen-3.3.0-testing/tools/python/xen/xm/main.py +--- xen-3.3.1-testing.orig/tools/python/xen/xm/main.py ++++ xen-3.3.1-testing/tools/python/xen/xm/main.py @@ -122,6 +122,14 @@ SUBCOMMAND_HELP = { 'Restore a domain from a saved state.'), 'save' : ('[-c] ', diff --git a/svm-lmsl.patch b/svm-lmsl.patch index e54027a..45d4703 100644 --- a/svm-lmsl.patch +++ b/svm-lmsl.patch @@ -1,8 +1,22 @@ -Index: xen-3.3.0-testing/xen/arch/x86/hvm/hvm.c +Index: xen-3.3.1-testing/xen/arch/x86/hvm/hvm.c =================================================================== ---- xen-3.3.0-testing.orig/xen/arch/x86/hvm/hvm.c -+++ xen-3.3.0-testing/xen/arch/x86/hvm/hvm.c -@@ -789,10 +789,11 @@ int hvm_set_efer(uint64_t value) +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/hvm.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/hvm.c +@@ -525,11 +525,12 @@ static int hvm_load_cpu_ctxt(struct doma + return -EINVAL; + } + +- if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LME | EFER_LMA | ++ if ( (ctxt.msr_efer & ~(EFER_FFXSE | EFER_LMSLE | EFER_LME | EFER_LMA | + EFER_NX | EFER_SCE)) || + ((sizeof(long) != 8) && (ctxt.msr_efer & EFER_LME)) || + (!cpu_has_nx && (ctxt.msr_efer & EFER_NX)) || + (!cpu_has_syscall && (ctxt.msr_efer & EFER_SCE)) || ++ (!cpu_has_lmsl && (ctxt.msr_efer & EFER_LMSLE)) || + (!cpu_has_ffxsr && (ctxt.msr_efer & EFER_FFXSE)) || + ((ctxt.msr_efer & (EFER_LME|EFER_LMA)) == EFER_LMA) ) + { +@@ -790,10 +791,11 @@ int hvm_set_efer(uint64_t value) value &= ~EFER_LMA; @@ -15,10 +29,10 @@ Index: xen-3.3.0-testing/xen/arch/x86/hvm/hvm.c (!cpu_has_ffxsr && (value & EFER_FFXSE)) ) { gdprintk(XENLOG_WARNING, "Trying to set reserved bit in " -Index: xen-3.3.0-testing/xen/arch/x86/hvm/svm/svm.c +Index: xen-3.3.1-testing/xen/arch/x86/hvm/svm/svm.c =================================================================== ---- xen-3.3.0-testing.orig/xen/arch/x86/hvm/svm/svm.c -+++ xen-3.3.0-testing/xen/arch/x86/hvm/svm/svm.c +--- xen-3.3.1-testing.orig/xen/arch/x86/hvm/svm/svm.c ++++ xen-3.3.1-testing/xen/arch/x86/hvm/svm/svm.c @@ -53,6 +53,11 @@ u32 svm_feature_flags; @@ -54,10 +68,10 @@ Index: xen-3.3.0-testing/xen/arch/x86/hvm/svm/svm.c if ( cpu != 0 ) return 1; -Index: xen-3.3.0-testing/xen/include/asm-x86/hvm/hvm.h +Index: xen-3.3.1-testing/xen/include/asm-x86/hvm/hvm.h =================================================================== ---- xen-3.3.0-testing.orig/xen/include/asm-x86/hvm/hvm.h -+++ xen-3.3.0-testing/xen/include/asm-x86/hvm/hvm.h +--- xen-3.3.1-testing.orig/xen/include/asm-x86/hvm/hvm.h ++++ xen-3.3.1-testing/xen/include/asm-x86/hvm/hvm.h @@ -133,6 +133,12 @@ struct hvm_function_table { extern struct hvm_function_table hvm_funcs; extern int hvm_enabled; diff --git a/tools-gdbserver-build.diff b/tools-gdbserver-build.diff new file mode 100644 index 0000000..933e110 --- /dev/null +++ b/tools-gdbserver-build.diff @@ -0,0 +1,21 @@ +diff -r 0eab1869ef66 tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in +--- a/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Thu Sep 04 11:42:38 2008 +0100 ++++ b/tools/debugger/gdb/gdb-6.2.1-xen-sparse/gdb/gdbserver/Makefile.in Thu Sep 11 15:45:31 2008 -0600 +@@ -90,7 +90,7 @@ GLOBAL_CFLAGS = ${MT_CFLAGS} ${MH_CFLAGS + GLOBAL_CFLAGS = ${MT_CFLAGS} ${MH_CFLAGS} + #PROFILE_CFLAGS = -pg + +-WARN_CFLAGS = -Wall ++WARN_CFLAGS = -Wall -Wno-sequence-point + + # CFLAGS is specifically reserved for setting from the command line + # when running make. I.E. "make CFLAGS=-Wmissing-prototypes". +@@ -260,7 +260,7 @@ linux-low.o: linux-low.c $(linux_low_h) + $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@ + + linux-xen-low.o: linux-xen-low.c $(linux_low_h) $(server_h) +- $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) $< @USE_THREAD_DB@ ++ $(CC) -c $(CPPFLAGS) $(INTERNAL_CFLAGS) -I../../../../../include/ $< @USE_THREAD_DB@ + + linux-arm-low.o: linux-arm-low.c $(linux_low_h) $(server_h) + linux-i386-low.o: linux-i386-low.c $(linux_low_h) $(server_h) diff --git a/x86-microcode.patch b/x86-microcode.patch new file mode 100644 index 0000000..ac2b4ca --- /dev/null +++ b/x86-microcode.patch @@ -0,0 +1,141 @@ +Index: xen-3.3.0-testing/xen/arch/x86/microcode.c +=================================================================== +--- xen-3.3.0-testing.orig/xen/arch/x86/microcode.c ++++ xen-3.3.0-testing/xen/arch/x86/microcode.c +@@ -98,7 +98,7 @@ MODULE_LICENSE("GPL"); + static int verbose; + boolean_param("microcode.verbose", verbose); + +-#define MICROCODE_VERSION "1.14a" ++#define MICROCODE_VERSION "1.14b" + + #define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */ + #define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */ +@@ -118,9 +118,6 @@ boolean_param("microcode.verbose", verbo + + #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +-/* serialize access to the physical write to MSR 0x79 */ +-static DEFINE_SPINLOCK(microcode_update_lock); +- + /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ + static DEFINE_MUTEX(microcode_mutex); + +@@ -376,25 +373,10 @@ out: + + static void do_update_one (void * unused) + { +- unsigned long flags; + unsigned int val[2]; + int cpu_num = smp_processor_id(); + struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; + +- if (uci->mc == NULL) { +- if (verbose) { +- if (uci->err == MC_SUCCESS) +- printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n", +- cpu_num, uci->rev); +- else +- printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); +- } +- return; +- } +- +- /* serialize access to the physical write to MSR 0x79 */ +- spin_lock_irqsave(µcode_update_lock, flags); +- + /* write microcode via MSR 0x79 */ + wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) uci->mc->bits, +@@ -409,7 +391,6 @@ static void do_update_one (void * unused + + /* notify the caller of success on this cpu */ + uci->err = MC_SUCCESS; +- spin_unlock_irqrestore(µcode_update_lock, flags); + printk(KERN_INFO "microcode: CPU%d updated from revision " + "0x%x to 0x%x, date = %08x \n", + cpu_num, uci->rev, val[1], uci->mc->hdr.date); +@@ -418,40 +399,65 @@ static void do_update_one (void * unused + + static int do_microcode_update (void) + { +- int i, error; ++ int i, j, error; ++ cpumask_t cpu_mask = cpu_online_map; + +- if (on_each_cpu(collect_cpu_info, NULL, 1, 1) != 0) { +- printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); +- error = -EIO; +- goto out; ++ for_each_cpu_mask(i, cpu_mask) { ++ if (on_selected_cpus(cpumask_of_cpu(i), collect_cpu_info, NULL, 1, 1) != 0) { ++ printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); ++ return -EIO; ++ } + } + + if ((error = find_matching_ucodes())) { + printk(KERN_ERR "microcode: Error in the microcode data\n"); +- goto out_free; ++ cpus_clear(cpu_mask); + } + +- if (on_each_cpu(do_update_one, NULL, 1, 1) != 0) { +- printk(KERN_ERR "microcode: Error! Could not run on all processors\n"); +- error = -EIO; ++ for (; (i = any_online_cpu(cpu_mask)) < NR_CPUS; cpu_clear(i, cpu_mask)) { ++ if (ucode_cpu_info[i].mc == NULL) { ++ if (verbose) { ++ switch (ucode_cpu_info[i].err) { ++ case MC_SUCCESS: ++ printk(KERN_INFO "microcode: CPU%d already at revision 0x%x\n", ++ i, ucode_cpu_info[i].rev); ++ break; ++ case MC_IGNORED: ++ printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision" ++ " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev); ++ break; ++ default: ++ printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", i); ++ break; ++ } ++ } ++ } else if (on_selected_cpus(cpumask_of_cpu(i), do_update_one, NULL, 1, 1) != 0) { ++ printk(KERN_ERR "microcode: Error! Could not run on processor %d\n", i); ++ error = -EIO; ++ } else if (ucode_cpu_info[i].err == MC_SUCCESS) { ++ cpus_andnot(cpu_mask, cpu_mask, cpu_sibling_map[i]); ++ for_each_cpu_mask(j, cpu_sibling_map[i]) { ++ if (j != i) { ++ ucode_cpu_info[j].err = MC_SUCCESS; ++ ASSERT(ucode_cpu_info[j].mc == ucode_cpu_info[i].mc); ++ ucode_cpu_info[j].mc = NULL; ++ } ++ } ++ } + } + +-out_free: +- for_each_online_cpu(i) { ++ for (i = 0; i < NR_CPUS; i++) { + if (ucode_cpu_info[i].mc) { +- int j; + void *tmp = ucode_cpu_info[i].mc; +- vfree(tmp); +- for_each_online_cpu(j) { ++ ++ for (j = 0; j < NR_CPUS; j++) { + if (ucode_cpu_info[j].mc == tmp) + ucode_cpu_info[j].mc = NULL; + } ++ vfree(tmp); + } +- if (ucode_cpu_info[i].err == MC_IGNORED && verbose) +- printk(KERN_WARNING "microcode: CPU%d not 'upgrading' to earlier revision" +- " 0x%x (current=0x%x)\n", i, ucode_cpu_info[i].cksum, ucode_cpu_info[i].rev); + } +-out: ++ + return error; + } + diff --git a/x86-show-page-walk-early.patch b/x86-show-page-walk-early.patch index 468f86c..86842ea 100644 --- a/x86-show-page-walk-early.patch +++ b/x86-show-page-walk-early.patch @@ -134,7 +134,7 @@ Index: xen-3.3.0-testing/xen/include/asm-x86/mm.h =================================================================== --- xen-3.3.0-testing.orig/xen/include/asm-x86/mm.h +++ xen-3.3.0-testing/xen/include/asm-x86/mm.h -@@ -277,6 +277,7 @@ TYPE_SAFE(unsigned long,mfn); +@@ -307,6 +307,7 @@ TYPE_SAFE(unsigned long,mfn); #define machine_to_phys_mapping ((unsigned long *)RDWR_MPT_VIRT_START) #define INVALID_M2P_ENTRY (~0UL) #define VALID_M2P(_e) (!((_e) & (1UL<<(BITS_PER_LONG-1)))) diff --git a/xen-3.3.0-testing-src.tar.bz2 b/xen-3.3.0-testing-src.tar.bz2 deleted file mode 100644 index 3f1654b..0000000 --- a/xen-3.3.0-testing-src.tar.bz2 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c53af8d91df0454b584aa771232a715bd068467c2841a3d44812fd9e70cb497e -size 9822192 diff --git a/xen-3.3.1-testing-src.tar.bz2 b/xen-3.3.1-testing-src.tar.bz2 new file mode 100644 index 0000000..ca30afe --- /dev/null +++ b/xen-3.3.1-testing-src.tar.bz2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54669aeece8872bda13bb98fc0afa03d82fd439f4e234935d8174657a2ee0f08 +size 22688672 diff --git a/xen-domUloader.diff b/xen-domUloader.diff index 46f8c4e..082c019 100644 --- a/xen-domUloader.diff +++ b/xen-domUloader.diff @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/python/xen/util/blkif.py +Index: xen-3.3.1-testing/tools/python/xen/util/blkif.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/util/blkif.py -+++ xen-3.3.0-testing/tools/python/xen/util/blkif.py +--- xen-3.3.1-testing.orig/tools/python/xen/util/blkif.py ++++ xen-3.3.1-testing/tools/python/xen/util/blkif.py @@ -71,23 +71,24 @@ def blkdev_segment(name): 'type' : 'Disk' } return val @@ -32,11 +32,11 @@ Index: xen-3.3.0-testing/tools/python/xen/util/blkif.py def mount_mode(name): mode = None -Index: xen-3.3.0-testing/tools/python/xen/xend/server/DevController.py +Index: xen-3.3.1-testing/tools/python/xen/xend/server/DevController.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/server/DevController.py -+++ xen-3.3.0-testing/tools/python/xen/xend/server/DevController.py -@@ -612,6 +612,31 @@ class DevController: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/DevController.py ++++ xen-3.3.1-testing/tools/python/xen/xend/server/DevController.py +@@ -604,6 +604,31 @@ class DevController: return (Missing, None) @@ -68,10 +68,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/server/DevController.py def backendPath(self, backdom, devid): """Construct backend path given the backend domain and device id. -Index: xen-3.3.0-testing/tools/python/xen/xend/XendBootloader.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendBootloader.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendBootloader.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendBootloader.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendBootloader.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendBootloader.py @@ -12,8 +12,9 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # @@ -126,10 +126,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendBootloader.py + if m: + return vdisk == m.group(1) or vdisk == m.group(2) + return True -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py @@ -35,7 +35,7 @@ from types import StringTypes import xen.lowlevel.xc @@ -148,7 +148,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py from xen.xend.XendError import XendError, VmError from xen.xend.XendDevices import XendDevices from xen.xend.XendTask import XendTask -@@ -1875,6 +1875,10 @@ class XendDomainInfo: +@@ -1897,6 +1897,10 @@ class XendDomainInfo: deviceClass, config = self.info['devices'].get(dev_uuid) self._waitForDevice(deviceClass, config['devid']) @@ -159,7 +159,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py def _waitForDevice_destroy(self, deviceClass, devid, backpath): return self.getDeviceController(deviceClass).waitForDevice_destroy( devid, backpath) -@@ -2557,8 +2561,11 @@ class XendDomainInfo: +@@ -2569,8 +2573,11 @@ class XendDomainInfo: blexec = osdep.pygrub_path blcfg = None @@ -173,7 +173,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py if not disks: msg = "Had a bootloader specified, but no disks are bootable" -@@ -2569,13 +2576,10 @@ class XendDomainInfo: +@@ -2581,13 +2588,10 @@ class XendDomainInfo: devtype = devinfo[0] disk = devinfo[1]['uname'] @@ -190,7 +190,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py log.info("Mounting %s on %s." % (fn, BOOTLOADER_LOOPBACK_DEVICE)) -@@ -2587,7 +2591,9 @@ class XendDomainInfo: +@@ -2599,7 +2603,9 @@ class XendDomainInfo: from xen.xend import XendDomain dom0 = XendDomain.instance().privilegedDomain() @@ -201,7 +201,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py fn = BOOTLOADER_LOOPBACK_DEVICE try: -@@ -2598,7 +2604,7 @@ class XendDomainInfo: +@@ -2610,7 +2616,7 @@ class XendDomainInfo: log.info("Unmounting %s from %s." % (fn, BOOTLOADER_LOOPBACK_DEVICE)) diff --git a/xen-hvm-default-bridge.diff b/xen-hvm-default-bridge.diff index 40c6091..88ab688 100644 --- a/xen-hvm-default-bridge.diff +++ b/xen-hvm-default-bridge.diff @@ -1,21 +1,7 @@ -Index: xen-3.3.0-testing/tools/examples/xend-config.sxp +Index: xen-3.3.1-testing/tools/ioemu-remote/vl.c =================================================================== ---- xen-3.3.0-testing.orig/tools/examples/xend-config.sxp -+++ xen-3.3.0-testing/tools/examples/xend-config.sxp -@@ -141,7 +141,8 @@ - # - # (network-script 'network-bridge netdev=eth1') - # --# The bridge is named xenbr0, by default. To rename the bridge, use -+# The bridge is named to match the outgoing interface, by default. For example, -+# eth1 is on xenbr1. To rename the bridge, use - # - # (network-script 'network-bridge bridge=') - # -Index: xen-3.3.0-testing/tools/ioemu-remote/vl.c -=================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/vl.c -+++ xen-3.3.0-testing/tools/ioemu-remote/vl.c +--- xen-3.3.1-testing.orig/tools/ioemu-remote/vl.c ++++ xen-3.3.1-testing/tools/ioemu-remote/vl.c @@ -134,8 +134,8 @@ int inet_aton(const char *cp, struct in_ #include "exec-all.h" @@ -42,10 +28,10 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/vl.c } } else #endif -Index: xen-3.3.0-testing/tools/python/xen/xend/image.py +Index: xen-3.3.1-testing/tools/python/xen/xend/image.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/image.py -+++ xen-3.3.0-testing/tools/python/xen/xend/image.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/image.py ++++ xen-3.3.1-testing/tools/python/xen/xend/image.py @@ -764,14 +764,16 @@ class HVMImageHandler(ImageHandler): mac = devinfo.get('mac') if mac is None: @@ -66,10 +52,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/image.py if nics == 0: ret.append("-net") -Index: xen-3.3.0-testing/tools/ioemu-remote/i386-dm/qemu-ifup +Index: xen-3.3.1-testing/tools/ioemu-remote/i386-dm/qemu-ifup =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/i386-dm/qemu-ifup -+++ xen-3.3.0-testing/tools/ioemu-remote/i386-dm/qemu-ifup +--- xen-3.3.1-testing.orig/tools/ioemu-remote/i386-dm/qemu-ifup ++++ xen-3.3.1-testing/tools/ioemu-remote/i386-dm/qemu-ifup @@ -1,11 +1,11 @@ #!/bin/sh diff --git a/xen-ioemu-hvm-pv-support.diff b/xen-ioemu-hvm-pv-support.diff index 5889ae0..0d91a02 100644 --- a/xen-ioemu-hvm-pv-support.diff +++ b/xen-ioemu-hvm-pv-support.diff @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/ioemu-remote/hw/ide.c +Index: xen-3.3.1-testing/tools/ioemu-remote/hw/ide.c =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/hw/ide.c -+++ xen-3.3.0-testing/tools/ioemu-remote/hw/ide.c +--- xen-3.3.1-testing.orig/tools/ioemu-remote/hw/ide.c ++++ xen-3.3.1-testing/tools/ioemu-remote/hw/ide.c @@ -485,6 +485,9 @@ typedef struct PCIIDEState { } PCIIDEState; @@ -62,11 +62,11 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/hw/ide.c pci_conf = d->dev.config; pci_conf[0x00] = 0x86; // Intel pci_conf[0x01] = 0x80; -Index: xen-3.3.0-testing/tools/ioemu-remote/hw/pci.c +Index: xen-3.3.1-testing/tools/ioemu-remote/hw/pci.c =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/hw/pci.c -+++ xen-3.3.0-testing/tools/ioemu-remote/hw/pci.c -@@ -653,6 +653,28 @@ void pci_nic_init(PCIBus *bus, NICInfo * +--- xen-3.3.1-testing.orig/tools/ioemu-remote/hw/pci.c ++++ xen-3.3.1-testing/tools/ioemu-remote/hw/pci.c +@@ -648,6 +648,28 @@ void pci_nic_init(PCIBus *bus, NICInfo * } } @@ -95,10 +95,10 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/hw/pci.c typedef struct { PCIDevice dev; PCIBus *bus; -Index: xen-3.3.0-testing/tools/ioemu-remote/hw/xen_platform.c +Index: xen-3.3.1-testing/tools/ioemu-remote/hw/xen_platform.c =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/hw/xen_platform.c -+++ xen-3.3.0-testing/tools/ioemu-remote/hw/xen_platform.c +--- xen-3.3.1-testing.orig/tools/ioemu-remote/hw/xen_platform.c ++++ xen-3.3.1-testing/tools/ioemu-remote/hw/xen_platform.c @@ -26,6 +26,8 @@ #include "hw.h" #include "pci.h" @@ -162,10 +162,10 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/hw/xen_platform.c } static uint32_t platform_mmio_read(void *opaque, target_phys_addr_t addr) -Index: xen-3.3.0-testing/tools/ioemu-remote/vl.c +Index: xen-3.3.1-testing/tools/ioemu-remote/vl.c =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/vl.c -+++ xen-3.3.0-testing/tools/ioemu-remote/vl.c +--- xen-3.3.1-testing.orig/tools/ioemu-remote/vl.c ++++ xen-3.3.1-testing/tools/ioemu-remote/vl.c @@ -259,6 +259,20 @@ static int event_pending = 1; #include "xen-vl-extra.c" @@ -262,10 +262,10 @@ Index: xen-3.3.0-testing/tools/ioemu-remote/vl.c /* XXX: fd_read_poll should be suppressed, but an API change is necessary in the character devices to suppress fd_can_read(). */ int qemu_set_fd_handler2(int fd, -Index: xen-3.3.0-testing/tools/ioemu-remote/sysemu.h +Index: xen-3.3.1-testing/tools/ioemu-remote/sysemu.h =================================================================== ---- xen-3.3.0-testing.orig/tools/ioemu-remote/sysemu.h -+++ xen-3.3.0-testing/tools/ioemu-remote/sysemu.h +--- xen-3.3.1-testing.orig/tools/ioemu-remote/sysemu.h ++++ xen-3.3.1-testing/tools/ioemu-remote/sysemu.h @@ -49,6 +49,9 @@ void do_loadvm(const char *name); void do_delvm(const char *name); void do_info_snapshots(void); diff --git a/xen-max-free-mem.diff b/xen-max-free-mem.diff index d85b4d8..b849d86 100644 --- a/xen-max-free-mem.diff +++ b/xen-max-free-mem.diff @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/python/xen/xend/XendNode.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendNode.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendNode.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendNode.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendNode.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendNode.py @@ -650,11 +650,35 @@ class XendNode: info['cpu_mhz'] = info['cpu_khz'] / 1000 @@ -53,10 +53,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendNode.py 'node_to_cpu', 'node_to_memory' ] -Index: xen-3.3.0-testing/tools/python/xen/xend/balloon.py +Index: xen-3.3.1-testing/tools/python/xen/xend/balloon.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/balloon.py -+++ xen-3.3.0-testing/tools/python/xen/xend/balloon.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/balloon.py ++++ xen-3.3.1-testing/tools/python/xen/xend/balloon.py @@ -41,6 +41,8 @@ SLEEP_TIME_GROWTH = 0.1 # label actually shown in the PROC_XEN_BALLOON file. #labels = { 'current' : 'Current allocation', @@ -90,11 +90,11 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/balloon.py def free(need_mem): """Balloon out memory from the privileged domain so that there is the specified required amount (in KiB) free. -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -1016,6 +1016,27 @@ class XendDomainInfo: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -1038,6 +1038,27 @@ class XendDomainInfo: return None @@ -122,10 +122,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py def setMemoryTarget(self, target): """Set the memory target of this domain. @param target: In MiB. -Index: xen-3.3.0-testing/tools/python/xen/xend/server/SrvDomain.py +Index: xen-3.3.1-testing/tools/python/xen/xend/server/SrvDomain.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/server/SrvDomain.py -+++ xen-3.3.0-testing/tools/python/xen/xend/server/SrvDomain.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/server/SrvDomain.py ++++ xen-3.3.1-testing/tools/python/xen/xend/server/SrvDomain.py @@ -171,7 +171,7 @@ class SrvDomain(SrvDir): diff --git a/xen-updown.sh b/xen-updown.sh new file mode 100644 index 0000000..3fed189 --- /dev/null +++ b/xen-updown.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# +usage () { + echo $@ + echo "usage: $0 [] [-o ]" + echo "" + echo "Options are:" + echo " debug : be verbose" + echo " rc : indicates that we are called from rcnetwork" + echo "" + echo "Any another options are ignored" + exit $R_USAGE +} + +###################################################################### +# change the working direcory and source some common files +# +R_INTERNAL=1 # internal error, e.g. no config or missing scripts +cd /etc/sysconfig/network || exit $R_INTERNAL +test -f ./config && . ./config +test -f scripts/functions && . scripts/functions || exit $R_INTERNAL + +###################################################################### +# check arguments and how we are called (in case of links) +# +SCRIPTNAME=${0} +debug $* +case $1 in ""|-h|*help*) usage ;; esac +CONFIG="$1" +shift +if [ "x$1" != x -a "x$1" != "x-o" ] ; then + INTERFACE="$1" +else + INTERFACE="$CONFIG" +fi +shift +test "x$1" = "x-o" && shift +DEBUG=no +RUN_FROM_RC=no +while [ $# -gt 0 ]; do + case $1 in + debug) DEBUG=yes ;; + rc) RUN_FROM_RC=yes ;; + *) debug unknown option $1 ;; + esac + shift +done + +# usage: ifprint message.... +ifprint() { + func=$1 ; shift + test "x$func" = x && return 1 + if [ "$RUN_FROM_RC" = yes -a "$INTERFACE" != all ] ; then + $func "`printf " %-9s " "$INTERFACE"`$*" + else + $func "$*" + fi +} + +# +# xen related code +# + +# check if xen is running +is_xend_running() { + test -x /etc/init.d/xend && \ + /etc/init.d/xend status &>/dev/null && return 0 + return 1 +} +exit_if_xend_not_running() { + is_xend_running || { + debug "$0: xend is not running - nothing to do" + exit 0 + } +} + +# (modified) functions from /etc/init.d/xendomains +parseln() +{ + name=${1:0:$((${#1}-36))} + name=${name%% *} + rest="${1: -36}" + id=${rest:0:4} + id=`echo $id` + mem=${rest:4:6} + mem=`echo $mem` + vcpu=${rest:10:6} + vcpu=`echo $vcpu` + state=${rest:16:11} + state=`echo $state` + tm=${rest:27} + tm=`echo $tm` +} + +xm_list() +{ + TERM=vt100 xm list | grep -v '^Name *ID' +} + +# For the specified vm, return a list of vifs that are connected to $INTERFACE +list_vifs() +{ + id=$1 + vifs=() + for vif in $(ls -1 "/sys/class/net/$INTERFACE/brif/"); do + tmp="`echo ${vif} | egrep "^(tap|vif)$id\..*"`" + if [ ! -z ${tmp} ]; then + vifs=(${vifs[@]} ${tmp}) + fi + done + echo "${vifs[@]}" +} + +# Write list of concerned vifs to state file +save_sysconfig_state() +{ + [ -d "${RUN_FILES_BASE}/xen/" ] || \ + mkdir -p "${RUN_FILES_BASE}/xen/" || return 1 + + rm -f "${RUN_FILES_BASE}/xen/$INTERFACE" && { + echo "VIFS='${vifs[@]}'" + } > "${RUN_FILES_BASE}/xen/$INTERFACE" +} + +case $SCRIPTNAME in +*if-up.d*) + exit_if_xend_not_running + + for IF in $(ls -1 "${RUN_FILES_BASE}/xen/") ; do + . "${RUN_FILES_BASE}/xen/$INTERFACE" || continue + + for vif in ${VIFS}; do + test -d "/sys/class/net/${vif}" || continue + if ! is_iface_up ${vif} ; then + ip link set dev ${vif} up + fi + brctl addif ${INTERFACE} ${vif} 2>&1 > /dev/null + done + # remove sysconfig state + rm -f "${RUN_FILES_BASE}/xen/$INTERFACE" + done +;; +*if-down.d*) + exit_if_xend_not_running + + # Remember vifs attached to $INTERFACE + vifs=() + num=0 + while read LN; do + parseln "$LN" + [ "$id" = 0 ] && continue + [ -z "$state" ] && continue + + vifs=(${vifs[@]} $(list_vifs $id)) + done < <(xm_list) + + [ -z "${vifs[*]}" ] || save_sysconfig_state + +;; +*) + usage +;; +esac + diff --git a/xen.changes b/xen.changes index f4c87ba..5456ac3 100644 --- a/xen.changes +++ b/xen.changes @@ -1,3 +1,40 @@ +------------------------------------------------------------------- +Thu Sep 11 12:42:05 MDT 2008 - brogers@novell.com + +- Added gdbserver-xen to the set of tools we build. + fate#302942 + +------------------------------------------------------------------- +Thu Sep 11 10:32:17 MDT 2008 - jfehlig@novell.com + +- Added ocfs2 to Should-Start in xendomains init script + +------------------------------------------------------------------- +Wed Sep 10 20:47:45 MDT 2008 - plc@novell.com + +- Added pv cdrom support to blktap + fate#300964 + +------------------------------------------------------------------- +Wed Sep 10 14:06:51 MDT 2008 - jfehlig@novell.com + +- Removed invocation of network-bridge script from xend-config.sxp. + Networks are now created through yast2-network package. +- Added sysconfig hook script for Xen to cope with ifup/ifdown + events on network devices (e.g. bridges) in use by virtual + machines. + fate#303386 + +------------------------------------------------------------------- +Mon Sep 8 08:53:25 MDT 2008 - carnold@novell.com + +- Updated to xen version 3.3.1 RC changeset 18390. + +------------------------------------------------------------------- +Wed Sep 3 21:07:56 CEST 2008 - kwolf@suse.de + +- Snapshots: Fix xend API functions for libvirt usage + ------------------------------------------------------------------- Mon Sep 1 08:56:32 MDT 2008 - carnold@novell.com diff --git a/xen.spec b/xen.spec index d155877..3313eb6 100644 --- a/xen.spec +++ b/xen.spec @@ -1,5 +1,5 @@ # -# spec file for package xen (Version 3.3.0_18358_02) +# spec file for package xen (Version 3.3.1_18390_01) # # Copyright (c) 2008 SUSE LINUX Products GmbH, Nuernberg, Germany. # @@ -21,8 +21,8 @@ Name: xen %define xvers 3.3 %define xvermaj 3 -%define changeset 18358 -%define xen_build_dir xen-3.3.0-testing +%define changeset 18390 +%define xen_build_dir xen-3.3.1-testing %if %sles_version %define with_kmp 1 %else @@ -40,14 +40,14 @@ BuildRequires: glibc-32bit glibc-devel-32bit %if %{?with_kmp}0 BuildRequires: kernel-source kernel-syms module-init-tools xorg-x11 %endif -Version: 3.3.0_18358_02 -Release: 2 +Version: 3.3.1_18390_01 +Release: 1 License: GPL v2 only Group: System/Kernel AutoReqProv: on PreReq: %insserv_prereq %fillup_prereq Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) -Source0: xen-3.3.0-testing-src.tar.bz2 +Source0: xen-3.3.1-testing-src.tar.bz2 Source2: README.SuSE Source3: boot.xen Source4: boot.local.xenU @@ -66,7 +66,11 @@ Source18: init.xen_loop # Xen API remote authentication sources Source23: etc_pam.d_xen-api Source24: xenapiusers +# sysconfig hook script for Xen +Source25: xen-updown.sh # Upstream patches +Patch0: 18412-x86-page-type-preemptible.patch +Patch1: 18420-x86-page-type-preemptible-fix.patch # Our patches Patch100: xen-config.diff Patch101: xend-config.diff @@ -111,6 +115,7 @@ Patch156: blktap.patch Patch157: xen-api-auth.patch Patch158: xen-qemu-iscsi-fix.patch Patch159: xend-vif-fix.patch +Patch160: tools-gdbserver-build.diff # Patches for snapshot support Patch170: qemu-img-snapshot.patch Patch171: ioemu-blktap-fix-open.patch @@ -126,18 +131,22 @@ Patch184: ioemu-blktap-barriers.patch # Jim's domain lock patch Patch190: xend-domain-lock.patch # Patches from Jan -Patch240: x86-show-page-walk-early.patch -Patch241: svm-lmsl.patch -Patch242: x86-extra-trap-info.patch +Patch240: poll-single-port.patch +Patch241: dump-exec-state.patch +Patch242: x86-show-page-walk-early.patch +Patch243: svm-lmsl.patch +Patch244: x86-extra-trap-info.patch +Patch245: x86-microcode.patch Patch250: 32on64-extra-mem.patch # PV Driver Patches Patch350: pv-driver-build.patch Patch351: xen-ioemu-hvm-pv-support.diff Patch352: pvdrv_emulation_control.patch +Patch353: blktap-pv-cdrom.patch # novell_shim patches -Patch400: ns_tools.patch -Patch401: ns_xen_base.patch -Patch402: ns_xen_extension.patch +Patch400: hv_tools.patch +Patch401: hv_xen_base.patch +Patch402: hv_xen_extension.patch Url: http://www.cl.cam.ac.uk/Research/SRG/netos/xen/ BuildRoot: %{_tmppath}/%{name}-%{version}-build %define pysite %(python -c "import distutils.sysconfig; print distutils.sysconfig.get_python_lib()") @@ -469,6 +478,8 @@ Authors: %prep %setup -q -n %xen_build_dir +%patch0 -p1 +%patch1 -p1 %patch100 -p1 %patch101 -p1 %patch102 -p1 @@ -512,6 +523,7 @@ Authors: %patch157 -p1 %patch158 -p1 %patch159 -p1 +%patch160 -p1 %patch170 -p1 %patch171 -p1 %patch172 -p1 @@ -527,16 +539,20 @@ Authors: %patch240 -p1 %patch241 -p1 %patch242 -p1 +%patch243 -p1 +%patch244 -p1 +%patch245 -p1 %patch250 -p1 %patch350 -p1 %patch351 -p1 %patch352 -p1 +%patch353 -p1 # Don't use shim for now -#%ifarch x86_64 -#%patch400 -p1 -#%patch401 -p1 -#%patch402 -p1 -#%endif +%ifarch x86_64 +%patch400 -p1 +%patch401 -p1 +%patch402 -p1 +%endif %build XEN_EXTRAVERSION=%version-%release @@ -548,6 +564,10 @@ export CFLAGS="${RPM_OPT_FLAGS}" export RPM_OPT_FLAGS make -C tools/include/xen-foreign make tools docs +cd tools/debugger/gdb +# there are code problems that don't pass the 02-check-gcc-output, hence bitbucket +./gdbbuild 1>/dev/null 2>/dev/null +cd ../../.. %if %{?with_kmp}0 # pv driver modules export XL=/usr/src/linux @@ -610,6 +630,7 @@ make -C tools/include/xen-foreign export XEN_PYTHON_NATIVE_INSTALL=1 make -C tools install \ DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} +cp tools/debugger/gdb/gdb-6.2.1-linux-i386-xen/gdb/gdbserver/gdbserver-xen $RPM_BUILD_ROOT/usr/bin/gdbserver-xen make -C tools/misc/serial-split install \ DESTDIR=$RPM_BUILD_ROOT MANDIR=%{_mandir} %ifarch x86_64 @@ -656,6 +677,13 @@ install -m755 %SOURCE11 %SOURCE12 %SOURCE13 %SOURCE16 %SOURCE17 $RPM_BUILD_ROOT/ install -d $RPM_BUILD_ROOT/etc/pam.d install -m644 %SOURCE23 $RPM_BUILD_ROOT/etc/pam.d/xen-api install -m644 %SOURCE24 $RPM_BUILD_ROOT/etc/xen/ +# sysconfig hook for Xen +mkdir -p $RPM_BUILD_ROOT/etc/sysconfig/network/scripts +mkdir -p $RPM_BUILD_ROOT/etc/sysconfig/network/if-up.d +mkdir -p $RPM_BUILD_ROOT/etc/sysconfig/network/if-down.d +install -m755 %SOURCE25 $RPM_BUILD_ROOT/etc/sysconfig/network/scripts +ln -s /etc/sysconfig/network/scripts/xen-updown.sh $RPM_BUILD_ROOT/etc/sysconfig/network/if-up.d/xen +ln -s /etc/sysconfig/network/scripts/xen-updown.sh $RPM_BUILD_ROOT/etc/sysconfig/network/if-down.d/xen # logrotate install -m644 -D %SOURCE7 $RPM_BUILD_ROOT/etc/logrotate.d/xen # directories @@ -722,6 +750,7 @@ rm -f $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm.debug /usr/bin/pygrub /usr/bin/qemu-img-xen /usr/bin/tapdisk-ioemu +/usr/bin/gdbserver-xen /usr/sbin/blktapctrl /usr/sbin/flask-loadpolicy /usr/sbin/img2qcow @@ -783,6 +812,9 @@ rm -f $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm.debug %dir /etc/udev %dir /etc/udev/rules.d /etc/udev/rules.d/40-xen.rules +/etc/sysconfig/network/scripts/xen-updown.sh +/etc/sysconfig/network/if-up.d/xen +/etc/sysconfig/network/if-down.d/xen %dir %{_defaultdocdir}/xen %{_defaultdocdir}/xen/COPYING %{_defaultdocdir}/xen/README.SuSE @@ -865,6 +897,25 @@ rm -f $RPM_BUILD_ROOT/%{_libdir}/xen/bin/qemu-dm.debug /sbin/ldconfig %changelog +* Thu Sep 11 2008 brogers@novell.com +- Added gdbserver-xen to the set of tools we build. + fate#302942 +* Thu Sep 11 2008 jfehlig@novell.com +- Added ocfs2 to Should-Start in xendomains init script +* Wed Sep 10 2008 plc@novell.com +- Added pv cdrom support to blktap + fate#300964 +* Wed Sep 10 2008 jfehlig@novell.com +- Removed invocation of network-bridge script from xend-config.sxp. + Networks are now created through yast2-network package. +- Added sysconfig hook script for Xen to cope with ifup/ifdown + events on network devices (e.g. bridges) in use by virtual + machines. + fate#303386 +* Mon Sep 08 2008 carnold@novell.com +- Updated to xen version 3.3.1 RC changeset 18390. +* Wed Sep 03 2008 kwolf@suse.de +- Snapshots: Fix xend API functions for libvirt usage * Mon Sep 01 2008 carnold@novell.com - Fix problems building KMPs against the 2.6.27 kernel. * Fri Aug 29 2008 plc@novell.com diff --git a/xenapi-console-protocol.patch b/xenapi-console-protocol.patch index a9091ac..0894a61 100644 --- a/xenapi-console-protocol.patch +++ b/xenapi-console-protocol.patch @@ -1,8 +1,8 @@ -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -3242,6 +3242,14 @@ class XendDomainInfo: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -3254,6 +3254,14 @@ class XendDomainInfo: if not config.has_key('backend'): config['backend'] = "00000000-0000-0000-0000-000000000000" diff --git a/xend-config.diff b/xend-config.diff index da3a3ab..54ae9c9 100644 --- a/xend-config.diff +++ b/xend-config.diff @@ -46,7 +46,31 @@ Index: xen-3.3.0-testing/tools/examples/xend-config.sxp #(xend-relocation-ssl-server no) #(xend-unix-path /var/lib/xend/xend-socket) -@@ -181,7 +182,7 @@ +@@ -140,7 +141,8 @@ + # + # (network-script 'network-bridge netdev=eth1') + # +-# The bridge is named xenbr0, by default. To rename the bridge, use ++# The bridge takes on the ethernet device name by default. To rename the ++# bridge, use + # + # (network-script 'network-bridge bridge=') + # +@@ -149,7 +151,12 @@ + # two fake interfaces per guest domain. To do things like this, write + # yourself a wrapper script, and call network-bridge from it, as appropriate. + # +-(network-script network-bridge) ++# SuSE users note: ++# On openSUSE >= 11.1 and SLES >= 11, networks should be configured using ++# native platform tool - YaST. vif-bridge and qemu-ifup can be used to ++# connect vifs to the YaST-managed networks. ++#(network-script network-bridge) ++(network-script ) + + # The script used to control virtual interfaces. This can be overridden on a + # per-vif basis when creating a domain or a configuring a new vif. The +@@ -181,7 +188,7 @@ # dom0-min-mem is the lowest permissible memory level (in MB) for dom0. # This is a minimum both for auto-ballooning (as enabled by # enable-dom0-ballooning below) and for xm mem-set when applied to dom0. diff --git a/xend-core-dump-loc.diff b/xend-core-dump-loc.diff index 430532c..3a929bf 100644 --- a/xend-core-dump-loc.diff +++ b/xend-core-dump-loc.diff @@ -1,8 +1,8 @@ -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py -@@ -1864,7 +1864,7 @@ class XendDomainInfo: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py +@@ -1886,7 +1886,7 @@ class XendDomainInfo: try: if not corefile: this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime()) diff --git a/xend-domain-lock.patch b/xend-domain-lock.patch index c62c4c7..dd0d114 100644 --- a/xend-domain-lock.patch +++ b/xend-domain-lock.patch @@ -1,7 +1,7 @@ -Index: xen-3.3.0-testing/tools/examples/domain-lock +Index: xen-3.3.1-testing/tools/examples/domain-lock =================================================================== --- /dev/null -+++ xen-3.3.0-testing/tools/examples/domain-lock ++++ xen-3.3.1-testing/tools/examples/domain-lock @@ -0,0 +1,86 @@ +#!/bin/bash + @@ -89,10 +89,10 @@ Index: xen-3.3.0-testing/tools/examples/domain-lock + get_status $vm_path + ;; +esac -Index: xen-3.3.0-testing/tools/examples/xend-config.sxp +Index: xen-3.3.1-testing/tools/examples/xend-config.sxp =================================================================== ---- xen-3.3.0-testing.orig/tools/examples/xend-config.sxp -+++ xen-3.3.0-testing/tools/examples/xend-config.sxp +--- xen-3.3.1-testing.orig/tools/examples/xend-config.sxp ++++ xen-3.3.1-testing/tools/examples/xend-config.sxp @@ -250,4 +250,44 @@ # Path where persistent domain configuration is stored. @@ -138,10 +138,10 @@ Index: xen-3.3.0-testing/tools/examples/xend-config.sxp +# and (if supplied) to the lock file in that order. +# +#(xend-domain-lock-utility domain-lock) -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomainInfo.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomainInfo.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomainInfo.py @@ -30,11 +30,13 @@ import threading import re import copy @@ -157,7 +157,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py from xen.util.blkif import parse_uname import xen.util.xsm.xsm as security from xen.util import xsconstants -@@ -421,6 +423,7 @@ class XendDomainInfo: +@@ -443,6 +445,7 @@ class XendDomainInfo: if self._stateGet() in (XEN_API_VM_POWER_STATE_HALTED, XEN_API_VM_POWER_STATE_SUSPENDED, XEN_API_VM_POWER_STATE_CRASHED): try: @@ -165,7 +165,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py XendTask.log_progress(0, 30, self._constructDomain) XendTask.log_progress(31, 60, self._initDomain) -@@ -453,6 +456,7 @@ class XendDomainInfo: +@@ -475,6 +478,7 @@ class XendDomainInfo: state = self._stateGet() if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED): try: @@ -173,7 +173,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py self._constructDomain() self._storeVmDetails() self._createDevices() -@@ -2317,6 +2321,11 @@ class XendDomainInfo: +@@ -2339,6 +2343,11 @@ class XendDomainInfo: self._stateSet(DOM_STATE_HALTED) self.domid = None # Do not push into _stateSet()! @@ -185,7 +185,7 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py finally: self.refresh_shutdown_lock.release() -@@ -3555,6 +3564,74 @@ class XendDomainInfo: +@@ -3567,6 +3576,74 @@ class XendDomainInfo: def has_device(self, dev_class, dev_uuid): return (dev_uuid in self.info['%s_refs' % dev_class.lower()]) @@ -260,11 +260,11 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomainInfo.py def __str__(self): return '' % \ (str(self.domid), self.info['name_label'], -Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendDomain.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendDomain.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py -@@ -1295,6 +1295,7 @@ class XendDomain: +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendDomain.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendDomain.py +@@ -1299,6 +1299,7 @@ class XendDomain: POWER_STATE_NAMES[DOM_STATE_RUNNING], POWER_STATE_NAMES[dominfo._stateGet()]) @@ -272,10 +272,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendDomain.py """ The following call may raise a XendError exception """ dominfo.testMigrateDevices(True, dst) -Index: xen-3.3.0-testing/tools/python/xen/xend/XendOptions.py +Index: xen-3.3.1-testing/tools/python/xen/xend/XendOptions.py =================================================================== ---- xen-3.3.0-testing.orig/tools/python/xen/xend/XendOptions.py -+++ xen-3.3.0-testing/tools/python/xen/xend/XendOptions.py +--- xen-3.3.1-testing.orig/tools/python/xen/xend/XendOptions.py ++++ xen-3.3.1-testing/tools/python/xen/xend/XendOptions.py @@ -135,6 +135,17 @@ class XendOptions: """Default rotation count of qemu-dm log file.""" qemu_dm_logrotate_count = 10 @@ -314,10 +314,10 @@ Index: xen-3.3.0-testing/tools/python/xen/xend/XendOptions.py class XendOptionsFile(XendOptions): -Index: xen-3.3.0-testing/tools/examples/Makefile +Index: xen-3.3.1-testing/tools/examples/Makefile =================================================================== ---- xen-3.3.0-testing.orig/tools/examples/Makefile -+++ xen-3.3.0-testing/tools/examples/Makefile +--- xen-3.3.1-testing.orig/tools/examples/Makefile ++++ xen-3.3.1-testing/tools/examples/Makefile @@ -35,6 +35,7 @@ XEN_SCRIPTS += vtpm vtpm-delete XEN_SCRIPTS += xen-hotplug-cleanup XEN_SCRIPTS += external-device-migrate