Update version for v2.1.1 release

Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
target-i386: Support migratable=no properly
2014-09-10 14:30:45 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00 · 2014-09-10 09:30:58 -05:00
61 changed files with 710 additions and 237 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-2.1.0
+2.1.1
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -304,7 +304,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
        /* ensure policy won't be ignored in case memory is preallocated
         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
         * this doesn't catch hugepage case. */
-        unsigned flags = MPOL_MF_STRICT;
+        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;

        /* check for invalid host-nodes and policies and give more verbose
         * error messages than mbind(). */
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -449,6 +449,10 @@ static void error_callback_bh(void *opaque)
 static void blkdebug_aio_cancel(BlockDriverAIOCB *blockacb)
 {
    BlkdebugAIOCB *acb = container_of(blockacb, BlkdebugAIOCB, common);
+    if (acb->bh) {
+        qemu_bh_delete(acb->bh);
+        acb->bh = NULL;
+    }
    qemu_aio_release(acb);
 }

--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1509,7 +1509,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
    if (iscsilun->allocationmap != NULL) {
        g_free(iscsilun->allocationmap);
        iscsilun->allocationmap =
-            bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+            bitmap_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
+                                                    iscsilun),
                                    iscsilun->cluster_sectors));
    }

--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -747,6 +747,15 @@ static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
        }
        if (len == -1 && errno == EINTR) {
            continue;
+        } else if (len == -1 && errno == EINVAL &&
+                   (aiocb->bs->open_flags & BDRV_O_NOCACHE) &&
+                   !(aiocb->aio_type & QEMU_AIO_WRITE) &&
+                   offset > 0) {
+            /* O_DIRECT pread() may fail with EINVAL when offset is unaligned
+             * after a short read.  Assume that O_DIRECT short reads only occur
+             * at EOF.  Therefore this is a short read, not an I/O error.
+             */
+            break;
        } else if (len == -1) {
            offset = -errno;
            break;
--- a/1
+++ b/1
@@ -1723,6 +1723,7 @@ fi

 cat > $TMPC <<EOF
 #include <sys/socket.h>
+#include <linux/ip.h>
 int main(void) { return sizeof(struct mmsghdr); }
 EOF
 if compile_prog "" "" ; then
--- a/exec.c
+++ b/exec.c
@@ -430,15 +430,50 @@ static int cpu_common_post_load(void *opaque, int version_id)
    return 0;
 }

+static int cpu_common_pre_load(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    cpu->exception_index = 0;
+
+    return 0;
+}
+
+static bool cpu_common_exception_index_needed(void *opaque)
+{
+    CPUState *cpu = opaque;
+
+    return cpu->exception_index != 0;
+}
+
+static const VMStateDescription vmstate_cpu_common_exception_index = {
+    .name = "cpu_common/exception_index",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT32(exception_index, CPUState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_cpu_common = {
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
+    .pre_load = cpu_common_pre_load,
    .post_load = cpu_common_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection[]) {
+        {
+            .vmsd = &vmstate_cpu_common_exception_index,
+            .needed = cpu_common_exception_index_needed,
+        } , {
+            /* empty */
+        }
    }
 };

--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -231,7 +231,7 @@ static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
    uint32_t val = 0;
    int bsel = s->hotplug_select;

-    if (bsel < 0 || bsel > ACPI_PCIHP_MAX_HOTPLUG_BUS) {
+    if (bsel < 0 || bsel >= ACPI_PCIHP_MAX_HOTPLUG_BUS) {
        return 0;
    }

--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -194,20 +194,41 @@ static void fdt_add_psci_node(const VirtBoardInfo *vbi)

    /* No PSCI for TCG yet */
    if (kvm_enabled()) {
+        uint32_t cpu_suspend_fn;
+        uint32_t cpu_off_fn;
+        uint32_t cpu_on_fn;
+        uint32_t migrate_fn;
+
        qemu_fdt_add_subnode(fdt, "/psci");
        if (armcpu->psci_version == 2) {
            const char comp[] = "arm,psci-0.2\0arm,psci";
            qemu_fdt_setprop(fdt, "/psci", "compatible", comp, sizeof(comp));
+
+            cpu_off_fn = QEMU_PSCI_0_2_FN_CPU_OFF;
+            if (arm_feature(&armcpu->env, ARM_FEATURE_AARCH64)) {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN64_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN64_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN64_MIGRATE;
+            } else {
+                cpu_suspend_fn = QEMU_PSCI_0_2_FN_CPU_SUSPEND;
+                cpu_on_fn = QEMU_PSCI_0_2_FN_CPU_ON;
+                migrate_fn = QEMU_PSCI_0_2_FN_MIGRATE;
+            }
        } else {
            qemu_fdt_setprop_string(fdt, "/psci", "compatible", "arm,psci");
+
+            cpu_suspend_fn = QEMU_PSCI_0_1_FN_CPU_SUSPEND;
+            cpu_off_fn = QEMU_PSCI_0_1_FN_CPU_OFF;
+            cpu_on_fn = QEMU_PSCI_0_1_FN_CPU_ON;
+            migrate_fn = QEMU_PSCI_0_1_FN_MIGRATE;
        }

        qemu_fdt_setprop_string(fdt, "/psci", "method", "hvc");
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend",
-                                  PSCI_FN_CPU_SUSPEND);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", PSCI_FN_CPU_OFF);
-        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", PSCI_FN_CPU_ON);
-        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", PSCI_FN_MIGRATE);
+
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_suspend", cpu_suspend_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_off", cpu_off_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "cpu_on", cpu_on_fn);
+        qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn);
    }
 }

--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -469,8 +469,9 @@ static void virtio_blk_dma_restart_bh(void *opaque)
    s->rq = NULL;

    while (req) {
+        VirtIOBlockReq *next = req->next;
        virtio_blk_handle_request(req, &mrb);
-        req = req->next;
+        req = next;
    }

    virtio_submit_multiwrite(s->bs, &mrb);
--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
@@ -138,7 +138,9 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl)
        if (qemu_spice_rect_is_empty(qxl->dirty+i)) {
            break;
        }
-        if (qxl->dirty[i].left > qxl->dirty[i].right ||
+        if (qxl->dirty[i].left < 0 ||
+            qxl->dirty[i].top < 0 ||
+            qxl->dirty[i].left > qxl->dirty[i].right ||
            qxl->dirty[i].top > qxl->dirty[i].bottom ||
            qxl->dirty[i].right > qxl->guest_primary.surface.width ||
            qxl->dirty[i].bottom > qxl->guest_primary.surface.height) {
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -2063,6 +2063,7 @@ static int qxl_init_primary(PCIDevice *dev)

    qxl->id = 0;
    qxl_init_ramsize(qxl);
+    vga->vbe_size = qxl->vgamem_size;
    vga->vram_size_mb = qxl->vga.vram_size >> 20;
    vga_common_init(vga, OBJECT(dev), true);
    vga_init(vga, OBJECT(dev),
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -580,6 +580,93 @@ void vga_ioport_write(void *opaque, uint32_t addr, uint32_t val)
    }
 }

+/*
+ * Sanity check vbe register writes.
+ *
+ * As we don't have a way to signal errors to the guest in the bochs
+ * dispi interface we'll go adjust the registers to the closest valid
+ * value.
+ */
+static void vbe_fixup_regs(VGACommonState *s)
+{
+    uint16_t *r = s->vbe_regs;
+    uint32_t bits, linelength, maxy, offset;
+
+    if (!(r[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED)) {
+        /* vbe is turned off -- nothing to do */
+        return;
+    }
+
+    /* check depth */
+    switch (r[VBE_DISPI_INDEX_BPP]) {
+    case 4:
+    case 8:
+    case 16:
+    case 24:
+    case 32:
+        bits = r[VBE_DISPI_INDEX_BPP];
+        break;
+    case 15:
+        bits = 16;
+        break;
+    default:
+        bits = r[VBE_DISPI_INDEX_BPP] = 8;
+        break;
+    }
+
+    /* check width */
+    r[VBE_DISPI_INDEX_XRES] &= ~7u;
+    if (r[VBE_DISPI_INDEX_XRES] == 0) {
+        r[VBE_DISPI_INDEX_XRES] = 8;
+    }
+    if (r[VBE_DISPI_INDEX_XRES] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_XRES] = VBE_DISPI_MAX_XRES;
+    }
+    r[VBE_DISPI_INDEX_VIRT_WIDTH] &= ~7u;
+    if (r[VBE_DISPI_INDEX_VIRT_WIDTH] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_VIRT_WIDTH] = VBE_DISPI_MAX_XRES;
+    }
+    if (r[VBE_DISPI_INDEX_VIRT_WIDTH] < r[VBE_DISPI_INDEX_XRES]) {
+        r[VBE_DISPI_INDEX_VIRT_WIDTH] = r[VBE_DISPI_INDEX_XRES];
+    }
+
+    /* check height */
+    linelength = r[VBE_DISPI_INDEX_VIRT_WIDTH] * bits / 8;
+    maxy = s->vbe_size / linelength;
+    if (r[VBE_DISPI_INDEX_YRES] == 0) {
+        r[VBE_DISPI_INDEX_YRES] = 1;
+    }
+    if (r[VBE_DISPI_INDEX_YRES] > VBE_DISPI_MAX_YRES) {
+        r[VBE_DISPI_INDEX_YRES] = VBE_DISPI_MAX_YRES;
+    }
+    if (r[VBE_DISPI_INDEX_YRES] > maxy) {
+        r[VBE_DISPI_INDEX_YRES] = maxy;
+    }
+
+    /* check offset */
+    if (r[VBE_DISPI_INDEX_X_OFFSET] > VBE_DISPI_MAX_XRES) {
+        r[VBE_DISPI_INDEX_X_OFFSET] = VBE_DISPI_MAX_XRES;
+    }
+    if (r[VBE_DISPI_INDEX_Y_OFFSET] > VBE_DISPI_MAX_YRES) {
+        r[VBE_DISPI_INDEX_Y_OFFSET] = VBE_DISPI_MAX_YRES;
+    }
+    offset = r[VBE_DISPI_INDEX_X_OFFSET] * bits / 8;
+    offset += r[VBE_DISPI_INDEX_Y_OFFSET] * linelength;
+    if (offset + r[VBE_DISPI_INDEX_YRES] * linelength > s->vbe_size) {
+        r[VBE_DISPI_INDEX_Y_OFFSET] = 0;
+        offset = r[VBE_DISPI_INDEX_X_OFFSET] * bits / 8;
+        if (offset + r[VBE_DISPI_INDEX_YRES] * linelength > s->vbe_size) {
+            r[VBE_DISPI_INDEX_X_OFFSET] = 0;
+            offset = 0;
+        }
+    }
+
+    /* update vga state */
+    r[VBE_DISPI_INDEX_VIRT_HEIGHT] = maxy;
+    s->vbe_line_offset = linelength;
+    s->vbe_start_addr  = offset / 4;
+}
+
 static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
 {
    VGACommonState *s = opaque;
@@ -614,7 +701,7 @@ uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
            val = s->vbe_regs[s->vbe_index];
        }
    } else if (s->vbe_index == VBE_DISPI_INDEX_VIDEO_MEMORY_64K) {
-        val = s->vram_size / (64 * 1024);
+        val = s->vbe_size / (64 * 1024);
    } else {
        val = 0;
    }
@@ -649,22 +736,13 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
            }
            break;
        case VBE_DISPI_INDEX_XRES:
-            if ((val <= VBE_DISPI_MAX_XRES) && ((val & 7) == 0)) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
-            break;
        case VBE_DISPI_INDEX_YRES:
-            if (val <= VBE_DISPI_MAX_YRES) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
-            break;
        case VBE_DISPI_INDEX_BPP:
-            if (val == 0)
-                val = 8;
-            if (val == 4 || val == 8 || val == 15 ||
-                val == 16 || val == 24 || val == 32) {
-                s->vbe_regs[s->vbe_index] = val;
-            }
+        case VBE_DISPI_INDEX_VIRT_WIDTH:
+        case VBE_DISPI_INDEX_X_OFFSET:
+        case VBE_DISPI_INDEX_Y_OFFSET:
+            s->vbe_regs[s->vbe_index] = val;
+            vbe_fixup_regs(s);
            break;
        case VBE_DISPI_INDEX_BANK:
            if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4) {
@@ -681,19 +759,11 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
                !(s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED)) {
                int h, shift_control;

-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] =
-                    s->vbe_regs[VBE_DISPI_INDEX_XRES];
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] =
-                    s->vbe_regs[VBE_DISPI_INDEX_YRES];
+                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] = 0;
                s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET] = 0;
                s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET] = 0;
-
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] >> 1;
-                else
-                    s->vbe_line_offset = s->vbe_regs[VBE_DISPI_INDEX_XRES] *
-                        ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                s->vbe_start_addr = 0;
+                s->vbe_regs[VBE_DISPI_INDEX_ENABLE] |= VBE_DISPI_ENABLED;
+                vbe_fixup_regs(s);

                /* clear the screen (should be done in BIOS) */
                if (!(val & VBE_DISPI_NOCLEARMEM)) {
@@ -742,40 +812,6 @@ void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
            s->vbe_regs[s->vbe_index] = val;
            vga_update_memory_access(s);
            break;
-        case VBE_DISPI_INDEX_VIRT_WIDTH:
-            {
-                int w, h, line_offset;
-
-                if (val < s->vbe_regs[VBE_DISPI_INDEX_XRES])
-                    return;
-                w = val;
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    line_offset = w >> 1;
-                else
-                    line_offset = w * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                h = s->vram_size / line_offset;
-                /* XXX: support weird bochs semantics ? */
-                if (h < s->vbe_regs[VBE_DISPI_INDEX_YRES])
-                    return;
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_WIDTH] = w;
-                s->vbe_regs[VBE_DISPI_INDEX_VIRT_HEIGHT] = h;
-                s->vbe_line_offset = line_offset;
-            }
-            break;
-        case VBE_DISPI_INDEX_X_OFFSET:
-        case VBE_DISPI_INDEX_Y_OFFSET:
-            {
-                int x;
-                s->vbe_regs[s->vbe_index] = val;
-                s->vbe_start_addr = s->vbe_line_offset * s->vbe_regs[VBE_DISPI_INDEX_Y_OFFSET];
-                x = s->vbe_regs[VBE_DISPI_INDEX_X_OFFSET];
-                if (s->vbe_regs[VBE_DISPI_INDEX_BPP] == 4)
-                    s->vbe_start_addr += x >> 1;
-                else
-                    s->vbe_start_addr += x * ((s->vbe_regs[VBE_DISPI_INDEX_BPP] + 7) >> 3);
-                s->vbe_start_addr >>= 2;
-            }
-            break;
        default:
            break;
        }
@@ -2289,6 +2325,9 @@ void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
        s->vram_size <<= 1;
    }
    s->vram_size_mb = s->vram_size >> 20;
+    if (!s->vbe_size) {
+        s->vbe_size = s->vram_size;
+    }

    s->is_vbe_vmstate = 1;
    memory_region_init_ram(&s->vram, obj, "vga.vram", s->vram_size);
--- a/hw/display/vga_int.h
+++ b/hw/display/vga_int.h
@@ -93,6 +93,7 @@ typedef struct VGACommonState {
    MemoryRegion vram_vbe;
    uint32_t vram_size;
    uint32_t vram_size_mb; /* property */
+    uint32_t vbe_size;
    uint32_t latch;
    MemoryRegion *chain4_alias;
    uint8_t sr_index;
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -546,6 +546,12 @@ static void fadt_setup(AcpiFadtDescriptorRev1 *fadt, AcpiPmInfo *pm)
                              (1 << ACPI_FADT_F_SLP_BUTTON) |
                              (1 << ACPI_FADT_F_RTC_S4));
    fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_USE_PLATFORM_CLOCK);
+    /* APIC destination mode ("Flat Logical") has an upper limit of 8 CPUs
+     * For more than 8 CPUs, "Clustered Logical" mode has to be used
+     */
+    if (max_cpus > 8) {
+        fadt->flags |= cpu_to_le32(1 << ACPI_FADT_F_FORCE_APIC_CLUSTER_MODEL);
+    }
 }


@@ -1393,7 +1399,7 @@ build_rsdp(GArray *rsdp_table, GArray *linker, unsigned rsdt)
 {
    AcpiRsdpDescriptor *rsdp = acpi_data_push(rsdp_table, sizeof *rsdp);

-    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 1,
+    bios_linker_loader_alloc(linker, ACPI_BUILD_RSDP_FILE, 16,
                             true /* fseg memory */);

    memcpy(&rsdp->signature, "RSD PTR ", 8);
--- a/hw/i386/acpi-dsdt.dsl
+++ b/hw/i386/acpi-dsdt.dsl
@@ -302,7 +302,7 @@ DefinitionBlock (
 /****************************************************************
 * General purpose events
 ****************************************************************/
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)

    Scope(\_GPE) {
        Name(_HID, "ACPI0006")
@@ -321,7 +321,7 @@ DefinitionBlock (
        }
        Method(_E03) {
            // Memory hotplug event
-            \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+            \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
        }
        Method(_L04) {
        }
--- a/hw/i386/acpi-dsdt.hex.generated
+++ b/hw/i386/acpi-dsdt.hex.generated
@@ -8,7 +8,7 @@ static unsigned char AcpiDsdtAmlCode[] = {
 0x0,
 0x0,
 0x1,
-0x2e,
+0x1f,
 0x42,
 0x58,
 0x50,
@@ -31,9 +31,9 @@ static unsigned char AcpiDsdtAmlCode[] = {
 0x4e,
 0x54,
 0x4c,
-0x13,
-0x9,
-0x12,
+0x28,
+0x5,
+0x10,
 0x20,
 0x10,
 0x49,
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -73,7 +73,12 @@
 #endif

 /* Leave a chunk of memory at the top of RAM for the BIOS ACPI tables.  */
-#define ACPI_DATA_SIZE       0x10000
+unsigned acpi_data_size = 0x20000;
+void pc_set_legacy_acpi_data_size(void)
+{
+    acpi_data_size = 0x10000;
+}
+
 #define BIOS_CFG_IOPORT 0x510
 #define FW_CFG_ACPI_TABLES (FW_CFG_ARCH_LOCAL + 0)
 #define FW_CFG_SMBIOS_ENTRIES (FW_CFG_ARCH_LOCAL + 1)
@@ -811,8 +816,9 @@ static void load_linux(FWCfgState *fw_cfg,
        initrd_max = 0x37ffffff;
    }

-    if (initrd_max >= max_ram_size-ACPI_DATA_SIZE)
-    	initrd_max = max_ram_size-ACPI_DATA_SIZE-1;
+    if (initrd_max >= max_ram_size - acpi_data_size) {
+        initrd_max = max_ram_size - acpi_data_size - 1;
+    }

    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_ADDR, cmdline_addr);
    fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, strlen(kernel_cmdline)+1);
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -318,6 +318,7 @@ static void pc_compat_2_0(MachineState *machine)
    legacy_acpi_table_size = 6652;
    smbios_legacy_mode = true;
    has_reserved_memory = false;
+    pc_set_legacy_acpi_data_size();
 }

 static void pc_compat_1_7(MachineState *machine)
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -282,6 +282,7 @@ static void pc_compat_2_0(MachineState *machine)
 {
    smbios_legacy_mode = true;
    has_reserved_memory = false;
+    pc_set_legacy_acpi_data_size();
 }

 static void pc_compat_1_7(MachineState *machine)
--- a/hw/i386/q35-acpi-dsdt.dsl
+++ b/hw/i386/q35-acpi-dsdt.dsl
@@ -410,7 +410,7 @@ DefinitionBlock (
 /****************************************************************
 * General purpose events
 ****************************************************************/
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD, MethodObj)

    Scope(\_GPE) {
        Name(_HID, "ACPI0006")
@@ -425,7 +425,7 @@ DefinitionBlock (
        }
        Method(_E03) {
            // Memory hotplug event
-            \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
+            \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_SCAN_METHOD()
        }
        Method(_L04) {
        }
--- a/hw/i386/ssdt-mem.dsl
+++ b/hw/i386/ssdt-mem.dsl
@@ -39,10 +39,10 @@ ACPI_EXTRACT_ALL_CODE ssdm_mem_aml
 DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
 {

-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
-    External(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD, MethodObj)
+    External(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD, MethodObj)

    Scope(\_SB) {
 /*  v------------------ DO NOT EDIT ------------------v */
@@ -58,19 +58,19 @@ DefinitionBlock ("ssdt-mem.aml", "SSDT", 0x02, "BXPC", "CSSDT", 0x1)
            Name(_HID, EISAID("PNP0C80"))

            Method(_CRS, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_CRS_METHOD(_UID))
            }

            Method(_STA, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_STATUS_METHOD(_UID))
            }

            Method(_PXM, 0) {
-                Return(\_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
+                Return(\_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_PROXIMITY_METHOD(_UID))
            }

            Method(_OST, 3) {
-                \_SB.PCI0.MEMORY_HOPTLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
+                \_SB.PCI0.MEMORY_HOTPLUG_DEVICE.MEMORY_SLOT_OST_METHOD(_UID, Arg0, Arg1, Arg2)
            }
        }
    }
--- a/hw/i386/ssdt-misc.dsl
+++ b/hw/i386/ssdt-misc.dsl
@@ -120,7 +120,7 @@ DefinitionBlock ("ssdt-misc.aml", "SSDT", 0x01, "BXPC", "BXSSDTSUSP", 0x1)

    External(MEMORY_SLOT_NOTIFY_METHOD, MethodObj)
    Scope(\_SB.PCI0) {
-        Device(MEMORY_HOPTLUG_DEVICE) {
+        Device(MEMORY_HOTPLUG_DEVICE) {
            Name(_HID, "PNP0A06")
            Name(_UID, "Memory hotplug resources")

--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -688,7 +688,8 @@ void ide_dma_cb(void *opaque, int ret)
           sector_num, n, s->dma_cmd);
 #endif

-    if (!ide_sect_range_ok(s, sector_num, n)) {
+    if ((s->dma_cmd == IDE_DMA_READ || s->dma_cmd == IDE_DMA_WRITE) &&
+        !ide_sect_range_ok(s, sector_num, n)) {
        dma_buf_commit(s);
        ide_dma_error(s);
        return;
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -252,6 +252,12 @@ static void pc_dimm_realize(DeviceState *dev, Error **errp)
        error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
        return;
    }
+    if (dimm->node >= nb_numa_nodes) {
+        error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
+                   PRIu32 "' which exceeds the number of numa nodes: %d",
+                   dimm->node, nb_numa_nodes);
+        return;
+    }
 }

 static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -120,11 +120,20 @@ typedef struct VFIOINTx {
 } VFIOINTx;

 typedef struct VFIOMSIVector {
-    EventNotifier interrupt; /* eventfd triggered on interrupt */
-    EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */
+    /*
+     * Two interrupt paths are configured per vector.  The first, is only used
+     * for interrupts injected via QEMU.  This is typically the non-accel path,
+     * but may also be used when we want QEMU to handle masking and pending
+     * bits.  The KVM path bypasses QEMU and is therefore higher performance,
+     * but requires masking at the device.  virq is used to track the MSI route
+     * through KVM, thus kvm_interrupt is only available when virq is set to a
+     * valid (>= 0) value.
+     */
+    EventNotifier interrupt;
+    EventNotifier kvm_interrupt;
    struct VFIODevice *vdev; /* back pointer to device */
    MSIMessage msg; /* cache the MSI message so we know when it changes */
-    int virq; /* KVM irqchip route for QEMU bypass */
+    int virq;
    bool use;
 } VFIOMSIVector;

@@ -681,13 +690,24 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
    fds = (int32_t *)&irq_set->data;

    for (i = 0; i < vdev->nr_vectors; i++) {
-        if (!vdev->msi_vectors[i].use) {
-            fds[i] = -1;
-        } else if (vdev->msi_vectors[i].virq >= 0) {
-            fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
-        } else {
-            fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
+        int fd = -1;
+
+        /*
+         * MSI vs MSI-X - The guest has direct access to MSI mask and pending
+         * bits, therefore we always use the KVM signaling path when setup.
+         * MSI-X mask and pending bits are emulated, so we want to use the
+         * KVM signaling path only when configured and unmasked.
+         */
+        if (vdev->msi_vectors[i].use) {
+            if (vdev->msi_vectors[i].virq < 0 ||
+                (msix && msix_is_masked(&vdev->pdev, i))) {
+                fd = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
+            } else {
+                fd = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
+            }
        }
+
+        fds[i] = fd;
    }

    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -115,6 +115,7 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)

 void vhost_net_ack_features(struct vhost_net *net, unsigned features)
 {
+    net->dev.acked_features = net->dev.backend_features;
    vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
 }

@@ -188,9 +189,13 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
    return vhost_dev_query(&net->dev, dev);
 }

+static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
+{
+    net->dev.vq_index = vq_index;
+}
+
 static int vhost_net_start_one(struct vhost_net *net,
-                               VirtIODevice *dev,
-                               int vq_index)
+                               VirtIODevice *dev)
 {
    struct vhost_vring_file file = { };
    int r;
@@ -201,7 +206,6 @@ static int vhost_net_start_one(struct vhost_net *net,

    net->dev.nvqs = 2;
    net->dev.vqs = net->vqs;
-    net->dev.vq_index = vq_index;

    r = vhost_dev_enable_notifiers(&net->dev, dev);
    if (r < 0) {
@@ -294,7 +298,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
-    int r, i = 0;
+    int r, e, i;

    if (!vhost_net_device_endian_ok(dev)) {
        error_report("vhost-net does not support cross-endian");
@@ -309,11 +313,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
    }

    for (i = 0; i < total_queues; i++) {
-        r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
-
-        if (r < 0) {
-            goto err;
-        }
+        vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2);
    }

    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
@@ -322,12 +322,26 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
        goto err;
    }

+    for (i = 0; i < total_queues; i++) {
+        r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev);
+
+        if (r < 0) {
+            goto err_start;
+        }
+    }
+
    return 0;

-err:
+err_start:
    while (--i >= 0) {
        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
    }
+    e = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
+    if (e < 0) {
+        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
+        fflush(stderr);
+    }
+err:
    return r;
 }

@@ -339,16 +353,16 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
    int i, r;

+    for (i = 0; i < total_queues; i++) {
+        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
+    }
+
    r = k->set_guest_notifiers(qbus->parent, total_queues * 2, false);
    if (r < 0) {
        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
        fflush(stderr);
    }
    assert(r >= 0);
-
-    for (i = 0; i < total_queues; i++) {
-        vhost_net_stop_one(get_vhost_net(ncs[i].peer), dev);
-    }
 }

 void vhost_net_cleanup(struct vhost_net *net)
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -125,10 +125,23 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
        return;
    }
    if (!n->vhost_started) {
-        int r;
+        int r, i;
+
        if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
            return;
        }
+
+        /* Any packets outstanding? Purge them to avoid touching rings
+         * when vhost is running.
+         */
+        for (i = 0;  i < queues; i++) {
+            NetClientState *qnc = qemu_get_subqueue(n->nic, i);
+
+            /* Purge both directions: TX and RX. */
+            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
+            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
+        }
+
        n->vhost_started = 1;
        r = vhost_net_start(vdev, n->nic->ncs, queues);
        if (r < 0) {
@@ -1224,7 +1237,12 @@ static void virtio_net_tx_timer(void *opaque)
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
-    assert(vdev->vm_running);
+    /* This happens when device was stopped but BH wasn't. */
+    if (!vdev->vm_running) {
+        /* Make sure tx waiting is set, so we'll run when restarted. */
+        assert(q->tx_waiting);
+        return;
+    }

    q->tx_waiting = 0;

@@ -1244,7 +1262,12 @@ static void virtio_net_tx_bh(void *opaque)
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    int32_t ret;

-    assert(vdev->vm_running);
+    /* This happens when device was stopped but BH wasn't. */
+    if (!vdev->vm_running) {
+        /* Make sure tx waiting is set, so we'll run when restarted. */
+        assert(q->tx_waiting);
+        return;
+    }

    q->tx_waiting = 0;

--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -34,6 +34,7 @@

 #define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
 #define VMXNET3_MSIX_BAR_SIZE 0x2000
+#define MIN_BUF_SIZE 60

 #define VMXNET3_BAR0_IDX      (0)
 #define VMXNET3_BAR1_IDX      (1)
@@ -1871,12 +1872,21 @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
    VMXNET3State *s = qemu_get_nic_opaque(nc);
    size_t bytes_indicated;
+    uint8_t min_buf[MIN_BUF_SIZE];

    if (!vmxnet3_can_receive(nc)) {
        VMW_PKPRN("Cannot receive now");
        return -1;
    }

+    /* Pad to minimum Ethernet frame length */
+    if (size < sizeof(min_buf)) {
+        memcpy(min_buf, buf, size);
+        memset(&min_buf[size], 0, sizeof(min_buf) - size);
+        buf = min_buf;
+        size = sizeof(min_buf);
+    }
+
    if (s->peer_has_vhdr) {
        vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
        buf += sizeof(struct virtio_net_hdr);
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1147,9 +1147,10 @@ uint32_t pci_default_read_config(PCIDevice *d,
    return le32_to_cpu(val);
 }

-void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
+void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
 {
    int i, was_irq_disabled = pci_irq_disabled(d);
+    uint32_t val = val_in;

    for (i = 0; i < l; val >>= 8, ++i) {
        uint8_t wmask = d->wmask[addr + i];
@@ -1171,8 +1172,8 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
                                    & PCI_COMMAND_MASTER);
    }

-    msi_write_config(d, addr, val, l);
-    msix_write_config(d, addr, val, l);
+    msi_write_config(d, addr, val_in, l);
+    msix_write_config(d, addr, val_in, l);
 }

 /***********************************************************/
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1377,7 +1377,6 @@ static void ppc_spapr_init(MachineState *machine)
    spapr_create_nvram(spapr);

    /* Set up PCI */
-    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
    spapr_pci_rtas_init();

    phb = spapr_create_phb(spapr, 0);
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -341,7 +341,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
    }

    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
-    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
+    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
                     irq, req_num);

    /* Add MSI device to cache */
@@ -465,34 +465,6 @@ static const MemoryRegionOps spapr_msi_ops = {
    .endianness = DEVICE_LITTLE_ENDIAN
 };

-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
-{
-    uint64_t window_size = 4096;
-
-    /*
-     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
-     * we need to allocate some memory to catch those writes coming
-     * from msi_notify()/msix_notify().
-     * As MSIMessage:addr is going to be the same and MSIMessage:data
-     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
-     * be used.
-     *
-     * For KVM we want to ensure that this memory is a full page so that
-     * our memory slot is of page size granularity.
-     */
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        window_size = getpagesize();
-    }
-#endif
-
-    spapr->msi_win_addr = addr;
-    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
-                          "msi", window_size);
-    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
-                                &spapr->msiwindow);
-}
-
 /*
 * PHB PCI device
 */
@@ -512,6 +484,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
    char *namebuf;
    int i;
    PCIBus *bus;
+    uint64_t msi_window_size = 4096;

    if (sphb->index != -1) {
        hwaddr windows_base;
@@ -604,6 +577,28 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
                       sphb->dtbusname);

+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     *
+     * For KVM we want to ensure that this memory is a full page so that
+     * our memory slot is of page size granularity.
+     */
+#ifdef CONFIG_KVM
+    if (kvm_enabled()) {
+        msi_window_size = getpagesize();
+    }
+#endif
+
+    memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr,
+                          "msi", msi_window_size);
+    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+                                &sphb->msiwindow);
+
    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);

    pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -238,6 +238,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
    s->dev.nvqs = VHOST_SCSI_VQ_NUM_FIXED + vs->conf.num_queues;
    s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs);
    s->dev.vq_index = 0;
+    s->dev.backend_features = 0;

    ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
                         VHOST_BACKEND_TYPE_KERNEL, true);
@@ -246,7 +247,6 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
                   strerror(-ret));
        return;
    }
-    s->dev.backend_features = 0;

    error_setg(&s->migration_blocker,
            "vhost-scsi does not support migration");
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -976,7 +976,6 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
 bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
 {
    struct vhost_virtqueue *vq = hdev->vqs + n - hdev->vq_index;
-    assert(hdev->started);
    assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);
    return event_notifier_test_and_clear(&vq->masked_notifier);
 }
@@ -988,7 +987,6 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
    struct VirtQueue *vvq = virtio_get_queue(vdev, n);
    int r, index = n - hdev->vq_index;

-    assert(hdev->started);
    assert(n >= hdev->vq_index && n < hdev->vq_index + hdev->nvqs);

    struct vhost_vring_file file = {
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -1108,7 +1108,10 @@ static void virtio_vmstate_change(void *opaque, int running, RunState state)
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
-    vdev->vm_running = running;
+
+    if (running) {
+        vdev->vm_running = running;
+    }

    if (backend_run) {
        virtio_set_status(vdev, vdev->status);
@@ -1121,6 +1124,10 @@ static void virtio_vmstate_change(void *opaque, int running, RunState state)
    if (!backend_run) {
        virtio_set_status(vdev, vdev->status);
    }
+
+    if (!running) {
+        vdev->vm_running = running;
+    }
 }

 void virtio_init(VirtIODevice *vdev, const char *name,
--- a/include/hw/acpi/pc-hotplug.h
+++ b/include/hw/acpi/pc-hotplug.h
@@ -32,7 +32,7 @@
 #define ACPI_MEMORY_HOTPLUG_IO_LEN 24
 #define ACPI_MEMORY_HOTPLUG_BASE 0x0a00

-#define MEMORY_HOPTLUG_DEVICE        MHPD
+#define MEMORY_HOTPLUG_DEVICE        MHPD
 #define MEMORY_SLOTS_NUMBER          MDNR
 #define MEMORY_HOTPLUG_IO_REGION     HPMR
 #define MEMORY_SLOT_ADDR_LOW         MRBL
--- a/include/hw/i386/pc.h
+++ b/include/hw/i386/pc.h
@@ -177,6 +177,8 @@ void pc_acpi_init(const char *default_dsdt);
 PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size,
                                ram_addr_t above_4g_mem_size);

+void pc_set_legacy_acpi_data_size(void);
+
 #define PCI_HOST_PROP_PCI_HOLE_START   "pci-hole-start"
 #define PCI_HOST_PROP_PCI_HOLE_END     "pci-hole-end"
 #define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start"
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -70,7 +70,7 @@ struct sPAPRPHBState {

    MemoryRegion memspace, iospace;
    hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow;
+    MemoryRegion memwindow, iowindow, msiwindow;

    uint32_t dma_liobn;
    AddressSpace iommu_as;
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -13,8 +13,6 @@ struct sPAPRNVRAM;
 typedef struct sPAPREnvironment {
    struct VIOsPAPRBus *vio_bus;
    QLIST_HEAD(, sPAPRPHBState) phbs;
-    hwaddr msi_win_addr;
-    MemoryRegion msiwindow;
    struct sPAPRNVRAM *nvram;
    XICSState *icp;

--- a/net/net.c
+++ b/net/net.c
@@ -41,12 +41,14 @@
 #include "qapi-visit.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/dealloc-visitor.h"
+#include "sysemu/sysemu.h"

 /* Net bridge is currently not supported for W32. */
 #if !defined(_WIN32)
 # define CONFIG_NET_BRIDGE
 #endif

+static VMChangeStateEntry *net_change_state_entry;
 static QTAILQ_HEAD(, NetClientState) net_clients;

 const char *host_net_devices[] = {
@@ -452,6 +454,12 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)

 int qemu_can_send_packet(NetClientState *sender)
 {
+    int vm_running = runstate_is_running();
+
+    if (!vm_running) {
+        return 0;
+    }
+
    if (!sender->peer) {
        return 1;
    }
@@ -504,7 +512,8 @@ void qemu_purge_queued_packets(NetClientState *nc)
    qemu_net_queue_purge(nc->peer->incoming_queue, nc);
 }

-void qemu_flush_queued_packets(NetClientState *nc)
+static
+void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge)
 {
    nc->receive_disabled = 0;

@@ -518,9 +527,17 @@ void qemu_flush_queued_packets(NetClientState *nc)
         * the file descriptor (for tap, for example).
         */
        qemu_notify_event();
+    } else if (purge) {
+        /* Unable to empty the queue, purge remaining packets */
+        qemu_net_queue_purge(nc->incoming_queue, nc);
    }
 }

+void qemu_flush_queued_packets(NetClientState *nc)
+{
+    qemu_flush_or_purge_queued_packets(nc, false);
+}
+
 static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
                                                 unsigned flags,
                                                 const uint8_t *buf, int size,
@@ -1168,6 +1185,22 @@ void qmp_set_link(const char *name, bool up, Error **errp)
    }
 }

+static void net_vm_change_state_handler(void *opaque, int running,
+                                        RunState state)
+{
+    /* Complete all queued packets, to guarantee we don't modify
+     * state later when VM is not running.
+     */
+    if (!running) {
+        NetClientState *nc;
+        NetClientState *tmp;
+
+        QTAILQ_FOREACH_SAFE(nc, &net_clients, next, tmp) {
+            qemu_flush_or_purge_queued_packets(nc, true);
+        }
+    }
+}
+
 void net_cleanup(void)
 {
    NetClientState *nc;
@@ -1183,6 +1216,8 @@ void net_cleanup(void)
            qemu_del_net_client(nc);
        }
    }
+
+    qemu_del_vm_change_state_handler(net_change_state_entry);
 }

 void net_check_clients(void)
@@ -1268,6 +1303,9 @@ int net_init_clients(void)
 #endif
    }

+    net_change_state_entry =
+        qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL);
+
    QTAILQ_INIT(&net_clients);

    if (qemu_opts_foreach(qemu_find_opts("netdev"), net_init_netdev, NULL, 1) == -1)
--- a/net/queue.c
+++ b/net/queue.c
@@ -233,6 +233,9 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from)
        if (packet->sender == from) {
            QTAILQ_REMOVE(&queue->packets, packet, entry);
            queue->nq_count--;
+            if (packet->sent_cb) {
+                packet->sent_cb(packet->sender, 0);
+            }
            g_free(packet);
        }
    }
--- a/numa.c
+++ b/numa.c
@@ -210,8 +210,8 @@ void set_numa_nodes(void)
            numa_total += numa_info[i].node_mem;
        }
        if (numa_total != ram_size) {
-            error_report("total memory for NUMA nodes (%" PRIu64 ")"
-                         " should equal RAM size (" RAM_ADDR_FMT ")",
+            error_report("total memory for NUMA nodes (0x%" PRIx64 ")"
+                         " should equal RAM size (0x" RAM_ADDR_FMT ")",
                         numa_total, ram_size);
            exit(1);
        }
--- a/qdev-monitor.c
+++ b/qdev-monitor.c
@@ -182,9 +182,10 @@ static const char *find_typename_by_alias(const char *alias)

 int qdev_device_help(QemuOpts *opts)
 {
+    Error *local_err = NULL;
    const char *driver;
-    Property *prop;
-    ObjectClass *klass;
+    DevicePropertyInfoList *prop_list;
+    DevicePropertyInfoList *prop;

    driver = qemu_opt_get(opts, "driver");
    if (driver && is_help_option(driver)) {
@@ -196,35 +197,28 @@ int qdev_device_help(QemuOpts *opts)
        return 0;
    }

-    klass = object_class_by_name(driver);
-    if (!klass) {
+    if (!object_class_by_name(driver)) {
        const char *typename = find_typename_by_alias(driver);

        if (typename) {
            driver = typename;
-            klass = object_class_by_name(driver);
        }
    }

-    if (!object_class_dynamic_cast(klass, TYPE_DEVICE)) {
-        return 0;
+    prop_list = qmp_device_list_properties(driver, &local_err);
+    if (!prop_list) {
+        error_printf("%s\n", error_get_pretty(local_err));
+        error_free(local_err);
+        return 1;
    }
-    do {
-        for (prop = DEVICE_CLASS(klass)->props; prop && prop->name; prop++) {
-            /*
-             * TODO Properties without a parser are just for dirty hacks.
-             * qdev_prop_ptr is the only such PropertyInfo.  It's marked
-             * for removal.  This conditional should be removed along with
-             * it.
-             */
-            if (!prop->info->set) {
-                continue;           /* no way to set it, don't show */
-            }
-            error_printf("%s.%s=%s\n", driver, prop->name,
-                         prop->info->legacy_name ?: prop->info->name);
-        }
-        klass = object_class_get_parent(klass);
-    } while (klass != object_class_by_name(TYPE_DEVICE));
+
+    for (prop = prop_list; prop; prop = prop->next) {
+        error_printf("%s.%s=%s\n", driver,
+                     prop->value->name,
+                     prop->value->type);
+    }
+
+    qapi_free_DevicePropertyInfoList(prop_list);
    return 1;
 }

--- a/qemu-char.c
+++ b/qemu-char.c
@@ -1160,7 +1160,9 @@ static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
    if (!s->connected) {
        /* guest sends data, check for (re-)connect */
        pty_chr_update_read_handler_locked(chr);
-        return 0;
+        if (!s->connected) {
+            return 0;
+        }
    }
    return io_channel_send(s->fd, buf, len);
 }
--- a/qmp.c
+++ b/qmp.c
@@ -509,6 +509,7 @@ DevicePropertyInfoList *qmp_device_list_properties(const char *typename,
        if (strcmp(prop->name, "type") == 0 ||
            strcmp(prop->name, "realized") == 0 ||
            strcmp(prop->name, "hotpluggable") == 0 ||
+            strcmp(prop->name, "hotplugged") == 0 ||
            strcmp(prop->name, "parent_bus") == 0) {
            continue;
        }
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -1170,7 +1170,14 @@ static inline int cpu_mmu_index (CPUARMState *env)
 static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
                                        target_ulong *cs_base, int *flags)
 {
-    int fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    int fpen;
+
+    if (arm_feature(env, ARM_FEATURE_V6)) {
+        fpen = extract32(env->cp15.c1_coproc, 20, 2);
+    } else {
+        /* CPACR doesn't exist before v6, so VFP is always accessible */
+        fpen = 3;
+    }

    if (is_a64(env)) {
        *pc = env->pc;
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -123,9 +123,10 @@ static void aarch64_a57_initfn(Object *obj)
    cpu->id_isar2 = 0x21232042;
    cpu->id_isar3 = 0x01112131;
    cpu->id_isar4 = 0x00011142;
+    cpu->id_isar5 = 0x00011121;
    cpu->id_aa64pfr0 = 0x00002222;
    cpu->id_aa64dfr0 = 0x10305106;
-    cpu->id_aa64isar0 = 0x00010000;
+    cpu->id_aa64isar0 = 0x00011120;
    cpu->id_aa64mmfr0 = 0x00001124;
    cpu->clidr = 0x0a200023;
    cpu->ccsidr[0] = 0x701fe00a; /* 32KB L1 dcache */
--- a/target-arm/kvm-consts.h
+++ b/target-arm/kvm-consts.h
@@ -17,6 +17,7 @@
 #ifdef CONFIG_KVM
 #include "qemu/compiler.h"
 #include <linux/kvm.h>
+#include <linux/psci.h>

 #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y)

@@ -38,17 +39,43 @@ MISMATCH_CHECK(CP_REG_SIZE_U64, KVM_REG_SIZE_U64)
 MISMATCH_CHECK(CP_REG_ARM, KVM_REG_ARM)
 MISMATCH_CHECK(CP_REG_ARCH_MASK, KVM_REG_ARCH_MASK)

-#define PSCI_FN_BASE 0x95c1ba5e
-#define PSCI_FN(n) (PSCI_FN_BASE + (n))
-#define PSCI_FN_CPU_SUSPEND PSCI_FN(0)
-#define PSCI_FN_CPU_OFF PSCI_FN(1)
-#define PSCI_FN_CPU_ON PSCI_FN(2)
-#define PSCI_FN_MIGRATE PSCI_FN(3)
+#define QEMU_PSCI_0_1_FN_BASE 0x95c1ba5e
+#define QEMU_PSCI_0_1_FN(n) (QEMU_PSCI_0_1_FN_BASE + (n))
+#define QEMU_PSCI_0_1_FN_CPU_SUSPEND QEMU_PSCI_0_1_FN(0)
+#define QEMU_PSCI_0_1_FN_CPU_OFF QEMU_PSCI_0_1_FN(1)
+#define QEMU_PSCI_0_1_FN_CPU_ON QEMU_PSCI_0_1_FN(2)
+#define QEMU_PSCI_0_1_FN_MIGRATE QEMU_PSCI_0_1_FN(3)

-MISMATCH_CHECK(PSCI_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND)
-MISMATCH_CHECK(PSCI_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
-MISMATCH_CHECK(PSCI_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
-MISMATCH_CHECK(PSCI_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_SUSPEND, KVM_PSCI_FN_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_OFF, KVM_PSCI_FN_CPU_OFF)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_CPU_ON, KVM_PSCI_FN_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_1_FN_MIGRATE, KVM_PSCI_FN_MIGRATE)
+
+#define QEMU_PSCI_0_2_FN_BASE 0x84000000
+#define QEMU_PSCI_0_2_FN(n) (QEMU_PSCI_0_2_FN_BASE + (n))
+
+#define QEMU_PSCI_0_2_64BIT 0x40000000
+#define QEMU_PSCI_0_2_FN64_BASE \
+        (QEMU_PSCI_0_2_FN_BASE + QEMU_PSCI_0_2_64BIT)
+#define QEMU_PSCI_0_2_FN64(n) (QEMU_PSCI_0_2_FN64_BASE + (n))
+
+#define QEMU_PSCI_0_2_FN_CPU_SUSPEND QEMU_PSCI_0_2_FN(1)
+#define QEMU_PSCI_0_2_FN_CPU_OFF QEMU_PSCI_0_2_FN(2)
+#define QEMU_PSCI_0_2_FN_CPU_ON QEMU_PSCI_0_2_FN(3)
+#define QEMU_PSCI_0_2_FN_MIGRATE QEMU_PSCI_0_2_FN(5)
+
+#define QEMU_PSCI_0_2_FN64_CPU_SUSPEND QEMU_PSCI_0_2_FN64(1)
+#define QEMU_PSCI_0_2_FN64_CPU_OFF QEMU_PSCI_0_2_FN64(2)
+#define QEMU_PSCI_0_2_FN64_CPU_ON QEMU_PSCI_0_2_FN64(3)
+#define QEMU_PSCI_0_2_FN64_MIGRATE QEMU_PSCI_0_2_FN64(5)
+
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_SUSPEND, PSCI_0_2_FN_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_OFF, PSCI_0_2_FN_CPU_OFF)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_CPU_ON, PSCI_0_2_FN_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN_MIGRATE, PSCI_0_2_FN_MIGRATE)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_SUSPEND, PSCI_0_2_FN64_CPU_SUSPEND)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_CPU_ON, PSCI_0_2_FN64_CPU_ON)
+MISMATCH_CHECK(QEMU_PSCI_0_2_FN64_MIGRATE, PSCI_0_2_FN64_MIGRATE)

 /* Note that KVM uses overlapping values for AArch32 and AArch64
 * target CPU numbers. AArch32 targets:
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1454,7 +1454,7 @@ static void disas_exc(DisasContext *s, uint32_t insn)
            break;
        }
        /* BRK */
-        gen_exception_insn(s, 0, EXCP_BKPT, syn_aa64_bkpt(imm16));
+        gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16));
        break;
    case 2:
        if (op2_ll != 0) {
--- a/target-i386/cpu-qom.h
+++ b/target-i386/cpu-qom.h
@@ -92,6 +92,7 @@ typedef struct X86CPU {
    bool enforce_cpuid;
    bool expose_kvm;
    bool migratable;
+    bool host_features;

    /* if true the CPUID code directly forward host cache leaves to the guest */
    bool cache_info_passthrough;
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -1254,6 +1254,9 @@ void x86_cpu_compat_set_features(const char *cpu_model, FeatureWord w,
    }
 }

+static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
+                                                   bool migratable_only);
+
 #ifdef CONFIG_KVM

 static int cpu_x86_fill_model_id(char *str)
@@ -1310,26 +1313,23 @@ static void host_x86_cpu_class_init(ObjectClass *oc, void *data)
    dc->props = host_x86_cpu_properties;
 }

-static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w,
-                                                   bool migratable_only);
-
 static void host_x86_cpu_initfn(Object *obj)
 {
    X86CPU *cpu = X86_CPU(obj);
    CPUX86State *env = &cpu->env;
    KVMState *s = kvm_state;
-    FeatureWord w;

    assert(kvm_enabled());

+    /* We can't fill the features array here because we don't know yet if
+     * "migratable" is true or false.
+     */
+    cpu->host_features = true;
+
    env->cpuid_level = kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX);
    env->cpuid_xlevel = kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX);
    env->cpuid_xlevel2 = kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX);

-    for (w = 0; w < FEATURE_WORDS; w++) {
-        env->features[w] =
-            x86_cpu_get_supported_feature_word(w, cpu->migratable);
-    }
    object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort);
 }

@@ -1828,6 +1828,13 @@ static void x86_cpu_parse_featurestr(CPUState *cs, char *features,
        featurestr = strtok(NULL, ",");
    }

+    if (cpu->host_features) {
+        for (w = 0; w < FEATURE_WORDS; w++) {
+            env->features[w] =
+                x86_cpu_get_supported_feature_word(w, cpu->migratable);
+        }
+    }
+
    for (w = 0; w < FEATURE_WORDS; w++) {
        env->features[w] |= plus_features[w];
        env->features[w] &= ~minus_features[w];
@@ -2588,6 +2595,16 @@ static void x86_cpu_reset(CPUState *s)

    env->xcr0 = 1;

+    /*
+     * SDM 11.11.5 requires:
+     *  - IA32_MTRR_DEF_TYPE MSR.E = 0
+     *  - IA32_MTRR_PHYSMASKn.V = 0
+     * All other bits are undefined.  For simplification, zero it all.
+     */
+    env->mtrr_deftype = 0;
+    memset(env->mtrr_var, 0, sizeof(env->mtrr_var));
+    memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed));
+
 #if !defined(CONFIG_USER_ONLY)
    /* We hard-wire the BSP to the first CPU. */
    if (s->cpu_index == 0) {
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -337,6 +337,8 @@
 #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)

+#define MSR_MTRRphysIndex(addr)         ((((addr) & ~1u) - 0x200) / 2)
+
 #define MSR_MTRRfix64K_00000            0x250
 #define MSR_MTRRfix16K_80000            0x258
 #define MSR_MTRRfix16K_A0000            0x259
@@ -930,7 +932,7 @@ typedef struct CPUX86State {
    /* MTRRs */
    uint64_t mtrr_fixed[11];
    uint64_t mtrr_deftype;
-    MTRRVar mtrr_var[8];
+    MTRRVar mtrr_var[MSR_MTRRcap_VCNT];

    /* For KVM */
    uint32_t mp_state;
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -615,8 +615,8 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr,
            if (!(pdpe & PG_PRESENT_MASK)) {
                goto do_fault;
            }
-            rsvd_mask |= PG_HI_USER_MASK | PG_NX_MASK;
-            if (pdpe & rsvd_mask) {
+            rsvd_mask |= PG_HI_USER_MASK;
+            if (pdpe & (rsvd_mask | PG_NX_MASK)) {
                goto do_fault_rsvd;
            }
            ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -79,6 +79,7 @@ static int lm_capable_kernel;
 static bool has_msr_hv_hypercall;
 static bool has_msr_hv_vapic;
 static bool has_msr_hv_tsc;
+static bool has_msr_mtrr;

 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
@@ -739,6 +740,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
        env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
    }

+    if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
+        has_msr_mtrr = true;
+    }
+
    return 0;
 }

@@ -1183,7 +1188,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
    CPUX86State *env = &cpu->env;
    struct {
        struct kvm_msrs info;
-        struct kvm_msr_entry entries[100];
+        struct kvm_msr_entry entries[150];
    } msr_data;
    struct kvm_msr_entry *msrs = msr_data.entries;
    int n = 0, i;
@@ -1278,6 +1283,37 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
            kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
                              env->msr_hv_tsc);
        }
+        if (has_msr_mtrr) {
+            kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
+            for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+                kvm_msr_entry_set(&msrs[n++],
+                                  MSR_MTRRphysBase(i), env->mtrr_var[i].base);
+                kvm_msr_entry_set(&msrs[n++],
+                                  MSR_MTRRphysMask(i), env->mtrr_var[i].mask);
+            }
+        }

        /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
         *       kvm_put_msr_feature_control. */
@@ -1484,7 +1520,7 @@ static int kvm_get_msrs(X86CPU *cpu)
    CPUX86State *env = &cpu->env;
    struct {
        struct kvm_msrs info;
-        struct kvm_msr_entry entries[100];
+        struct kvm_msr_entry entries[150];
    } msr_data;
    struct kvm_msr_entry *msrs = msr_data.entries;
    int ret, i, n;
@@ -1572,6 +1608,24 @@ static int kvm_get_msrs(X86CPU *cpu)
    if (has_msr_hv_tsc) {
        msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
    }
+    if (has_msr_mtrr) {
+        msrs[n++].index = MSR_MTRRdefType;
+        msrs[n++].index = MSR_MTRRfix64K_00000;
+        msrs[n++].index = MSR_MTRRfix16K_80000;
+        msrs[n++].index = MSR_MTRRfix16K_A0000;
+        msrs[n++].index = MSR_MTRRfix4K_C0000;
+        msrs[n++].index = MSR_MTRRfix4K_C8000;
+        msrs[n++].index = MSR_MTRRfix4K_D0000;
+        msrs[n++].index = MSR_MTRRfix4K_D8000;
+        msrs[n++].index = MSR_MTRRfix4K_E0000;
+        msrs[n++].index = MSR_MTRRfix4K_E8000;
+        msrs[n++].index = MSR_MTRRfix4K_F0000;
+        msrs[n++].index = MSR_MTRRfix4K_F8000;
+        for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+            msrs[n++].index = MSR_MTRRphysBase(i);
+            msrs[n++].index = MSR_MTRRphysMask(i);
+        }
+    }

    msr_data.info.nmsrs = n;
    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
@@ -1692,6 +1746,49 @@ static int kvm_get_msrs(X86CPU *cpu)
        case HV_X64_MSR_REFERENCE_TSC:
            env->msr_hv_tsc = msrs[i].data;
            break;
+        case MSR_MTRRdefType:
+            env->mtrr_deftype = msrs[i].data;
+            break;
+        case MSR_MTRRfix64K_00000:
+            env->mtrr_fixed[0] = msrs[i].data;
+            break;
+        case MSR_MTRRfix16K_80000:
+            env->mtrr_fixed[1] = msrs[i].data;
+            break;
+        case MSR_MTRRfix16K_A0000:
+            env->mtrr_fixed[2] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_C0000:
+            env->mtrr_fixed[3] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_C8000:
+            env->mtrr_fixed[4] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_D0000:
+            env->mtrr_fixed[5] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_D8000:
+            env->mtrr_fixed[6] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_E0000:
+            env->mtrr_fixed[7] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_E8000:
+            env->mtrr_fixed[8] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_F0000:
+            env->mtrr_fixed[9] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_F8000:
+            env->mtrr_fixed[10] = msrs[i].data;
+            break;
+        case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
+            if (index & 1) {
+                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
+            } else {
+                env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
+            }
+            break;
        }
    }

--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -677,7 +677,7 @@ VMStateDescription vmstate_x86_cpu = {
        /* MTRRs */
        VMSTATE_UINT64_ARRAY_V(env.mtrr_fixed, X86CPU, 11, 8),
        VMSTATE_UINT64_V(env.mtrr_deftype, X86CPU, 8),
-        VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, 8, 8),
+        VMSTATE_MTRR_VARS(env.mtrr_var, X86CPU, MSR_MTRRcap_VCNT, 8),
        /* KVM-related states */
        VMSTATE_INT32_V(env.interrupt_injected, X86CPU, 9),
        VMSTATE_UINT32_V(env.mp_state, X86CPU, 9),
--- a/tests/qemu-iotests/101
+++ b/tests/qemu-iotests/101
@@ -0,0 +1,58 @@
+#!/bin/bash
+#
+# Test short file I/O
+#
+# Copyright (C) 2014 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha@redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt raw
+_supported_proto file
+_supported_os Linux
+
+
+echo
+echo "== creating short image file =="
+dd if=/dev/zero of="$TEST_IMG" bs=1 count=320
+
+echo
+echo "== reading bytes beyond EOF gives zeroes =="
+$QEMU_IO -c "read -P 0 0 512" "$TEST_IMG" | _filter_qemu_io
+
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
--- a/tests/qemu-iotests/101.out
+++ b/tests/qemu-iotests/101.out
@@ -0,0 +1,10 @@
+QA output created by 101
+
+== creating short image file ==
+320+0 records in
+320+0 records out
+
+== reading bytes beyond EOF gives zeroes ==
+read 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -100,3 +100,4 @@
 091 rw auto quick
 092 rw auto quick
 095 rw auto quick
+101 rw auto quick
--- a/thread-pool.c
+++ b/thread-pool.c
@@ -21,7 +21,6 @@
 #include "block/coroutine.h"
 #include "trace.h"
 #include "block/block_int.h"
-#include "qemu/event_notifier.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"

@@ -57,8 +56,8 @@ struct ThreadPoolElement {
 };

 struct ThreadPool {
-    EventNotifier notifier;
    AioContext *ctx;
+    QEMUBH *completion_bh;
    QemuMutex lock;
    QemuCond check_cancel;
    QemuCond worker_stopped;
@@ -119,7 +118,7 @@ static void *worker_thread(void *opaque)
            qemu_cond_broadcast(&pool->check_cancel);
        }

-        event_notifier_set(&pool->notifier);
+        qemu_bh_schedule(pool->completion_bh);
    }

    pool->cur_threads--;
@@ -168,12 +167,11 @@ static void spawn_thread(ThreadPool *pool)
    }
 }

-static void event_notifier_ready(EventNotifier *notifier)
+static void thread_pool_completion_bh(void *opaque)
 {
-    ThreadPool *pool = container_of(notifier, ThreadPool, notifier);
+    ThreadPool *pool = opaque;
    ThreadPoolElement *elem, *next;

-    event_notifier_test_and_clear(notifier);
 restart:
    QLIST_FOREACH_SAFE(elem, &pool->head, all, next) {
        if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -187,6 +185,12 @@ restart:
            QLIST_REMOVE(elem, all);
            /* Read state before ret.  */
            smp_rmb();
+
+            /* Schedule ourselves in case elem->common.cb() calls aio_poll() to
+             * wait for another request that completed at the same time.
+             */
+            qemu_bh_schedule(pool->completion_bh);
+
            elem->common.cb(elem->common.opaque, elem->ret);
            qemu_aio_release(elem);
            goto restart;
@@ -215,7 +219,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
        qemu_sem_timedwait(&pool->sem, 0) == 0) {
        QTAILQ_REMOVE(&pool->request_list, elem, reqs);
        elem->state = THREAD_CANCELED;
-        event_notifier_set(&pool->notifier);
+        qemu_bh_schedule(pool->completion_bh);
    } else {
        pool->pending_cancellations++;
        while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) {
@@ -224,7 +228,7 @@ static void thread_pool_cancel(BlockDriverAIOCB *acb)
        pool->pending_cancellations--;
    }
    qemu_mutex_unlock(&pool->lock);
-    event_notifier_ready(&pool->notifier);
+    thread_pool_completion_bh(pool);
 }

 static const AIOCBInfo thread_pool_aiocb_info = {
@@ -293,8 +297,8 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
    }

    memset(pool, 0, sizeof(*pool));
-    event_notifier_init(&pool->notifier, false);
    pool->ctx = ctx;
+    pool->completion_bh = aio_bh_new(ctx, thread_pool_completion_bh, pool);
    qemu_mutex_init(&pool->lock);
    qemu_cond_init(&pool->check_cancel);
    qemu_cond_init(&pool->worker_stopped);
@@ -304,8 +308,6 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)

    QLIST_INIT(&pool->head);
    QTAILQ_INIT(&pool->request_list);
-
-    aio_set_event_notifier(ctx, &pool->notifier, event_notifier_ready);
 }

 ThreadPool *thread_pool_new(AioContext *ctx)
@@ -339,11 +341,10 @@ void thread_pool_free(ThreadPool *pool)

    qemu_mutex_unlock(&pool->lock);

-    aio_set_event_notifier(pool->ctx, &pool->notifier, NULL);
+    qemu_bh_delete(pool->completion_bh);
    qemu_sem_destroy(&pool->sem);
    qemu_cond_destroy(&pool->check_cancel);
    qemu_cond_destroy(&pool->worker_stopped);
    qemu_mutex_destroy(&pool->lock);
-    event_notifier_cleanup(&pool->notifier);
    g_free(pool);
 }
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -334,11 +334,23 @@ void qemu_spice_create_host_memslot(SimpleSpiceDisplay *ssd)
 void qemu_spice_create_host_primary(SimpleSpiceDisplay *ssd)
 {
    QXLDevSurfaceCreate surface;
+    uint64_t surface_size;

    memset(&surface, 0, sizeof(surface));

-    dprint(1, "%s/%d: %dx%d\n", __func__, ssd->qxl.id,
-           surface_width(ssd->ds), surface_height(ssd->ds));
+    surface_size = (uint64_t) surface_width(ssd->ds) *
+        surface_height(ssd->ds) * 4;
+    assert(surface_size > 0);
+    assert(surface_size < INT_MAX);
+    if (ssd->bufsize < surface_size) {
+        ssd->bufsize = surface_size;
+        g_free(ssd->buf);
+        ssd->buf = g_malloc(ssd->bufsize);
+    }
+
+    dprint(1, "%s/%d: %ux%u (size %" PRIu64 "/%d)\n", __func__, ssd->qxl.id,
+           surface_width(ssd->ds), surface_height(ssd->ds),
+           surface_size, ssd->bufsize);

    surface.format     = SPICE_SURFACE_FMT_32_xRGB;
    surface.width      = surface_width(ssd->ds);
@@ -369,8 +381,6 @@ void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd)
    if (ssd->num_surfaces == 0) {
        ssd->num_surfaces = 1024;
    }
-    ssd->bufsize = (16 * 1024 * 1024);
-    ssd->buf = g_malloc(ssd->bufsize);
 }

 /* display listener callbacks */
@@ -495,7 +505,7 @@ static void interface_get_init_info(QXLInstance *sin, QXLDevInitInfo *info)
    info->num_memslots = NUM_MEMSLOTS;
    info->num_memslots_groups = NUM_MEMSLOTS_GROUPS;
    info->internal_groupslot_id = 0;
-    info->qxl_ram_size = ssd->bufsize;
+    info->qxl_ram_size = 16 * 1024 * 1024;
    info->n_surfaces = ssd->num_surfaces;
 }

--- a/vl.c
+++ b/vl.c
@@ -4009,11 +4009,6 @@ int main(int argc, char **argv, char **envp)
        qemu_set_version(machine_class->hw_version);
    }

-    if (qemu_opts_foreach(qemu_find_opts("object"),
-                          object_create, NULL, 0) != 0) {
-        exit(1);
-    }
-
    /* Init CPU def lists, based on config
     * - Must be called after all the qemu_read_config_file() calls
     * - Must be called before list_cpus()
@@ -4225,6 +4220,11 @@ int main(int argc, char **argv, char **envp)
        exit(0);
    }

+    if (qemu_opts_foreach(qemu_find_opts("object"),
+                          object_create, NULL, 0) != 0) {
+        exit(1);
+    }
+
    machine_opts = qemu_get_machine_opts();
    if (qemu_opt_foreach(machine_opts, object_set_property, current_machine,
                         1) < 0) {
@@ -1 +1 @@
 .1.0
 .1.1