gtk: don't exit early in case gtk init fails

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com> Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Merge remote-tracking branch 'remotes/kraxel/tags/pull-vga-20150610-1' into staging
2015-06-11 11:37:56 +02:00 · 2015-06-10 18:13:58 +01:00 · 2015-06-10 16:52:35 +01:00 · 2015-06-10 15:46:39 +01:00 · 2015-06-10 15:10:14 +01:00 · 2015-06-10 15:03:02 +01:00
259 changed files with 10227 additions and 2745 deletions
--- a/3
+++ b/3
@@ -486,7 +486,8 @@ F: hw/ppc/prep.c
 F: hw/pci-host/prep.[hc]
 F: hw/isa/pc87312.[hc]

-sPAPR
+sPAPR (pseries)
+M: David Gibson <david@gibson.dropbear.id.au>
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
 S: Supported
--- a/7
+++ b/7
@@ -389,13 +389,8 @@ ifneq (,$(findstring qemu-ga,$(TOOLS)))
 endif
 endif

-install-confdir:
-	$(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"

-install-sysconfig: install-datadir install-confdir
-	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
-
-install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
+install: all $(if $(BUILD_DOCS),install-doc) \
 install-datadir install-localstatedir
 ifneq ($(TOOLS),)
 	$(call install-prog,$(TOOLS),$(DESTDIR)$(bindir))
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -76,6 +76,8 @@ common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o

 common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)

+common-obj-$(CONFIG_FDT) += device_tree.o
+
 ######################################################################
 # qapi

--- a/Makefile.target
+++ b/Makefile.target
@@ -1,5 +1,7 @@
 # -*- Mode: makefile -*-

+BUILD_DIR?=$(CURDIR)/..
+
 include ../config-host.mak
 include config-target.mak
 include config-devices.mak
@@ -129,7 +131,6 @@ ifdef CONFIG_SOFTMMU
 obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
 obj-y += qtest.o bootdevice.o
 obj-y += hw/
-obj-$(CONFIG_FDT) += device_tree.o
 obj-$(CONFIG_KVM) += kvm-all.o
 obj-y += memory.o savevm.o cputlb.o
 obj-y += memory_mapping.o
--- a/arch_init.c
+++ b/arch_init.c
@@ -136,7 +136,6 @@ static struct defconfig_file {
    bool userconfig;
 } default_config_files[] = {
    { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
-    { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
    { NULL }, /* end of list */
 };

@@ -610,52 +609,10 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
    return (next - base) << TARGET_PAGE_BITS;
 }

-static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
-{
-    bool ret;
-    int nr = addr >> TARGET_PAGE_BITS;
-
-    ret = test_and_set_bit(nr, migration_bitmap);
-
-    if (!ret) {
-        migration_dirty_pages++;
-    }
-    return ret;
-}
-
 static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 {
-    ram_addr_t addr;
-    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
-
-    /* start address is aligned at the start of a word? */
-    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
-        int k;
-        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
-        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
-
-        for (k = page; k < page + nr; k++) {
-            if (src[k]) {
-                unsigned long new_dirty;
-                new_dirty = ~migration_bitmap[k];
-                migration_bitmap[k] |= src[k];
-                new_dirty &= src[k];
-                migration_dirty_pages += ctpopl(new_dirty);
-                src[k] = 0;
-            }
-        }
-    } else {
-        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_get_dirty(start + addr,
-                                              TARGET_PAGE_SIZE,
-                                              DIRTY_MEMORY_MIGRATION)) {
-                cpu_physical_memory_reset_dirty(start + addr,
-                                                TARGET_PAGE_SIZE,
-                                                DIRTY_MEMORY_MIGRATION);
-                migration_bitmap_set_dirty(start + addr);
-            }
-        }
-    }
+    migration_dirty_pages +=
+        cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
 }


--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -216,10 +216,9 @@ static int get_event_by_name(const char *name, BlkDebugEvent *event)
 struct add_rule_data {
    BDRVBlkdebugState *s;
    int action;
-    Error **errp;
 };

-static int add_rule(QemuOpts *opts, void *opaque)
+static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
 {
    struct add_rule_data *d = opaque;
    BDRVBlkdebugState *s = d->s;
@@ -230,10 +229,10 @@ static int add_rule(QemuOpts *opts, void *opaque)
    /* Find the right event for the rule */
    event_name = qemu_opt_get(opts, "event");
    if (!event_name) {
-        error_setg(d->errp, "Missing event name for rule");
+        error_setg(errp, "Missing event name for rule");
        return -1;
    } else if (get_event_by_name(event_name, &event) < 0) {
-        error_setg(d->errp, "Invalid event name \"%s\"", event_name);
+        error_setg(errp, "Invalid event name \"%s\"", event_name);
        return -1;
    }

@@ -319,8 +318,7 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,

    d.s = s;
    d.action = ACTION_INJECT_ERROR;
-    d.errp = &local_err;
-    qemu_opts_foreach(&inject_error_opts, add_rule, &d, 1);
+    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
@@ -328,7 +326,7 @@ static int read_config(BDRVBlkdebugState *s, const char *filename,
    }

    d.action = ACTION_SET_STATE;
-    qemu_opts_foreach(&set_state_opts, add_rule, &d, 1);
+    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
        ret = -EINVAL;
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -1323,13 +1323,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
    const char *filename;
    int i, ret = 0;

-    if ((BDRV_SECTOR_SIZE % 512) != 0) {
-        error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
-                   "BDRV_SECTOR_SIZE(%lld) is not a multiple "
-                   "of 512", BDRV_SECTOR_SIZE);
-        return -EINVAL;
-    }
-
    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
    qemu_opts_absorb_qdict(opts, options, &local_err);
    if (local_err) {
--- a/blockdev.c
+++ b/blockdev.c
@@ -2113,7 +2113,7 @@ void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
    aio_context_release(aio_context);
 }

-int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
+void hmp_drive_del(Monitor *mon, const QDict *qdict)
 {
    const char *id = qdict_get_str(qdict, "id");
    BlockBackend *blk;
@@ -2124,14 +2124,14 @@ int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
    blk = blk_by_name(id);
    if (!blk) {
        error_report("Device '%s' not found", id);
-        return -1;
+        return;
    }
    bs = blk_bs(blk);

    if (!blk_legacy_dinfo(blk)) {
        error_report("Deleting device added with blockdev-add"
                     " is not supported");
-        return -1;
+        return;
    }

    aio_context = bdrv_get_aio_context(bs);
@@ -2140,7 +2140,7 @@ int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
        error_report_err(local_err);
        aio_context_release(aio_context);
-        return -1;
+        return;
    }

    /* quiesce block driver; prevent further io */
@@ -2163,7 +2163,6 @@ int hmp_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
    }

    aio_context_release(aio_context);
-    return 0;
 }

 void qmp_block_resize(bool has_device, const char *device,
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -108,10 +108,6 @@ void cpu_list_unlock(void)
 /***********************************************************/
 /* CPUX86 core interface */

-void cpu_smm_update(CPUX86State *env)
-{
-}
-
 uint64_t cpu_get_tsc(CPUX86State *env)
 {
    return cpu_get_real_ticks();
--- a/8
+++ b/8
@@ -353,7 +353,7 @@ for opt do
  ;;
  --cpu=*) cpu="$optarg"
  ;;
-  --extra-cflags=*) QEMU_CFLAGS="$optarg $QEMU_CFLAGS"
+  --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
                    EXTRA_CFLAGS="$optarg"
  ;;
  --extra-ldflags=*) LDFLAGS="$optarg $LDFLAGS"
@@ -3115,9 +3115,11 @@ fi
 if test "$fdt" != "no" ; then
  fdt_libs="-lfdt"
  # explicitly check for libfdt_env.h as it is missing in some stable installs
+  # and test for required functions to make sure we are on a version >= 1.4.0
  cat > $TMPC << EOF
+#include <libfdt.h>
 #include <libfdt_env.h>
-int main(void) { return 0; }
+int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
 EOF
  if compile_prog "" "$fdt_libs" ; then
    # system DTC is good - use it
@@ -3135,7 +3137,7 @@ EOF
    fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
  elif test "$fdt" = "yes" ; then
    # have neither and want - prompt for system/submodule install
-    error_exit "DTC (libfdt) not present. Your options:" \
+    error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
        "  (1) Preferred: Install the DTC (libfdt) devel package" \
        "  (2) Fetch the DTC submodule, using:" \
        "      git submodule update --init dtc"
--- a/cpus.c
+++ b/cpus.c
@@ -105,6 +105,7 @@ static bool all_cpu_threads_idle(void)

 /* Protected by TimersState seqlock */

+static bool icount_sleep = true;
 static int64_t vm_clock_warp_start = -1;
 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 static int icount_time_shift;
@@ -393,15 +394,18 @@ void qemu_clock_warp(QEMUClockType type)
        return;
    }

-    /*
-     * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
-     * This ensures that the deadline for the timer is computed correctly below.
-     * This also makes sure that the insn counter is synchronized before the
-     * CPU starts running, in case the CPU is woken by an event other than
-     * the earliest QEMU_CLOCK_VIRTUAL timer.
-     */
-    icount_warp_rt(NULL);
-    timer_del(icount_warp_timer);
+    if (icount_sleep) {
+        /*
+         * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
+         * This ensures that the deadline for the timer is computed correctly
+         * below.
+         * This also makes sure that the insn counter is synchronized before
+         * the CPU starts running, in case the CPU is woken by an event other
+         * than the earliest QEMU_CLOCK_VIRTUAL timer.
+         */
+        icount_warp_rt(NULL);
+        timer_del(icount_warp_timer);
+    }
    if (!all_cpu_threads_idle()) {
        return;
    }
@@ -415,6 +419,11 @@ void qemu_clock_warp(QEMUClockType type)
    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
    if (deadline < 0) {
+        static bool notified;
+        if (!icount_sleep && !notified) {
+            error_report("WARNING: icount sleep disabled and no active timers");
+            notified = true;
+        }
        return;
    }

@@ -425,23 +434,35 @@ void qemu_clock_warp(QEMUClockType type)
         * interrupt to wake it up, but the interrupt never comes because
         * the vCPU isn't running any insns and thus doesn't advance the
         * QEMU_CLOCK_VIRTUAL.
-         *
-         * An extreme solution for this problem would be to never let VCPUs
-         * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
-         * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
-         * event.  Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
-         * after some "real" time, (related to the time left until the next
-         * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
-         * This avoids that the warps are visible externally; for example,
-         * you will not be sending network packets continuously instead of
-         * every 100ms.
         */
-        seqlock_write_lock(&timers_state.vm_clock_seqlock);
-        if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
-            vm_clock_warp_start = clock;
+        if (!icount_sleep) {
+            /*
+             * We never let VCPUs sleep in no sleep icount mode.
+             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
+             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
+             * It is useful when we want a deterministic execution time,
+             * isolated from host latencies.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            timers_state.qemu_icount_bias += deadline;
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+        } else {
+            /*
+             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
+             * "real" time, (related to the time left until the next event) has
+             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
+             * This avoids that the warps are visible externally; for example,
+             * you will not be sending network packets continuously instead of
+             * every 100ms.
+             */
+            seqlock_write_lock(&timers_state.vm_clock_seqlock);
+            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
+                vm_clock_warp_start = clock;
+            }
+            seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+            timer_mod_anticipate(icount_warp_timer, clock + deadline);
        }
-        seqlock_write_unlock(&timers_state.vm_clock_seqlock);
-        timer_mod_anticipate(icount_warp_timer, clock + deadline);
    } else if (deadline == 0) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
@@ -504,9 +525,18 @@ void configure_icount(QemuOpts *opts, Error **errp)
        }
        return;
    }
+
+    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
+    if (icount_sleep) {
+        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+                                         icount_warp_rt, NULL);
+    }
+
    icount_align_option = qemu_opt_get_bool(opts, "align", false);
-    icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
-                                     icount_warp_rt, NULL);
+
+    if (icount_align_option && !icount_sleep) {
+        error_setg(errp, "align=on and sleep=no are incompatible");
+    }
    if (strcmp(option, "auto") != 0) {
        errno = 0;
        icount_time_shift = strtol(option, &rem_str, 0);
@@ -517,6 +547,8 @@ void configure_icount(QemuOpts *opts, Error **errp)
        return;
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
+    } else if (!icount_sleep) {
+        error_setg(errp, "shift=auto and sleep=no are incompatible");
    }

    use_icount = 2;
--- a/cputlb.c
+++ b/cputlb.c
@@ -125,14 +125,13 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
   can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_CODE);
+    cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
+                                             DIRTY_MEMORY_CODE);
 }

 /* update the TLB so that writes in physical page 'phys_addr' are no longer
   tested for self modifying code */
-void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr,
-                             target_ulong vaddr)
+void tlb_unprotect_code(ram_addr_t ram_addr)
 {
    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }
--- a/device_tree.c
+++ b/device_tree.c
@@ -18,7 +18,6 @@
 #include <unistd.h>
 #include <stdlib.h>

-#include "config.h"
 #include "qemu-common.h"
 #include "qemu/error-report.h"
 #include "sysemu/device_tree.h"
--- a/docs/pci_expander_bridge.txt
+++ b/docs/pci_expander_bridge.txt
@@ -0,0 +1,58 @@
+PCI EXPANDER BRIDGE (PXB)
+=========================
+
+Description
+===========
+PXB is a "light-weight" host bridge in the same PCI domain
+as the main host bridge whose purpose is to enable
+the main host bridge to support multiple PCI root buses.
+It is implemented only for i440fx and can be placed only
+on bus 0 (pci.0).
+
+As opposed to PCI-2-PCI bridge's secondary bus, PXB's bus
+is a primary bus and can be associated with a NUMA node
+(different from the main host bridge) allowing the guest OS
+to recognize the proximity of a pass-through device to
+other resources as RAM and CPUs.
+
+Usage
+=====
+A detailed command line would be:
+
+[qemu-bin + storage options]
+-m 2G
+-object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 -numa node,nodeid=0,cpus=0,memdev=ram-node0
+-object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1
+-device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd-device e1000,bus=bridge1,addr=0x4,netdev=nd
+-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8,bus=pci.0 -device e1000,bus=bridge2,addr=0x3
+-device pxb,id=bridge3,bus=pci.0,bus_nr=40,bus=pci.0 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
+
+Here you have:
+ - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes)
+ - a pxb host bridge attached to NUMA 1 with an e1000 behind it
+ - a pxb host bridge attached to NUMA 0 with an e1000 behind it
+ - a pxb host bridge not attached to any NUMA with a hard drive behind it.
+
+Limitations
+===========
+Please observe that we specified the bus "pci.0" for the second and third pxb.
+This is because when no bus is given, another pxb can be selected by QEMU as default bus,
+however, PXBs can be placed only under the root bus.
+
+Implementation
+==============
+The PXB is composed by:
+- HostBridge (TYPE_PXB_HOST)
+  The host bridge allows to register and query the PXB's rPCI root bus in QEMU.
+- PXBDev(TYPE_PXB_DEVICE)
+  It is a regular PCI Device that resides on the piix host-bridge bus and its bus uses the same PCI domain.
+  However, the bus behind is exposed through ACPI as a primary PCI bus and starts a new PCI hierarchy.
+  The interrupts from devices behind the PXB are routed through this device the same as if it were a
+  PCI-2-PCI bridge. The _PRT follows the i440fx model.
+- PCIBridgeDev(TYPE_PCI_BRIDGE_DEV)
+  Created automatically as part of init sequence.
+  When adding a device to PXB it is attached to the bridge for two reasons:
+  - Using the bridge will enable hotplug support
+  - All the devices behind the bridge will use bridge's IO/MEM windows compacting
+    the PCI address space.
+
--- a/docs/specs/fw_cfg.txt
+++ b/docs/specs/fw_cfg.txt
@@ -203,3 +203,24 @@ completes fully overwriting the item's data.

 NOTE: This function is deprecated, and will be completely removed
 starting with QEMU v2.4.
+
+== Externally Provided Items ==
+
+As of v2.4, "file" fw_cfg items (i.e., items with selector keys above
+FW_CFG_FILE_FIRST, and with a corresponding entry in the fw_cfg file
+directory structure) may be inserted via the QEMU command line, using
+the following syntax:
+
+    -fw_cfg [name=]<item_name>,file=<path>
+
+where <item_name> is the fw_cfg item name, and <path> is the location
+on the host file system of a file containing the data to be inserted.
+
+NOTE: Users *SHOULD* choose item names beginning with the prefix "opt/"
+when using the "-fw_cfg" command line option, to avoid conflicting with
+item names used internally by QEMU. For instance:
+
+    -fw_cfg name=opt/my_item_name,file=./my_blob.bin
+
+Similarly, QEMU developers *SHOULD NOT* use item names prefixed with
+"opt/" when inserting items programmatically, e.g. via fw_cfg_add_file().
--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -0,0 +1,287 @@
+= sPAPR Dynamic Reconfiguration =
+
+sPAPR/"pseries" guests make use of a facility called dynamic-reconfiguration
+to handle hotplugging of dynamic "physical" resources like PCI cards, or
+"logical"/paravirtual resources like memory, CPUs, and "physical"
+host-bridges, which are generally managed by the host/hypervisor and provided
+to guests as virtualized resources. The specifics of dynamic-reconfiguration
+are documented extensively in PAPR+ v2.7, Section 13.1. This document
+provides a summary of that information as it applies to the implementation
+within QEMU.
+
+== Dynamic-reconfiguration Connectors ==
+
+To manage hotplug/unplug of these resources, a firmware abstraction known as
+a Dynamic Resource Connector (DRC) is used to assign a particular dynamic
+resource to the guest, and provide an interface for the guest to manage
+configuration/removal of the resource associated with it.
+
+== Device-tree description of DRCs ==
+
+A set of 4 Open Firmware device tree array properties are used to describe
+the name/index/power-domain/type of each DRC allocated to a guest at
+boot-time. There may be multiple sets of these arrays, rooted at different
+paths in the device tree depending on the type of resource the DRCs manage.
+
+In some cases, the DRCs themselves may be provided by a dynamic resource,
+such as the DRCs managing PCI slots on a hotplugged PHB. In this case the
+arrays would be fetched as part of the device tree retrieval interfaces
+for hotplugged resources described under "Guest->Host interface".
+
+The array properties are described below. Each entry/element in an array
+describes the DRC identified by the element in the corresponding position
+of ibm,drc-indexes:
+
+ibm,drc-names:
+  first 4-bytes: BE-encoded integer denoting the number of entries
+  each entry: a NULL-terminated <name> string encoded as a byte array
+
+  <name> values for logical/virtual resources are defined in PAPR+ v2.7,
+  Section 13.5.2.4, and basically consist of the type of the resource
+  followed by a space and a numerical value that's unique across resources
+  of that type.
+
+  <name> values for "physical" resources such as PCI or VIO devices are
+  defined as being "location codes", which are the "location labels" of
+  each encapsulating device, starting from the chassis down to the
+  individual slot for the device, concatenated by a hyphen. This provides
+  a mapping of resources to a physical location in a chassis for debugging
+  purposes. For QEMU, this mapping is less important, so we assign a
+  location code that conforms to naming specifications, but is simply a
+  location label for the slot by itself to simplify the implementation.
+  The naming convention for location labels is documented in detail in
+  PAPR+ v2.7, Section 12.3.1.5, and in our case amounts to using "C<n>"
+  for PCI/VIO device slots, where <n> is unique across all PCI/VIO
+  device slots.
+
+ibm,drc-indexes:
+  first 4-bytes: BE-encoded integer denoting the number of entries
+  each 4-byte entry: BE-encoded <index> integer that is unique across all DRCs
+    in the machine
+
+  <index> is arbitrary, but in the case of QEMU we try to maintain the
+  convention used to assign them to pSeries guests on pHyp:
+
+    bit[31:28]: integer encoding of <type>, where <type> is:
+                  1 for CPU resource
+                  2 for PHB resource
+                  3 for VIO resource
+                  4 for PCI resource
+                  8 for Memory resource
+    bit[27:0]: integer encoding of <id>, where <id> is unique across
+                 all resources of specified type
+
+ibm,drc-power-domains:
+  first 4-bytes: BE-encoded integer denoting the number of entries
+  each 4-byte entry: 32-bit, BE-encoded <index> integer that specifies the
+    power domain the resource will be assigned to. In the case of QEMU
+    we associated all resources with a "live insertion" domain, where the
+    power is assumed to be managed automatically. The integer value for
+    this domain is a special value of -1.
+
+
+ibm,drc-types:
+  first 4-bytes: BE-encoded integer denoting the number of entries
+  each entry: a NULL-terminated <type> string encoded as a byte array
+
+  <type> is assigned as follows:
+    "CPU" for a CPU
+    "PHB" for a physical host-bridge
+    "SLOT" for a VIO slot
+    "28" for a PCI slot
+    "MEM" for memory resource
+
+== Guest->Host interface to manage dynamic resources ==
+
+Each DRC is given a globally unique DRC Index, and resources associated with
+a particular DRC are configured/managed by the guest via a number of RTAS
+calls which reference individual DRCs based on the DRC index. This can be
+considered the guest->host interface.
+
+rtas-set-power-level:
+  arg[0]: integer identifying power domain
+  arg[1]: new power level for the domain, 0-100
+  output[0]: status, 0 on success
+  output[1]: power level after command
+
+  Set the power level for a specified power domain
+
+rtas-get-power-level:
+  arg[0]: integer identifying power domain
+  output[0]: status, 0 on success
+  output[1]: current power level
+
+  Get the power level for a specified power domain
+
+rtas-set-indicator:
+  arg[0]: integer identifying sensor/indicator type
+  arg[1]: index of sensor, for DR-related sensors this is generally the
+          DRC index
+  arg[2]: desired sensor value
+  output[0]: status, 0 on success
+
+  Set the state of an indicator or sensor. For the purpose of this document we
+  focus on the indicator/sensor types associated with a DRC. The types are:
+
+    9001: isolation-state, controls/indicates whether a device has been made
+          accessible to a guest
+
+          supported sensor values:
+            0: isolate, device is made unaccessible by guest OS
+            1: unisolate, device is made available to guest OS
+
+    9002: dr-indicator, controls "visual" indicator associated with device
+
+          supported sensor values:
+            0: inactive, resource may be safely removed
+            1: active, resource is in use and cannot be safely removed
+            2: identify, used to visually identify slot for interactive hotplug
+            3: action, in most cases, used in the same manner as identify
+
+    9003: allocation-state, generally only used for "logical" DR resources to
+          request the allocation/deallocation of a resource prior to acquiring
+          it via isolation-state->unisolate, or after releasing it via
+          isolation-state->isolate, respectively. for "physical" DR (like PCI
+          hotplug/unplug) the pre-allocation of the resource is implied and
+          this sensor is unused.
+
+          supported sensor values:
+            0: unusable, tell firmware/system the resource can be
+               unallocated/reclaimed and added back to the system resource pool
+            1: usable, request the resource be allocated/reserved for use by
+               guest OS
+            2: exchange, used to allocate a spare resource to use for fail-over
+               in certain situations. unused in QEMU
+            3: recover, used to reclaim a previously allocated resource that's
+               not currently allocated to the guest OS. unused in QEMU
+
+rtas-get-sensor-state:
+  arg[0]: integer identifying sensor/indicator type
+  arg[1]: index of sensor, for DR-related sensors this is generally the
+          DRC index
+  output[0]: status, 0 on success
+
+  Used to read an indicator or sensor value.
+
+  For DR-related operations, the only noteworthy sensor is dr-entity-sense,
+  which has a type value of 9003, as allocation-state does in the case of
+  rtas-set-indicator. The semantics/encodings of the sensor values are distinct
+  however:
+
+  supported sensor values for dr-entity-sense (9003) sensor:
+    0: empty,
+         for physical resources: DRC/slot is empty
+         for logical resources: unused
+    1: present,
+         for physical resources: DRC/slot is populated with a device/resource
+         for logical resources: resource has been allocated to the DRC
+    2: unusable,
+         for physical resources: unused
+         for logical resources: DRC has no resource allocated to it
+    3: exchange,
+         for physical resources: unused
+         for logical resources: resource available for exchange (see
+           allocation-state sensor semantics above)
+    4: recovery,
+         for physical resources: unused
+         for logical resources: resource available for recovery (see
+           allocation-state sensor semantics above)
+
+rtas-ibm-configure-connector:
+  arg[0]: guest physical address of 4096-byte work area buffer
+  arg[1]: 0, or address of additional 4096-byte work area buffer. only non-zero
+          if a prior RTAS response indicated a need for additional memory
+  output[0]: status:
+               0: completed transmittal of device-tree node
+               1: instruct guest to prepare for next DT sibling node
+               2: instruct guest to prepare for next DT child node
+               3: instruct guest to prepare for next DT property
+               4: instruct guest to ascend to parent DT node
+               5: instruct guest to provide additional work-area buffer
+                  via arg[1]
+            990x: instruct guest that operation took too long and to try
+                  again later
+
+  Used to fetch an OF device-tree description of the resource associated with
+  a particular DRC. The DRC index is encoded in the first 4-bytes of the first
+  work area buffer.
+
+  Work area layout, using 4-byte offsets:
+    wa[0]: DRC index of the DRC to fetch device-tree nodes from
+    wa[1]: 0 (hard-coded)
+    wa[2]: for next-sibling/next-child response:
+             wa offset of null-terminated string denoting the new node's name
+           for next-property response:
+             wa offset of null-terminated string denoting new property's name
+    wa[3]: for next-property response (unused otherwise):
+             byte-length of new property's value
+    wa[4]: for next-property response (unused otherwise):
+             new property's value, encoded as an OFDT-compatible byte array
+
+== hotplug/unplug events ==
+
+For most DR operations, the hypervisor will issue host->guest add/remove events
+using the EPOW/check-exception notification framework, where the host issues a
+check-exception interrupt, then provides an RTAS event log via an
+rtas-check-exception call issued by the guest in response. This framework is
+documented by PAPR+ v2.7, and already use in by QEMU for generating powerdown
+requests via EPOW events.
+
+For DR, this framework has been extended to include hotplug events, which were
+previously unneeded due to direct manipulation of DR-related guest userspace
+tools by host-level management such as an HMC. This level of management is not
+applicable to PowerKVM, hence the reason for extending the notification
+framework to support hotplug events.
+
+Note that these events are not yet formally part of the PAPR+ specification,
+but support for this format has already been implemented in DR-related
+guest tools such as powerpc-utils/librtas, as well as kernel patches that have
+been submitted to handle in-kernel processing of memory/cpu-related hotplug
+events[1], and is planned for formal inclusion is PAPR+ specification. The
+hotplug-specific payload is QEMU implemented as follows (with all values
+encoded in big-endian format):
+
+struct rtas_event_log_v6_hp {
+#define SECTION_ID_HOTPLUG              0x4850 /* HP */
+    struct section_header {
+        uint16_t section_id;            /* set to SECTION_ID_HOTPLUG */
+        uint16_t section_length;        /* sizeof(rtas_event_log_v6_hp),
+                                         * plus the length of the DRC name
+                                         * if a DRC name identifier is
+                                         * specified for hotplug_identifier
+                                         */
+        uint8_t section_version;        /* version 1 */
+        uint8_t section_subtype;        /* unused */
+        uint16_t creator_component_id;  /* unused */
+    } hdr;
+#define RTAS_LOG_V6_HP_TYPE_CPU         1
+#define RTAS_LOG_V6_HP_TYPE_MEMORY      2
+#define RTAS_LOG_V6_HP_TYPE_SLOT        3
+#define RTAS_LOG_V6_HP_TYPE_PHB         4
+#define RTAS_LOG_V6_HP_TYPE_PCI         5
+    uint8_t hotplug_type;               /* type of resource/device */
+#define RTAS_LOG_V6_HP_ACTION_ADD       1
+#define RTAS_LOG_V6_HP_ACTION_REMOVE    2
+    uint8_t hotplug_action;             /* action (add/remove) */
+#define RTAS_LOG_V6_HP_ID_DRC_NAME      1
+#define RTAS_LOG_V6_HP_ID_DRC_INDEX     2
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT     3
+    uint8_t hotplug_identifier;         /* type of the resource identifier,
+                                         * which serves as the discriminator
+                                         * for the 'drc' union field below
+                                         */
+    uint8_t reserved;
+    union {
+        uint32_t index;                 /* DRC index of resource to take action
+                                         * on
+                                         */
+        uint32_t count;                 /* number of DR resources to take
+                                         * action on (guest chooses which)
+                                         */
+        char name[1];                   /* string representing the name of the
+                                         * DRC to take action on
+                                         */
+    } drc;
+} QEMU_PACKED;
+
+[1] http://thread.gmane.org/gmane.linux.ports.ppc.embedded/75350/focus=106867
--- a/docs/writing-qmp-commands.txt
+++ b/docs/writing-qmp-commands.txt
@@ -598,7 +598,7 @@ stored in its "value" member. In our example, the "value" member is a pointer
 to an TimerAlarmMethod instance.

 Notice that the "current" variable is used as "true" only in the first
-interation of the loop. That's because the alarm timer method in use is the
+iteration of the loop. That's because the alarm timer method in use is the
 first element of the alarm_timers array. Also notice that QAPI lists are handled
 by hand and we return the head of the list.

--- a/2
+++ b/2
--- a/exec.c
+++ b/exec.c
@@ -59,8 +59,6 @@
 //#define DEBUG_SUBPAGE

 #if !defined(CONFIG_USER_ONLY)
-static bool in_migration;
-
 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
@@ -173,17 +171,22 @@ static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
    }
 }

-static uint32_t phys_map_node_alloc(PhysPageMap *map)
+static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
 {
    unsigned i;
    uint32_t ret;
+    PhysPageEntry e;
+    PhysPageEntry *p;

    ret = map->nodes_nb++;
+    p = map->nodes[ret];
    assert(ret != PHYS_MAP_NODE_NIL);
    assert(ret != map->nodes_nb_alloc);
+
+    e.skip = leaf ? 0 : 1;
+    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
    for (i = 0; i < P_L2_SIZE; ++i) {
-        map->nodes[ret][i].skip = 1;
-        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        memcpy(&p[i], &e, sizeof(e));
    }
    return ret;
 }
@@ -193,21 +196,12 @@ static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                int level)
 {
    PhysPageEntry *p;
-    int i;
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);

    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
-        lp->ptr = phys_map_node_alloc(map);
-        p = map->nodes[lp->ptr];
-        if (level == 0) {
-            for (i = 0; i < P_L2_SIZE; i++) {
-                p[i].skip = 0;
-                p[i].ptr = PHYS_SECTION_UNASSIGNED;
-            }
-        }
-    } else {
-        p = map->nodes[lp->ptr];
+        lp->ptr = phys_map_node_alloc(map, level == 0);
    }
+    p = map->nodes[lp->ptr];
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];

    while (*nb && lp < &p[P_L2_SIZE]) {
@@ -858,21 +852,27 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 }

 /* Note: start and end must be within the same ram block.  */
-void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
-                                     unsigned client)
+bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
+                                              ram_addr_t length,
+                                              unsigned client)
 {
-    if (length == 0)
-        return;
-    cpu_physical_memory_clear_dirty_range_type(start, length, client);
+    unsigned long end, page;
+    bool dirty;

-    if (tcg_enabled()) {
+    if (length == 0) {
+        return false;
+    }
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
+                                         page, end - page);
+
+    if (dirty && tcg_enabled()) {
        tlb_reset_dirty_range_all(start, length);
    }
-}

-static void cpu_physical_memory_set_dirty_tracking(bool enable)
-{
-    in_migration = enable;
+    return dirty;
 }

 /* Called from RCU critical section */
@@ -1362,7 +1362,8 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)

    cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
    block->used_length = newsize;
-    cpu_physical_memory_set_dirty_range(block->offset, block->used_length);
+    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
+                                        DIRTY_CLIENTS_ALL);
    memory_region_set_size(block->mr, newsize);
    if (block->resized) {
        block->resized(block->idstr, newsize, block->host);
@@ -1436,7 +1437,8 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
       }
    }
    cpu_physical_memory_set_dirty_range(new_block->offset,
-                                        new_block->used_length);
+                                        new_block->used_length,
+                                        DIRTY_CLIENTS_ALL);

    if (new_block->host) {
        qemu_ram_setup_dump(new_block->host, new_block->max_length);
@@ -1824,7 +1826,11 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
    default:
        abort();
    }
-    cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
+    /* Set both VGA and migration bits for simplicity and to remove
+     * the notdirty callback faster.
+     */
+    cpu_physical_memory_set_dirty_range(ram_addr, size,
+                                        DIRTY_CLIENTS_NOCODE);
    /* we remove the notdirty callback only if the code has been
       flushed */
    if (!cpu_physical_memory_is_clean(ram_addr)) {
@@ -2165,22 +2171,6 @@ static void tcg_commit(MemoryListener *listener)
    }
 }

-static void core_log_global_start(MemoryListener *listener)
-{
-    cpu_physical_memory_set_dirty_tracking(true);
-}
-
-static void core_log_global_stop(MemoryListener *listener)
-{
-    cpu_physical_memory_set_dirty_tracking(false);
-}
-
-static MemoryListener core_memory_listener = {
-    .log_global_start = core_log_global_start,
-    .log_global_stop = core_log_global_stop,
-    .priority = 1,
-};
-
 void address_space_init_dispatch(AddressSpace *as)
 {
    as->dispatch = NULL;
@@ -2220,8 +2210,6 @@ static void memory_map_init(void)
    memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
                          65536);
    address_space_init(&address_space_io, system_io, "I/O");
-
-    memory_listener_register(&core_memory_listener, &address_space_memory);
 }

 MemoryRegion *get_system_memory(void)
@@ -2279,14 +2267,23 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,

 #else

-static void invalidate_and_set_dirty(hwaddr addr,
+static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
                                     hwaddr length)
 {
-    if (cpu_physical_memory_range_includes_clean(addr, length)) {
-        tb_invalidate_phys_range(addr, addr + length, 0);
-        cpu_physical_memory_set_dirty_range_nocode(addr, length);
+    uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+    /* No early return if dirty_log_mask is or becomes 0, because
+     * cpu_physical_memory_set_dirty_range will still call
+     * xen_modified_memory.
+     */
+    if (dirty_log_mask) {
+        dirty_log_mask =
+            cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
    }
-    xen_modified_memory(addr, length);
+    if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
+        tb_invalidate_phys_range(addr, addr + length);
+        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+    }
+    cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
 }

 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
@@ -2371,7 +2368,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
                /* RAM case */
                ptr = qemu_get_ram_ptr(addr1);
                memcpy(ptr, buf, l);
-                invalidate_and_set_dirty(addr1, l);
+                invalidate_and_set_dirty(mr, addr1, l);
            }
        } else {
            if (!memory_access_is_direct(mr, is_write)) {
@@ -2468,7 +2465,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
            switch (type) {
            case WRITE_DATA:
                memcpy(ptr, buf, l);
-                invalidate_and_set_dirty(addr1, l);
+                invalidate_and_set_dirty(mr, addr1, l);
                break;
            case FLUSH_CACHE:
                flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
@@ -2693,7 +2690,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
        mr = qemu_ram_addr_from_host(buffer, &addr1);
        assert(mr != NULL);
        if (is_write) {
-            invalidate_and_set_dirty(addr1, access_len);
+            invalidate_and_set_dirty(mr, addr1, access_len);
        }
        if (xen_enabled()) {
            xen_invalidate_map_cache_entry(buffer);
@@ -3022,6 +3019,7 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
    hwaddr l = 4;
    hwaddr addr1;
    MemTxResult r;
+    uint8_t dirty_log_mask;

    rcu_read_lock();
    mr = address_space_translate(as, addr, &addr1, &l,
@@ -3033,14 +3031,9 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
        ptr = qemu_get_ram_ptr(addr1);
        stl_p(ptr, val);

-        if (unlikely(in_migration)) {
-            if (cpu_physical_memory_is_clean(addr1)) {
-                /* invalidate code */
-                tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
-                /* set dirty bit */
-                cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
-            }
-        }
+        dirty_log_mask = memory_region_get_dirty_log_mask(mr);
+        dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
+        cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
        r = MEMTX_OK;
    }
    if (result) {
@@ -3096,7 +3089,7 @@ static inline void address_space_stl_internal(AddressSpace *as,
            stl_p(ptr, val);
            break;
        }
-        invalidate_and_set_dirty(addr1, 4);
+        invalidate_and_set_dirty(mr, addr1, 4);
        r = MEMTX_OK;
    }
    if (result) {
@@ -3200,7 +3193,7 @@ static inline void address_space_stw_internal(AddressSpace *as,
            stw_p(ptr, val);
            break;
        }
-        invalidate_and_set_dirty(addr1, 2);
+        invalidate_and_set_dirty(mr, addr1, 2);
        r = MEMTX_OK;
    }
    if (result) {
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -113,7 +113,7 @@ const float16 float16_default_nan = const_float16(0xFE00);
 #if defined(TARGET_SPARC)
 const float32 float32_default_nan = const_float32(0x7FFFFFFF);
 #elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
-      defined(TARGET_XTENSA)
+      defined(TARGET_XTENSA) || defined(TARGET_S390X)
 const float32 float32_default_nan = const_float32(0x7FC00000);
 #elif SNAN_BIT_IS_ONE
 const float32 float32_default_nan = const_float32(0x7FBFFFFF);
@@ -126,7 +126,8 @@ const float32 float32_default_nan = const_float32(0xFFC00000);
 *----------------------------------------------------------------------------*/
 #if defined(TARGET_SPARC)
 const float64 float64_default_nan = const_float64(LIT64( 0x7FFFFFFFFFFFFFFF ));
-#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
+      defined(TARGET_S390X)
 const float64 float64_default_nan = const_float64(LIT64( 0x7FF8000000000000 ));
 #elif SNAN_BIT_IS_ONE
 const float64 float64_default_nan = const_float64(LIT64(0x7FF7FFFFFFFFFFFF));
@@ -155,6 +156,9 @@ const floatx80 floatx80_default_nan
 #if SNAN_BIT_IS_ONE
 #define float128_default_nan_high LIT64(0x7FFF7FFFFFFFFFFF)
 #define float128_default_nan_low  LIT64(0xFFFFFFFFFFFFFFFF)
+#elif defined(TARGET_S390X)
+#define float128_default_nan_high LIT64( 0x7FFF800000000000 )
+#define float128_default_nan_low  LIT64( 0x0000000000000000 )
 #else
 #define float128_default_nan_high LIT64( 0xFFFF800000000000 )
 #define float128_default_nan_low  LIT64( 0x0000000000000000 )
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -178,8 +178,7 @@ ETEXI
        .args_type  = "id:B",
        .params     = "device",
        .help       = "remove host block device",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = hmp_drive_del,
+        .mhandler.cmd = hmp_drive_del,
    },

 STEXI
@@ -654,8 +653,7 @@ ETEXI
        .args_type  = "device:O",
        .params     = "driver[,prop=value][,...]",
        .help       = "add device, like -device on the command line",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = do_device_add,
+        .mhandler.cmd = hmp_device_add,
        .command_completion = device_add_completion,
    },

@@ -1011,17 +1009,16 @@ ETEXI
        .name       = "client_migrate_info",
        .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
        .params     = "protocol hostname port tls-port cert-subject",
-        .help       = "send migration info to spice/vnc client",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = client_migrate_info,
+        .help       = "set migration information for remote display",
+        .mhandler.cmd = hmp_client_migrate_info,
    },

 STEXI
@item client_migrate_info @var{protocol} @var{hostname} @var{port} @var{tls-port} @var{cert-subject}
@findex client_migrate_info
-Set the spice/vnc connection info for the migration target.  The spice/vnc
-server will ask the spice/vnc client to automatically reconnect using the
-new parameters (if specified) once the vm migration finished successfully.
+Set migration information for remote display.  This makes the server
+ask the client to automatically reconnect using the new parameters
+once migration finished successfully.  Only implemented for SPICE.
 ETEXI

    {
@@ -1186,8 +1183,7 @@ ETEXI
                      "<error_status> = error string or 32bit\n\t\t\t"
                      "<tlb header> = 32bit x 4\n\t\t\t"
                      "<tlb header prefix> = 32bit x 4",
-        .user_print  = pcie_aer_inject_error_print,
-        .mhandler.cmd_new = hmp_pcie_aer_inject_error,
+        .mhandler.cmd = hmp_pcie_aer_inject_error,
    },

 STEXI
--- a/hmp.c
+++ b/hmp.c
@@ -22,6 +22,7 @@
 #include "qmp-commands.h"
 #include "qemu/sockets.h"
 #include "monitor/monitor.h"
+#include "monitor/qdev.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/string-output-visitor.h"
 #include "qapi-visit.h"
@@ -1250,6 +1251,23 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
    }
 }

+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict)
+{
+    Error *err = NULL;
+    const char *protocol = qdict_get_str(qdict, "protocol");
+    const char *hostname = qdict_get_str(qdict, "hostname");
+    bool has_port        = qdict_haskey(qdict, "port");
+    int port             = qdict_get_try_int(qdict, "port", -1);
+    bool has_tls_port    = qdict_haskey(qdict, "tls-port");
+    int tls_port         = qdict_get_try_int(qdict, "tls-port", -1);
+    const char *cert_subject = qdict_get_try_str(qdict, "cert-subject");
+
+    qmp_client_migrate_info(protocol, hostname,
+                            has_port, port, has_tls_port, tls_port,
+                            !!cert_subject, cert_subject, &err);
+    hmp_handle_error(mon, &err);
+}
+
 void hmp_set_password(Monitor *mon, const QDict *qdict)
 {
    const char *protocol  = qdict_get_str(qdict, "protocol");
@@ -1482,6 +1500,11 @@ void hmp_migrate(Monitor *mon, const QDict *qdict)
    }
 }

+void hmp_device_add(Monitor *mon, const QDict *qdict)
+{
+    do_device_add(mon, qdict, NULL);
+}
+
 void hmp_device_del(Monitor *mon, const QDict *qdict)
 {
    const char *id = qdict_get_str(qdict, "id");
--- a/hmp.h
+++ b/hmp.h
@@ -67,6 +67,7 @@ void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
+void hmp_client_migrate_info(Monitor *mon, const QDict *qdict);
 void hmp_set_password(Monitor *mon, const QDict *qdict);
 void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_eject(Monitor *mon, const QDict *qdict);
@@ -79,6 +80,7 @@ void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
 void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
 void hmp_block_job_complete(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
+void hmp_device_add(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
 void hmp_netdev_add(Monitor *mon, const QDict *qdict);
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -687,6 +687,14 @@ Aml *aml_else(void)
    return var;
 }

+/* ACPI 1.0b: 16.2.5.3 Type 1 Opcodes Encoding: DefWhile */
+Aml *aml_while(Aml *predicate)
+{
+    Aml *var = aml_bundle(0xA2 /* WhileOp */, AML_PACKAGE);
+    aml_append(var, predicate);
+    return var;
+}
+
 /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefMethod */
 Aml *aml_method(const char *name, int arg_count)
 {
--- a/hw/acpi/core.c
+++ b/hw/acpi/core.c
@@ -22,6 +22,7 @@
 #include "hw/hw.h"
 #include "hw/i386/pc.h"
 #include "hw/acpi/acpi.h"
+#include "hw/nvram/fw_cfg.h"
 #include "qemu/config-file.h"
 #include "qapi/opts-visitor.h"
 #include "qapi/dealloc-visitor.h"
@@ -592,14 +593,26 @@ static const MemoryRegionOps acpi_pm_cnt_ops = {
    .endianness = DEVICE_LITTLE_ENDIAN,
 };

-void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent, uint8_t s4_val)
+void acpi_pm1_cnt_init(ACPIREGS *ar, MemoryRegion *parent,
+                       bool disable_s3, bool disable_s4, uint8_t s4_val)
 {
+    FWCfgState *fw_cfg;
+
    ar->pm1.cnt.s4_val = s4_val;
    ar->wakeup.notify = acpi_notify_wakeup;
    qemu_register_wakeup_notifier(&ar->wakeup);
    memory_region_init_io(&ar->pm1.cnt.io, memory_region_owner(parent),
                          &acpi_pm_cnt_ops, ar, "acpi-cnt", 2);
    memory_region_add_subregion(parent, 4, &ar->pm1.cnt.io);
+
+    fw_cfg = fw_cfg_find();
+    if (fw_cfg) {
+        uint8_t suspend[6] = {128, 0, 0, 129, 128, 128};
+        suspend[3] = 1 | ((!disable_s3) << 7);
+        suspend[4] = s4_val | ((!disable_s4) << 7);
+
+        fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6);
+    }
 }

 void acpi_pm1_cnt_reset(ACPIREGS *ar)
@@ -666,6 +679,13 @@ uint32_t acpi_gpe_ioport_readb(ACPIREGS *ar, uint32_t addr)
    return val;
 }

+void acpi_send_gpe_event(ACPIREGS *ar, qemu_irq irq,
+                         AcpiGPEStatusBits status)
+{
+    ar->gpe.sts[0] |= status;
+    acpi_update_sci(ar, irq);
+}
+
 void acpi_update_sci(ACPIREGS *regs, qemu_irq irq)
 {
    int sci_level, pm1a_sts;
--- a/hw/acpi/cpu_hotplug.c
+++ b/hw/acpi/cpu_hotplug.c
@@ -59,8 +59,7 @@ void acpi_cpu_plug_cb(ACPIREGS *ar, qemu_irq irq,
        return;
    }

-    ar->gpe.sts[0] |= ACPI_CPU_HOTPLUG_STATUS;
-    acpi_update_sci(ar, irq);
+    acpi_send_gpe_event(ar, irq, ACPI_CPU_HOTPLUG_STATUS);
 }

 void acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner,
--- a/hw/acpi/ich9.c
+++ b/hw/acpi/ich9.c
@@ -94,7 +94,8 @@ static void ich9_smi_writel(void *opaque, hwaddr addr, uint64_t val,
    ICH9LPCPMRegs *pm = opaque;
    switch (addr) {
    case 0:
-        pm->smi_en = val;
+        pm->smi_en &= ~pm->smi_en_wmask;
+        pm->smi_en |= (val & pm->smi_en_wmask);
        break;
    }
 }
@@ -198,6 +199,7 @@ static void pm_reset(void *opaque)
         * support SMM mode. */
        pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN;
    }
+    pm->smi_en_wmask = ~0;

    acpi_update_sci(&pm->acpi_regs, pm->irq);
 }
@@ -219,7 +221,8 @@ void ich9_pm_init(PCIDevice *lpc_pci, ICH9LPCPMRegs *pm,

    acpi_pm_tmr_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
    acpi_pm1_evt_init(&pm->acpi_regs, ich9_pm_update_sci_fn, &pm->io);
-    acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->s4_val);
+    acpi_pm1_cnt_init(&pm->acpi_regs, &pm->io, pm->disable_s3, pm->disable_s4,
+                      pm->s4_val);

    acpi_gpe_init(&pm->acpi_regs, ICH9_PMIO_GPE0_LEN);
    memory_region_init_io(&pm->io_gpe, OBJECT(lpc_pci), &ich9_gpe_ops, pm,
--- a/hw/acpi/memory_hotplug.c
+++ b/hw/acpi/memory_hotplug.c
@@ -241,8 +241,7 @@ void acpi_memory_plug_cb(ACPIREGS *ar, qemu_irq irq, MemHotplugState *mem_st,
    mdev->is_inserting = true;

    /* do ACPI magic */
-    ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS;
-    acpi_update_sci(ar, irq);
+    acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
    return;
 }

@@ -260,8 +259,7 @@ void acpi_memory_unplug_request_cb(ACPIREGS *ar, qemu_irq irq,
    mdev->is_removing = true;

    /* Do ACPI magic */
-    ar->gpe.sts[0] |= ACPI_MEMORY_HOTPLUG_STATUS;
-    acpi_update_sci(ar, irq);
+    acpi_send_gpe_event(ar, irq, ACPI_MEMORY_HOTPLUG_STATUS);
 }

 void acpi_memory_unplug_cb(MemHotplugState *mem_st,
--- a/hw/acpi/pcihp.c
+++ b/hw/acpi/pcihp.c
@@ -45,7 +45,6 @@
 # define ACPI_PCIHP_DPRINTF(format, ...)     do { } while (0)
 #endif

-#define ACPI_PCI_HOTPLUG_STATUS 2
 #define ACPI_PCIHP_ADDR 0xae00
 #define ACPI_PCIHP_SIZE 0x0014
 #define ACPI_PCIHP_LEGACY_SIZE 0x000f
@@ -202,8 +201,7 @@ void acpi_pcihp_device_plug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s,

    s->acpi_pcihp_pci_status[bsel].up |= (1U << slot);

-    ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS;
-    acpi_update_sci(ar, irq);
+    acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS);
 }

 void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s,
@@ -220,8 +218,7 @@ void acpi_pcihp_device_unplug_cb(ACPIREGS *ar, qemu_irq irq, AcpiPciHpState *s,

    s->acpi_pcihp_pci_status[bsel].down |= (1U << slot);

-    ar->gpe.sts[0] |= ACPI_PCI_HOTPLUG_STATUS;
-    acpi_update_sci(ar, irq);
+    acpi_send_gpe_event(ar, irq, ACPI_PCI_HOTPLUG_STATUS);
 }

 static uint64_t pci_read(void *opaque, hwaddr addr, unsigned int size)
--- a/hw/acpi/piix4.c
+++ b/hw/acpi/piix4.c
@@ -475,7 +475,7 @@ static void piix4_pm_realize(PCIDevice *dev, Error **errp)

    acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
    acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-    acpi_pm1_cnt_init(&s->ar, &s->io, s->s4_val);
+    acpi_pm1_cnt_init(&s->ar, &s->io, s->disable_s3, s->disable_s4, s->s4_val);
    acpi_gpe_init(&s->ar, GPE_LEN);

    s->powerdown_notifier.notify = piix4_pm_powerdown_req;
@@ -503,8 +503,7 @@ Object *piix4_pm_find(void)

 I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
                      qemu_irq sci_irq, qemu_irq smi_irq,
-                      int kvm_enabled, FWCfgState *fw_cfg,
-                      DeviceState **piix4_pm)
+                      int kvm_enabled, DeviceState **piix4_pm)
 {
    DeviceState *dev;
    PIIX4PMState *s;
@@ -525,14 +524,6 @@ I2CBus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,

    qdev_init_nofail(dev);

-    if (fw_cfg) {
-        uint8_t suspend[6] = {128, 0, 0, 129, 128, 128};
-        suspend[3] = 1 | ((!s->disable_s3) << 7);
-        suspend[4] = s->s4_val | ((!s->disable_s4) << 7);
-
-        fw_cfg_add_file(fw_cfg, "etc/system-states", g_memdup(suspend, 6), 6);
-    }
-
    return s->smb.smbus;
 }

--- a/hw/alpha/dp264.c
+++ b/hw/alpha/dp264.c
@@ -55,7 +55,7 @@ static void clipper_init(MachineState *machine)
    ISABus *isa_bus;
    qemu_irq rtc_irq;
    long size, i;
-    const char *palcode_filename;
+    char *palcode_filename;
    uint64_t palcode_entry, palcode_low, palcode_high;
    uint64_t kernel_entry, kernel_low, kernel_high;

@@ -101,8 +101,8 @@ static void clipper_init(MachineState *machine)
    /* Load PALcode.  Given that this is not "real" cpu palcode,
       but one explicitly written for the emulation, we might as
       well load it directly from and ELF image.  */
-    palcode_filename = (bios_name ? bios_name : "palcode-clipper");
-    palcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, palcode_filename);
+    palcode_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS,
+                                bios_name ? bios_name : "palcode-clipper");
    if (palcode_filename == NULL) {
        hw_error("no palcode provided\n");
        exit(1);
@@ -114,6 +114,7 @@ static void clipper_init(MachineState *machine)
        hw_error("could not load palcode '%s'\n", palcode_filename);
        exit(1);
    }
+    g_free(palcode_filename);

    /* Start all cpus at the PALcode RESET entry point.  */
    for (i = 0; i < smp_cpus; ++i) {
--- a/hw/alpha/typhoon.c
+++ b/hw/alpha/typhoon.c
@@ -841,7 +841,7 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
        }
    }

-    *p_rtc_irq = *qemu_allocate_irqs(typhoon_set_timer_irq, s, 1);
+    *p_rtc_irq = qemu_allocate_irq(typhoon_set_timer_irq, s, 0);

    /* Main memory region, 0x00.0000.0000.  Real hardware supports 32GB,
       but the address space hole reserved at this point is 8TB.  */
@@ -918,11 +918,11 @@ PCIBus *typhoon_init(ram_addr_t ram_size, ISABus **isa_bus,
    /* Init the ISA bus.  */
    /* ??? Technically there should be a cy82c693ub pci-isa bridge.  */
    {
-        qemu_irq isa_pci_irq, *isa_irqs;
+        qemu_irq *isa_irqs;

        *isa_bus = isa_bus_new(NULL, get_system_memory(), &s->pchip.reg_io);
-        isa_pci_irq = *qemu_allocate_irqs(typhoon_set_isa_irq, s, 1);
-        isa_irqs = i8259_init(*isa_bus, isa_pci_irq);
+        isa_irqs = i8259_init(*isa_bus,
+                              qemu_allocate_irq(typhoon_set_isa_irq, s, 0));
        isa_bus_irqs(*isa_bus, isa_irqs);
    }

--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -5,6 +5,7 @@ obj-y += omap_sx1.o palm.o realview.o spitz.o stellaris.o
 obj-y += tosa.o versatilepb.o vexpress.o virt.o xilinx_zynq.o z2.o
 obj-$(CONFIG_ACPI) += virt-acpi-build.o
 obj-y += netduino2.o
+obj-y += sysbus-fdt.o

 obj-y += armv7m.o exynos4210.o pxa2xx.o pxa2xx_gpio.o pxa2xx_pic.o
 obj-$(CONFIG_DIGIC) += digic.o
--- a/hw/arm/boot.c
+++ b/hw/arm/boot.c
@@ -557,7 +557,7 @@ static void load_image_to_fw_cfg(FWCfgState *fw_cfg, uint16_t size_key,
    fw_cfg_add_bytes(fw_cfg, data_key, data, size);
 }

-void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
+static void arm_load_kernel_notify(Notifier *notifier, void *data)
 {
    CPUState *cs;
    int kernel_size;
@@ -568,6 +568,11 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
    hwaddr entry, kernel_load_offset;
    int big_endian;
    static const ARMInsnFixup *primary_loader;
+    ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier,
+                                         notifier, notifier);
+    ARMCPU *cpu = n->cpu;
+    struct arm_boot_info *info =
+        container_of(n, struct arm_boot_info, load_kernel_notifier);

    /* CPU objects (unlike devices) are not automatically reset on system
     * reset, so we must always register a handler to do so. If we're
@@ -775,3 +780,10 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
        ARM_CPU(cs)->env.boot_info = info;
    }
 }
+
+void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info)
+{
+    info->load_kernel_notifier.cpu = cpu;
+    info->load_kernel_notifier.notifier.notify = arm_load_kernel_notify;
+    qemu_add_machine_init_done_notifier(&info->load_kernel_notifier.notifier);
+}
--- a/hw/arm/nseries.c
+++ b/hw/arm/nseries.c
@@ -133,9 +133,8 @@ static void n800_mmc_cs_cb(void *opaque, int line, int level)

 static void n8x0_gpio_setup(struct n800_s *s)
 {
-    qemu_irq *mmc_cs = qemu_allocate_irqs(n800_mmc_cs_cb, s->mpu->mmc, 1);
-    qdev_connect_gpio_out(s->mpu->gpio, N8X0_MMC_CS_GPIO, mmc_cs[0]);
-
+    qdev_connect_gpio_out(s->mpu->gpio, N8X0_MMC_CS_GPIO,
+                          qemu_allocate_irq(n800_mmc_cs_cb, s->mpu->mmc, 0));
    qemu_irq_lower(qdev_get_gpio_in(s->mpu->gpio, N800_BAT_COVER_GPIO));
 }

--- a/hw/arm/omap_sx1.c
+++ b/hw/arm/omap_sx1.c
@@ -103,7 +103,6 @@ static void sx1_init(MachineState *machine, const int version)
    struct omap_mpu_state_s *mpu;
    MemoryRegion *address_space = get_system_memory();
    MemoryRegion *flash = g_new(MemoryRegion, 1);
-    MemoryRegion *flash_1 = g_new(MemoryRegion, 1);
    MemoryRegion *cs = g_new(MemoryRegion, 4);
    static uint32_t cs0val = 0x00213090;
    static uint32_t cs1val = 0x00215070;
@@ -165,6 +164,7 @@ static void sx1_init(MachineState *machine, const int version)

    if ((version == 1) &&
            (dinfo = drive_get(IF_PFLASH, 0, fl_idx)) != NULL) {
+        MemoryRegion *flash_1 = g_new(MemoryRegion, 1);
        memory_region_init_ram(flash_1, NULL, "omap_sx1.flash1-0", flash1_size,
                               &error_abort);
        vmstate_register_ram_global(flash_1);
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -0,0 +1,174 @@
+/*
+ * ARM Platform Bus device tree generation helpers
+ *
+ * Copyright (c) 2014 Linaro Limited
+ *
+ * Authors:
+ *  Alex Graf <agraf@suse.de>
+ *  Eric Auger <eric.auger@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "hw/arm/sysbus-fdt.h"
+#include "qemu/error-report.h"
+#include "sysemu/device_tree.h"
+#include "hw/platform-bus.h"
+#include "sysemu/sysemu.h"
+
+/*
+ * internal struct that contains the information to create dynamic
+ * sysbus device node
+ */
+typedef struct PlatformBusFDTData {
+    void *fdt; /* device tree handle */
+    int irq_start; /* index of the first IRQ usable by platform bus devices */
+    const char *pbus_node_name; /* name of the platform bus node */
+    PlatformBusDevice *pbus;
+} PlatformBusFDTData;
+
+/*
+ * struct used when calling the machine init done notifier
+ * that constructs the fdt nodes of platform bus devices
+ */
+typedef struct PlatformBusFDTNotifierParams {
+    Notifier notifier;
+    ARMPlatformBusFDTParams *fdt_params;
+} PlatformBusFDTNotifierParams;
+
+/* struct that associates a device type name and a node creation function */
+typedef struct NodeCreationPair {
+    const char *typename;
+    int (*add_fdt_node_fn)(SysBusDevice *sbdev, void *opaque);
+} NodeCreationPair;
+
+/* list of supported dynamic sysbus devices */
+static const NodeCreationPair add_fdt_node_functions[] = {
+    {"", NULL}, /* last element */
+};
+
+/**
+ * add_fdt_node - add the device tree node of a dynamic sysbus device
+ *
+ * @sbdev: handle to the sysbus device
+ * @opaque: handle to the PlatformBusFDTData
+ *
+ * Checks the sysbus type belongs to the list of device types that
+ * are dynamically instantiable and if so call the node creation
+ * function.
+ */
+static int add_fdt_node(SysBusDevice *sbdev, void *opaque)
+{
+    int i, ret;
+
+    for (i = 0; i < ARRAY_SIZE(add_fdt_node_functions); i++) {
+        if (!strcmp(object_get_typename(OBJECT(sbdev)),
+                    add_fdt_node_functions[i].typename)) {
+            ret = add_fdt_node_functions[i].add_fdt_node_fn(sbdev, opaque);
+            assert(!ret);
+            return 0;
+        }
+    }
+    error_report("Device %s can not be dynamically instantiated",
+                     qdev_fw_name(DEVICE(sbdev)));
+    exit(1);
+}
+
+/**
+ * add_all_platform_bus_fdt_nodes - create all the platform bus nodes
+ *
+ * builds the parent platform bus node and all the nodes of dynamic
+ * sysbus devices attached to it.
+ */
+static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params)
+{
+    const char platcomp[] = "qemu,platform\0simple-bus";
+    PlatformBusDevice *pbus;
+    DeviceState *dev;
+    gchar *node;
+    uint64_t addr, size;
+    int irq_start, dtb_size;
+    struct arm_boot_info *info = fdt_params->binfo;
+    const ARMPlatformBusSystemParams *params = fdt_params->system_params;
+    const char *intc = fdt_params->intc;
+    void *fdt = info->get_dtb(info, &dtb_size);
+
+    /*
+     * If the user provided a dtb, we assume the dynamic sysbus nodes
+     * already are integrated there. This corresponds to a use case where
+     * the dynamic sysbus nodes are complex and their generation is not yet
+     * supported. In that case the user can take charge of the guest dt
+     * while qemu takes charge of the qom stuff.
+     */
+    if (info->dtb_filename) {
+        return;
+    }
+
+    assert(fdt);
+
+    node = g_strdup_printf("/platform@%"PRIx64, params->platform_bus_base);
+    addr = params->platform_bus_base;
+    size = params->platform_bus_size;
+    irq_start = params->platform_bus_first_irq;
+
+    /* Create a /platform node that we can put all devices into */
+    qemu_fdt_add_subnode(fdt, node);
+    qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp));
+
+    /* Our platform bus region is less than 32bits, so 1 cell is enough for
+     * address and size
+     */
+    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
+
+    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc);
+
+    dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE);
+    pbus = PLATFORM_BUS_DEVICE(dev);
+
+    /* We can only create dt nodes for dynamic devices when they're ready */
+    assert(pbus->done_gathering);
+
+    PlatformBusFDTData data = {
+        .fdt = fdt,
+        .irq_start = irq_start,
+        .pbus_node_name = node,
+        .pbus = pbus,
+    };
+
+    /* Loop through all dynamic sysbus devices and create their node */
+    foreach_dynamic_sysbus_device(add_fdt_node, &data);
+
+    g_free(node);
+}
+
+static void platform_bus_fdt_notify(Notifier *notifier, void *data)
+{
+    PlatformBusFDTNotifierParams *p = DO_UPCAST(PlatformBusFDTNotifierParams,
+                                                notifier, notifier);
+
+    add_all_platform_bus_fdt_nodes(p->fdt_params);
+    g_free(p->fdt_params);
+    g_free(p);
+}
+
+void arm_register_platform_bus_fdt_creator(ARMPlatformBusFDTParams *fdt_params)
+{
+    PlatformBusFDTNotifierParams *p = g_new(PlatformBusFDTNotifierParams, 1);
+
+    p->fdt_params = fdt_params;
+    p->notifier.notify = platform_bus_fdt_notify;
+    qemu_add_machine_init_done_notifier(&p->notifier);
+}
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -525,7 +525,7 @@ static pflash_t *ve_pflash_cfi01_register(hwaddr base, const char *name,
    qdev_prop_set_uint64(dev, "sector-length", VEXPRESS_FLASH_SECT_SIZE);
    qdev_prop_set_uint8(dev, "width", 4);
    qdev_prop_set_uint8(dev, "device-width", 2);
-    qdev_prop_set_uint8(dev, "big-endian", 0);
+    qdev_prop_set_bit(dev, "big-endian", false);
    qdev_prop_set_uint16(dev, "id0", 0x89);
    qdev_prop_set_uint16(dev, "id1", 0x18);
    qdev_prop_set_uint16(dev, "id2", 0x00);
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -45,9 +45,11 @@
 #include "qemu/error-report.h"
 #include "hw/pci-host/gpex.h"
 #include "hw/arm/virt-acpi-build.h"
+#include "hw/arm/sysbus-fdt.h"
+#include "hw/platform-bus.h"

 /* Number of external interrupt lines to configure the GIC with */
-#define NUM_IRQS 128
+#define NUM_IRQS 256

 #define GIC_FDT_IRQ_TYPE_SPI 0
 #define GIC_FDT_IRQ_TYPE_PPI 1
@@ -60,6 +62,10 @@
 #define GIC_FDT_IRQ_PPI_CPU_START 8
 #define GIC_FDT_IRQ_PPI_CPU_WIDTH 8

+#define PLATFORM_BUS_NUM_IRQS 64
+
+static ARMPlatformBusSystemParams platform_bus_params;
+
 typedef struct VirtBoardInfo {
    struct arm_boot_info bootinfo;
    const char *cpu_model;
@@ -69,6 +75,8 @@ typedef struct VirtBoardInfo {
    void *fdt;
    int fdt_size;
    uint32_t clock_phandle;
+    uint32_t gic_phandle;
+    uint32_t v2m_phandle;
 } VirtBoardInfo;

 typedef struct {
@@ -103,20 +111,22 @@ typedef struct {
 */
 static const MemMapEntry a15memmap[] = {
    /* Space up to 0x8000000 is reserved for a boot ROM */
-    [VIRT_FLASH] =      {          0, 0x08000000 },
-    [VIRT_CPUPERIPHS] = { 0x08000000, 0x00020000 },
+    [VIRT_FLASH] =              {          0, 0x08000000 },
+    [VIRT_CPUPERIPHS] =         { 0x08000000, 0x00020000 },
    /* GIC distributor and CPU interfaces sit inside the CPU peripheral space */
-    [VIRT_GIC_DIST] =   { 0x08000000, 0x00010000 },
-    [VIRT_GIC_CPU] =    { 0x08010000, 0x00010000 },
-    [VIRT_UART] =       { 0x09000000, 0x00001000 },
-    [VIRT_RTC] =        { 0x09010000, 0x00001000 },
-    [VIRT_FW_CFG] =     { 0x09020000, 0x0000000a },
-    [VIRT_MMIO] =       { 0x0a000000, 0x00000200 },
+    [VIRT_GIC_DIST] =           { 0x08000000, 0x00010000 },
+    [VIRT_GIC_CPU] =            { 0x08010000, 0x00010000 },
+    [VIRT_GIC_V2M] =            { 0x08020000, 0x00001000 },
+    [VIRT_UART] =               { 0x09000000, 0x00001000 },
+    [VIRT_RTC] =                { 0x09010000, 0x00001000 },
+    [VIRT_FW_CFG] =             { 0x09020000, 0x0000000a },
+    [VIRT_MMIO] =               { 0x0a000000, 0x00000200 },
    /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
-    [VIRT_PCIE_MMIO] =  { 0x10000000, 0x2eff0000 },
-    [VIRT_PCIE_PIO] =   { 0x3eff0000, 0x00010000 },
-    [VIRT_PCIE_ECAM] =  { 0x3f000000, 0x01000000 },
-    [VIRT_MEM] =        { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
+    [VIRT_PLATFORM_BUS] =       { 0x0c000000, 0x02000000 },
+    [VIRT_PCIE_MMIO] =          { 0x10000000, 0x2eff0000 },
+    [VIRT_PCIE_PIO] =           { 0x3eff0000, 0x00010000 },
+    [VIRT_PCIE_ECAM] =          { 0x3f000000, 0x01000000 },
+    [VIRT_MEM] =                { 0x40000000, 30ULL * 1024 * 1024 * 1024 },
 };

 static const int a15irqmap[] = {
@@ -124,6 +134,8 @@ static const int a15irqmap[] = {
    [VIRT_RTC] = 2,
    [VIRT_PCIE] = 3, /* ... to 6 */
    [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
+    [VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
+    [VIRT_PLATFORM_BUS] = 112, /* ...to 112 + PLATFORM_BUS_NUM_IRQS -1 */
 };

 static VirtBoardInfo machines[] = {
@@ -299,12 +311,23 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
    }
 }

-static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
 {
-    uint32_t gic_phandle;
+    vbi->v2m_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+    qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
+    qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+                            "arm,gic-v2m-frame");
+    qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+    qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
+                                 2, vbi->memmap[VIRT_GIC_V2M].base,
+                                 2, vbi->memmap[VIRT_GIC_V2M].size);
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
+}

-    gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-    qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", gic_phandle);
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
+{
+    vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+    qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);

    qemu_fdt_add_subnode(vbi->fdt, "/intc");
    /* 'cortex-a15-gic' means 'GIC v2' */
@@ -317,12 +340,32 @@ static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
                                     2, vbi->memmap[VIRT_GIC_DIST].size,
                                     2, vbi->memmap[VIRT_GIC_CPU].base,
                                     2, vbi->memmap[VIRT_GIC_CPU].size);
-    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
-
-    return gic_phandle;
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
+    qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
+    qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }

-static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+{
+    int i;
+    int irq = vbi->irqmap[VIRT_GIC_V2M];
+    DeviceState *dev;
+
+    dev = qdev_create(NULL, "arm-gicv2m");
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+    qdev_prop_set_uint32(dev, "base-spi", irq);
+    qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
+    qdev_init_nofail(dev);
+
+    for (i = 0; i < NUM_GICV2M_SPIS; i++) {
+        sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
+    }
+
+    fdt_add_v2m_gic_node(vbi);
+}
+
+static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
    /* We create a standalone GIC v2 */
    DeviceState *gicdev;
@@ -371,7 +414,9 @@ static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
        pic[i] = qdev_get_gpio_in(gicdev, i);
    }

-    return fdt_add_gic_node(vbi);
+    fdt_add_gic_node(vbi);
+
+    create_v2m(vbi, pic);
 }

 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -510,7 +555,7 @@ static void create_one_flash(const char *name, hwaddr flashbase,
    qdev_prop_set_uint64(dev, "sector-length", sectorlength);
    qdev_prop_set_uint8(dev, "width", 4);
    qdev_prop_set_uint8(dev, "device-width", 2);
-    qdev_prop_set_uint8(dev, "big-endian", 0);
+    qdev_prop_set_bit(dev, "big-endian", false);
    qdev_prop_set_uint16(dev, "id0", 0x89);
    qdev_prop_set_uint16(dev, "id1", 0x18);
    qdev_prop_set_uint16(dev, "id2", 0x00);
@@ -587,7 +632,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
                                int first_irq, const char *nodename)
 {
    int devfn, pin;
-    uint32_t full_irq_map[4 * 4 * 8] = { 0 };
+    uint32_t full_irq_map[4 * 4 * 10] = { 0 };
    uint32_t *irq_map = full_irq_map;

    for (devfn = 0; devfn <= 0x18; devfn += 0x8) {
@@ -600,13 +645,13 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
            uint32_t map[] = {
                devfn << 8, 0, 0,                           /* devfn */
                pin + 1,                                    /* PCI pin */
-                gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
+                gic_phandle, 0, 0, irq_type, irq_nr, irq_level }; /* GIC irq */

            /* Convert map to big endian */
-            for (i = 0; i < 8; i++) {
+            for (i = 0; i < 10; i++) {
                irq_map[i] = cpu_to_be32(map[i]);
            }
-            irq_map += 8;
+            irq_map += 10;
        }
    }

@@ -618,8 +663,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
                           0x7           /* PCI irq */);
 }

-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
-                        uint32_t gic_phandle)
+static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
    hwaddr base_mmio = vbi->memmap[VIRT_PCIE_MMIO].base;
    hwaddr size_mmio = vbi->memmap[VIRT_PCIE_MMIO].size;
@@ -676,6 +720,8 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
    qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
                           nr_pcie_buses - 1);

+    qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent", vbi->v2m_phandle);
+
    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
                                 2, base_ecam, 2, size_ecam);
    qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
@@ -685,11 +731,52 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
                                 2, base_mmio, 2, size_mmio);

    qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
-    create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
+    create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);

    g_free(nodename);
 }

+static void create_platform_bus(VirtBoardInfo *vbi, qemu_irq *pic)
+{
+    DeviceState *dev;
+    SysBusDevice *s;
+    int i;
+    ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1);
+    MemoryRegion *sysmem = get_system_memory();
+
+    platform_bus_params.platform_bus_base = vbi->memmap[VIRT_PLATFORM_BUS].base;
+    platform_bus_params.platform_bus_size = vbi->memmap[VIRT_PLATFORM_BUS].size;
+    platform_bus_params.platform_bus_first_irq = vbi->irqmap[VIRT_PLATFORM_BUS];
+    platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS;
+
+    fdt_params->system_params = &platform_bus_params;
+    fdt_params->binfo = &vbi->bootinfo;
+    fdt_params->intc = "/intc";
+    /*
+     * register a machine init done notifier that creates the device tree
+     * nodes of the platform bus and its children dynamic sysbus devices
+     */
+    arm_register_platform_bus_fdt_creator(fdt_params);
+
+    dev = qdev_create(NULL, TYPE_PLATFORM_BUS_DEVICE);
+    dev->id = TYPE_PLATFORM_BUS_DEVICE;
+    qdev_prop_set_uint32(dev, "num_irqs",
+        platform_bus_params.platform_bus_num_irqs);
+    qdev_prop_set_uint32(dev, "mmio_size",
+        platform_bus_params.platform_bus_size);
+    qdev_init_nofail(dev);
+    s = SYS_BUS_DEVICE(dev);
+
+    for (i = 0; i < platform_bus_params.platform_bus_num_irqs; i++) {
+        int irqn = platform_bus_params.platform_bus_first_irq + i;
+        sysbus_connect_irq(s, i, pic[irqn]);
+    }
+
+    memory_region_add_subregion(sysmem,
+                                platform_bus_params.platform_bus_base,
+                                sysbus_mmio_get_region(s, 0));
+}
+
 static void *machvirt_dtb(const struct arm_boot_info *binfo, int *fdt_size)
 {
    const VirtBoardInfo *board = (const VirtBoardInfo *)binfo;
@@ -717,7 +804,6 @@ static void machvirt_init(MachineState *machine)
    VirtBoardInfo *vbi;
    VirtGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state);
    VirtGuestInfo *guest_info = &guest_info_state->info;
-    uint32_t gic_phandle;
    char **cpustr;

    if (!cpu_model) {
@@ -794,13 +880,13 @@ static void machvirt_init(MachineState *machine)

    create_flash(vbi);

-    gic_phandle = create_gic(vbi, pic);
+    create_gic(vbi, pic);

    create_uart(vbi, pic);

    create_rtc(vbi, pic);

-    create_pcie(vbi, pic, gic_phandle);
+    create_pcie(vbi, pic);

    /* Create mmio transports, so the user can create virtio backends
     * (which will be automatically plugged in to the transports). If
@@ -828,6 +914,14 @@ static void machvirt_init(MachineState *machine)
    vbi->bootinfo.get_dtb = machvirt_dtb;
    vbi->bootinfo.firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
    arm_load_kernel(ARM_CPU(first_cpu), &vbi->bootinfo);
+
+    /*
+     * arm_load_kernel machine init done notifier registration must
+     * happen before the platform_bus_create call. In this latter,
+     * another notifier is registered which adds platform bus nodes.
+     * Notifiers are executed in registration reverse order.
+     */
+    create_platform_bus(vbi, pic);
 }

 static bool virt_get_secure(Object *obj, Error **errp)
@@ -866,6 +960,7 @@ static void virt_class_init(ObjectClass *oc, void *data)
    mc->desc = "ARM Virtual Machine",
    mc->init = machvirt_init;
    mc->max_cpus = 8;
+    mc->has_dynamic_sysbus = true;
 }

 static const TypeInfo machvirt_info = {
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -324,7 +324,7 @@ static void fd_revalidate(FDrive *drv)
 /* Intel 82078 floppy disk controller emulation          */

 static void fdctrl_reset(FDCtrl *fdctrl, int do_irq);
-static void fdctrl_reset_fifo(FDCtrl *fdctrl);
+static void fdctrl_to_command_phase(FDCtrl *fdctrl);
 static int fdctrl_transfer_handler (void *opaque, int nchan,
                                    int dma_pos, int dma_len);
 static void fdctrl_raise_irq(FDCtrl *fdctrl);
@@ -495,6 +495,33 @@ enum {
    FD_DIR_DSKCHG   = 0x80,
 };

+/*
+ * See chapter 5.0 "Controller phases" of the spec:
+ *
+ * Command phase:
+ * The host writes a command and its parameters into the FIFO. The command
+ * phase is completed when all parameters for the command have been supplied,
+ * and execution phase is entered.
+ *
+ * Execution phase:
+ * Data transfers, either DMA or non-DMA. For non-DMA transfers, the FIFO
+ * contains the payload now, otherwise it's unused. When all bytes of the
+ * required data have been transferred, the state is switched to either result
+ * phase (if the command produces status bytes) or directly back into the
+ * command phase for the next command.
+ *
+ * Result phase:
+ * The host reads out the FIFO, which contains one or more result bytes now.
+ */
+enum {
+    /* Only for migration: reconstruct phase from registers like qemu 2.3 */
+    FD_PHASE_RECONSTRUCT    = 0,
+
+    FD_PHASE_COMMAND        = 1,
+    FD_PHASE_EXECUTION      = 2,
+    FD_PHASE_RESULT         = 3,
+};
+
 #define FD_MULTI_TRACK(state) ((state) & FD_STATE_MULTI)
 #define FD_FORMAT_CMD(state) ((state) & FD_STATE_FORMAT)

@@ -504,6 +531,7 @@ struct FDCtrl {
    /* Controller state */
    QEMUTimer *result_timer;
    int dma_chann;
+    uint8_t phase;
    /* Controller's identification */
    uint8_t version;
    /* HW */
@@ -744,6 +772,28 @@ static const VMStateDescription vmstate_fdrive = {
    }
 };

+/*
+ * Reconstructs the phase from register values according to the logic that was
+ * implemented in qemu 2.3. This is the default value that is used if the phase
+ * subsection is not present on migration.
+ *
+ * Don't change this function to reflect newer qemu versions, it is part of
+ * the migration ABI.
+ */
+static int reconstruct_phase(FDCtrl *fdctrl)
+{
+    if (fdctrl->msr & FD_MSR_NONDMA) {
+        return FD_PHASE_EXECUTION;
+    } else if ((fdctrl->msr & FD_MSR_RQM) == 0) {
+        /* qemu 2.3 disabled RQM only during DMA transfers */
+        return FD_PHASE_EXECUTION;
+    } else if (fdctrl->msr & FD_MSR_DIO) {
+        return FD_PHASE_RESULT;
+    } else {
+        return FD_PHASE_COMMAND;
+    }
+}
+
 static void fdc_pre_save(void *opaque)
 {
    FDCtrl *s = opaque;
@@ -751,12 +801,24 @@ static void fdc_pre_save(void *opaque)
    s->dor_vmstate = s->dor | GET_CUR_DRV(s);
 }

+static int fdc_pre_load(void *opaque)
+{
+    FDCtrl *s = opaque;
+    s->phase = FD_PHASE_RECONSTRUCT;
+    return 0;
+}
+
 static int fdc_post_load(void *opaque, int version_id)
 {
    FDCtrl *s = opaque;

    SET_CUR_DRV(s, s->dor_vmstate & FD_DOR_SELMASK);
    s->dor = s->dor_vmstate & ~FD_DOR_SELMASK;
+
+    if (s->phase == FD_PHASE_RECONSTRUCT) {
+        s->phase = reconstruct_phase(s);
+    }
+
    return 0;
 }

@@ -794,11 +856,29 @@ static const VMStateDescription vmstate_fdc_result_timer = {
    }
 };

+static bool fdc_phase_needed(void *opaque)
+{
+    FDCtrl *fdctrl = opaque;
+
+    return reconstruct_phase(fdctrl) != fdctrl->phase;
+}
+
+static const VMStateDescription vmstate_fdc_phase = {
+    .name = "fdc/phase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(phase, FDCtrl),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_fdc = {
    .name = "fdc",
    .version_id = 2,
    .minimum_version_id = 2,
    .pre_save = fdc_pre_save,
+    .pre_load = fdc_pre_load,
    .post_load = fdc_post_load,
    .fields = (VMStateField[]) {
        /* Controller State */
@@ -838,6 +918,9 @@ static const VMStateDescription vmstate_fdc = {
        } , {
            .vmsd = &vmstate_fdc_result_timer,
            .needed = fdc_result_timer_needed,
+        } , {
+            .vmsd = &vmstate_fdc_phase,
+            .needed = fdc_phase_needed,
        } , {
            /* empty */
        }
@@ -918,7 +1001,7 @@ static void fdctrl_reset(FDCtrl *fdctrl, int do_irq)
    fdctrl->data_dir = FD_DIR_WRITE;
    for (i = 0; i < MAX_FD; i++)
        fd_recalibrate(&fdctrl->drives[i]);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
    if (do_irq) {
        fdctrl->status0 |= FD_SR0_RDYCHG;
        fdctrl_raise_irq(fdctrl);
@@ -1134,17 +1217,22 @@ static uint32_t fdctrl_read_dir(FDCtrl *fdctrl)
    return retval;
 }

-/* FIFO state control */
-static void fdctrl_reset_fifo(FDCtrl *fdctrl)
+/* Clear the FIFO and update the state for receiving the next command */
+static void fdctrl_to_command_phase(FDCtrl *fdctrl)
 {
+    fdctrl->phase = FD_PHASE_COMMAND;
    fdctrl->data_dir = FD_DIR_WRITE;
    fdctrl->data_pos = 0;
+    fdctrl->data_len = 1; /* Accept command byte, adjust for params later */
    fdctrl->msr &= ~(FD_MSR_CMDBUSY | FD_MSR_DIO);
+    fdctrl->msr |= FD_MSR_RQM;
 }

-/* Set FIFO status for the host to read */
-static void fdctrl_set_fifo(FDCtrl *fdctrl, int fifo_len)
+/* Update the state to allow the guest to read out the command status.
+ * @fifo_len is the number of result bytes to be read out. */
+static void fdctrl_to_result_phase(FDCtrl *fdctrl, int fifo_len)
 {
+    fdctrl->phase = FD_PHASE_RESULT;
    fdctrl->data_dir = FD_DIR_READ;
    fdctrl->data_len = fifo_len;
    fdctrl->data_pos = 0;
@@ -1157,7 +1245,7 @@ static void fdctrl_unimplemented(FDCtrl *fdctrl, int direction)
    qemu_log_mask(LOG_UNIMP, "fdc: unimplemented command 0x%02x\n",
                  fdctrl->fifo[0]);
    fdctrl->fifo[0] = FD_SR0_INVCMD;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 /* Seek to next sector
@@ -1238,7 +1326,7 @@ static void fdctrl_stop_transfer(FDCtrl *fdctrl, uint8_t status0,
    fdctrl->msr |= FD_MSR_RQM | FD_MSR_DIO;
    fdctrl->msr &= ~FD_MSR_NONDMA;

-    fdctrl_set_fifo(fdctrl, 7);
+    fdctrl_to_result_phase(fdctrl, 7);
    fdctrl_raise_irq(fdctrl);
 }

@@ -1352,7 +1440,7 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
        }
    }
    FLOPPY_DPRINTF("start non-DMA transfer\n");
-    fdctrl->msr |= FD_MSR_NONDMA;
+    fdctrl->msr |= FD_MSR_NONDMA | FD_MSR_RQM;
    if (direction != FD_DIR_WRITE)
        fdctrl->msr |= FD_MSR_DIO;
    /* IO based transfer: calculate len */
@@ -1505,9 +1593,16 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
        FLOPPY_DPRINTF("error: controller not ready for reading\n");
        return 0;
    }
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
    pos = fdctrl->data_pos;
    pos %= FD_SECTOR_LEN;
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        assert(fdctrl->msr & FD_MSR_NONDMA);
        if (pos == 0) {
            if (fdctrl->data_pos != 0)
                if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
@@ -1523,20 +1618,28 @@ static uint32_t fdctrl_read_data(FDCtrl *fdctrl)
                memset(fdctrl->fifo, 0, FD_SECTOR_LEN);
            }
        }
-    }
-    retval = fdctrl->fifo[pos];
-    if (++fdctrl->data_pos == fdctrl->data_len) {
-        fdctrl->data_pos = 0;
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->msr & FD_MSR_NONDMA) {
+
+        if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        } else {
-            fdctrl_reset_fifo(fdctrl);
+        }
+        break;
+
+    case FD_PHASE_RESULT:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        if (++fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl->msr &= ~FD_MSR_RQM;
+            fdctrl_to_command_phase(fdctrl);
            fdctrl_reset_irq(fdctrl);
        }
+        break;
+
+    case FD_PHASE_COMMAND:
+    default:
+        abort();
    }
+
+    retval = fdctrl->fifo[pos];
    FLOPPY_DPRINTF("data register: 0x%02x\n", retval);

    return retval;
@@ -1606,7 +1709,7 @@ static void fdctrl_handle_lock(FDCtrl *fdctrl, int direction)
 {
    fdctrl->lock = (fdctrl->fifo[0] & 0x80) ? 1 : 0;
    fdctrl->fifo[0] = fdctrl->lock << 4;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction)
@@ -1631,20 +1734,20 @@ static void fdctrl_handle_dumpreg(FDCtrl *fdctrl, int direction)
        (cur_drv->perpendicular << 2);
    fdctrl->fifo[8] = fdctrl->config;
    fdctrl->fifo[9] = fdctrl->precomp_trk;
-    fdctrl_set_fifo(fdctrl, 10);
+    fdctrl_to_result_phase(fdctrl, 10);
 }

 static void fdctrl_handle_version(FDCtrl *fdctrl, int direction)
 {
    /* Controller's version */
    fdctrl->fifo[0] = fdctrl->version;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 static void fdctrl_handle_partid(FDCtrl *fdctrl, int direction)
 {
    fdctrl->fifo[0] = 0x41; /* Stepping 1 */
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction)
@@ -1667,7 +1770,7 @@ static void fdctrl_handle_restore(FDCtrl *fdctrl, int direction)
    fdctrl->config = fdctrl->fifo[11];
    fdctrl->precomp_trk = fdctrl->fifo[12];
    fdctrl->pwrd = fdctrl->fifo[13];
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }

 static void fdctrl_handle_save(FDCtrl *fdctrl, int direction)
@@ -1697,7 +1800,7 @@ static void fdctrl_handle_save(FDCtrl *fdctrl, int direction)
    fdctrl->fifo[12] = fdctrl->pwrd;
    fdctrl->fifo[13] = 0;
    fdctrl->fifo[14] = 0;
-    fdctrl_set_fifo(fdctrl, 15);
+    fdctrl_to_result_phase(fdctrl, 15);
 }

 static void fdctrl_handle_readid(FDCtrl *fdctrl, int direction)
@@ -1746,7 +1849,7 @@ static void fdctrl_handle_specify(FDCtrl *fdctrl, int direction)
    else
        fdctrl->dor |= FD_DOR_DMAEN;
    /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }

 static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction)
@@ -1762,7 +1865,7 @@ static void fdctrl_handle_sense_drive_status(FDCtrl *fdctrl, int direction)
        (cur_drv->head << 2) |
        GET_CUR_DRV(fdctrl) |
        0x28;
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction)
@@ -1772,7 +1875,7 @@ static void fdctrl_handle_recalibrate(FDCtrl *fdctrl, int direction)
    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
    cur_drv = get_cur_drv(fdctrl);
    fd_recalibrate(cur_drv);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
    /* Raise Interrupt */
    fdctrl->status0 |= FD_SR0_SEEK;
    fdctrl_raise_irq(fdctrl);
@@ -1788,7 +1891,7 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
        fdctrl->reset_sensei--;
    } else if (!(fdctrl->sra & FD_SRA_INTPEND)) {
        fdctrl->fifo[0] = FD_SR0_INVCMD;
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
        return;
    } else {
        fdctrl->fifo[0] =
@@ -1797,7 +1900,7 @@ static void fdctrl_handle_sense_interrupt_status(FDCtrl *fdctrl, int direction)
    }

    fdctrl->fifo[1] = cur_drv->track;
-    fdctrl_set_fifo(fdctrl, 2);
+    fdctrl_to_result_phase(fdctrl, 2);
    fdctrl_reset_irq(fdctrl);
    fdctrl->status0 = FD_SR0_RDYCHG;
 }
@@ -1808,7 +1911,7 @@ static void fdctrl_handle_seek(FDCtrl *fdctrl, int direction)

    SET_CUR_DRV(fdctrl, fdctrl->fifo[1] & FD_DOR_SELMASK);
    cur_drv = get_cur_drv(fdctrl);
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
    /* The seek command just sends step pulses to the drive and doesn't care if
     * there is a medium inserted of if it's banging the head against the drive.
     */
@@ -1825,7 +1928,7 @@ static void fdctrl_handle_perpendicular_mode(FDCtrl *fdctrl, int direction)
    if (fdctrl->fifo[1] & 0x80)
        cur_drv->perpendicular = fdctrl->fifo[1] & 0x7;
    /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }

 static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction)
@@ -1833,20 +1936,20 @@ static void fdctrl_handle_configure(FDCtrl *fdctrl, int direction)
    fdctrl->config = fdctrl->fifo[2];
    fdctrl->precomp_trk =  fdctrl->fifo[3];
    /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }

 static void fdctrl_handle_powerdown_mode(FDCtrl *fdctrl, int direction)
 {
    fdctrl->pwrd = fdctrl->fifo[1];
    fdctrl->fifo[0] = fdctrl->fifo[1];
-    fdctrl_set_fifo(fdctrl, 1);
+    fdctrl_to_result_phase(fdctrl, 1);
 }

 static void fdctrl_handle_option(FDCtrl *fdctrl, int direction)
 {
    /* No result back */
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
 }

 static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direction)
@@ -1862,15 +1965,15 @@ static void fdctrl_handle_drive_specification_command(FDCtrl *fdctrl, int direct
            fdctrl->fifo[0] = fdctrl->fifo[1];
            fdctrl->fifo[2] = 0;
            fdctrl->fifo[3] = 0;
-            fdctrl_set_fifo(fdctrl, 4);
+            fdctrl_to_result_phase(fdctrl, 4);
        } else {
-            fdctrl_reset_fifo(fdctrl);
+            fdctrl_to_command_phase(fdctrl);
        }
    } else if (fdctrl->data_len > 7) {
        /* ERROR */
        fdctrl->fifo[0] = 0x80 |
            (cur_drv->head << 2) | GET_CUR_DRV(fdctrl);
-        fdctrl_set_fifo(fdctrl, 1);
+        fdctrl_to_result_phase(fdctrl, 1);
    }
 }

@@ -1887,7 +1990,7 @@ static void fdctrl_handle_relative_seek_in(FDCtrl *fdctrl, int direction)
        fd_seek(cur_drv, cur_drv->head,
                cur_drv->track + fdctrl->fifo[2], cur_drv->sect, 1);
    }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
    /* Raise Interrupt */
    fdctrl->status0 |= FD_SR0_SEEK;
    fdctrl_raise_irq(fdctrl);
@@ -1905,20 +2008,25 @@ static void fdctrl_handle_relative_seek_out(FDCtrl *fdctrl, int direction)
        fd_seek(cur_drv, cur_drv->head,
                cur_drv->track - fdctrl->fifo[2], cur_drv->sect, 1);
    }
-    fdctrl_reset_fifo(fdctrl);
+    fdctrl_to_command_phase(fdctrl);
    /* Raise Interrupt */
    fdctrl->status0 |= FD_SR0_SEEK;
    fdctrl_raise_irq(fdctrl);
 }

-static const struct {
+/*
+ * Handlers for the execution phase of each command
+ */
+typedef struct FDCtrlCommand {
    uint8_t value;
    uint8_t mask;
    const char* name;
    int parameters;
    void (*handler)(FDCtrl *fdctrl, int direction);
    int direction;
-} handlers[] = {
+} FDCtrlCommand;
+
+static const FDCtrlCommand handlers[] = {
    { FD_CMD_READ, 0x1f, "READ", 8, fdctrl_start_transfer, FD_DIR_READ },
    { FD_CMD_WRITE, 0x3f, "WRITE", 8, fdctrl_start_transfer, FD_DIR_WRITE },
    { FD_CMD_SEEK, 0xff, "SEEK", 2, fdctrl_handle_seek },
@@ -1955,9 +2063,19 @@ static const struct {
 /* Associate command to an index in the 'handlers' array */
 static uint8_t command_to_handler[256];

+static const FDCtrlCommand *get_command(uint8_t cmd)
+{
+    int idx;
+
+    idx = command_to_handler[cmd];
+    FLOPPY_DPRINTF("%s command\n", handlers[idx].name);
+    return &handlers[idx];
+}
+
 static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
 {
    FDrive *cur_drv;
+    const FDCtrlCommand *cmd;
    uint32_t pos;

    /* Reset mode */
@@ -1970,12 +2088,27 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
        return;
    }
    fdctrl->dsr &= ~FD_DSR_PWRDOWN;
-    /* Is it write command time ? */
-    if (fdctrl->msr & FD_MSR_NONDMA) {
+
+    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
+
+    /* If data_len spans multiple sectors, the current position in the FIFO
+     * wraps around while fdctrl->data_pos is the real position in the whole
+     * request. */
+    pos = fdctrl->data_pos++;
+    pos %= FD_SECTOR_LEN;
+    fdctrl->fifo[pos] = value;
+
+    if (fdctrl->data_pos == fdctrl->data_len) {
+        fdctrl->msr &= ~FD_MSR_RQM;
+    }
+
+    switch (fdctrl->phase) {
+    case FD_PHASE_EXECUTION:
+        /* For DMA requests, RQM should be cleared during execution phase, so
+         * we would have errored out above. */
+        assert(fdctrl->msr & FD_MSR_NONDMA);
+
        /* FIFO data write */
-        pos = fdctrl->data_pos++;
-        pos %= FD_SECTOR_LEN;
-        fdctrl->fifo[pos] = value;
        if (pos == FD_SECTOR_LEN - 1 ||
            fdctrl->data_pos == fdctrl->data_len) {
            cur_drv = get_cur_drv(fdctrl);
@@ -1983,45 +2116,54 @@ static void fdctrl_write_data(FDCtrl *fdctrl, uint32_t value)
                < 0) {
                FLOPPY_DPRINTF("error writing sector %d\n",
                               fd_sector(cur_drv));
-                return;
+                break;
            }
            if (!fdctrl_seek_to_next_sect(fdctrl, cur_drv)) {
                FLOPPY_DPRINTF("error seeking to next sector %d\n",
                               fd_sector(cur_drv));
-                return;
+                break;
            }
        }
-        /* Switch from transfer mode to status mode
-         * then from status mode to command mode
-         */
-        if (fdctrl->data_pos == fdctrl->data_len)
-            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
-        return;
-    }
-    if (fdctrl->data_pos == 0) {
-        /* Command */
-        pos = command_to_handler[value & 0xff];
-        FLOPPY_DPRINTF("%s command\n", handlers[pos].name);
-        fdctrl->data_len = handlers[pos].parameters + 1;
-        fdctrl->msr |= FD_MSR_CMDBUSY;
-    }

-    FLOPPY_DPRINTF("%s: %02x\n", __func__, value);
-    pos = fdctrl->data_pos++;
-    pos %= FD_SECTOR_LEN;
-    fdctrl->fifo[pos] = value;
-    if (fdctrl->data_pos == fdctrl->data_len) {
-        /* We now have all parameters
-         * and will be able to treat the command
-         */
-        if (fdctrl->data_state & FD_STATE_FORMAT) {
-            fdctrl_format_sector(fdctrl);
-            return;
+        /* Switch to result phase when done with the transfer */
+        if (fdctrl->data_pos == fdctrl->data_len) {
+            fdctrl_stop_transfer(fdctrl, 0x00, 0x00, 0x00);
+        }
+        break;
+
+    case FD_PHASE_COMMAND:
+        assert(!(fdctrl->msr & FD_MSR_NONDMA));
+        assert(fdctrl->data_pos < FD_SECTOR_LEN);
+
+        if (pos == 0) {
+            /* The first byte specifies the command. Now we start reading
+             * as many parameters as this command requires. */
+            cmd = get_command(value);
+            fdctrl->data_len = cmd->parameters + 1;
+            if (cmd->parameters) {
+                fdctrl->msr |= FD_MSR_RQM;
+            }
+            fdctrl->msr |= FD_MSR_CMDBUSY;
        }

-        pos = command_to_handler[fdctrl->fifo[0] & 0xff];
-        FLOPPY_DPRINTF("treat %s command\n", handlers[pos].name);
-        (*handlers[pos].handler)(fdctrl, handlers[pos].direction);
+        if (fdctrl->data_pos == fdctrl->data_len) {
+            /* We have all parameters now, execute the command */
+            fdctrl->phase = FD_PHASE_EXECUTION;
+
+            if (fdctrl->data_state & FD_STATE_FORMAT) {
+                fdctrl_format_sector(fdctrl);
+                break;
+            }
+
+            cmd = get_command(fdctrl->fifo[0]);
+            FLOPPY_DPRINTF("Calling handler for '%s'\n", cmd->name);
+            cmd->handler(fdctrl, cmd->direction);
+        }
+        break;
+
+    case FD_PHASE_RESULT:
+    default:
+        abort();
    }
 }

--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -64,6 +64,9 @@ do {                                                        \
 #define TYPE_CFI_PFLASH01 "cfi.pflash01"
 #define CFI_PFLASH01(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH01)

+#define PFLASH_BE          0
+#define PFLASH_SECURE      1
+
 struct pflash_t {
    /*< private >*/
    SysBusDevice parent_obj;
@@ -75,7 +78,7 @@ struct pflash_t {
    uint8_t bank_width;
    uint8_t device_width; /* If 0, device width not specified. */
    uint8_t max_device_width;  /* max device width in bytes */
-    uint8_t be;
+    uint32_t features;
    uint8_t wcycle; /* if 0, the flash is read normally */
    int ro;
    uint8_t cmd;
@@ -235,12 +238,57 @@ static uint32_t pflash_devid_query(pflash_t *pfl, hwaddr offset)
    return resp;
 }

+static uint32_t pflash_data_read(pflash_t *pfl, hwaddr offset,
+                                 int width, int be)
+{
+    uint8_t *p;
+    uint32_t ret;
+
+    p = pfl->storage;
+    switch (width) {
+    case 1:
+        ret = p[offset];
+        DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n",
+                __func__, offset, ret);
+        break;
+    case 2:
+        if (be) {
+            ret = p[offset] << 8;
+            ret |= p[offset + 1];
+        } else {
+            ret = p[offset];
+            ret |= p[offset + 1] << 8;
+        }
+        DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n",
+                __func__, offset, ret);
+        break;
+    case 4:
+        if (be) {
+            ret = p[offset] << 24;
+            ret |= p[offset + 1] << 16;
+            ret |= p[offset + 2] << 8;
+            ret |= p[offset + 3];
+        } else {
+            ret = p[offset];
+            ret |= p[offset + 1] << 8;
+            ret |= p[offset + 2] << 16;
+            ret |= p[offset + 3] << 24;
+        }
+        DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n",
+                __func__, offset, ret);
+        break;
+    default:
+        DPRINTF("BUG in %s\n", __func__);
+        abort();
+    }
+    return ret;
+}
+
 static uint32_t pflash_read (pflash_t *pfl, hwaddr offset,
                             int width, int be)
 {
    hwaddr boff;
    uint32_t ret;
-    uint8_t *p;

    ret = -1;

@@ -257,43 +305,7 @@ static uint32_t pflash_read (pflash_t *pfl, hwaddr offset,
        /* fall through to read code */
    case 0x00:
        /* Flash area read */
-        p = pfl->storage;
-        switch (width) {
-        case 1:
-            ret = p[offset];
-            DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n",
-                    __func__, offset, ret);
-            break;
-        case 2:
-            if (be) {
-                ret = p[offset] << 8;
-                ret |= p[offset + 1];
-            } else {
-                ret = p[offset];
-                ret |= p[offset + 1] << 8;
-            }
-            DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n",
-                    __func__, offset, ret);
-            break;
-        case 4:
-            if (be) {
-                ret = p[offset] << 24;
-                ret |= p[offset + 1] << 16;
-                ret |= p[offset + 2] << 8;
-                ret |= p[offset + 3];
-            } else {
-                ret = p[offset];
-                ret |= p[offset + 1] << 8;
-                ret |= p[offset + 2] << 16;
-                ret |= p[offset + 3] << 24;
-            }
-            DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n",
-                    __func__, offset, ret);
-            break;
-        default:
-            DPRINTF("BUG in %s\n", __func__);
-        }
-
+        ret = pflash_data_read(pfl, offset, width, be);
        break;
    case 0x10: /* Single byte program */
    case 0x20: /* Block erase */
@@ -648,101 +660,37 @@ static void pflash_write(pflash_t *pfl, hwaddr offset,
 }


-static uint32_t pflash_readb_be(void *opaque, hwaddr addr)
-{
-    return pflash_read(opaque, addr, 1, 1);
-}
-
-static uint32_t pflash_readb_le(void *opaque, hwaddr addr)
-{
-    return pflash_read(opaque, addr, 1, 0);
-}
-
-static uint32_t pflash_readw_be(void *opaque, hwaddr addr)
+static MemTxResult pflash_mem_read_with_attrs(void *opaque, hwaddr addr, uint64_t *value,
+                                              unsigned len, MemTxAttrs attrs)
 {
    pflash_t *pfl = opaque;
+    bool be = !!(pfl->features & (1 << PFLASH_BE));

-    return pflash_read(pfl, addr, 2, 1);
+    if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) {
+        *value = pflash_data_read(opaque, addr, len, be);
+    } else {
+        *value = pflash_read(opaque, addr, len, be);
+    }
+    return MEMTX_OK;
 }

-static uint32_t pflash_readw_le(void *opaque, hwaddr addr)
+static MemTxResult pflash_mem_write_with_attrs(void *opaque, hwaddr addr, uint64_t value,
+                                               unsigned len, MemTxAttrs attrs)
 {
    pflash_t *pfl = opaque;
+    bool be = !!(pfl->features & (1 << PFLASH_BE));

-    return pflash_read(pfl, addr, 2, 0);
+    if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) {
+        return MEMTX_ERROR;
+    } else {
+        pflash_write(opaque, addr, value, len, be);
+        return MEMTX_OK;
+    }
 }

-static uint32_t pflash_readl_be(void *opaque, hwaddr addr)
-{
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 4, 1);
-}
-
-static uint32_t pflash_readl_le(void *opaque, hwaddr addr)
-{
-    pflash_t *pfl = opaque;
-
-    return pflash_read(pfl, addr, 4, 0);
-}
-
-static void pflash_writeb_be(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_write(opaque, addr, value, 1, 1);
-}
-
-static void pflash_writeb_le(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_write(opaque, addr, value, 1, 0);
-}
-
-static void pflash_writew_be(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 2, 1);
-}
-
-static void pflash_writew_le(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 2, 0);
-}
-
-static void pflash_writel_be(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 4, 1);
-}
-
-static void pflash_writel_le(void *opaque, hwaddr addr,
-                             uint32_t value)
-{
-    pflash_t *pfl = opaque;
-
-    pflash_write(pfl, addr, value, 4, 0);
-}
-
-static const MemoryRegionOps pflash_cfi01_ops_be = {
-    .old_mmio = {
-        .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, },
-        .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, },
-    },
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
-static const MemoryRegionOps pflash_cfi01_ops_le = {
-    .old_mmio = {
-        .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, },
-        .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, },
-    },
+static const MemoryRegionOps pflash_cfi01_ops = {
+    .read_with_attrs = pflash_mem_read_with_attrs,
+    .write_with_attrs = pflash_mem_write_with_attrs,
    .endianness = DEVICE_NATIVE_ENDIAN,
 };

@@ -773,7 +721,8 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp)

    memory_region_init_rom_device(
        &pfl->mem, OBJECT(dev),
-        pfl->be ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, pfl,
+        &pflash_cfi01_ops,
+        pfl,
        pfl->name, total_len, &local_err);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -925,7 +874,8 @@ static Property pflash_cfi01_properties[] = {
    DEFINE_PROP_UINT8("width", struct pflash_t, bank_width, 0),
    DEFINE_PROP_UINT8("device-width", struct pflash_t, device_width, 0),
    DEFINE_PROP_UINT8("max-device-width", struct pflash_t, max_device_width, 0),
-    DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0),
+    DEFINE_PROP_BIT("big-endian", struct pflash_t, features, PFLASH_BE, 0),
+    DEFINE_PROP_BIT("secure", struct pflash_t, features, PFLASH_SECURE, 0),
    DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0),
    DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0),
    DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0),
@@ -975,7 +925,7 @@ pflash_t *pflash_cfi01_register(hwaddr base,
    qdev_prop_set_uint32(dev, "num-blocks", nb_blocs);
    qdev_prop_set_uint64(dev, "sector-length", sector_len);
    qdev_prop_set_uint8(dev, "width", bank_width);
-    qdev_prop_set_uint8(dev, "big-endian", !!be);
+    qdev_prop_set_bit(dev, "big-endian", !!be);
    qdev_prop_set_uint16(dev, "id0", id0);
    qdev_prop_set_uint16(dev, "id1", id1);
    qdev_prop_set_uint16(dev, "id2", id2);
--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
@@ -641,28 +641,3 @@ static void parallel_register_types(void)
 }

 type_init(parallel_register_types)
-
-static void parallel_init(ISABus *bus, int index, CharDriverState *chr)
-{
-    DeviceState *dev;
-    ISADevice *isadev;
-
-    isadev = isa_create(bus, "isa-parallel");
-    dev = DEVICE(isadev);
-    qdev_prop_set_uint32(dev, "index", index);
-    qdev_prop_set_chr(dev, "chardev", chr);
-    qdev_init_nofail(dev);
-}
-
-void parallel_hds_isa_init(ISABus *bus, int n)
-{
-    int i;
-
-    assert(n <= MAX_PARALLEL_PORTS);
-
-    for (i = 0; i < n; i++) {
-        if (parallel_hds[i]) {
-            parallel_init(bus, i, parallel_hds[i]);
-        }
-    }
-}
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -294,6 +294,14 @@ static void machine_init_notify(Notifier *notifier, void *data)
    foreach_dynamic_sysbus_device(error_on_sysbus_device, NULL);
 }

+static void machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    /* Default 128 MB as guest ram size */
+    mc->default_ram_size = 128 * M_BYTE;
+}
+
 static void machine_initfn(Object *obj)
 {
    MachineState *ms = MACHINE(obj);
@@ -463,6 +471,7 @@ static const TypeInfo machine_info = {
    .parent = TYPE_OBJECT,
    .abstract = true,
    .class_size = sizeof(MachineClass),
+    .class_init    = machine_class_init,
    .instance_size = sizeof(MachineState),
    .instance_init = machine_initfn,
    .instance_finalize = machine_finalize,
--- a/hw/core/qdev-properties-system.c
+++ b/hw/core/qdev-properties-system.c
@@ -389,7 +389,7 @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd)
    nd->instantiated = 1;
 }

-static int qdev_add_one_global(QemuOpts *opts, void *opaque)
+static int qdev_add_one_global(void *opaque, QemuOpts *opts, Error **errp)
 {
    GlobalProperty *g;

@@ -404,5 +404,6 @@ static int qdev_add_one_global(QemuOpts *opts, void *opaque)

 void qemu_add_globals(void)
 {
-    qemu_opts_foreach(qemu_find_opts("global"), qdev_add_one_global, NULL, 0);
+    qemu_opts_foreach(qemu_find_opts("global"),
+                      qdev_add_one_global, NULL, NULL);
 }
--- a/hw/display/Makefile.objs
+++ b/hw/display/Makefile.objs
@@ -34,3 +34,5 @@ obj-$(CONFIG_CG3) += cg3.o
 obj-$(CONFIG_VGA) += vga.o

 common-obj-$(CONFIG_QXL) += qxl.o qxl-logger.o qxl-render.o
+
+obj-$(CONFIG_VIRTIO) += virtio-gpu.o
--- a/hw/display/cg3.c
+++ b/hw/display/cg3.c
@@ -106,6 +106,7 @@ static void cg3_update_display(void *opaque)
    pix = memory_region_get_ram_ptr(&s->vram_mem);
    data = (uint32_t *)surface_data(surface);

+    memory_region_sync_dirty_bitmap(&s->vram_mem);
    for (y = 0; y < height; y++) {
        int update = s->full_update;

@@ -309,6 +310,7 @@ static void cg3_realizefn(DeviceState *dev, Error **errp)

    memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size,
                           &error_abort);
+    memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
    vmstate_register_ram_global(&s->vram_mem);
    sysbus_init_mmio(sbd, &s->vram_mem);

--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1109,6 +1109,12 @@ static inline int fimd_get_buffer_id(Exynos4210fimdWindow *w)
    }
 }

+static void exynos4210_fimd_invalidate(void *opaque)
+{
+    Exynos4210fimdState *s = (Exynos4210fimdState *)opaque;
+    s->invalidate = true;
+}
+
 /* Updates specified window's MemorySection based on values of WINCON,
 * VIDOSDA, VIDOSDB, VIDWADDx and SHADOWCON registers */
 static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
@@ -1136,7 +1142,11 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
    /* TODO: add .exit and unref the region there.  Not needed yet since sysbus
     * does not support hot-unplug.
     */
-    memory_region_unref(w->mem_section.mr);
+    if (w->mem_section.mr) {
+        memory_region_set_log(w->mem_section.mr, false, DIRTY_MEMORY_VGA);
+        memory_region_unref(w->mem_section.mr);
+    }
+
    w->mem_section = memory_region_find(sysbus_address_space(sbd),
                                        fb_start_addr, w->fb_len);
    assert(w->mem_section.mr);
@@ -1162,6 +1172,8 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
        cpu_physical_memory_unmap(w->host_fb_addr, fb_mapped_len, 0, 0);
        goto error_return;
    }
+    memory_region_set_log(w->mem_section.mr, true, DIRTY_MEMORY_VGA);
+    exynos4210_fimd_invalidate(s);
    return;

 error_return:
@@ -1224,12 +1236,6 @@ static void exynos4210_fimd_update_irq(Exynos4210fimdState *s)
    }
 }

-static void exynos4210_fimd_invalidate(void *opaque)
-{
-    Exynos4210fimdState *s = (Exynos4210fimdState *)opaque;
-    s->invalidate = true;
-}
-
 static void exynos4210_update_resolution(Exynos4210fimdState *s)
 {
    DisplaySurface *surface = qemu_console_surface(s->console);
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -63,6 +63,10 @@ void framebuffer_update_display(
    assert(mem_section.offset_within_address_space == base);

    memory_region_sync_dirty_bitmap(mem);
+    if (!memory_region_is_logging(mem, DIRTY_MEMORY_VGA)) {
+        invalidate = true;
+    }
+
    src_base = cpu_physical_memory_map(base, &src_len, 0);
    /* If we can't map the framebuffer then bail.  We could try harder,
       but it's not really worth it as dirty flag tracking will probably
--- a/hw/display/g364fb.c
+++ b/hw/display/g364fb.c
@@ -260,6 +260,7 @@ static void g364fb_update_display(void *opaque)
        qemu_console_resize(s->con, s->width, s->height);
    }

+    memory_region_sync_dirty_bitmap(&s->mem_vram);
    if (s->ctla & CTLA_FORCE_BLANK) {
        g364fb_draw_blank(s);
    } else if (s->depth == 8) {
@@ -489,7 +490,7 @@ static void g364fb_init(DeviceState *dev, G364State *s)
    memory_region_init_ram_ptr(&s->mem_vram, NULL, "vram",
                               s->vram_size, s->vram);
    vmstate_register_ram(&s->mem_vram, dev);
-    memory_region_set_coalescing(&s->mem_vram);
+    memory_region_set_log(&s->mem_vram, true, DIRTY_MEMORY_VGA);
 }

 #define TYPE_G364 "sysbus-g364"
--- a/hw/display/sm501.c
+++ b/hw/display/sm501.c
@@ -1322,6 +1322,7 @@ static void sm501_draw_crt(SM501State * s)
    }

    /* draw each line according to conditions */
+    memory_region_sync_dirty_bitmap(&s->local_mem_region);
    for (y = 0; y < height; y++) {
 	int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0;
 	int update = full_update || update_hwc;
@@ -1412,6 +1413,7 @@ void sm501_init(MemoryRegion *address_space_mem, uint32_t base,
    memory_region_init_ram(&s->local_mem_region, NULL, "sm501.local",
                           local_mem_bytes, &error_abort);
    vmstate_register_ram_global(&s->local_mem_region);
+    memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA);
    s->local_mem = memory_region_get_ram_ptr(&s->local_mem_region);
    memory_region_add_subregion(address_space_mem, base, &s->local_mem_region);

--- a/hw/display/tc6393xb.c
+++ b/hw/display/tc6393xb.c
@@ -571,7 +571,7 @@ TC6393xbState *tc6393xb_init(MemoryRegion *sysmem, uint32_t base, qemu_irq irq)
    s->irq = irq;
    s->gpio_in = qemu_allocate_irqs(tc6393xb_gpio_set, s, TC6393XB_GPIOS);

-    s->l3v = *qemu_allocate_irqs(tc6393xb_l3v, s, 1);
+    s->l3v = qemu_allocate_irq(tc6393xb_l3v, s, 0);
    s->blanked = 1;

    s->sub_irqs = qemu_allocate_irqs(tc6393xb_sub_irq, s, TC6393XB_NR_IRQS);
--- a/hw/display/tcx.c
+++ b/hw/display/tcx.c
@@ -353,6 +353,7 @@ static void tcx_update_display(void *opaque)
        return;
    }

+    memory_region_sync_dirty_bitmap(&ts->vram_mem);
    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) {
        if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE,
                                    DIRTY_MEMORY_VGA)) {
@@ -446,6 +447,7 @@ static void tcx24_update_display(void *opaque)
    dd = surface_stride(surface);
    ds = 1024;

+    memory_region_sync_dirty_bitmap(&ts->vram_mem);
    for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE,
            page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) {
        if (tcx24_check_dirty(ts, page, page24, cpage)) {
@@ -1006,6 +1008,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp)
    memory_region_init_ram(&s->vram_mem, OBJECT(s), "tcx.vram",
                           s->vram_size * (1 + 4 + 4), &error_abort);
    vmstate_register_ram_global(&s->vram_mem);
+    memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA);
    vram_base = memory_region_get_ram_ptr(&s->vram_mem);

    /* 10/ROM : FCode ROM */
--- a/hw/display/vga-pci.c
+++ b/hw/display/vga-pci.c
@@ -54,9 +54,7 @@ typedef struct PCIVGAState {
    VGACommonState vga;
    uint32_t flags;
    MemoryRegion mmio;
-    MemoryRegion ioport;
-    MemoryRegion bochs;
-    MemoryRegion qext;
+    MemoryRegion mrs[3];
 } PCIVGAState;

 #define TYPE_PCI_VGA "pci-vga"
@@ -76,16 +74,16 @@ static const VMStateDescription vmstate_vga_pci = {
 static uint64_t pci_vga_ioport_read(void *ptr, hwaddr addr,
                                    unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;
    uint64_t ret = 0;

    switch (size) {
    case 1:
-        ret = vga_ioport_read(&d->vga, addr);
+        ret = vga_ioport_read(s, addr + 0x3c0);
        break;
    case 2:
-        ret  = vga_ioport_read(&d->vga, addr);
-        ret |= vga_ioport_read(&d->vga, addr+1) << 8;
+        ret  = vga_ioport_read(s, addr + 0x3c0);
+        ret |= vga_ioport_read(s, addr + 0x3c1) << 8;
        break;
    }
    return ret;
@@ -94,11 +92,11 @@ static uint64_t pci_vga_ioport_read(void *ptr, hwaddr addr,
 static void pci_vga_ioport_write(void *ptr, hwaddr addr,
                                 uint64_t val, unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;

    switch (size) {
    case 1:
-        vga_ioport_write(&d->vga, addr + 0x3c0, val);
+        vga_ioport_write(s, addr + 0x3c0, val);
        break;
    case 2:
        /*
@@ -106,8 +104,8 @@ static void pci_vga_ioport_write(void *ptr, hwaddr addr,
         * indexed registers with a single word write because the
         * index byte is updated first.
         */
-        vga_ioport_write(&d->vga, addr + 0x3c0, val & 0xff);
-        vga_ioport_write(&d->vga, addr + 0x3c1, (val >> 8) & 0xff);
+        vga_ioport_write(s, addr + 0x3c0, val & 0xff);
+        vga_ioport_write(s, addr + 0x3c1, (val >> 8) & 0xff);
        break;
    }
 }
@@ -125,21 +123,21 @@ static const MemoryRegionOps pci_vga_ioport_ops = {
 static uint64_t pci_vga_bochs_read(void *ptr, hwaddr addr,
                                   unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;
    int index = addr >> 1;

-    vbe_ioport_write_index(&d->vga, 0, index);
-    return vbe_ioport_read_data(&d->vga, 0);
+    vbe_ioport_write_index(s, 0, index);
+    return vbe_ioport_read_data(s, 0);
 }

 static void pci_vga_bochs_write(void *ptr, hwaddr addr,
                                uint64_t val, unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;
    int index = addr >> 1;

-    vbe_ioport_write_index(&d->vga, 0, index);
-    vbe_ioport_write_data(&d->vga, 0, val);
+    vbe_ioport_write_index(s, 0, index);
+    vbe_ioport_write_data(s, 0, val);
 }

 static const MemoryRegionOps pci_vga_bochs_ops = {
@@ -154,13 +152,13 @@ static const MemoryRegionOps pci_vga_bochs_ops = {

 static uint64_t pci_vga_qext_read(void *ptr, hwaddr addr, unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;

    switch (addr) {
    case PCI_VGA_QEXT_REG_SIZE:
        return PCI_VGA_QEXT_SIZE;
    case PCI_VGA_QEXT_REG_BYTEORDER:
-        return d->vga.big_endian_fb ?
+        return s->big_endian_fb ?
            PCI_VGA_QEXT_BIG_ENDIAN : PCI_VGA_QEXT_LITTLE_ENDIAN;
    default:
        return 0;
@@ -170,15 +168,15 @@ static uint64_t pci_vga_qext_read(void *ptr, hwaddr addr, unsigned size)
 static void pci_vga_qext_write(void *ptr, hwaddr addr,
                               uint64_t val, unsigned size)
 {
-    PCIVGAState *d = ptr;
+    VGACommonState *s = ptr;

    switch (addr) {
    case PCI_VGA_QEXT_REG_BYTEORDER:
        if (val == PCI_VGA_QEXT_BIG_ENDIAN) {
-            d->vga.big_endian_fb = true;
+            s->big_endian_fb = true;
        }
        if (val == PCI_VGA_QEXT_LITTLE_ENDIAN) {
-            d->vga.big_endian_fb = false;
+            s->big_endian_fb = false;
        }
        break;
    }
@@ -206,10 +204,34 @@ static const MemoryRegionOps pci_vga_qext_ops = {
    .endianness = DEVICE_LITTLE_ENDIAN,
 };

+static void pci_std_vga_mmio_region_init(VGACommonState *s,
+                                         MemoryRegion *parent,
+                                         MemoryRegion *subs,
+                                         bool qext)
+{
+    memory_region_init_io(&subs[0], NULL, &pci_vga_ioport_ops, s,
+                          "vga ioports remapped", PCI_VGA_IOPORT_SIZE);
+    memory_region_add_subregion(parent, PCI_VGA_IOPORT_OFFSET,
+                                &subs[0]);
+
+    memory_region_init_io(&subs[1], NULL, &pci_vga_bochs_ops, s,
+                          "bochs dispi interface", PCI_VGA_BOCHS_SIZE);
+    memory_region_add_subregion(parent, PCI_VGA_BOCHS_OFFSET,
+                                &subs[1]);
+
+    if (qext) {
+        memory_region_init_io(&subs[2], NULL, &pci_vga_qext_ops, s,
+                              "qemu extended regs", PCI_VGA_QEXT_SIZE);
+        memory_region_add_subregion(parent, PCI_VGA_QEXT_OFFSET,
+                                    &subs[2]);
+    }
+}
+
 static void pci_std_vga_realize(PCIDevice *dev, Error **errp)
 {
    PCIVGAState *d = PCI_VGA(dev);
    VGACommonState *s = &d->vga;
+    bool qext = false;

    /* vga + console init */
    vga_common_init(s, OBJECT(dev), true);
@@ -224,23 +246,12 @@ static void pci_std_vga_realize(PCIDevice *dev, Error **errp)
    /* mmio bar for vga register access */
    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_MMIO)) {
        memory_region_init(&d->mmio, NULL, "vga.mmio", 4096);
-        memory_region_init_io(&d->ioport, NULL, &pci_vga_ioport_ops, d,
-                              "vga ioports remapped", PCI_VGA_IOPORT_SIZE);
-        memory_region_init_io(&d->bochs, NULL, &pci_vga_bochs_ops, d,
-                              "bochs dispi interface", PCI_VGA_BOCHS_SIZE);
-
-        memory_region_add_subregion(&d->mmio, PCI_VGA_IOPORT_OFFSET,
-                                    &d->ioport);
-        memory_region_add_subregion(&d->mmio, PCI_VGA_BOCHS_OFFSET,
-                                    &d->bochs);

        if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_QEXT)) {
-            memory_region_init_io(&d->qext, NULL, &pci_vga_qext_ops, d,
-                                  "qemu extended regs", PCI_VGA_QEXT_SIZE);
-            memory_region_add_subregion(&d->mmio, PCI_VGA_QEXT_OFFSET,
-                                        &d->qext);
+            qext = true;
            pci_set_byte(&d->dev.config[PCI_REVISION_ID], 2);
        }
+        pci_std_vga_mmio_region_init(s, &d->mmio, d->mrs, qext);

        pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
    }
@@ -262,6 +273,7 @@ static void pci_secondary_vga_realize(PCIDevice *dev, Error **errp)
 {
    PCIVGAState *d = PCI_VGA(dev);
    VGACommonState *s = &d->vga;
+    bool qext = false;

    /* vga + console init */
    vga_common_init(s, OBJECT(dev), false);
@@ -269,23 +281,12 @@ static void pci_secondary_vga_realize(PCIDevice *dev, Error **errp)

    /* mmio bar */
    memory_region_init(&d->mmio, OBJECT(dev), "vga.mmio", 4096);
-    memory_region_init_io(&d->ioport, OBJECT(dev), &pci_vga_ioport_ops, d,
-                          "vga ioports remapped", PCI_VGA_IOPORT_SIZE);
-    memory_region_init_io(&d->bochs, OBJECT(dev), &pci_vga_bochs_ops, d,
-                          "bochs dispi interface", PCI_VGA_BOCHS_SIZE);
-
-    memory_region_add_subregion(&d->mmio, PCI_VGA_IOPORT_OFFSET,
-                                &d->ioport);
-    memory_region_add_subregion(&d->mmio, PCI_VGA_BOCHS_OFFSET,
-                                &d->bochs);

    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_QEXT)) {
-        memory_region_init_io(&d->qext, NULL, &pci_vga_qext_ops, d,
-                              "qemu extended regs", PCI_VGA_QEXT_SIZE);
-        memory_region_add_subregion(&d->mmio, PCI_VGA_QEXT_OFFSET,
-                                    &d->qext);
+        qext = true;
        pci_set_byte(&d->dev.config[PCI_REVISION_ID], 2);
    }
+    pci_std_vga_mmio_region_init(s, &d->mmio, d->mrs, qext);

    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
    pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -0,0 +1,918 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/iov.h"
+#include "ui/console.h"
+#include "trace.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-bus.h"
+
+static struct virtio_gpu_simple_resource*
+virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
+
+static void update_cursor_data_simple(VirtIOGPU *g,
+                                      struct virtio_gpu_scanout *s,
+                                      uint32_t resource_id)
+{
+    struct virtio_gpu_simple_resource *res;
+    uint32_t pixels;
+
+    res = virtio_gpu_find_resource(g, resource_id);
+    if (!res) {
+        return;
+    }
+
+    if (pixman_image_get_width(res->image)  != s->current_cursor->width ||
+        pixman_image_get_height(res->image) != s->current_cursor->height) {
+        return;
+    }
+
+    pixels = s->current_cursor->width * s->current_cursor->height;
+    memcpy(s->current_cursor->data,
+           pixman_image_get_data(res->image),
+           pixels * sizeof(uint32_t));
+}
+
+static void update_cursor(VirtIOGPU *g, struct virtio_gpu_update_cursor *cursor)
+{
+    struct virtio_gpu_scanout *s;
+
+    if (cursor->pos.scanout_id >= g->conf.max_outputs) {
+        return;
+    }
+    s = &g->scanout[cursor->pos.scanout_id];
+
+    if (cursor->hdr.type != VIRTIO_GPU_CMD_MOVE_CURSOR) {
+        if (!s->current_cursor) {
+            s->current_cursor = cursor_alloc(64, 64);
+        }
+
+        s->current_cursor->hot_x = cursor->hot_x;
+        s->current_cursor->hot_y = cursor->hot_y;
+
+        if (cursor->resource_id > 0) {
+            update_cursor_data_simple(g, s, cursor->resource_id);
+        }
+        dpy_cursor_define(s->con, s->current_cursor);
+    }
+    dpy_mouse_set(s->con, cursor->pos.x, cursor->pos.y,
+                  cursor->resource_id ? 1 : 0);
+}
+
+static void virtio_gpu_get_config(VirtIODevice *vdev, uint8_t *config)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    memcpy(config, &g->virtio_config, sizeof(g->virtio_config));
+}
+
+static void virtio_gpu_set_config(VirtIODevice *vdev, const uint8_t *config)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    struct virtio_gpu_config vgconfig;
+
+    memcpy(&vgconfig, config, sizeof(g->virtio_config));
+
+    if (vgconfig.events_clear) {
+        g->virtio_config.events_read &= ~vgconfig.events_clear;
+    }
+}
+
+static uint64_t virtio_gpu_get_features(VirtIODevice *vdev, uint64_t features)
+{
+    return features;
+}
+
+static void virtio_gpu_notify_event(VirtIOGPU *g, uint32_t event_type)
+{
+    g->virtio_config.events_read |= event_type;
+    virtio_notify_config(&g->parent_obj);
+}
+
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
+{
+    struct virtio_gpu_simple_resource *res;
+
+    QTAILQ_FOREACH(res, &g->reslist, next) {
+        if (res->resource_id == resource_id) {
+            return res;
+        }
+    }
+    return NULL;
+}
+
+void virtio_gpu_ctrl_response(VirtIOGPU *g,
+                              struct virtio_gpu_ctrl_command *cmd,
+                              struct virtio_gpu_ctrl_hdr *resp,
+                              size_t resp_len)
+{
+    size_t s;
+
+    if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) {
+        resp->flags |= VIRTIO_GPU_FLAG_FENCE;
+        resp->fence_id = cmd->cmd_hdr.fence_id;
+        resp->ctx_id = cmd->cmd_hdr.ctx_id;
+    }
+    s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len);
+    if (s != resp_len) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: response size incorrect %zu vs %zu\n",
+                      __func__, s, resp_len);
+    }
+    virtqueue_push(cmd->vq, &cmd->elem, s);
+    virtio_notify(VIRTIO_DEVICE(g), cmd->vq);
+    cmd->finished = true;
+}
+
+void virtio_gpu_ctrl_response_nodata(VirtIOGPU *g,
+                                     struct virtio_gpu_ctrl_command *cmd,
+                                     enum virtio_gpu_ctrl_type type)
+{
+    struct virtio_gpu_ctrl_hdr resp;
+
+    memset(&resp, 0, sizeof(resp));
+    resp.type = type;
+    virtio_gpu_ctrl_response(g, cmd, &resp, sizeof(resp));
+}
+
+static void
+virtio_gpu_fill_display_info(VirtIOGPU *g,
+                             struct virtio_gpu_resp_display_info *dpy_info)
+{
+    int i;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        if (g->enabled_output_bitmask & (1 << i)) {
+            dpy_info->pmodes[i].enabled = 1;
+            dpy_info->pmodes[i].r.width = g->req_state[i].width;
+            dpy_info->pmodes[i].r.height = g->req_state[i].height;
+        }
+    }
+}
+
+void virtio_gpu_get_display_info(VirtIOGPU *g,
+                                 struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_resp_display_info display_info;
+
+    trace_virtio_gpu_cmd_get_display_info();
+    memset(&display_info, 0, sizeof(display_info));
+    display_info.hdr.type = VIRTIO_GPU_RESP_OK_DISPLAY_INFO;
+    virtio_gpu_fill_display_info(g, &display_info);
+    virtio_gpu_ctrl_response(g, cmd, &display_info.hdr,
+                             sizeof(display_info));
+}
+
+static pixman_format_code_t get_pixman_format(uint32_t virtio_gpu_format)
+{
+    switch (virtio_gpu_format) {
+#ifdef HOST_WORDS_BIGENDIAN
+    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
+        return PIXMAN_b8g8r8x8;
+    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
+        return PIXMAN_b8g8r8a8;
+    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
+        return PIXMAN_x8r8g8b8;
+    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
+        return PIXMAN_a8r8g8b8;
+    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
+        return PIXMAN_r8g8b8x8;
+    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
+        return PIXMAN_r8g8b8a8;
+    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
+        return PIXMAN_x8b8g8r8;
+    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
+        return PIXMAN_a8b8g8r8;
+#else
+    case VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM:
+        return PIXMAN_x8r8g8b8;
+    case VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM:
+        return PIXMAN_a8r8g8b8;
+    case VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM:
+        return PIXMAN_b8g8r8x8;
+    case VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM:
+        return PIXMAN_b8g8r8a8;
+    case VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM:
+        return PIXMAN_x8b8g8r8;
+    case VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM:
+        return PIXMAN_a8b8g8r8;
+    case VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM:
+        return PIXMAN_r8g8b8x8;
+    case VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM:
+        return PIXMAN_r8g8b8a8;
+#endif
+    default:
+        return 0;
+    }
+}
+
+static void virtio_gpu_resource_create_2d(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    pixman_format_code_t pformat;
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_create_2d c2d;
+
+    VIRTIO_GPU_FILL_CMD(c2d);
+    trace_virtio_gpu_cmd_res_create_2d(c2d.resource_id, c2d.format,
+                                       c2d.width, c2d.height);
+
+    if (c2d.resource_id == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: resource id 0 is not allowed\n",
+                      __func__);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, c2d.resource_id);
+    if (res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: resource already exists %d\n",
+                      __func__, c2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    res = g_new0(struct virtio_gpu_simple_resource, 1);
+
+    res->width = c2d.width;
+    res->height = c2d.height;
+    res->format = c2d.format;
+    res->resource_id = c2d.resource_id;
+
+    pformat = get_pixman_format(c2d.format);
+    if (!pformat) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: host couldn't handle guest format %d\n",
+                      __func__, c2d.format);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+    res->image = pixman_image_create_bits(pformat,
+                                          c2d.width,
+                                          c2d.height,
+                                          NULL, 0);
+
+    if (!res->image) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: resource creation failed %d %d %d\n",
+                      __func__, c2d.resource_id, c2d.width, c2d.height);
+        g_free(res);
+        cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY;
+        return;
+    }
+
+    QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+}
+
+static void virtio_gpu_resource_destroy(VirtIOGPU *g,
+                                        struct virtio_gpu_simple_resource *res)
+{
+    pixman_image_unref(res->image);
+    QTAILQ_REMOVE(&g->reslist, res, next);
+    g_free(res);
+}
+
+static void virtio_gpu_resource_unref(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_unref unref;
+
+    VIRTIO_GPU_FILL_CMD(unref);
+    trace_virtio_gpu_cmd_res_unref(unref.resource_id);
+
+    res = virtio_gpu_find_resource(g, unref.resource_id);
+    if (!res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, unref.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+    virtio_gpu_resource_destroy(g, res);
+}
+
+static void virtio_gpu_transfer_to_host_2d(VirtIOGPU *g,
+                                           struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    int h;
+    uint32_t src_offset, dst_offset, stride;
+    int bpp;
+    pixman_format_code_t format;
+    struct virtio_gpu_transfer_to_host_2d t2d;
+
+    VIRTIO_GPU_FILL_CMD(t2d);
+    trace_virtio_gpu_cmd_res_xfer_toh_2d(t2d.resource_id);
+
+    res = virtio_gpu_find_resource(g, t2d.resource_id);
+    if (!res || !res->iov) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, t2d.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (t2d.r.x > res->width ||
+        t2d.r.y > res->height ||
+        t2d.r.width > res->width ||
+        t2d.r.height > res->height ||
+        t2d.r.x + t2d.r.width > res->width ||
+        t2d.r.y + t2d.r.height > res->height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: transfer bounds outside resource"
+                      " bounds for resource %d: %d %d %d %d vs %d %d\n",
+                      __func__, t2d.resource_id, t2d.r.x, t2d.r.y,
+                      t2d.r.width, t2d.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    format = pixman_image_get_format(res->image);
+    bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8;
+    stride = pixman_image_get_stride(res->image);
+
+    if (t2d.offset || t2d.r.x || t2d.r.y ||
+        t2d.r.width != pixman_image_get_width(res->image)) {
+        void *img_data = pixman_image_get_data(res->image);
+        for (h = 0; h < t2d.r.height; h++) {
+            src_offset = t2d.offset + stride * h;
+            dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp);
+
+            iov_to_buf(res->iov, res->iov_cnt, src_offset,
+                       (uint8_t *)img_data
+                       + dst_offset, t2d.r.width * bpp);
+        }
+    } else {
+        iov_to_buf(res->iov, res->iov_cnt, 0,
+                   pixman_image_get_data(res->image),
+                   pixman_image_get_stride(res->image)
+                   * pixman_image_get_height(res->image));
+    }
+}
+
+static void virtio_gpu_resource_flush(VirtIOGPU *g,
+                                      struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_flush rf;
+    pixman_region16_t flush_region;
+    int i;
+
+    VIRTIO_GPU_FILL_CMD(rf);
+    trace_virtio_gpu_cmd_res_flush(rf.resource_id,
+                                   rf.r.width, rf.r.height, rf.r.x, rf.r.y);
+
+    res = virtio_gpu_find_resource(g, rf.resource_id);
+    if (!res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, rf.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (rf.r.x > res->width ||
+        rf.r.y > res->height ||
+        rf.r.width > res->width ||
+        rf.r.height > res->height ||
+        rf.r.x + rf.r.width > res->width ||
+        rf.r.y + rf.r.height > res->height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: flush bounds outside resource"
+                      " bounds for resource %d: %d %d %d %d vs %d %d\n",
+                      __func__, rf.resource_id, rf.r.x, rf.r.y,
+                      rf.r.width, rf.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    pixman_region_init_rect(&flush_region,
+                            rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+    for (i = 0; i < VIRTIO_GPU_MAX_SCANOUT; i++) {
+        struct virtio_gpu_scanout *scanout;
+        pixman_region16_t region, finalregion;
+        pixman_box16_t *extents;
+
+        if (!(res->scanout_bitmask & (1 << i))) {
+            continue;
+        }
+        scanout = &g->scanout[i];
+
+        pixman_region_init(&finalregion);
+        pixman_region_init_rect(&region, scanout->x, scanout->y,
+                                scanout->width, scanout->height);
+
+        pixman_region_intersect(&finalregion, &flush_region, &region);
+        pixman_region_translate(&finalregion, -scanout->x, -scanout->y);
+        extents = pixman_region_extents(&finalregion);
+        /* work out the area we need to update for each console */
+        dpy_gfx_update(g->scanout[i].con,
+                       extents->x1, extents->y1,
+                       extents->x2 - extents->x1,
+                       extents->y2 - extents->y1);
+
+        pixman_region_fini(&region);
+        pixman_region_fini(&finalregion);
+    }
+    pixman_region_fini(&flush_region);
+}
+
+static void virtio_gpu_set_scanout(VirtIOGPU *g,
+                                   struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_scanout *scanout;
+    pixman_format_code_t format;
+    uint32_t offset;
+    int bpp;
+    struct virtio_gpu_set_scanout ss;
+
+    VIRTIO_GPU_FILL_CMD(ss);
+    trace_virtio_gpu_cmd_set_scanout(ss.scanout_id, ss.resource_id,
+                                     ss.r.width, ss.r.height, ss.r.x, ss.r.y);
+
+    g->enable = 1;
+    if (ss.resource_id == 0) {
+        scanout = &g->scanout[ss.scanout_id];
+        if (scanout->resource_id) {
+            res = virtio_gpu_find_resource(g, scanout->resource_id);
+            if (res) {
+                res->scanout_bitmask &= ~(1 << ss.scanout_id);
+            }
+        }
+        if (ss.scanout_id == 0 ||
+            ss.scanout_id >= g->conf.max_outputs) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: illegal scanout id specified %d",
+                          __func__, ss.scanout_id);
+            cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+            return;
+        }
+        dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, NULL);
+        scanout->ds = NULL;
+        scanout->width = 0;
+        scanout->height = 0;
+        return;
+    }
+
+    /* create a surface for this scanout */
+    if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUT ||
+        ss.scanout_id >= g->conf.max_outputs) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout id specified %d",
+                      __func__, ss.scanout_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+        return;
+    }
+
+    res = virtio_gpu_find_resource(g, ss.resource_id);
+    if (!res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, ss.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    if (ss.r.x > res->width ||
+        ss.r.y > res->height ||
+        ss.r.width > res->width ||
+        ss.r.height > res->height ||
+        ss.r.x + ss.r.width > res->width ||
+        ss.r.y + ss.r.height > res->height) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal scanout %d bounds for"
+                      " resource %d, (%d,%d)+%d,%d vs %d %d\n",
+                      __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y,
+                      ss.r.width, ss.r.height, res->width, res->height);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+        return;
+    }
+
+    scanout = &g->scanout[ss.scanout_id];
+
+    format = pixman_image_get_format(res->image);
+    bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8;
+    offset = (ss.r.x * bpp) + ss.r.y * pixman_image_get_stride(res->image);
+    if (!scanout->ds || surface_data(scanout->ds)
+        != ((uint8_t *)pixman_image_get_data(res->image) + offset) ||
+        scanout->width != ss.r.width ||
+        scanout->height != ss.r.height) {
+        /* realloc the surface ptr */
+        scanout->ds = qemu_create_displaysurface_from
+            (ss.r.width, ss.r.height, format,
+             pixman_image_get_stride(res->image),
+             (uint8_t *)pixman_image_get_data(res->image) + offset);
+        if (!scanout->ds) {
+            cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+            return;
+        }
+        dpy_gfx_replace_surface(g->scanout[ss.scanout_id].con, scanout->ds);
+    }
+
+    res->scanout_bitmask |= (1 << ss.scanout_id);
+    scanout->resource_id = ss.resource_id;
+    scanout->x = ss.r.x;
+    scanout->y = ss.r.y;
+    scanout->width = ss.r.width;
+    scanout->height = ss.r.height;
+}
+
+int virtio_gpu_create_mapping_iov(struct virtio_gpu_resource_attach_backing *ab,
+                                  struct virtio_gpu_ctrl_command *cmd,
+                                  struct iovec **iov)
+{
+    struct virtio_gpu_mem_entry *ents;
+    size_t esize, s;
+    int i;
+
+    if (ab->nr_entries > 16384) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: nr_entries is too big (%d > 1024)\n",
+                      __func__, ab->nr_entries);
+        return -1;
+    }
+
+    esize = sizeof(*ents) * ab->nr_entries;
+    ents = g_malloc(esize);
+    s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+                   sizeof(*ab), ents, esize);
+    if (s != esize) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: command data size incorrect %zu vs %zu\n",
+                      __func__, s, esize);
+        g_free(ents);
+        return -1;
+    }
+
+    *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
+    for (i = 0; i < ab->nr_entries; i++) {
+        hwaddr len = ents[i].length;
+        (*iov)[i].iov_len = ents[i].length;
+        (*iov)[i].iov_base = cpu_physical_memory_map(ents[i].addr, &len, 1);
+        if (!(*iov)[i].iov_base || len != ents[i].length) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: failed to map MMIO memory for"
+                          " resource %d element %d\n",
+                          __func__, ab->resource_id, i);
+            virtio_gpu_cleanup_mapping_iov(*iov, i);
+            g_free(ents);
+            g_free(*iov);
+            *iov = NULL;
+            return -1;
+        }
+    }
+    g_free(ents);
+    return 0;
+}
+
+void virtio_gpu_cleanup_mapping_iov(struct iovec *iov, uint32_t count)
+{
+    int i;
+
+    for (i = 0; i < count; i++) {
+        cpu_physical_memory_unmap(iov[i].iov_base, iov[i].iov_len, 1,
+                                  iov[i].iov_len);
+    }
+}
+
+static void virtio_gpu_cleanup_mapping(struct virtio_gpu_simple_resource *res)
+{
+    virtio_gpu_cleanup_mapping_iov(res->iov, res->iov_cnt);
+    g_free(res->iov);
+    res->iov = NULL;
+    res->iov_cnt = 0;
+}
+
+static void
+virtio_gpu_resource_attach_backing(VirtIOGPU *g,
+                                   struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_attach_backing ab;
+    int ret;
+
+    VIRTIO_GPU_FILL_CMD(ab);
+    trace_virtio_gpu_cmd_res_back_attach(ab.resource_id);
+
+    res = virtio_gpu_find_resource(g, ab.resource_id);
+    if (!res) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, ab.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+
+    ret = virtio_gpu_create_mapping_iov(&ab, cmd, &res->iov);
+    if (ret != 0) {
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        return;
+    }
+
+    res->iov_cnt = ab.nr_entries;
+}
+
+static void
+virtio_gpu_resource_detach_backing(VirtIOGPU *g,
+                                   struct virtio_gpu_ctrl_command *cmd)
+{
+    struct virtio_gpu_simple_resource *res;
+    struct virtio_gpu_resource_detach_backing detach;
+
+    VIRTIO_GPU_FILL_CMD(detach);
+    trace_virtio_gpu_cmd_res_back_detach(detach.resource_id);
+
+    res = virtio_gpu_find_resource(g, detach.resource_id);
+    if (!res || !res->iov) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: illegal resource specified %d\n",
+                      __func__, detach.resource_id);
+        cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+        return;
+    }
+    virtio_gpu_cleanup_mapping(res);
+}
+
+static void virtio_gpu_simple_process_cmd(VirtIOGPU *g,
+                                          struct virtio_gpu_ctrl_command *cmd)
+{
+    VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
+
+    switch (cmd->cmd_hdr.type) {
+    case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+        virtio_gpu_get_display_info(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+        virtio_gpu_resource_create_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+        virtio_gpu_resource_unref(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+        virtio_gpu_resource_flush(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+        virtio_gpu_transfer_to_host_2d(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_SET_SCANOUT:
+        virtio_gpu_set_scanout(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+        virtio_gpu_resource_attach_backing(g, cmd);
+        break;
+    case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+        virtio_gpu_resource_detach_backing(g, cmd);
+        break;
+    default:
+        cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+        break;
+    }
+    if (!cmd->finished) {
+        virtio_gpu_ctrl_response_nodata(g, cmd, cmd->error ? cmd->error :
+                                        VIRTIO_GPU_RESP_OK_NODATA);
+    }
+}
+
+static void virtio_gpu_handle_ctrl_cb(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    qemu_bh_schedule(g->ctrl_bh);
+}
+
+static void virtio_gpu_handle_cursor_cb(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    qemu_bh_schedule(g->cursor_bh);
+}
+
+static void virtio_gpu_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    struct virtio_gpu_ctrl_command *cmd;
+
+    if (!virtio_queue_ready(vq)) {
+        return;
+    }
+
+    cmd = g_new(struct virtio_gpu_ctrl_command, 1);
+    while (virtqueue_pop(vq, &cmd->elem)) {
+        cmd->vq = vq;
+        cmd->error = 0;
+        cmd->finished = false;
+        g->stats.requests++;
+
+        virtio_gpu_simple_process_cmd(g, cmd);
+        if (!cmd->finished) {
+            QTAILQ_INSERT_TAIL(&g->fenceq, cmd, next);
+            g->stats.inflight++;
+            if (g->stats.max_inflight < g->stats.inflight) {
+                g->stats.max_inflight = g->stats.inflight;
+            }
+            fprintf(stderr, "inflight: %3d (+)\r", g->stats.inflight);
+            cmd = g_new(struct virtio_gpu_ctrl_command, 1);
+        }
+    }
+    g_free(cmd);
+}
+
+static void virtio_gpu_ctrl_bh(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+    virtio_gpu_handle_ctrl(&g->parent_obj, g->ctrl_vq);
+}
+
+static void virtio_gpu_handle_cursor(VirtIODevice *vdev, VirtQueue *vq)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    VirtQueueElement elem;
+    size_t s;
+    struct virtio_gpu_update_cursor cursor_info;
+
+    if (!virtio_queue_ready(vq)) {
+        return;
+    }
+    while (virtqueue_pop(vq, &elem)) {
+        s = iov_to_buf(elem.out_sg, elem.out_num, 0,
+                       &cursor_info, sizeof(cursor_info));
+        if (s != sizeof(cursor_info)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: cursor size incorrect %zu vs %zu\n",
+                          __func__, s, sizeof(cursor_info));
+        } else {
+            update_cursor(g, &cursor_info);
+        }
+        virtqueue_push(vq, &elem, 0);
+        virtio_notify(vdev, vq);
+    }
+}
+
+static void virtio_gpu_cursor_bh(void *opaque)
+{
+    VirtIOGPU *g = opaque;
+    virtio_gpu_handle_cursor(&g->parent_obj, g->cursor_vq);
+}
+
+static void virtio_gpu_invalidate_display(void *opaque)
+{
+}
+
+static void virtio_gpu_update_display(void *opaque)
+{
+}
+
+static void virtio_gpu_text_update(void *opaque, console_ch_t *chardata)
+{
+}
+
+static int virtio_gpu_ui_info(void *opaque, uint32_t idx, QemuUIInfo *info)
+{
+    VirtIOGPU *g = opaque;
+
+    if (idx > g->conf.max_outputs) {
+        return -1;
+    }
+
+    g->req_state[idx].x = info->xoff;
+    g->req_state[idx].y = info->yoff;
+    g->req_state[idx].width = info->width;
+    g->req_state[idx].height = info->height;
+
+    if (info->width && info->height) {
+        g->enabled_output_bitmask |= (1 << idx);
+    } else {
+        g->enabled_output_bitmask &= ~(1 << idx);
+    }
+
+    /* send event to guest */
+    virtio_gpu_notify_event(g, VIRTIO_GPU_EVENT_DISPLAY);
+    return 0;
+}
+
+const GraphicHwOps virtio_gpu_ops = {
+    .invalidate = virtio_gpu_invalidate_display,
+    .gfx_update = virtio_gpu_update_display,
+    .text_update = virtio_gpu_text_update,
+    .ui_info = virtio_gpu_ui_info,
+};
+
+static void virtio_gpu_device_realize(DeviceState *qdev, Error **errp)
+{
+    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+    VirtIOGPU *g = VIRTIO_GPU(qdev);
+    int i;
+
+    g->config_size = sizeof(struct virtio_gpu_config);
+    g->virtio_config.num_scanouts = g->conf.max_outputs;
+    virtio_init(VIRTIO_DEVICE(g), "virtio-gpu", VIRTIO_ID_GPU,
+                g->config_size);
+
+    g->req_state[0].width = 1024;
+    g->req_state[0].height = 768;
+
+    g->ctrl_vq   = virtio_add_queue(vdev, 64, virtio_gpu_handle_ctrl_cb);
+    g->cursor_vq = virtio_add_queue(vdev, 16, virtio_gpu_handle_cursor_cb);
+
+    g->ctrl_bh = qemu_bh_new(virtio_gpu_ctrl_bh, g);
+    g->cursor_bh = qemu_bh_new(virtio_gpu_cursor_bh, g);
+    QTAILQ_INIT(&g->reslist);
+    QTAILQ_INIT(&g->fenceq);
+
+    g->enabled_output_bitmask = 1;
+    g->qdev = qdev;
+
+    for (i = 0; i < g->conf.max_outputs; i++) {
+        g->scanout[i].con =
+            graphic_console_init(DEVICE(g), i, &virtio_gpu_ops, g);
+        if (i > 0) {
+            dpy_gfx_replace_surface(g->scanout[i].con, NULL);
+        }
+    }
+}
+
+static void virtio_gpu_instance_init(Object *obj)
+{
+}
+
+static void virtio_gpu_reset(VirtIODevice *vdev)
+{
+    VirtIOGPU *g = VIRTIO_GPU(vdev);
+    struct virtio_gpu_simple_resource *res, *tmp;
+    int i;
+
+    g->enable = 0;
+
+    QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
+        virtio_gpu_resource_destroy(g, res);
+    }
+    for (i = 0; i < g->conf.max_outputs; i++) {
+#if 0
+        g->req_state[i].x = 0;
+        g->req_state[i].y = 0;
+        if (i == 0) {
+            g->req_state[0].width = 1024;
+            g->req_state[0].height = 768;
+        } else {
+            g->req_state[i].width = 0;
+            g->req_state[i].height = 0;
+        }
+#endif
+        g->scanout[i].resource_id = 0;
+        g->scanout[i].width = 0;
+        g->scanout[i].height = 0;
+        g->scanout[i].x = 0;
+        g->scanout[i].y = 0;
+        g->scanout[i].ds = NULL;
+    }
+    g->enabled_output_bitmask = 1;
+}
+
+static Property virtio_gpu_properties[] = {
+    DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void virtio_gpu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+
+    vdc->realize = virtio_gpu_device_realize;
+    vdc->get_config = virtio_gpu_get_config;
+    vdc->set_config = virtio_gpu_set_config;
+    vdc->get_features = virtio_gpu_get_features;
+
+    vdc->reset = virtio_gpu_reset;
+
+    dc->props = virtio_gpu_properties;
+}
+
+static const TypeInfo virtio_gpu_info = {
+    .name = TYPE_VIRTIO_GPU,
+    .parent = TYPE_VIRTIO_DEVICE,
+    .instance_size = sizeof(VirtIOGPU),
+    .instance_init = virtio_gpu_instance_init,
+    .class_init = virtio_gpu_class_init,
+};
+
+static void virtio_register_types(void)
+{
+    type_register_static(&virtio_gpu_info);
+}
+
+type_init(virtio_register_types)
+
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_ctrl_hdr)                != 24);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_update_cursor)           != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_unref)          != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_create_2d)      != 40);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_set_scanout)             != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_flush)          != 48);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_transfer_to_host_2d)     != 56);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_mem_entry)               != 16);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_attach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resource_detach_backing) != 32);
+QEMU_BUILD_BUG_ON(sizeof(struct virtio_gpu_resp_display_info)       != 408);
--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
@@ -1124,7 +1124,7 @@ static void vmsvga_update_display(void *opaque)
     * Is it more efficient to look at vram VGA-dirty bits or wait
     * for the driver to issue SVGA_CMD_UPDATE?
     */
-    if (memory_region_is_logging(&s->vga.vram)) {
+    if (memory_region_is_logging(&s->vga.vram, DIRTY_MEMORY_VGA)) {
        vga_sync_dirty_bitmap(&s->vga);
        dirty = memory_region_get_dirty(&s->vga.vram, 0,
            surface_stride(surface) * surface_height(surface),
--- a/hw/gpio/pl061.c
+++ b/hw/gpio/pl061.c
@@ -173,7 +173,7 @@ static uint64_t pl061_read(void *opaque, hwaddr offset,
    case 0x414: /* Raw interrupt status */
        return s->istate;
    case 0x418: /* Masked interrupt status */
-        return s->istate | s->im;
+        return s->istate & s->im;
    case 0x420: /* Alternate function select */
        return s->afsel;
    case 0x500: /* 2mA drive */
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -240,13 +240,32 @@ static void acpi_get_misc_info(AcpiMiscInfo *info)
    info->applesmc_io_base = applesmc_port();
 }

+/*
+ * Because of the PXB hosts we cannot simply query TYPE_PCI_HOST_BRIDGE.
+ * On i386 arch we only have two pci hosts, so we can look only for them.
+ */
+static Object *acpi_get_i386_pci_host(void)
+{
+    PCIHostState *host;
+
+    host = OBJECT_CHECK(PCIHostState,
+                        object_resolve_path("/machine/i440fx", NULL),
+                        TYPE_PCI_HOST_BRIDGE);
+    if (!host) {
+        host = OBJECT_CHECK(PCIHostState,
+                            object_resolve_path("/machine/q35", NULL),
+                            TYPE_PCI_HOST_BRIDGE);
+    }
+
+    return OBJECT(host);
+}
+
 static void acpi_get_pci_info(PcPciInfo *info)
 {
    Object *pci_host;
-    bool ambiguous;

-    pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous);
-    g_assert(!ambiguous);
+
+    pci_host = acpi_get_i386_pci_host();
    g_assert(pci_host);

    info->w32.begin = object_property_get_int(pci_host,
@@ -596,6 +615,291 @@ static void build_append_pci_bus_devices(Aml *parent_scope, PCIBus *bus,
        }
    }
    aml_append(parent_scope, method);
+    qobject_decref(bsel);
+}
+
+/*
+ * initialize_route - Initialize the interrupt routing rule
+ * through a specific LINK:
+ *  if (lnk_idx == idx)
+ *      route using link 'link_name'
+ */
+static Aml *initialize_route(Aml *route, const char *link_name,
+                             Aml *lnk_idx, int idx)
+{
+    Aml *if_ctx = aml_if(aml_equal(lnk_idx, aml_int(idx)));
+    Aml *pkg = aml_package(4);
+
+    aml_append(pkg, aml_int(0));
+    aml_append(pkg, aml_int(0));
+    aml_append(pkg, aml_name("%s", link_name));
+    aml_append(pkg, aml_int(0));
+    aml_append(if_ctx, aml_store(pkg, route));
+
+    return if_ctx;
+}
+
+/*
+ * build_prt - Define interrupt rounting rules
+ *
+ * Returns an array of 128 routes, one for each device,
+ * based on device location.
+ * The main goal is to equaly distribute the interrupts
+ * over the 4 existing ACPI links (works only for i440fx).
+ * The hash function is  (slot + pin) & 3 -> "LNK[D|A|B|C]".
+ *
+ */
+static Aml *build_prt(void)
+{
+    Aml *method, *while_ctx, *pin, *res;
+
+    method = aml_method("_PRT", 0);
+    res = aml_local(0);
+    pin = aml_local(1);
+    aml_append(method, aml_store(aml_package(128), res));
+    aml_append(method, aml_store(aml_int(0), pin));
+
+    /* while (pin < 128) */
+    while_ctx = aml_while(aml_lless(pin, aml_int(128)));
+    {
+        Aml *slot = aml_local(2);
+        Aml *lnk_idx = aml_local(3);
+        Aml *route = aml_local(4);
+
+        /* slot = pin >> 2 */
+        aml_append(while_ctx,
+                   aml_store(aml_shiftright(pin, aml_int(2)), slot));
+        /* lnk_idx = (slot + pin) & 3 */
+        aml_append(while_ctx,
+                   aml_store(aml_and(aml_add(pin, slot), aml_int(3)), lnk_idx));
+
+        /* route[2] = "LNK[D|A|B|C]", selection based on pin % 3  */
+        aml_append(while_ctx, initialize_route(route, "LNKD", lnk_idx, 0));
+        aml_append(while_ctx, initialize_route(route, "LNKA", lnk_idx, 1));
+        aml_append(while_ctx, initialize_route(route, "LNKB", lnk_idx, 2));
+        aml_append(while_ctx, initialize_route(route, "LNKC", lnk_idx, 3));
+
+        /* route[0] = 0x[slot]FFFF */
+        aml_append(while_ctx,
+            aml_store(aml_or(aml_shiftleft(slot, aml_int(16)), aml_int(0xFFFF)),
+                      aml_index(route, aml_int(0))));
+        /* route[1] = pin & 3 */
+        aml_append(while_ctx,
+            aml_store(aml_and(pin, aml_int(3)), aml_index(route, aml_int(1))));
+        /* res[pin] = route */
+        aml_append(while_ctx, aml_store(route, aml_index(res, pin)));
+        /* pin++ */
+        aml_append(while_ctx, aml_increment(pin));
+    }
+    aml_append(method, while_ctx);
+    /* return res*/
+    aml_append(method, aml_return(res));
+
+    return method;
+}
+
+typedef struct CrsRangeEntry {
+    uint64_t base;
+    uint64_t limit;
+} CrsRangeEntry;
+
+static void crs_range_insert(GPtrArray *ranges, uint64_t base, uint64_t limit)
+{
+    CrsRangeEntry *entry;
+
+    entry = g_malloc(sizeof(*entry));
+    entry->base = base;
+    entry->limit = limit;
+
+    g_ptr_array_add(ranges, entry);
+}
+
+static void crs_range_free(gpointer data)
+{
+    CrsRangeEntry *entry = (CrsRangeEntry *)data;
+    g_free(entry);
+}
+
+static gint crs_range_compare(gconstpointer a, gconstpointer b)
+{
+     CrsRangeEntry *entry_a = *(CrsRangeEntry **)a;
+     CrsRangeEntry *entry_b = *(CrsRangeEntry **)b;
+
+     return (int64_t)entry_a->base - (int64_t)entry_b->base;
+}
+
+/*
+ * crs_replace_with_free_ranges - given the 'used' ranges within [start - end]
+ * interval, computes the 'free' ranges from the same interval.
+ * Example: If the input array is { [a1 - a2],[b1 - b2] }, the function
+ * will return { [base - a1], [a2 - b1], [b2 - limit] }.
+ */
+static void crs_replace_with_free_ranges(GPtrArray *ranges,
+                                         uint64_t start, uint64_t end)
+{
+    GPtrArray *free_ranges = g_ptr_array_new_with_free_func(crs_range_free);
+    uint64_t free_base = start;
+    int i;
+
+    g_ptr_array_sort(ranges, crs_range_compare);
+    for (i = 0; i < ranges->len; i++) {
+        CrsRangeEntry *used = g_ptr_array_index(ranges, i);
+
+        if (free_base < used->base) {
+            crs_range_insert(free_ranges, free_base, used->base - 1);
+        }
+
+        free_base = used->limit + 1;
+    }
+
+    if (free_base < end) {
+        crs_range_insert(free_ranges, free_base, end);
+    }
+
+    g_ptr_array_set_size(ranges, 0);
+    for (i = 0; i < free_ranges->len; i++) {
+        g_ptr_array_add(ranges, g_ptr_array_index(free_ranges, i));
+    }
+
+    g_ptr_array_free(free_ranges, false);
+}
+
+static Aml *build_crs(PCIHostState *host,
+                      GPtrArray *io_ranges, GPtrArray *mem_ranges)
+{
+    Aml *crs = aml_resource_template();
+    uint8_t max_bus = pci_bus_num(host->bus);
+    uint8_t type;
+    int devfn;
+
+    for (devfn = 0; devfn < ARRAY_SIZE(host->bus->devices); devfn++) {
+        int i;
+        uint64_t range_base, range_limit;
+        PCIDevice *dev = host->bus->devices[devfn];
+
+        if (!dev) {
+            continue;
+        }
+
+        for (i = 0; i < PCI_NUM_REGIONS; i++) {
+            PCIIORegion *r = &dev->io_regions[i];
+
+            range_base = r->addr;
+            range_limit = r->addr + r->size - 1;
+
+            /*
+             * Work-around for old bioses
+             * that do not support multiple root buses
+             */
+            if (!range_base || range_base > range_limit) {
+                continue;
+            }
+
+            if (r->type & PCI_BASE_ADDRESS_SPACE_IO) {
+                aml_append(crs,
+                    aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED,
+                                AML_POS_DECODE, AML_ENTIRE_RANGE,
+                                0,
+                                range_base,
+                                range_limit,
+                                0,
+                                range_limit - range_base + 1));
+                crs_range_insert(io_ranges, range_base, range_limit);
+            } else { /* "memory" */
+                aml_append(crs,
+                    aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+                                     AML_MAX_FIXED, AML_NON_CACHEABLE,
+                                     AML_READ_WRITE,
+                                     0,
+                                     range_base,
+                                     range_limit,
+                                     0,
+                                     range_limit - range_base + 1));
+                crs_range_insert(mem_ranges, range_base, range_limit);
+            }
+        }
+
+        type = dev->config[PCI_HEADER_TYPE] & ~PCI_HEADER_TYPE_MULTI_FUNCTION;
+        if (type == PCI_HEADER_TYPE_BRIDGE) {
+            uint8_t subordinate = dev->config[PCI_SUBORDINATE_BUS];
+            if (subordinate > max_bus) {
+                max_bus = subordinate;
+            }
+
+            range_base = pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_IO);
+            range_limit = pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_IO);
+
+            /*
+             * Work-around for old bioses
+             * that do not support multiple root buses
+             */
+            if (range_base || range_base > range_limit) {
+                aml_append(crs,
+                           aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED,
+                                       AML_POS_DECODE, AML_ENTIRE_RANGE,
+                                       0,
+                                       range_base,
+                                       range_limit,
+                                       0,
+                                       range_limit - range_base + 1));
+                crs_range_insert(io_ranges, range_base, range_limit);
+            }
+
+            range_base =
+                pci_bridge_get_base(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
+            range_limit =
+                pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_SPACE_MEMORY);
+
+            /*
+             * Work-around for old bioses
+             * that do not support multiple root buses
+             */
+            if (range_base || range_base > range_limit) {
+                aml_append(crs,
+                           aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+                                            AML_MAX_FIXED, AML_NON_CACHEABLE,
+                                            AML_READ_WRITE,
+                                            0,
+                                            range_base,
+                                            range_limit,
+                                            0,
+                                            range_limit - range_base + 1));
+                crs_range_insert(mem_ranges, range_base, range_limit);
+          }
+
+            range_base =
+                pci_bridge_get_base(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
+            range_limit =
+                pci_bridge_get_limit(dev, PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+            /*
+             * Work-around for old bioses
+             * that do not support multiple root buses
+             */
+            if (range_base || range_base > range_limit) {
+                aml_append(crs,
+                           aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+                                            AML_MAX_FIXED, AML_NON_CACHEABLE,
+                                            AML_READ_WRITE,
+                                            0,
+                                            range_base,
+                                            range_limit,
+                                            0,
+                                            range_limit - range_base + 1));
+                crs_range_insert(mem_ranges, range_base, range_limit);
+            }
+        }
+    }
+
+    aml_append(crs,
+        aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE,
+                            0,
+                            pci_bus_num(host->bus),
+                            max_bus,
+                            0,
+                            max_bus - pci_bus_num(host->bus) + 1));
+
+    return crs;
 }

 static void
@@ -607,6 +911,11 @@ build_ssdt(GArray *table_data, GArray *linker,
    uint32_t nr_mem = machine->ram_slots;
    unsigned acpi_cpus = guest_info->apic_id_limit;
    Aml *ssdt, *sb_scope, *scope, *pkg, *dev, *method, *crs, *field, *ifctx;
+    PCIBus *bus = NULL;
+    GPtrArray *io_ranges = g_ptr_array_new_with_free_func(crs_range_free);
+    GPtrArray *mem_ranges = g_ptr_array_new_with_free_func(crs_range_free);
+    CrsRangeEntry *entry;
+    int root_bus_limit = 0xFF;
    int i;

    ssdt = init_aml_allocator();
@@ -618,31 +927,81 @@ build_ssdt(GArray *table_data, GArray *linker,
    /* Reserve space for header */
    acpi_data_push(ssdt->buf, sizeof(AcpiTableHeader));

+    /* Extra PCI root buses are implemented  only for i440fx */
+    bus = find_i440fx();
+    if (bus) {
+        QLIST_FOREACH(bus, &bus->child, sibling) {
+            uint8_t bus_num = pci_bus_num(bus);
+            uint8_t numa_node = pci_bus_numa_node(bus);
+
+            /* look only for expander root buses */
+            if (!pci_bus_is_root(bus)) {
+                continue;
+            }
+
+            if (bus_num < root_bus_limit) {
+                root_bus_limit = bus_num - 1;
+            }
+
+            scope = aml_scope("\\_SB");
+            dev = aml_device("PC%.02X", bus_num);
+            aml_append(dev,
+                       aml_name_decl("_UID", aml_string("PC%.02X", bus_num)));
+            aml_append(dev, aml_name_decl("_HID", aml_string("PNP0A03")));
+            aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num)));
+
+            if (numa_node != NUMA_NODE_UNASSIGNED) {
+                aml_append(dev, aml_name_decl("_PXM", aml_int(numa_node)));
+            }
+
+            aml_append(dev, build_prt());
+            crs = build_crs(PCI_HOST_BRIDGE(BUS(bus)->parent),
+                            io_ranges, mem_ranges);
+            aml_append(dev, aml_name_decl("_CRS", crs));
+            aml_append(scope, dev);
+            aml_append(ssdt, scope);
+        }
+    }
+
    scope = aml_scope("\\_SB.PCI0");
    /* build PCI0._CRS */
    crs = aml_resource_template();
    aml_append(crs,
        aml_word_bus_number(AML_MIN_FIXED, AML_MAX_FIXED, AML_POS_DECODE,
-                            0x0000, 0x0000, 0x00FF, 0x0000, 0x0100));
+                            0x0000, 0x0, root_bus_limit,
+                            0x0000, root_bus_limit + 1));
    aml_append(crs, aml_io(AML_DECODE16, 0x0CF8, 0x0CF8, 0x01, 0x08));

    aml_append(crs,
        aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED,
                    AML_POS_DECODE, AML_ENTIRE_RANGE,
                    0x0000, 0x0000, 0x0CF7, 0x0000, 0x0CF8));
-    aml_append(crs,
-        aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED,
-                    AML_POS_DECODE, AML_ENTIRE_RANGE,
-                    0x0000, 0x0D00, 0xFFFF, 0x0000, 0xF300));
+
+    crs_replace_with_free_ranges(io_ranges, 0x0D00, 0xFFFF);
+    for (i = 0; i < io_ranges->len; i++) {
+        entry = g_ptr_array_index(io_ranges, i);
+        aml_append(crs,
+            aml_word_io(AML_MIN_FIXED, AML_MAX_FIXED,
+                        AML_POS_DECODE, AML_ENTIRE_RANGE,
+                        0x0000, entry->base, entry->limit,
+                        0x0000, entry->limit - entry->base + 1));
+    }
+
    aml_append(crs,
        aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
                         AML_CACHEABLE, AML_READ_WRITE,
                         0, 0x000A0000, 0x000BFFFF, 0, 0x00020000));
-    aml_append(crs,
-        aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
-                         AML_NON_CACHEABLE, AML_READ_WRITE,
-                         0, pci->w32.begin, pci->w32.end - 1, 0,
-                         pci->w32.end - pci->w32.begin));
+
+    crs_replace_with_free_ranges(mem_ranges, pci->w32.begin, pci->w32.end - 1);
+    for (i = 0; i < mem_ranges->len; i++) {
+        entry = g_ptr_array_index(mem_ranges, i);
+        aml_append(crs,
+            aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
+                             AML_NON_CACHEABLE, AML_READ_WRITE,
+                             0, entry->base, entry->limit,
+                             0, entry->limit - entry->base + 1));
+    }
+
    if (pci->w64.begin) {
        aml_append(crs,
            aml_qword_memory(AML_POS_DECODE, AML_MIN_FIXED, AML_MAX_FIXED,
@@ -665,6 +1024,9 @@ build_ssdt(GArray *table_data, GArray *linker,
    aml_append(dev, aml_name_decl("_CRS", crs));
    aml_append(scope, dev);

+    g_ptr_array_free(io_ranges, true);
+    g_ptr_array_free(mem_ranges, true);
+
    /* reserve PCIHP resources */
    if (pm->pcihp_io_len) {
        dev = aml_device("PHPR");
@@ -957,10 +1319,9 @@ build_ssdt(GArray *table_data, GArray *linker,
        {
            Object *pci_host;
            PCIBus *bus = NULL;
-            bool ambiguous;

-            pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous);
-            if (!ambiguous && pci_host) {
+            pci_host = acpi_get_i386_pci_host();
+            if (pci_host) {
                bus = PCI_HOST_BRIDGE(pci_host)->bus;
            }

@@ -1272,10 +1633,8 @@ static bool acpi_get_mcfg(AcpiMcfgInfo *mcfg)
 {
    Object *pci_host;
    QObject *o;
-    bool ambiguous;

-    pci_host = object_resolve_path_type("", TYPE_PCI_HOST_BRIDGE, &ambiguous);
-    g_assert(!ambiguous);
+    pci_host = acpi_get_i386_pci_host();
    g_assert(pci_host);

    o = object_property_get_qobject(pci_host, PCIE_HOST_MCFG_BASE, NULL);
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -30,6 +30,7 @@
 #include "hw/block/fdc.h"
 #include "hw/ide.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
 #include "monitor/monitor.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/timer/hpet.h"
@@ -163,27 +164,6 @@ uint64_t cpu_get_tsc(CPUX86State *env)
    return cpu_get_ticks();
 }

-/* SMM support */
-
-static cpu_set_smm_t smm_set;
-static void *smm_arg;
-
-void cpu_smm_register(cpu_set_smm_t callback, void *arg)
-{
-    assert(smm_set == NULL);
-    assert(smm_arg == NULL);
-    smm_set = callback;
-    smm_arg = arg;
-}
-
-void cpu_smm_update(CPUX86State *env)
-{
-    if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) {
-        smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg);
-    }
-}
-
-
 /* IRQ handling */
 int cpu_get_pic_interrupt(CPUX86State *env)
 {
@@ -1006,7 +986,6 @@ static X86CPU *pc_new_cpu(const char *cpu_model, int64_t apic_id,
    }

    qdev_set_parent_bus(DEVICE(cpu), qdev_get_child_bus(icc_bridge, "icc"));
-    object_unref(OBJECT(cpu));

    object_property_set_int(OBJECT(cpu), apic_id, "apic-id", &local_err);
    object_property_set_bool(OBJECT(cpu), true, "realized", &local_err);
@@ -1025,7 +1004,9 @@ static const char *current_cpu_model;
 void pc_hot_add_cpu(const int64_t id, Error **errp)
 {
    DeviceState *icc_bridge;
+    X86CPU *cpu;
    int64_t apic_id = x86_cpu_apic_id_from_index(id);
+    Error *local_err = NULL;

    if (id < 0) {
        error_setg(errp, "Invalid CPU id: %" PRIi64, id);
@@ -1053,7 +1034,12 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)

    icc_bridge = DEVICE(object_resolve_path_type("icc-bridge",
                                                 TYPE_ICC_BRIDGE, NULL));
-    pc_new_cpu(current_cpu_model, apic_id, icc_bridge, errp);
+    cpu = pc_new_cpu(current_cpu_model, apic_id, icc_bridge, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    object_unref(OBJECT(cpu));
 }

 void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
@@ -1087,6 +1073,7 @@ void pc_cpus_init(const char *cpu_model, DeviceState *icc_bridge)
            error_report_err(error);
            exit(1);
        }
+        object_unref(OBJECT(cpu));
    }

    /* map APIC MMIO area if CPU has APIC */
@@ -1119,6 +1106,25 @@ void pc_guest_info_machine_done(Notifier *notifier, void *data)
    PcGuestInfoState *guest_info_state = container_of(notifier,
                                                      PcGuestInfoState,
                                                      machine_done);
+    PCIBus *bus = find_i440fx();
+
+    if (bus) {
+        int extra_hosts = 0;
+
+        QLIST_FOREACH(bus, &bus->child, sibling) {
+            /* look for expander root buses */
+            if (pci_bus_is_root(bus)) {
+                extra_hosts++;
+            }
+        }
+        if (extra_hosts && guest_info_state->info.fw_cfg) {
+            uint64_t *val = g_malloc(sizeof(*val));
+            *val = cpu_to_le64(extra_hosts);
+            fw_cfg_add_file(guest_info_state->info.fw_cfg,
+                    "etc/extra-pci-roots", val, sizeof(*val));
+        }
+    }
+
    acpi_setup(&guest_info_state->info);
 }

@@ -1345,9 +1351,9 @@ FWCfgState *pc_memory_init(MachineState *machine,
    return fw_cfg;
 }

-qemu_irq *pc_allocate_cpu_irq(void)
+qemu_irq pc_allocate_cpu_irq(void)
 {
-    return qemu_allocate_irqs(pic_irq_request, NULL, 1);
+    return qemu_allocate_irq(pic_irq_request, NULL, 0);
 }

 DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus)
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -86,10 +86,9 @@ static void pc_init1(MachineState *machine)
    ISABus *isa_bus;
    PCII440FXState *i440fx_state;
    int piix3_devfn = -1;
-    qemu_irq *cpu_irq;
    qemu_irq *gsi;
    qemu_irq *i8259;
-    qemu_irq *smi_irq;
+    qemu_irq smi_irq;
    GSIState *gsi_state;
    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
    BusState *idebus[MAX_IDE_BUS];
@@ -99,7 +98,6 @@ static void pc_init1(MachineState *machine)
    MemoryRegion *pci_memory;
    MemoryRegion *rom_memory;
    DeviceState *icc_bridge;
-    FWCfgState *fw_cfg = NULL;
    PcGuestInfo *guest_info;
    ram_addr_t lowmem;

@@ -180,16 +178,16 @@ static void pc_init1(MachineState *machine)

    /* allocate ram and load rom/bios */
    if (!xen_enabled()) {
-        fw_cfg = pc_memory_init(machine, system_memory,
-                                below_4g_mem_size, above_4g_mem_size,
-                                rom_memory, &ram_memory, guest_info);
+        pc_memory_init(machine, system_memory,
+                       below_4g_mem_size, above_4g_mem_size,
+                       rom_memory, &ram_memory, guest_info);
    } else if (machine->kernel_filename != NULL) {
        /* For xen HVM direct kernel boot, load linux here */
-        fw_cfg = xen_load_linux(machine->kernel_filename,
-                                machine->kernel_cmdline,
-                                machine->initrd_filename,
-                                below_4g_mem_size,
-                                guest_info);
+        xen_load_linux(machine->kernel_filename,
+                       machine->kernel_cmdline,
+                       machine->initrd_filename,
+                       below_4g_mem_size,
+                       guest_info);
    }

    gsi_state = g_malloc0(sizeof(*gsi_state));
@@ -220,13 +218,13 @@ static void pc_init1(MachineState *machine)
    } else if (xen_enabled()) {
        i8259 = xen_interrupt_controller_init();
    } else {
-        cpu_irq = pc_allocate_cpu_irq();
-        i8259 = i8259_init(isa_bus, cpu_irq[0]);
+        i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq());
    }

    for (i = 0; i < ISA_NUM_IRQS; i++) {
        gsi_state->i8259_irq[i] = i8259[i];
    }
+    g_free(i8259);
    if (pci_enabled) {
        ioapic_init_gsi(gsi_state, "i440fx");
    }
@@ -284,11 +282,11 @@ static void pc_init1(MachineState *machine)
        DeviceState *piix4_pm;
        I2CBus *smbus;

-        smi_irq = qemu_allocate_irqs(pc_acpi_smi_interrupt, first_cpu, 1);
+        smi_irq = qemu_allocate_irq(pc_acpi_smi_interrupt, first_cpu, 0);
        /* TODO: Populate SPD eeprom data.  */
        smbus = piix4_pm_init(pci_bus, piix3_devfn + 3, 0xb100,
-                              gsi[9], *smi_irq,
-                              kvm_enabled(), fw_cfg, &piix4_pm);
+                              gsi[9], smi_irq,
+                              kvm_enabled(), &piix4_pm);
        smbus_eeprom_init(smbus, 8, NULL, 0);

        object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP,
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -79,7 +79,6 @@ static void pc_q35_init(MachineState *machine)
    GSIState *gsi_state;
    ISABus *isa_bus;
    int pci_enabled = 1;
-    qemu_irq *cpu_irq;
    qemu_irq *gsi;
    qemu_irq *i8259;
    int i;
@@ -230,8 +229,7 @@ static void pc_q35_init(MachineState *machine)
    } else if (xen_enabled()) {
        i8259 = xen_interrupt_controller_init();
    } else {
-        cpu_irq = pc_allocate_cpu_irq();
-        i8259 = i8259_init(isa_bus, cpu_irq[0]);
+        i8259 = i8259_init(isa_bus, pc_allocate_cpu_irq());
    }

    for (i = 0; i < ISA_NUM_IRQS; i++) {
--- a/hw/i386/ssdt-tpm-common.dsl
+++ b/hw/i386/ssdt-tpm-common.dsl
@@ -0,0 +1,36 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Common parts for TPM 1.2 and TPM 2 (with slight differences for PPI)
+ * to be #included
+ */
+
+
+    External(\_SB.PCI0.ISA, DeviceObj)
+    Scope(\_SB.PCI0.ISA) {
+        /* TPM with emulated TPM TIS interface */
+        Device (TPM) {
+            Name (_HID, EisaID ("PNP0C31"))
+            Name (_CRS, ResourceTemplate ()
+            {
+                Memory32Fixed (ReadWrite, TPM_TIS_ADDR_BASE, TPM_TIS_ADDR_SIZE)
+                IRQNoFlags () {TPM_TIS_IRQ}
+            })
+            Method (_STA, 0, NotSerialized) {
+                Return (0x0F)
+            }
+        }
+    }
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -51,8 +51,15 @@ static const int debug_macio = 0;

 #define MACIO_PAGE_SIZE 4096

+/*
+ * Unaligned DMA read/write access functions required for OS X/Darwin which
+ * don't perform DMA transactions on sector boundaries. These functions are
+ * modelled on bdrv_co_do_preadv()/bdrv_co_do_pwritev() and so should be
+ * easy to remove if the unaligned block APIs are ever exposed.
+ */
+
 static void pmac_dma_read(BlockBackend *blk,
-                          int64_t sector_num, int nb_sectors,
+                          int64_t offset, unsigned int bytes,
                          void (*cb)(void *opaque, int ret), void *opaque)
 {
    DBDMA_io *io = opaque;
@@ -60,76 +67,48 @@ static void pmac_dma_read(BlockBackend *blk,
    IDEState *s = idebus_active_if(&m->bus);
    dma_addr_t dma_addr, dma_len;
    void *mem;
-    int nsector, remainder;
+    int64_t sector_num;
+    int nsector;
+    uint64_t align = BDRV_SECTOR_SIZE;
+    size_t head_bytes, tail_bytes;

    qemu_iovec_destroy(&io->iov);
    qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);

-    if (io->remainder_len > 0) {
-        /* Return remainder of request */
-        int transfer = MIN(io->remainder_len, io->len);
+    sector_num = (offset >> 9);
+    nsector = (io->len >> 9);

-        MACIO_DPRINTF("--- DMA read pop     - bounce addr: %p addr: %"
-                      HWADDR_PRIx " remainder_len: %x\n",
-                      &io->remainder + (0x200 - transfer), io->addr,
-                      io->remainder_len);
-
-        cpu_physical_memory_write(io->addr,
-                                  &io->remainder + (0x200 - transfer),
-                                  transfer);
-
-        io->remainder_len -= transfer;
-        io->len -= transfer;
-        io->addr += transfer;
-
-        s->io_buffer_index += transfer;
-        s->io_buffer_size -= transfer;
-
-        if (io->remainder_len != 0) {
-            /* Still waiting for remainder */
-            return;
-        }
-
-        if (io->len == 0) {
-            MACIO_DPRINTF("--- finished all read processing; go and finish\n");
-            cb(opaque, 0);
-            return;
-        }
-    }
-
-    if (s->drive_kind == IDE_CD) {
-        sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    } else {
-        sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    }
-
-    nsector = ((io->len + 0x1ff) >> 9);
-    remainder = (nsector << 9) - io->len;
-
-    MACIO_DPRINTF("--- DMA read transfer - addr: %" HWADDR_PRIx " len: %x\n",
-                  io->addr, io->len);
+    MACIO_DPRINTF("--- DMA read transfer (0x%" HWADDR_PRIx ",0x%x): "
+                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
+                  sector_num, nsector);

    dma_addr = io->addr;
    dma_len = io->len;
    mem = dma_memory_map(&address_space_memory, dma_addr, &dma_len,
                         DMA_DIRECTION_FROM_DEVICE);

-    if (!remainder) {
-        MACIO_DPRINTF("--- DMA read aligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
-        qemu_iovec_add(&io->iov, mem, io->len);
-    } else {
-        MACIO_DPRINTF("--- DMA read unaligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
-        qemu_iovec_add(&io->iov, mem, io->len);
+    if (offset & (align - 1)) {
+        head_bytes = offset & (align - 1);

-        MACIO_DPRINTF("--- DMA read push    - bounce addr: %p "
-                      "remainder_len: %x\n",
-                      &io->remainder + 0x200 - remainder, remainder);
-        qemu_iovec_add(&io->iov, &io->remainder + 0x200 - remainder,
-                       remainder);
+        MACIO_DPRINTF("--- DMA unaligned head: sector %" PRId64 ", "
+                      "discarding %zu bytes\n", sector_num, head_bytes);

-        io->remainder_len = remainder;
+        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
+
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);
+    }
+
+    qemu_iovec_add(&io->iov, mem, io->len);
+
+    if ((offset + bytes) & (align - 1)) {
+        tail_bytes = (offset + bytes) & (align - 1);
+
+        MACIO_DPRINTF("--- DMA unaligned tail: sector %" PRId64 ", "
+                      "discarding bytes %zu\n", sector_num, tail_bytes);
+
+        qemu_iovec_add(&io->iov, &io->tail_remainder, align - tail_bytes);
+        bytes = ROUND_UP(bytes, align);
    }

    s->io_buffer_size -= io->len;
@@ -137,15 +116,15 @@ static void pmac_dma_read(BlockBackend *blk,

    io->len = 0;

-    MACIO_DPRINTF("--- Block read transfer   - sector_num: %"PRIx64"  "
-                  "nsector: %x\n",
-                  sector_num, nsector);
+    MACIO_DPRINTF("--- Block read transfer - sector_num: %" PRIx64 "  "
+                  "nsector: %x\n", (offset >> 9), (bytes >> 9));

-    m->aiocb = blk_aio_readv(blk, sector_num, &io->iov, nsector, cb, io);
+    m->aiocb = blk_aio_readv(blk, (offset >> 9), &io->iov, (bytes >> 9),
+                             cb, io);
 }

 static void pmac_dma_write(BlockBackend *blk,
-                         int64_t sector_num, int nb_sectors,
+                         int64_t offset, int bytes,
                         void (*cb)(void *opaque, int ret), void *opaque)
 {
    DBDMA_io *io = opaque;
@@ -153,53 +132,20 @@ static void pmac_dma_write(BlockBackend *blk,
    IDEState *s = idebus_active_if(&m->bus);
    dma_addr_t dma_addr, dma_len;
    void *mem;
-    int nsector, remainder;
-    int extra = 0;
+    int64_t sector_num;
+    int nsector;
+    uint64_t align = BDRV_SECTOR_SIZE;
+    size_t head_bytes, tail_bytes;
+    bool unaligned_head = false, unaligned_tail = false;

    qemu_iovec_destroy(&io->iov);
    qemu_iovec_init(&io->iov, io->len / MACIO_PAGE_SIZE + 1);

-    if (io->remainder_len > 0) {
-        /* Return remainder of request */
-        int transfer = MIN(io->remainder_len, io->len);
-
-        MACIO_DPRINTF("--- processing write remainder %x\n", transfer);
-        cpu_physical_memory_read(io->addr,
-                                 &io->remainder + (0x200 - transfer),
-                                 transfer);
-
-        io->remainder_len -= transfer;
-        io->len -= transfer;
-        io->addr += transfer;
-
-        s->io_buffer_index += transfer;
-        s->io_buffer_size -= transfer;
-
-        if (io->remainder_len != 0) {
-            /* Still waiting for remainder */
-            return;
-        }
-
-        MACIO_DPRINTF("--> prepending bounce buffer with size 0x200\n");
-
-        /* Sector transfer complete - prepend to request */
-        qemu_iovec_add(&io->iov, &io->remainder, 0x200);
-        extra = 1;
-    }
-
-    if (s->drive_kind == IDE_CD) {
-        sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    } else {
-        sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    }
-
+    sector_num = (offset >> 9);
    nsector = (io->len >> 9);
-    remainder = io->len - (nsector << 9);

-    MACIO_DPRINTF("--- DMA write transfer - addr: %" HWADDR_PRIx " len: %x\n",
-                  io->addr, io->len);
-    MACIO_DPRINTF("xxx remainder: %x\n", remainder);
-    MACIO_DPRINTF("xxx sector_num: %"PRIx64"   nsector: %x\n",
+    MACIO_DPRINTF("--- DMA write transfer (0x%" HWADDR_PRIx ",0x%x): "
+                  "sector_num: %" PRId64 ", nsector: %d\n", io->addr, io->len,
                  sector_num, nsector);

    dma_addr = io->addr;
@@ -207,36 +153,59 @@ static void pmac_dma_write(BlockBackend *blk,
    mem = dma_memory_map(&address_space_memory, dma_addr, &dma_len,
                         DMA_DIRECTION_TO_DEVICE);

-    if (!remainder) {
-        MACIO_DPRINTF("--- DMA write aligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, io->len);
+    if (offset & (align - 1)) {
+        head_bytes = offset & (align - 1);
+        sector_num = ((offset & ~(align - 1)) >> 9);
+
+        MACIO_DPRINTF("--- DMA unaligned head: pre-reading head sector %"
+                      PRId64 "\n", sector_num);
+
+        blk_pread(s->blk, (sector_num << 9), &io->head_remainder, align);
+
+        qemu_iovec_add(&io->iov, &io->head_remainder, head_bytes);
        qemu_iovec_add(&io->iov, mem, io->len);
-    } else {
-        /* Write up to last complete sector */
-        MACIO_DPRINTF("--- DMA write unaligned - addr: %" HWADDR_PRIx
-                      " len: %x\n", io->addr, (nsector << 9));
-        qemu_iovec_add(&io->iov, mem, (nsector << 9));

-        MACIO_DPRINTF("--- DMA write read    - bounce addr: %p "
-                      "remainder_len: %x\n", &io->remainder, remainder);
-        cpu_physical_memory_read(io->addr + (nsector << 9), &io->remainder,
-                                 remainder);
+        bytes += offset & (align - 1);
+        offset = offset & ~(align - 1);

-        io->remainder_len = 0x200 - remainder;
-
-        MACIO_DPRINTF("xxx remainder_len: %x\n", io->remainder_len);
+        unaligned_head = true;
    }

-    s->io_buffer_size -= ((nsector + extra) << 9);
-    s->io_buffer_index += ((nsector + extra) << 9);
+    if ((offset + bytes) & (align - 1)) {
+        tail_bytes = (offset + bytes) & (align - 1);
+        sector_num = (((offset + bytes) & ~(align - 1)) >> 9);
+
+        MACIO_DPRINTF("--- DMA unaligned tail: pre-reading tail sector %"
+                      PRId64 "\n", sector_num);
+
+        blk_pread(s->blk, (sector_num << 9), &io->tail_remainder, align);
+
+        if (!unaligned_head) {
+            qemu_iovec_add(&io->iov, mem, io->len);
+        }
+
+        qemu_iovec_add(&io->iov, &io->tail_remainder + tail_bytes,
+                       align - tail_bytes);
+
+        bytes = ROUND_UP(bytes, align);
+
+        unaligned_tail = true;
+    }
+
+    if (!unaligned_head && !unaligned_tail) {
+        qemu_iovec_add(&io->iov, mem, io->len);
+    }
+
+    s->io_buffer_size -= io->len;
+    s->io_buffer_index += io->len;

    io->len = 0;

-    MACIO_DPRINTF("--- Block write transfer   - sector_num: %"PRIx64"  "
-                  "nsector: %x\n", sector_num, nsector + extra);
+    MACIO_DPRINTF("--- Block write transfer - sector_num: %" PRIx64 "  "
+                  "nsector: %x\n", (offset >> 9), (bytes >> 9));

-    m->aiocb = blk_aio_writev(blk, sector_num, &io->iov, nsector + extra, cb,
-                              io);
+    m->aiocb = blk_aio_writev(blk, (offset >> 9), &io->iov, (bytes >> 9),
+                              cb, io);
 }

 static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
@@ -244,19 +213,12 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
    DBDMA_io *io = opaque;
    MACIOIDEState *m = io->opaque;
    IDEState *s = idebus_active_if(&m->bus);
-    int64_t sector_num;
-    int nsector, remainder;
+    int64_t offset;

-    MACIO_DPRINTF("\ns is %p\n", s);
-    MACIO_DPRINTF("io_buffer_index: %x\n", s->io_buffer_index);
-    MACIO_DPRINTF("io_buffer_size: %x   packet_transfer_size: %x\n",
-                  s->io_buffer_size, s->packet_transfer_size);
-    MACIO_DPRINTF("lba: %x\n", s->lba);
-    MACIO_DPRINTF("io_addr: %" HWADDR_PRIx "  io_len: %x\n", io->addr,
-                  io->len);
+    MACIO_DPRINTF("pmac_ide_atapi_transfer_cb\n");

    if (ret < 0) {
-        MACIO_DPRINTF("THERE WAS AN ERROR!  %d\n", ret);
+        MACIO_DPRINTF("DMA error: %d\n", ret);
        ide_atapi_io_error(s, ret);
        goto done;
    }
@@ -270,6 +232,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
    }

    if (s->io_buffer_size <= 0) {
+        MACIO_DPRINTF("End of IDE transfer\n");
        ide_atapi_cmd_ok(s);
        m->dma_active = false;
        goto done;
@@ -289,19 +252,13 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
        goto done;
    }

-    /* Calculate number of sectors */
-    sector_num = (int64_t)(s->lba << 2) + (s->io_buffer_index >> 9);
-    nsector = (io->len + 0x1ff) >> 9;
-    remainder = io->len & 0x1ff;
+    /* Calculate current offset */
+    offset = (int64_t)(s->lba << 11) + s->io_buffer_index;

-    MACIO_DPRINTF("nsector: %d   remainder: %x\n", nsector, remainder);
-    MACIO_DPRINTF("sector: %"PRIx64"   %zx\n", sector_num, io->iov.size / 512);
-
-    pmac_dma_read(s->blk, sector_num, nsector, pmac_ide_atapi_transfer_cb, io);
+    pmac_dma_read(s->blk, offset, io->len, pmac_ide_atapi_transfer_cb, io);
    return;

 done:
-    MACIO_DPRINTF("done DMA\n\n");
    block_acct_done(blk_get_stats(s->blk), &s->acct);
    io->dma_end(opaque);

@@ -313,16 +270,14 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
    DBDMA_io *io = opaque;
    MACIOIDEState *m = io->opaque;
    IDEState *s = idebus_active_if(&m->bus);
-    int64_t sector_num;
-    int nsector, remainder;
+    int64_t offset;

    MACIO_DPRINTF("pmac_ide_transfer_cb\n");

    if (ret < 0) {
-        MACIO_DPRINTF("DMA error\n");
+        MACIO_DPRINTF("DMA error: %d\n", ret);
        m->aiocb = NULL;
        ide_dma_error(s);
-        io->remainder_len = 0;
        goto done;
    }

@@ -335,7 +290,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
    }

    if (s->io_buffer_size <= 0) {
-        MACIO_DPRINTF("end of transfer\n");
+        MACIO_DPRINTF("End of IDE transfer\n");
        s->status = READY_STAT | SEEK_STAT;
        ide_set_irq(s->bus);
        m->dma_active = false;
@@ -348,24 +303,16 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
    }

    /* Calculate number of sectors */
-    sector_num = ide_get_sector(s) + (s->io_buffer_index >> 9);
-    nsector = (io->len + 0x1ff) >> 9;
-    remainder = io->len & 0x1ff;
-
-    s->nsector -= nsector;
-
-    MACIO_DPRINTF("nsector: %d   remainder: %x\n", nsector, remainder);
-    MACIO_DPRINTF("sector: %"PRIx64"   %x\n", sector_num, nsector);
+    offset = (ide_get_sector(s) << 9) + s->io_buffer_index;

    switch (s->dma_cmd) {
    case IDE_DMA_READ:
-        pmac_dma_read(s->blk, sector_num, nsector, pmac_ide_transfer_cb, io);
+        pmac_dma_read(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
        break;
    case IDE_DMA_WRITE:
-        pmac_dma_write(s->blk, sector_num, nsector, pmac_ide_transfer_cb, io);
+        pmac_dma_write(s->blk, offset, io->len, pmac_ide_transfer_cb, io);
        break;
    case IDE_DMA_TRIM:
-        MACIO_DPRINTF("TRIM command issued!");
        break;
    }

@@ -561,15 +508,12 @@ static void ide_dbdma_start(IDEDMA *dma, IDEState *s,
                            BlockCompletionFunc *cb)
 {
    MACIOIDEState *m = container_of(dma, MACIOIDEState, dma);
-    DBDMAState *dbdma = m->dbdma;
-    DBDMA_io *io;
-    int i;

    s->io_buffer_index = 0;
    if (s->drive_kind == IDE_CD) {
        s->io_buffer_size = s->packet_transfer_size;
    } else {
-        s->io_buffer_size = s->nsector * 0x200;
+        s->io_buffer_size = s->nsector * BDRV_SECTOR_SIZE;
    }

    MACIO_DPRINTF("\n\n------------ IDE transfer\n");
@@ -578,15 +522,6 @@ static void ide_dbdma_start(IDEDMA *dma, IDEState *s,
    MACIO_DPRINTF("lba: %x    size: %x\n", s->lba, s->io_buffer_size);
    MACIO_DPRINTF("-------------------------\n");

-    for (i = 0; i < DBDMA_CHANNELS; i++) {
-        io = &dbdma->channels[i].io;
-
-        if (io->opaque == m) {
-            io->remainder_len = 0;
-        }
-    }
-
-    MACIO_DPRINTF("\n");
    m->dma_active = true;
    DBDMA_kick(m->dbdma);
 }
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -452,8 +452,6 @@ static const struct IDEDMAOps bmdma_ops = {

 void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d)
 {
-    qemu_irq *irq;
-
    if (bus->dma == &bm->dma) {
        return;
    }
@@ -461,8 +459,7 @@ void bmdma_init(IDEBus *bus, BMDMAState *bm, PCIIDEState *d)
    bm->dma.ops = &bmdma_ops;
    bus->dma = &bm->dma;
    bm->irq = bus->irq;
-    irq = qemu_allocate_irqs(bmdma_irq, bm, 1);
-    bus->irq = *irq;
+    bus->irq = qemu_allocate_irq(bmdma_irq, bm, 0);
    bm->pci_dev = d;
 }

--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -11,6 +11,7 @@ common-obj-$(CONFIG_SLAVIO) += slavio_intctl.o
 common-obj-$(CONFIG_IOAPIC) += ioapic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv2m.o
 common-obj-$(CONFIG_OPENPIC) += openpic.o

 obj-$(CONFIG_APIC) += apic.o apic_common.o
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -370,13 +370,14 @@ static int apic_irq_pending(APICCommonState *s)
 static void apic_update_irq(APICCommonState *s)
 {
    CPUState *cpu;
+    DeviceState *dev = (DeviceState *)s;

    cpu = CPU(s->cpu);
    if (!qemu_cpu_is_self(cpu)) {
        cpu_interrupt(cpu, CPU_INTERRUPT_POLL);
    } else if (apic_irq_pending(s) > 0) {
        cpu_interrupt(cpu, CPU_INTERRUPT_HARD);
-    } else if (!apic_accept_pic_intr(&s->busdev.qdev) || !pic_get_output(isa_pic)) {
+    } else if (!apic_accept_pic_intr(dev) || !pic_get_output(isa_pic)) {
        cpu_reset_interrupt(cpu, CPU_INTERRUPT_HARD);
    }
 }
@@ -549,10 +550,12 @@ static void apic_deliver(DeviceState *dev, uint8_t dest, uint8_t dest_mode,

 static bool apic_check_pic(APICCommonState *s)
 {
-    if (!apic_accept_pic_intr(&s->busdev.qdev) || !pic_get_output(isa_pic)) {
+    DeviceState *dev = (DeviceState *)s;
+
+    if (!apic_accept_pic_intr(dev) || !pic_get_output(isa_pic)) {
        return false;
    }
-    apic_deliver_pic_intr(&s->busdev.qdev, 1);
+    apic_deliver_pic_intr(dev, 1);
    return true;
 }

--- a/hw/intc/arm_gicv2m.c
+++ b/hw/intc/arm_gicv2m.c
@@ -0,0 +1,192 @@
+/*
+ *  GICv2m extension for MSI/MSI-x support with a GICv2-based system
+ *
+ * Copyright (C) 2015 Linaro, All rights reserved.
+ *
+ * Author: Christoffer Dall <christoffer.dall@linaro.org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This file implements an emulated GICv2m widget as described in the ARM
+ * Server Base System Architecture (SBSA) specification Version 2.2
+ * (ARM-DEN-0029 v2.2) pages 35-39 without any optional implementation defined
+ * identification registers and with a single non-secure MSI register frame.
+ */
+
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+
+#define TYPE_ARM_GICV2M "arm-gicv2m"
+#define ARM_GICV2M(obj) OBJECT_CHECK(ARMGICv2mState, (obj), TYPE_ARM_GICV2M)
+
+#define GICV2M_NUM_SPI_MAX 128
+
+#define V2M_MSI_TYPER           0x008
+#define V2M_MSI_SETSPI_NS       0x040
+#define V2M_MSI_IIDR            0xFCC
+#define V2M_IIDR0               0xFD0
+#define V2M_IIDR11              0xFFC
+
+#define PRODUCT_ID_QEMU         0x51 /* ASCII code Q */
+
+typedef struct ARMGICv2mState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    qemu_irq spi[GICV2M_NUM_SPI_MAX];
+
+    uint32_t base_spi;
+    uint32_t num_spi;
+} ARMGICv2mState;
+
+static void gicv2m_set_irq(void *opaque, int irq)
+{
+    ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+
+    qemu_irq_pulse(s->spi[irq]);
+}
+
+static uint64_t gicv2m_read(void *opaque, hwaddr offset,
+                            unsigned size)
+{
+    ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+    uint32_t val;
+
+    if (size != 4) {
+        qemu_log_mask(LOG_GUEST_ERROR, "gicv2m_read: bad size %u\n", size);
+        return 0;
+    }
+
+    switch (offset) {
+    case V2M_MSI_TYPER:
+        val = (s->base_spi + 32) << 16;
+        val |= s->num_spi;
+        return val;
+    case V2M_MSI_IIDR:
+        /* We don't have any valid implementor so we leave that field as zero
+         * and we return 0 in the arch revision as per the spec.
+         */
+        return (PRODUCT_ID_QEMU << 20);
+    case V2M_IIDR0 ... V2M_IIDR11:
+        /* We do not implement any optional identification registers and the
+         * mandatory MSI_PIDR2 register reads as 0x0, so we capture all
+         * implementation defined registers here.
+         */
+        return 0;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "gicv2m_read: Bad offset %x\n", (int)offset);
+        return 0;
+    }
+}
+
+static void gicv2m_write(void *opaque, hwaddr offset,
+                        uint64_t value, unsigned size)
+{
+    ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+
+    if (size != 2 && size != 4) {
+        qemu_log_mask(LOG_GUEST_ERROR, "gicv2m_write: bad size %u\n", size);
+        return;
+    }
+
+    switch (offset) {
+    case V2M_MSI_SETSPI_NS: {
+        int spi;
+
+        spi = (value & 0x3ff) - (s->base_spi + 32);
+        if (spi >= 0 && spi < s->num_spi) {
+            gicv2m_set_irq(s, spi);
+        }
+        return;
+    }
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "gicv2m_write: Bad offset %x\n", (int)offset);
+    }
+}
+
+static const MemoryRegionOps gicv2m_ops = {
+    .read = gicv2m_read,
+    .write = gicv2m_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void gicv2m_realize(DeviceState *dev, Error **errp)
+{
+    ARMGICv2mState *s = ARM_GICV2M(dev);
+    int i;
+
+    if (s->num_spi > GICV2M_NUM_SPI_MAX) {
+        error_setg(errp,
+                   "requested %u SPIs exceeds GICv2m frame maximum %d",
+                   s->num_spi, GICV2M_NUM_SPI_MAX);
+        return;
+    }
+
+    if (s->base_spi + 32 > 1020 - s->num_spi) {
+        error_setg(errp,
+                   "requested base SPI %u+%u exceeds max. number 1020",
+                   s->base_spi + 32, s->num_spi);
+        return;
+    }
+
+    for (i = 0; i < s->num_spi; i++) {
+        sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->spi[i]);
+    }
+
+    msi_supported = true;
+    kvm_gsi_direct_mapping = true;
+    kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled();
+}
+
+static void gicv2m_init(Object *obj)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
+    ARMGICv2mState *s = ARM_GICV2M(obj);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &gicv2m_ops, s,
+                          "gicv2m", 0x1000);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static Property gicv2m_properties[] = {
+    DEFINE_PROP_UINT32("base-spi", ARMGICv2mState, base_spi, 0),
+    DEFINE_PROP_UINT32("num-spi", ARMGICv2mState, num_spi, 64),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void gicv2m_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->props = gicv2m_properties;
+    dc->realize = gicv2m_realize;
+}
+
+static const TypeInfo gicv2m_info = {
+    .name          = TYPE_ARM_GICV2M,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(ARMGICv2mState),
+    .instance_init = gicv2m_init,
+    .class_init    = gicv2m_class_init,
+};
+
+static void gicv2m_register_types(void)
+{
+    type_register_static(&gicv2m_info);
+}
+
+type_init(gicv2m_register_types)
--- a/hw/intc/exynos4210_gic.c
+++ b/hw/intc/exynos4210_gic.c
@@ -213,9 +213,6 @@ void exynos4210_init_board_irqs(Exynos4210Irq *s)
    uint32_t grp, bit, irq_id, n;

    for (n = 0; n < EXYNOS4210_MAX_EXT_COMBINER_IN_IRQ; n++) {
-        s->board_irqs[n] = qemu_irq_split(s->int_combiner_irq[n],
-                s->ext_combiner_irq[n]);
-
        irq_id = 0;
        if (n == EXYNOS4210_COMBINER_GET_IRQ_NUM(1, 4) ||
                n == EXYNOS4210_COMBINER_GET_IRQ_NUM(12, 4)) {
@@ -230,8 +227,10 @@ void exynos4210_init_board_irqs(Exynos4210Irq *s)
        if (irq_id) {
            s->board_irqs[n] = qemu_irq_split(s->int_combiner_irq[n],
                    s->ext_gic_irq[irq_id-32]);
+        } else {
+            s->board_irqs[n] = qemu_irq_split(s->int_combiner_irq[n],
+                    s->ext_combiner_irq[n]);
        }
-
    }
    for (; n < EXYNOS4210_MAX_INT_COMBINER_IN_IRQ; n++) {
        /* these IDs are passed to Internal Combiner and External GIC */
--- a/hw/isa/i82378.c
+++ b/hw/isa/i82378.c
@@ -65,7 +65,6 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
    uint8_t *pci_conf;
    ISABus *isabus;
    ISADevice *isa;
-    qemu_irq *out0_irq;

    pci_conf = pci->config;
    pci_set_word(pci_conf + PCI_COMMAND,
@@ -88,11 +87,9 @@ static void i82378_realize(PCIDevice *pci, Error **errp)
       All devices accept byte access only, except timer
     */

-    /* Workaround the fact that i8259 is not qdev'ified... */
-    out0_irq = qemu_allocate_irqs(i82378_request_out0_irq, s, 1);
-
    /* 2 82C59 (irq) */
-    s->i8259 = i8259_init(isabus, *out0_irq);
+    s->i8259 = i8259_init(isabus,
+                          qemu_allocate_irq(i82378_request_out0_irq, s, 0));
    isa_bus_irqs(isabus, s->i8259);

    /* 1 82C54 (pit) */
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -21,6 +21,7 @@
 #include "hw/sysbus.h"
 #include "sysemu/sysemu.h"
 #include "hw/isa/isa.h"
+#include "hw/i386/pc.h"

 static ISABus *isabus;

@@ -267,3 +268,28 @@ MemoryRegion *isa_address_space_io(ISADevice *dev)
 }

 type_init(isabus_register_types)
+
+static void parallel_init(ISABus *bus, int index, CharDriverState *chr)
+{
+    DeviceState *dev;
+    ISADevice *isadev;
+
+    isadev = isa_create(bus, "isa-parallel");
+    dev = DEVICE(isadev);
+    qdev_prop_set_uint32(dev, "index", index);
+    qdev_prop_set_chr(dev, "chardev", chr);
+    qdev_init_nofail(dev);
+}
+
+void parallel_hds_isa_init(ISABus *bus, int n)
+{
+    int i;
+
+    assert(n <= MAX_PARALLEL_PORTS);
+
+    for (i = 0; i < n; i++) {
+        if (parallel_hds[i]) {
+            parallel_init(bus, i, parallel_hds[i]);
+        }
+    }
+}
--- a/hw/isa/lpc_ich9.c
+++ b/hw/isa/lpc_ich9.c
@@ -360,11 +360,8 @@ static void ich9_set_sci(void *opaque, int irq_num, int level)
 void ich9_lpc_pm_init(PCIDevice *lpc_pci)
 {
    ICH9LPCState *lpc = ICH9_LPC_DEVICE(lpc_pci);
-    qemu_irq *sci_irq;
-
-    sci_irq = qemu_allocate_irqs(ich9_set_sci, lpc, 1);
-    ich9_pm_init(lpc_pci, &lpc->pm, sci_irq[0]);

+    ich9_pm_init(lpc_pci, &lpc->pm, qemu_allocate_irq(ich9_set_sci, lpc, 0));
    ich9_lpc_reset(&lpc->d.qdev);
 }

@@ -410,12 +407,28 @@ static void ich9_lpc_rcba_update(ICH9LPCState *lpc, uint32_t rbca_old)
    }
 }

+/* config:GEN_PMCON* */
+static void
+ich9_lpc_pmcon_update(ICH9LPCState *lpc)
+{
+    uint16_t gen_pmcon_1 = pci_get_word(lpc->d.config + ICH9_LPC_GEN_PMCON_1);
+    uint16_t wmask;
+
+    if (gen_pmcon_1 & ICH9_LPC_GEN_PMCON_1_SMI_LOCK) {
+        wmask = pci_get_word(lpc->d.wmask + ICH9_LPC_GEN_PMCON_1);
+        wmask &= ~ICH9_LPC_GEN_PMCON_1_SMI_LOCK;
+        pci_set_word(lpc->d.wmask + ICH9_LPC_GEN_PMCON_1, wmask);
+        lpc->pm.smi_en_wmask &= ~1;
+    }
+}
+
 static int ich9_lpc_post_load(void *opaque, int version_id)
 {
    ICH9LPCState *lpc = opaque;

    ich9_lpc_pmbase_update(lpc);
    ich9_lpc_rcba_update(lpc, 0 /* disabled ICH9_LPC_RBCA_EN */);
+    ich9_lpc_pmcon_update(lpc);
    return 0;
 }

@@ -438,6 +451,9 @@ static void ich9_lpc_config_write(PCIDevice *d,
    if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) {
        pci_bus_fire_intx_routing_notifier(lpc->d.bus);
    }
+    if (ranges_overlap(addr, len, ICH9_LPC_GEN_PMCON_1, 8)) {
+        ich9_lpc_pmcon_update(lpc);
+    }
 }

 static void ich9_lpc_reset(DeviceState *qdev)
@@ -494,7 +510,7 @@ static void ich9_lpc_machine_ready(Notifier *n, void *opaque)
        /* lpt */
        pci_conf[0x82] |= 0x04;
    }
-    if (memory_region_present(io_as, 0x3f0)) {
+    if (memory_region_present(io_as, 0x3f2)) {
        /* floppy */
        pci_conf[0x82] |= 0x08;
    }
--- a/hw/isa/vt82c686.c
+++ b/hw/isa/vt82c686.c
@@ -356,7 +356,7 @@ static void vt82c686b_pm_realize(PCIDevice *dev, Error **errp)

    acpi_pm_tmr_init(&s->ar, pm_tmr_timer, &s->io);
    acpi_pm1_evt_init(&s->ar, pm_tmr_timer, &s->io);
-    acpi_pm1_cnt_init(&s->ar, &s->io, 2);
+    acpi_pm1_cnt_init(&s->ar, &s->io, false, false, 2);
 }

 I2CBus *vt82c686b_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
--- a/hw/lm32/lm32_boards.c
+++ b/hw/lm32/lm32_boards.c
@@ -78,7 +78,7 @@ static void lm32_evr_init(MachineState *machine)
    DriveInfo *dinfo;
    MemoryRegion *address_space_mem =  get_system_memory();
    MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
-    qemu_irq *cpu_irq, irq[32];
+    qemu_irq irq[32];
    ResetInfo *reset_info;
    int i;

@@ -123,8 +123,7 @@ static void lm32_evr_init(MachineState *machine)
                          1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1);

    /* create irq lines */
-    cpu_irq = qemu_allocate_irqs(cpu_irq_handler, cpu, 1);
-    env->pic_state = lm32_pic_init(*cpu_irq);
+    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0));
    for (i = 0; i < 32; i++) {
        irq[i] = qdev_get_gpio_in(env->pic_state, i);
    }
@@ -173,7 +172,7 @@ static void lm32_uclinux_init(MachineState *machine)
    DriveInfo *dinfo;
    MemoryRegion *address_space_mem =  get_system_memory();
    MemoryRegion *phys_ram = g_new(MemoryRegion, 1);
-    qemu_irq *cpu_irq, irq[32];
+    qemu_irq irq[32];
    HWSetup *hw;
    ResetInfo *reset_info;
    int i;
@@ -225,8 +224,7 @@ static void lm32_uclinux_init(MachineState *machine)
                          1, 2, 0x01, 0x7e, 0x43, 0x00, 0x555, 0x2aa, 1);

    /* create irq lines */
-    cpu_irq = qemu_allocate_irqs(cpu_irq_handler, env, 1);
-    env->pic_state = lm32_pic_init(*cpu_irq);
+    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, env, 0));
    for (i = 0; i < 32; i++) {
        irq[i] = qdev_get_gpio_in(env->pic_state, i);
    }
--- a/hw/lm32/milkymist.c
+++ b/hw/lm32/milkymist.c
@@ -86,7 +86,7 @@ milkymist_init(MachineState *machine)
    DriveInfo *dinfo;
    MemoryRegion *address_space_mem = get_system_memory();
    MemoryRegion *phys_sdram = g_new(MemoryRegion, 1);
-    qemu_irq irq[32], *cpu_irq;
+    qemu_irq irq[32];
    int i;
    char *bios_filename;
    ResetInfo *reset_info;
@@ -130,8 +130,7 @@ milkymist_init(MachineState *machine)
                          2, 0x00, 0x89, 0x00, 0x1d, 1);

    /* create irq lines */
-    cpu_irq = qemu_allocate_irqs(cpu_irq_handler, cpu, 1);
-    env->pic_state = lm32_pic_init(*cpu_irq);
+    env->pic_state = lm32_pic_init(qemu_allocate_irq(cpu_irq_handler, cpu, 0));
    for (i = 0; i < 32; i++) {
        irq[i] = qdev_get_gpio_in(env->pic_state, i);
    }
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -211,7 +211,6 @@ uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
    uint64_t address_space_end = address_space_start + address_space_size;

    g_assert(QEMU_ALIGN_UP(address_space_start, align) == address_space_start);
-    g_assert(QEMU_ALIGN_UP(address_space_size, align) == address_space_size);

    if (!address_space_size) {
        error_setg(errp, "memory hotplug is not enabled, "
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -1161,7 +1161,7 @@ void mips_malta_init(MachineState *machine)
    pci_piix4_ide_init(pci_bus, hd, piix4_devfn + 1);
    pci_create_simple(pci_bus, piix4_devfn + 2, "piix4-usb-uhci");
    smbus = piix4_pm_init(pci_bus, piix4_devfn + 3, 0x1100,
-                          isa_get_irq(NULL, 9), NULL, 0, NULL, NULL);
+                          isa_get_irq(NULL, 9), NULL, 0, NULL);
    smbus_eeprom_init(smbus, 8, smbus_eeprom_buf, smbus_eeprom_size);
    g_free(smbus_eeprom_buf);
    pit = pit_init(isa_bus, 0x40, 0, NULL);
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -126,17 +126,18 @@ static void macio_bar_setup(MacIOState *macio_state)
    }
 }

-static int macio_common_initfn(PCIDevice *d)
+static void macio_common_realize(PCIDevice *d, Error **errp)
 {
    MacIOState *s = MACIO(d);
    SysBusDevice *sysbus_dev;
-    int ret;
+    Error *err = NULL;

    d->config[0x3d] = 0x01; // interrupt on pin 1

-    ret = qdev_init(DEVICE(&s->cuda));
-    if (ret < 0) {
-        return ret;
+    object_property_set_bool(OBJECT(&s->cuda), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
    }
    sysbus_dev = SYS_BUS_DEVICE(&s->cuda);
    memory_region_add_subregion(&s->bar, 0x16000,
@@ -144,12 +145,11 @@ static int macio_common_initfn(PCIDevice *d)

    macio_bar_setup(s);
    pci_register_bar(d, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
-
-    return 0;
 }

-static int macio_initfn_ide(MacIOState *s, MACIOIDEState *ide, qemu_irq irq0,
-                            qemu_irq irq1, int dmaid)
+static void macio_realize_ide(MacIOState *s, MACIOIDEState *ide,
+                              qemu_irq irq0, qemu_irq irq1, int dmaid,
+                              Error **errp)
 {
    SysBusDevice *sysbus_dev;

@@ -157,27 +157,31 @@ static int macio_initfn_ide(MacIOState *s, MACIOIDEState *ide, qemu_irq irq0,
    sysbus_connect_irq(sysbus_dev, 0, irq0);
    sysbus_connect_irq(sysbus_dev, 1, irq1);
    macio_ide_register_dma(ide, s->dbdma, dmaid);
-    return qdev_init(DEVICE(ide));
+    object_property_set_bool(OBJECT(ide), true, "realized", errp);
 }

-static int macio_oldworld_initfn(PCIDevice *d)
+static void macio_oldworld_realize(PCIDevice *d, Error **errp)
 {
    MacIOState *s = MACIO(d);
    OldWorldMacIOState *os = OLDWORLD_MACIO(d);
+    Error *err = NULL;
    SysBusDevice *sysbus_dev;
    int i;
    int cur_irq = 0;
-    int ret = macio_common_initfn(d);
-    if (ret < 0) {
-        return ret;
+
+    macio_common_realize(d, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
    }

    sysbus_dev = SYS_BUS_DEVICE(&s->cuda);
    sysbus_connect_irq(sysbus_dev, 0, os->irqs[cur_irq++]);

-    ret = qdev_init(DEVICE(&os->nvram));
-    if (ret < 0) {
-        return ret;
+    object_property_set_bool(OBJECT(&os->nvram), true, "realized", &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
    }
    sysbus_dev = SYS_BUS_DEVICE(&os->nvram);
    memory_region_add_subregion(&s->bar, 0x60000,
@@ -194,13 +198,12 @@ static int macio_oldworld_initfn(PCIDevice *d)
        qemu_irq irq0 = os->irqs[cur_irq++];
        qemu_irq irq1 = os->irqs[cur_irq++];

-        ret = macio_initfn_ide(s, &os->ide[i], irq0, irq1, 0x16 + (i * 4));
-        if (ret < 0) {
-            return ret;
+        macio_realize_ide(s, &os->ide[i], irq0, irq1, 0x16 + (i * 4), &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
        }
    }
-
-    return 0;
 }

 static void macio_init_ide(MacIOState *s, MACIOIDEState *ide, size_t ide_size,
@@ -268,17 +271,20 @@ static const MemoryRegionOps timer_ops = {
    .endianness = DEVICE_LITTLE_ENDIAN,
 };

-static int macio_newworld_initfn(PCIDevice *d)
+static void macio_newworld_realize(PCIDevice *d, Error **errp)
 {
    MacIOState *s = MACIO(d);
    NewWorldMacIOState *ns = NEWWORLD_MACIO(d);
+    Error *err = NULL;
    SysBusDevice *sysbus_dev;
    MemoryRegion *timer_memory = NULL;
    int i;
    int cur_irq = 0;
-    int ret = macio_common_initfn(d);
-    if (ret < 0) {
-        return ret;
+
+    macio_common_realize(d, &err);
+    if (err) {
+        error_propagate(errp, err);
+        return;
    }

    sysbus_dev = SYS_BUS_DEVICE(&s->cuda);
@@ -294,9 +300,10 @@ static int macio_newworld_initfn(PCIDevice *d)
        qemu_irq irq0 = ns->irqs[cur_irq++];
        qemu_irq irq1 = ns->irqs[cur_irq++];

-        ret = macio_initfn_ide(s, &ns->ide[i], irq0, irq1, 0x16 + (i * 4));
-        if (ret < 0) {
-            return ret;
+        macio_realize_ide(s, &ns->ide[i], irq0, irq1, 0x16 + (i * 4), &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
        }
    }

@@ -305,8 +312,6 @@ static int macio_newworld_initfn(PCIDevice *d)
    memory_region_init_io(timer_memory, OBJECT(s), &timer_ops, NULL, "timer",
                          0x1000);
    memory_region_add_subregion(&s->bar, 0x15000, timer_memory);
-
-    return 0;
 }

 static void macio_newworld_init(Object *obj)
@@ -352,7 +357,7 @@ static void macio_oldworld_class_init(ObjectClass *oc, void *data)
    PCIDeviceClass *pdc = PCI_DEVICE_CLASS(oc);
    DeviceClass *dc = DEVICE_CLASS(oc);

-    pdc->init = macio_oldworld_initfn;
+    pdc->realize = macio_oldworld_realize;
    pdc->device_id = PCI_DEVICE_ID_APPLE_343S1201;
    dc->vmsd = &vmstate_macio_oldworld;
 }
@@ -372,7 +377,7 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data)
    PCIDeviceClass *pdc = PCI_DEVICE_CLASS(oc);
    DeviceClass *dc = DEVICE_CLASS(oc);

-    pdc->init = macio_newworld_initfn;
+    pdc->realize = macio_newworld_realize;
    pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL;
    dc->vmsd = &vmstate_macio_newworld;
 }
--- a/hw/net/cadence_gem.c
+++ b/hw/net/cadence_gem.c
@@ -155,7 +155,7 @@
 #define GEM_NWCFG_BCAST_REJ    0x00000020 /* Reject broadcast packets */
 #define GEM_NWCFG_PROMISC      0x00000010 /* Accept all packets */

-#define GEM_DMACFG_RBUFSZ_M    0x007F0000 /* DMA RX Buffer Size mask */
+#define GEM_DMACFG_RBUFSZ_M    0x00FF0000 /* DMA RX Buffer Size mask */
 #define GEM_DMACFG_RBUFSZ_S    16         /* DMA RX Buffer Size shift */
 #define GEM_DMACFG_RBUFSZ_MUL  64         /* DMA RX Buffer Size multiplier */
 #define GEM_DMACFG_TXCSUM_OFFL 0x00000800 /* Transmit checksum offload */
--- a/hw/net/pcnet.c
+++ b/hw/net/pcnet.c
@@ -1241,6 +1241,14 @@ static void pcnet_transmit(PCNetState *s)
        }

        bcnt = 4096 - GET_FIELD(tmd.length, TMDL, BCNT);
+
+        /* if multi-tmd packet outsizes s->buffer then skip it silently.
+           Note: this is not what real hw does */
+        if (s->xmit_pos + bcnt > sizeof(s->buffer)) {
+            s->xmit_pos = -1;
+            goto txdone;
+        }
+
        s->phys_mem_read(s->dma_opaque, PHYSADDR(s, tmd.tbadr),
                         s->buffer + s->xmit_pos, bcnt, CSR_BSWP(s));
        s->xmit_pos += bcnt;
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -511,7 +511,7 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
    return virtio_net_guest_offloads_by_features(vdev->guest_features);
 }

-static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
+static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
 {
    VirtIONet *n = VIRTIO_NET(vdev);
    int i;
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -46,7 +46,6 @@ typedef struct FWCfgEntry {
    uint32_t len;
    uint8_t *data;
    void *callback_opaque;
-    FWCfgCallback callback;
    FWCfgReadCallback read_callback;
 } FWCfgEntry;

@@ -232,19 +231,7 @@ static void fw_cfg_reboot(FWCfgState *s)

 static void fw_cfg_write(FWCfgState *s, uint8_t value)
 {
-    int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
-    FWCfgEntry *e = &s->entries[arch][s->cur_entry & FW_CFG_ENTRY_MASK];
-
-    trace_fw_cfg_write(s, value);
-
-    if (s->cur_entry & FW_CFG_WRITE_CHANNEL && e->callback &&
-        s->cur_offset < e->len) {
-        e->data[s->cur_offset++] = value;
-        if (s->cur_offset == e->len) {
-            e->callback(e->callback_opaque, e->data);
-            s->cur_offset = 0;
-        }
-    }
+    /* nothing, write support removed in QEMU v2.4+ */
 }

 static int fw_cfg_select(FWCfgState *s, uint16_t key)
@@ -436,6 +423,7 @@ static void fw_cfg_add_bytes_read_callback(FWCfgState *s, uint16_t key,
    key &= FW_CFG_ENTRY_MASK;

    assert(key < FW_CFG_MAX_ENTRY && len < UINT32_MAX);
+    assert(s->entries[arch][key].data == NULL); /* avoid key conflict */

    s->entries[arch][key].data = data;
    s->entries[arch][key].len = (uint32_t)len;
@@ -458,7 +446,6 @@ static void *fw_cfg_modify_bytes_read(FWCfgState *s, uint16_t key,
    s->entries[arch][key].data = data;
    s->entries[arch][key].len = len;
    s->entries[arch][key].callback_opaque = NULL;
-    s->entries[arch][key].callback = NULL;

    return ptr;
 }
@@ -484,6 +471,16 @@ void fw_cfg_add_i16(FWCfgState *s, uint16_t key, uint16_t value)
    fw_cfg_add_bytes(s, key, copy, sizeof(value));
 }

+void fw_cfg_modify_i16(FWCfgState *s, uint16_t key, uint16_t value)
+{
+    uint16_t *copy, *old;
+
+    copy = g_malloc(sizeof(value));
+    *copy = cpu_to_le16(value);
+    old = fw_cfg_modify_bytes_read(s, key, copy, sizeof(value));
+    g_free(old);
+}
+
 void fw_cfg_add_i32(FWCfgState *s, uint16_t key, uint32_t value)
 {
    uint32_t *copy;
@@ -502,23 +499,6 @@ void fw_cfg_add_i64(FWCfgState *s, uint16_t key, uint64_t value)
    fw_cfg_add_bytes(s, key, copy, sizeof(value));
 }

-void fw_cfg_add_callback(FWCfgState *s, uint16_t key, FWCfgCallback callback,
-                         void *callback_opaque, void *data, size_t len)
-{
-    int arch = !!(key & FW_CFG_ARCH_LOCAL);
-
-    assert(key & FW_CFG_WRITE_CHANNEL);
-
-    key &= FW_CFG_ENTRY_MASK;
-
-    assert(key < FW_CFG_MAX_ENTRY && len <= UINT32_MAX);
-
-    s->entries[arch][key].data = data;
-    s->entries[arch][key].len = (uint32_t)len;
-    s->entries[arch][key].callback_opaque = callback_opaque;
-    s->entries[arch][key].callback = callback;
-}
-
 void fw_cfg_add_file_callback(FWCfgState *s,  const char *filename,
                              FWCfgReadCallback callback, void *callback_opaque,
                              void *data, size_t len)
@@ -535,18 +515,19 @@ void fw_cfg_add_file_callback(FWCfgState *s,  const char *filename,
    index = be32_to_cpu(s->files->count);
    assert(index < FW_CFG_FILE_SLOTS);

-    fw_cfg_add_bytes_read_callback(s, FW_CFG_FILE_FIRST + index,
-                                   callback, callback_opaque, data, len);
-
    pstrcpy(s->files->f[index].name, sizeof(s->files->f[index].name),
            filename);
    for (i = 0; i < index; i++) {
        if (strcmp(s->files->f[index].name, s->files->f[i].name) == 0) {
-            trace_fw_cfg_add_file_dupe(s, s->files->f[index].name);
-            return;
+            error_report("duplicate fw_cfg file name: %s",
+                         s->files->f[index].name);
+            exit(1);
        }
    }

+    fw_cfg_add_bytes_read_callback(s, FW_CFG_FILE_FIRST + index,
+                                   callback, callback_opaque, data, len);
+
    s->files->f[index].size   = cpu_to_be32(len);
    s->files->f[index].select = cpu_to_be16(FW_CFG_FILE_FIRST + index);
    trace_fw_cfg_add_file(s, index, s->files->f[index].name, len);
--- a/hw/pci-bridge/Makefile.objs
+++ b/hw/pci-bridge/Makefile.objs
@@ -1,4 +1,5 @@
 common-obj-y += pci_bridge_dev.o
+common-obj-y += pci_expander_bridge.o
 common-obj-$(CONFIG_XIO3130) += xio3130_upstream.o xio3130_downstream.o
 common-obj-$(CONFIG_IOH3420) += ioh3420.o
 common-obj-$(CONFIG_I82801B11) += i82801b11.o
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -0,0 +1,231 @@
+/*
+ * PCI Expander Bridge Device Emulation
+ *
+ * Copyright (C) 2015 Red Hat Inc
+ *
+ * Authors:
+ *   Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/i386/pc.h"
+#include "qemu/range.h"
+#include "qemu/error-report.h"
+#include "sysemu/numa.h"
+
+#define TYPE_PXB_BUS "pxb-bus"
+#define PXB_BUS(obj) OBJECT_CHECK(PXBBus, (obj), TYPE_PXB_BUS)
+
+typedef struct PXBBus {
+    /*< private >*/
+    PCIBus parent_obj;
+    /*< public >*/
+
+    char bus_path[8];
+} PXBBus;
+
+#define TYPE_PXB_DEVICE "pxb"
+#define PXB_DEV(obj) OBJECT_CHECK(PXBDev, (obj), TYPE_PXB_DEVICE)
+
+typedef struct PXBDev {
+    /*< private >*/
+    PCIDevice parent_obj;
+    /*< public >*/
+
+    uint8_t bus_nr;
+    uint16_t numa_node;
+} PXBDev;
+
+#define TYPE_PXB_HOST "pxb-host"
+
+static int pxb_bus_num(PCIBus *bus)
+{
+    PXBDev *pxb = PXB_DEV(bus->parent_dev);
+
+    return pxb->bus_nr;
+}
+
+static bool pxb_is_root(PCIBus *bus)
+{
+    return true; /* by definition */
+}
+
+static uint16_t pxb_bus_numa_node(PCIBus *bus)
+{
+    PXBDev *pxb = PXB_DEV(bus->parent_dev);
+
+    return pxb->numa_node;
+}
+
+static void pxb_bus_class_init(ObjectClass *class, void *data)
+{
+    PCIBusClass *pbc = PCI_BUS_CLASS(class);
+
+    pbc->bus_num = pxb_bus_num;
+    pbc->is_root = pxb_is_root;
+    pbc->numa_node = pxb_bus_numa_node;
+}
+
+static const TypeInfo pxb_bus_info = {
+    .name          = TYPE_PXB_BUS,
+    .parent        = TYPE_PCI_BUS,
+    .instance_size = sizeof(PXBBus),
+    .class_init    = pxb_bus_class_init,
+};
+
+static const char *pxb_host_root_bus_path(PCIHostState *host_bridge,
+                                          PCIBus *rootbus)
+{
+    PXBBus *bus = PXB_BUS(rootbus);
+
+    snprintf(bus->bus_path, 8, "0000:%02x", pxb_bus_num(rootbus));
+    return bus->bus_path;
+}
+
+static void pxb_host_class_init(ObjectClass *class, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(class);
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(class);
+
+    dc->fw_name = "pci";
+    hc->root_bus_path = pxb_host_root_bus_path;
+}
+
+static const TypeInfo pxb_host_info = {
+    .name          = TYPE_PXB_HOST,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .class_init    = pxb_host_class_init,
+};
+
+/*
+ * Registers the PXB bus as a child of the i440fx root bus.
+ *
+ * Returns 0 on successs, -1 if i440fx host was not
+ * found or the bus number is already in use.
+ */
+static int pxb_register_bus(PCIDevice *dev, PCIBus *pxb_bus)
+{
+    PCIBus *bus = dev->bus;
+    int pxb_bus_num = pci_bus_num(pxb_bus);
+
+    if (bus->parent_dev) {
+        error_report("PXB devices can be attached only to root bus.");
+        return -1;
+    }
+
+    QLIST_FOREACH(bus, &bus->child, sibling) {
+        if (pci_bus_num(bus) == pxb_bus_num) {
+            error_report("Bus %d is already in use.", pxb_bus_num);
+            return -1;
+        }
+    }
+    QLIST_INSERT_HEAD(&dev->bus->child, pxb_bus, sibling);
+
+    return 0;
+}
+
+static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin)
+{
+    PCIDevice *pxb = pci_dev->bus->parent_dev;
+
+    /*
+     * The bios does not index the pxb slot number when
+     * it computes the IRQ because it resides on bus 0
+     * and not on the current bus.
+     * However QEMU routes the irq through bus 0 and adds
+     * the pxb slot to the IRQ computation of the PXB
+     * device.
+     *
+     * Synchronize between bios and QEMU by canceling
+     * pxb's effect.
+     */
+    return pin - PCI_SLOT(pxb->devfn);
+}
+
+static int pxb_dev_initfn(PCIDevice *dev)
+{
+    PXBDev *pxb = PXB_DEV(dev);
+    DeviceState *ds, *bds;
+    PCIBus *bus;
+    const char *dev_name = NULL;
+
+    if (pxb->numa_node != NUMA_NODE_UNASSIGNED &&
+        pxb->numa_node >= nb_numa_nodes) {
+        error_report("Illegal numa node %d.", pxb->numa_node);
+        return -EINVAL;
+    }
+
+    if (dev->qdev.id && *dev->qdev.id) {
+        dev_name = dev->qdev.id;
+    }
+
+    ds = qdev_create(NULL, TYPE_PXB_HOST);
+    bus = pci_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS);
+
+    bus->parent_dev = dev;
+    bus->address_space_mem = dev->bus->address_space_mem;
+    bus->address_space_io = dev->bus->address_space_io;
+    bus->map_irq = pxb_map_irq_fn;
+
+    bds = qdev_create(BUS(bus), "pci-bridge");
+    bds->id = dev_name;
+    qdev_prop_set_uint8(bds, "chassis_nr", pxb->bus_nr);
+
+    PCI_HOST_BRIDGE(ds)->bus = bus;
+
+    if (pxb_register_bus(dev, bus)) {
+        return -EINVAL;
+    }
+
+    qdev_init_nofail(ds);
+    qdev_init_nofail(bds);
+
+    pci_word_test_and_set_mask(dev->config + PCI_STATUS,
+                               PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK);
+    pci_config_set_class(dev->config, PCI_CLASS_BRIDGE_HOST);
+
+    return 0;
+}
+
+static Property pxb_dev_properties[] = {
+    /* Note: 0 is not a legal a PXB bus number. */
+    DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
+    DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pxb_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+
+    k->init = pxb_dev_initfn;
+    k->vendor_id = PCI_VENDOR_ID_REDHAT;
+    k->device_id = PCI_DEVICE_ID_REDHAT_PXB;
+    k->class_id = PCI_CLASS_BRIDGE_HOST;
+
+    dc->desc = "PCI Expander Bridge";
+    dc->props = pxb_dev_properties;
+}
+
+static const TypeInfo pxb_dev_info = {
+    .name          = TYPE_PXB_DEVICE,
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PXBDev),
+    .class_init    = pxb_dev_class_init,
+};
+
+static void pxb_register_types(void)
+{
+    type_register_static(&pxb_bus_info);
+    type_register_static(&pxb_host_info);
+    type_register_static(&pxb_dev_info);
+}
+
+type_init(pxb_register_types)
--- a/hw/pci-host/pam.c
+++ b/hw/pci-host/pam.c
@@ -31,26 +31,6 @@
 #include "sysemu/sysemu.h"
 #include "hw/pci-host/pam.h"

-void smram_update(MemoryRegion *smram_region, uint8_t smram,
-                  uint8_t smm_enabled)
-{
-    bool smram_enabled;
-
-    smram_enabled = ((smm_enabled && (smram & SMRAM_G_SMRAME)) ||
-                        (smram & SMRAM_D_OPEN));
-    memory_region_set_enabled(smram_region, !smram_enabled);
-}
-
-void smram_set_smm(uint8_t *host_smm_enabled, int smm, uint8_t smram,
-                   MemoryRegion *smram_region)
-{
-    uint8_t smm_enabled = (smm != 0);
-    if (*host_smm_enabled != smm_enabled) {
-        *host_smm_enabled = smm_enabled;
-        smram_update(smram_region, smram, *host_smm_enabled);
-    }
-}
-
 void init_pam(DeviceState *dev, MemoryRegion *ram_memory,
              MemoryRegion *system_memory, MemoryRegion *pci_address_space,
              PAMMemoryRegion *mem, uint32_t start, uint32_t size)
--- a/hw/pci-host/piix.c
+++ b/hw/pci-host/piix.c
@@ -105,7 +105,7 @@ struct PCII440FXState {
    MemoryRegion *ram_memory;
    PAMMemoryRegion pam_regions[13];
    MemoryRegion smram_region;
-    uint8_t smm_enabled;
+    MemoryRegion smram, low_smram;
 };


@@ -138,18 +138,10 @@ static void i440fx_update_memory_mappings(PCII440FXState *d)
        pam_update(&d->pam_regions[i], i,
                   pd->config[I440FX_PAM + ((i + 1) / 2)]);
    }
-    smram_update(&d->smram_region, pd->config[I440FX_SMRAM], d->smm_enabled);
-    memory_region_transaction_commit();
-}
-
-static void i440fx_set_smm(int val, void *arg)
-{
-    PCII440FXState *d = arg;
-    PCIDevice *pd = PCI_DEVICE(d);
-
-    memory_region_transaction_begin();
-    smram_set_smm(&d->smm_enabled, val, pd->config[I440FX_SMRAM],
-                  &d->smram_region);
+    memory_region_set_enabled(&d->smram_region,
+                              !(pd->config[I440FX_SMRAM] & SMRAM_D_OPEN));
+    memory_region_set_enabled(&d->smram,
+                              pd->config[I440FX_SMRAM] & SMRAM_G_SMRAME);
    memory_region_transaction_commit();
 }

@@ -172,12 +164,13 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id)
    PCII440FXState *d = opaque;
    PCIDevice *pd = PCI_DEVICE(d);
    int ret, i;
+    uint8_t smm_enabled;

    ret = pci_device_load(pd, f);
    if (ret < 0)
        return ret;
    i440fx_update_memory_mappings(d);
-    qemu_get_8s(f, &d->smm_enabled);
+    qemu_get_8s(f, &smm_enabled);

    if (version_id == 2) {
        for (i = 0; i < PIIX_NUM_PIRQS; i++) {
@@ -205,7 +198,10 @@ static const VMStateDescription vmstate_i440fx = {
    .post_load = i440fx_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_PCI_DEVICE(parent_obj, PCII440FXState),
-        VMSTATE_UINT8(smm_enabled, PCII440FXState),
+        /* Used to be smm_enabled, which was basically always zero because
+         * SeaBIOS hardly uses SMM.  SMRAM is now handled by CPU code.
+         */
+        VMSTATE_UNUSED(1),
        VMSTATE_END_OF_LIST()
    }
 };
@@ -297,11 +293,7 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp)

 static void i440fx_realize(PCIDevice *dev, Error **errp)
 {
-    PCII440FXState *d = I440FX_PCI_DEVICE(dev);
-
    dev->config[I440FX_SMRAM] = 0x02;
-
-    cpu_smm_register(&i440fx_set_smm, d);
 }

 PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
@@ -346,11 +338,23 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state,
    pc_pci_as_mapping_init(OBJECT(f), f->system_memory,
                           f->pci_address_space);

+    /* if *disabled* show SMRAM to all CPUs */
    memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region",
                             f->pci_address_space, 0xa0000, 0x20000);
    memory_region_add_subregion_overlap(f->system_memory, 0xa0000,
                                        &f->smram_region, 1);
-    memory_region_set_enabled(&f->smram_region, false);
+    memory_region_set_enabled(&f->smram_region, true);
+
+    /* smram, as seen by SMM CPUs */
+    memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32);
+    memory_region_set_enabled(&f->smram, true);
+    memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low",
+                             f->ram_memory, 0xa0000, 0x20000);
+    memory_region_set_enabled(&f->low_smram, true);
+    memory_region_add_subregion(&f->smram, 0xa0000, &f->low_smram);
+    object_property_add_const_link(qdev_get_machine(), "smram",
+                                   OBJECT(&f->smram), &error_abort);
+
    init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space,
             &f->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE);
    for (i = 0; i < 12; ++i) {
--- a/hw/pci-host/q35.c
+++ b/hw/pci-host/q35.c
@@ -198,6 +198,28 @@ static const TypeInfo q35_host_info = {
 * MCH D0:F0
 */

+static uint64_t tseg_blackhole_read(void *ptr, hwaddr reg, unsigned size)
+{
+    return 0xffffffff;
+}
+
+static void tseg_blackhole_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned width)
+{
+    /* nothing */
+}
+
+static const MemoryRegionOps tseg_blackhole_ops = {
+    .read = tseg_blackhole_read,
+    .write = tseg_blackhole_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 /* PCIe MMCFG */
 static void mch_update_pciexbar(MCHPCIState *mch)
 {
@@ -266,21 +288,70 @@ static void mch_update_pam(MCHPCIState *mch)
 static void mch_update_smram(MCHPCIState *mch)
 {
    PCIDevice *pd = PCI_DEVICE(mch);
+    bool h_smrame = (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME);
+    uint32_t tseg_size;
+
+    /* implement SMRAM.D_LCK */
+    if (pd->config[MCH_HOST_BRIDGE_SMRAM] & MCH_HOST_BRIDGE_SMRAM_D_LCK) {
+        pd->config[MCH_HOST_BRIDGE_SMRAM] &= ~MCH_HOST_BRIDGE_SMRAM_D_OPEN;
+        pd->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK_LCK;
+        pd->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK_LCK;
+    }

    memory_region_transaction_begin();
-    smram_update(&mch->smram_region, pd->config[MCH_HOST_BRIDGE_SMRAM],
-                    mch->smm_enabled);
-    memory_region_transaction_commit();
-}

-static void mch_set_smm(int smm, void *arg)
-{
-    MCHPCIState *mch = arg;
-    PCIDevice *pd = PCI_DEVICE(mch);
+    if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN) {
+        /* Hide (!) low SMRAM if H_SMRAME = 1 */
+        memory_region_set_enabled(&mch->smram_region, h_smrame);
+        /* Show high SMRAM if H_SMRAME = 1 */
+        memory_region_set_enabled(&mch->open_high_smram, h_smrame);
+    } else {
+        /* Hide high SMRAM and low SMRAM */
+        memory_region_set_enabled(&mch->smram_region, true);
+        memory_region_set_enabled(&mch->open_high_smram, false);
+    }
+
+    if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME) {
+        memory_region_set_enabled(&mch->low_smram, !h_smrame);
+        memory_region_set_enabled(&mch->high_smram, h_smrame);
+    } else {
+        memory_region_set_enabled(&mch->low_smram, false);
+        memory_region_set_enabled(&mch->high_smram, false);
+    }
+
+    if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) {
+        switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] &
+                MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) {
+        case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB:
+            tseg_size = 1024 * 1024;
+            break;
+        case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB:
+            tseg_size = 1024 * 1024 * 2;
+            break;
+        case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB:
+            tseg_size = 1024 * 1024 * 8;
+            break;
+        default:
+            tseg_size = 0;
+            break;
+        }
+    } else {
+        tseg_size = 0;
+    }
+    memory_region_del_subregion(mch->system_memory, &mch->tseg_blackhole);
+    memory_region_set_enabled(&mch->tseg_blackhole, tseg_size);
+    memory_region_set_size(&mch->tseg_blackhole, tseg_size);
+    memory_region_add_subregion_overlap(mch->system_memory,
+                                        mch->below_4g_mem_size - tseg_size,
+                                        &mch->tseg_blackhole, 1);
+
+    memory_region_set_enabled(&mch->tseg_window, tseg_size);
+    memory_region_set_size(&mch->tseg_window, tseg_size);
+    memory_region_set_address(&mch->tseg_window,
+                              mch->below_4g_mem_size - tseg_size);
+    memory_region_set_alias_offset(&mch->tseg_window,
+                                   mch->below_4g_mem_size - tseg_size);

-    memory_region_transaction_begin();
-    smram_set_smm(&mch->smm_enabled, smm, pd->config[MCH_HOST_BRIDGE_SMRAM],
-                    &mch->smram_region);
    memory_region_transaction_commit();
 }

@@ -289,7 +360,6 @@ static void mch_write_config(PCIDevice *d,
 {
    MCHPCIState *mch = MCH_PCI_DEVICE(d);

-    /* XXX: implement SMRAM.D_LOCK */
    pci_default_write_config(d, address, val, len);

    if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0,
@@ -329,7 +399,10 @@ static const VMStateDescription vmstate_mch = {
    .post_load = mch_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_PCI_DEVICE(parent_obj, MCHPCIState),
-        VMSTATE_UINT8(smm_enabled, MCHPCIState),
+        /* Used to be smm_enabled, which was basically always zero because
+         * SeaBIOS hardly uses SMM.  SMRAM is now handled by CPU code.
+         */
+        VMSTATE_UNUSED(1),
        VMSTATE_END_OF_LIST()
    }
 };
@@ -343,6 +416,9 @@ static void mch_reset(DeviceState *qdev)
                 MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT);

    d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT;
+    d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT;
+    d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK;
+    d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK;

    mch_update(mch);
 }
@@ -399,13 +475,47 @@ static void mch_realize(PCIDevice *d, Error **errp)
    pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory,
                           mch->pci_address_space);

-    /* smram */
-    cpu_smm_register(&mch_set_smm, mch);
+    /* if *disabled* show SMRAM to all CPUs */
    memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region",
                             mch->pci_address_space, 0xa0000, 0x20000);
    memory_region_add_subregion_overlap(mch->system_memory, 0xa0000,
                                        &mch->smram_region, 1);
-    memory_region_set_enabled(&mch->smram_region, false);
+    memory_region_set_enabled(&mch->smram_region, true);
+
+    memory_region_init_alias(&mch->open_high_smram, OBJECT(mch), "smram-open-high",
+                             mch->ram_memory, 0xa0000, 0x20000);
+    memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000,
+                                        &mch->open_high_smram, 1);
+    memory_region_set_enabled(&mch->open_high_smram, false);
+
+    /* smram, as seen by SMM CPUs */
+    memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32);
+    memory_region_set_enabled(&mch->smram, true);
+    memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low",
+                             mch->ram_memory, 0xa0000, 0x20000);
+    memory_region_set_enabled(&mch->low_smram, true);
+    memory_region_add_subregion(&mch->smram, 0xa0000, &mch->low_smram);
+    memory_region_init_alias(&mch->high_smram, OBJECT(mch), "smram-high",
+                             mch->ram_memory, 0xa0000, 0x20000);
+    memory_region_set_enabled(&mch->high_smram, true);
+    memory_region_add_subregion(&mch->smram, 0xfeda0000, &mch->high_smram);
+
+    memory_region_init_io(&mch->tseg_blackhole, OBJECT(mch),
+                          &tseg_blackhole_ops, NULL,
+                          "tseg-blackhole", 0);
+    memory_region_set_enabled(&mch->tseg_blackhole, false);
+    memory_region_add_subregion_overlap(mch->system_memory,
+                                        mch->below_4g_mem_size,
+                                        &mch->tseg_blackhole, 1);
+
+    memory_region_init_alias(&mch->tseg_window, OBJECT(mch), "tseg-window",
+                             mch->ram_memory, mch->below_4g_mem_size, 0);
+    memory_region_set_enabled(&mch->tseg_window, false);
+    memory_region_add_subregion(&mch->smram, mch->below_4g_mem_size,
+                                &mch->tseg_window);
+    object_property_add_const_link(qdev_get_machine(), "smram",
+                                   OBJECT(&mch->smram), &error_abort);
+
    init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory,
             mch->pci_address_space, &mch->pam_regions[0],
             PAM_BIOS_BASE, PAM_BIOS_SIZE);
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -21,10 +21,6 @@
 #include "hw/pci/msi.h"
 #include "qemu/range.h"

-/* Eventually those constants should go to Linux pci_regs.h */
-#define PCI_MSI_PENDING_32      0x10
-#define PCI_MSI_PENDING_64      0x14
-
 /* PCI_MSI_ADDRESS_LO */
 #define PCI_MSI_ADDRESS_LO_MASK         (~0x3)

--- a/hw/pci/pci-stub.c
+++ b/hw/pci/pci-stub.c
@@ -29,19 +29,7 @@ PciInfoList *qmp_query_pci(Error **errp)
    return NULL;
 }

-static void pci_error_message(Monitor *mon)
+void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
 {
    monitor_printf(mon, "PCI devices not supported\n");
 }
-
-int hmp_pcie_aer_inject_error(Monitor *mon,
-                             const QDict *qdict, QObject **ret_data)
-{
-    pci_error_message(mon);
-    return -ENOSYS;
-}
-
-void pcie_aer_inject_error_print(Monitor *mon, const QObject *data)
-{
-    pci_error_message(mon);
-}
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -88,9 +88,28 @@ static void pci_bus_unrealize(BusState *qbus, Error **errp)
    vmstate_unregister(NULL, &vmstate_pcibus, bus);
 }

+static bool pcibus_is_root(PCIBus *bus)
+{
+    return !bus->parent_dev;
+}
+
+static int pcibus_num(PCIBus *bus)
+{
+    if (pcibus_is_root(bus)) {
+        return 0; /* pci host bridge */
+    }
+    return bus->parent_dev->config[PCI_SECONDARY_BUS];
+}
+
+static uint16_t pcibus_numa_node(PCIBus *bus)
+{
+    return NUMA_NODE_UNASSIGNED;
+}
+
 static void pci_bus_class_init(ObjectClass *klass, void *data)
 {
    BusClass *k = BUS_CLASS(klass);
+    PCIBusClass *pbc = PCI_BUS_CLASS(klass);

    k->print_dev = pcibus_dev_print;
    k->get_dev_path = pcibus_get_dev_path;
@@ -98,12 +117,17 @@ static void pci_bus_class_init(ObjectClass *klass, void *data)
    k->realize = pci_bus_realize;
    k->unrealize = pci_bus_unrealize;
    k->reset = pcibus_reset;
+
+    pbc->is_root = pcibus_is_root;
+    pbc->bus_num = pcibus_num;
+    pbc->numa_node = pcibus_numa_node;
 }

 static const TypeInfo pci_bus_info = {
    .name = TYPE_PCI_BUS,
    .parent = TYPE_BUS,
    .instance_size = sizeof(PCIBus),
+    .class_size = sizeof(PCIBusClass),
    .class_init = pci_bus_class_init,
 };

@@ -123,7 +147,7 @@ static uint16_t pci_default_sub_device_id = PCI_SUBDEVICE_ID_QEMU;

 static QLIST_HEAD(, PCIHostState) pci_host_bridges;

-static int pci_bar(PCIDevice *d, int reg)
+int pci_bar(PCIDevice *d, int reg)
 {
    uint8_t type;

@@ -278,7 +302,10 @@ PCIBus *pci_device_root_bus(const PCIDevice *d)
 {
    PCIBus *bus = d->bus;

-    while ((d = bus->parent_dev) != NULL) {
+    while (!pci_bus_is_root(bus)) {
+        d = bus->parent_dev;
+        assert(d != NULL);
+
        bus = d->bus;
    }

@@ -291,7 +318,6 @@ const char *pci_root_bus_path(PCIDevice *dev)
    PCIHostState *host_bridge = PCI_HOST_BRIDGE(rootbus->qbus.parent);
    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_GET_CLASS(host_bridge);

-    assert(!rootbus->parent_dev);
    assert(host_bridge->bus == rootbus);

    if (hc->root_bus_path) {
@@ -325,7 +351,7 @@ bool pci_bus_is_express(PCIBus *bus)

 bool pci_bus_is_root(PCIBus *bus)
 {
-    return !bus->parent_dev;
+    return PCI_BUS_GET_CLASS(bus)->is_root(bus);
 }

 void pci_bus_new_inplace(PCIBus *bus, size_t bus_size, DeviceState *parent,
@@ -379,9 +405,12 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name,

 int pci_bus_num(PCIBus *s)
 {
-    if (pci_bus_is_root(s))
-        return 0;       /* pci host bridge */
-    return s->parent_dev->config[PCI_SECONDARY_BUS];
+    return PCI_BUS_GET_CLASS(s)->bus_num(s);
+}
+
+int pci_bus_numa_node(PCIBus *bus)
+{
+    return PCI_BUS_GET_CLASS(bus)->numa_node(bus);
 }

 static int get_pci_config_device(QEMUFile *f, void *pv, size_t size)
@@ -1576,7 +1605,8 @@ PciInfoList *qmp_query_pci(Error **errp)

    QLIST_FOREACH(host_bridge, &pci_host_bridges, next) {
        info = g_malloc0(sizeof(*info));
-        info->value = qmp_query_pci_bus(host_bridge->bus, 0);
+        info->value = qmp_query_pci_bus(host_bridge->bus,
+                                        pci_bus_num(host_bridge->bus));

        /* XXX: waiting for the qapi to support GSList */
        if (!cur_item) {
@@ -1681,10 +1711,28 @@ static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num)
 {
    return !(pci_get_word(dev->config + PCI_BRIDGE_CONTROL) &
             PCI_BRIDGE_CTL_BUS_RESET) /* Don't walk the bus if it's reset. */ &&
-        dev->config[PCI_SECONDARY_BUS] < bus_num &&
+        dev->config[PCI_SECONDARY_BUS] <= bus_num &&
        bus_num <= dev->config[PCI_SUBORDINATE_BUS];
 }

+/* Whether a given bus number is in a range of a root bus */
+static bool pci_root_bus_in_range(PCIBus *bus, int bus_num)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        PCIDevice *dev = bus->devices[i];
+
+        if (dev && PCI_DEVICE_GET_CLASS(dev)->is_bridge) {
+            if (pci_secondary_bus_in_range(dev, bus_num)) {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
 static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
 {
    PCIBus *sec;
@@ -1706,12 +1754,18 @@ static PCIBus *pci_find_bus_nr(PCIBus *bus, int bus_num)
    /* try child bus */
    for (; bus; bus = sec) {
        QLIST_FOREACH(sec, &bus->child, sibling) {
-            assert(!pci_bus_is_root(sec));
-            if (sec->parent_dev->config[PCI_SECONDARY_BUS] == bus_num) {
+            if (pci_bus_num(sec) == bus_num) {
                return sec;
            }
-            if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) {
-                break;
+            /* PXB buses assumed to be children of bus 0 */
+            if (pci_bus_is_root(sec)) {
+                if (pci_root_bus_in_range(sec, bus_num)) {
+                    break;
+                }
+            } else {
+                if (pci_secondary_bus_in_range(sec->parent_dev, bus_num)) {
+                    break;
+                }
            }
        }
    }
--- a/hw/pci/pcie_aer.c
+++ b/hw/pci/pcie_aer.c
@@ -815,21 +815,6 @@ const VMStateDescription vmstate_pcie_aer_log = {
    }
 };

-void pcie_aer_inject_error_print(Monitor *mon, const QObject *data)
-{
-    QDict *qdict;
-    int devfn;
-    assert(qobject_type(data) == QTYPE_QDICT);
-    qdict = qobject_to_qdict(data);
-
-    devfn = (int)qdict_get_int(qdict, "devfn");
-    monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
-                   qdict_get_str(qdict, "id"),
-                   qdict_get_str(qdict, "root_bus"),
-                   (int) qdict_get_int(qdict, "bus"),
-                   PCI_SLOT(devfn), PCI_FUNC(devfn));
-}
-
 typedef struct PCIEAERErrorName {
    const char *name;
    uint32_t val;
@@ -962,8 +947,8 @@ static int pcie_aer_parse_error_string(const char *error_name,
    return -EINVAL;
 }

-int hmp_pcie_aer_inject_error(Monitor *mon,
-                             const QDict *qdict, QObject **ret_data)
+static int do_pcie_aer_inject_error(Monitor *mon,
+                                    const QDict *qdict, QObject **ret_data)
 {
    const char *id = qdict_get_str(qdict, "id");
    const char *error_name;
@@ -1035,3 +1020,23 @@ int hmp_pcie_aer_inject_error(Monitor *mon,

    return 0;
 }
+
+void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
+{
+    QObject *data;
+    int devfn;
+
+    if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
+        return;
+    }
+
+    assert(qobject_type(data) == QTYPE_QDICT);
+    qdict = qobject_to_qdict(data);
+
+    devfn = (int)qdict_get_int(qdict, "devfn");
+    monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
+                   qdict_get_str(qdict, "id"),
+                   qdict_get_str(qdict, "root_bus"),
+                   (int) qdict_get_int(qdict, "bus"),
+                   PCI_SLOT(devfn), PCI_FUNC(devfn));
+}
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -3,7 +3,7 @@ obj-y += ppc.o ppc_booke.o
 # IBM pSeries (sPAPR)
 obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
 obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
-obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o
+obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -1030,6 +1030,7 @@ void ppce500_init(MachineState *machine, PPCE500Params *params)
            exit(1);
        }
    }
+    g_free(filename);

    /* Reserve space for dtb */
    dt_base = (loadaddr + bios_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -119,7 +119,7 @@ static const MemoryRegionOps unin_ops = {
 static void fw_cfg_boot_set(void *opaque, const char *boot_device,
                            Error **errp)
 {
-    fw_cfg_add_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+    fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
 }

 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -52,7 +52,7 @@
 static void fw_cfg_boot_set(void *opaque, const char *boot_device,
                            Error **errp)
 {
-    fw_cfg_add_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+    fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
 }

 static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
--- a/hw/ppc/prep.c
+++ b/hw/ppc/prep.c
@@ -528,7 +528,6 @@ static void ppc_prep_init(MachineState *machine)
    PCIDevice *pci;
    ISABus *isa_bus;
    ISADevice *isa;
-    qemu_irq *cpu_exit_irq;
    int ppc_boot_device;
    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];

@@ -625,11 +624,11 @@ static void ppc_prep_init(MachineState *machine)

    /* PCI -> ISA bridge */
    pci = pci_create_simple(pci_bus, PCI_DEVFN(1, 0), "i82378");
-    cpu_exit_irq = qemu_allocate_irqs(cpu_request_exit, NULL, 1);
    cpu = POWERPC_CPU(first_cpu);
    qdev_connect_gpio_out(&pci->qdev, 0,
                          cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
-    qdev_connect_gpio_out(&pci->qdev, 1, *cpu_exit_irq);
+    qdev_connect_gpio_out(&pci->qdev, 1,
+                          qemu_allocate_irq(cpu_request_exit, NULL, 0));
    sysbus_connect_irq(&pcihost->busdev, 0, qdev_get_gpio_in(&pci->qdev, 9));
    sysbus_connect_irq(&pcihost->busdev, 1, qdev_get_gpio_in(&pci->qdev, 11));
    sysbus_connect_irq(&pcihost->busdev, 2, qdev_get_gpio_in(&pci->qdev, 9));
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -533,6 +533,8 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
        refpoints, sizeof(refpoints))));

    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
+    _FDT((fdt_property_cell(fdt, "rtas-event-scan-rate",
+                            RTAS_EVENT_SCAN_RATE)));

    /*
     * According to PAPR, rtas ibm,os-term does not guarantee a return
@@ -794,8 +796,8 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
    _FDT((fdt_pack(fdt)));

    if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
-        hw_error("FDT too big ! 0x%x bytes (max is 0x%x)\n",
-                 fdt_totalsize(fdt), FDT_MAX_SIZE);
+        error_report("FDT too big ! 0x%x bytes (max is 0x%x)",
+                     fdt_totalsize(fdt), FDT_MAX_SIZE);
        exit(1);
    }

@@ -899,7 +901,7 @@ static int spapr_check_htab_fd(sPAPREnvironment *spapr)
        spapr->htab_fd = kvmppc_get_htab_fd(false);
        if (spapr->htab_fd < 0) {
            error_report("Unable to open fd for reading hash table from KVM: "
-                    "%s", strerror(errno));
+                         "%s", strerror(errno));
            rc = -1;
        }
        spapr->htab_fd_stale = false;
@@ -1419,7 +1421,7 @@ static void ppc_spapr_init(MachineState *machine)
    rma_alloc_size = kvmppc_alloc_rma(&rma);

    if (rma_alloc_size == -1) {
-        hw_error("qemu: Unable to create RMA\n");
+        error_report("Unable to create RMA");
        exit(1);
    }

@@ -1504,6 +1506,11 @@ static void ppc_spapr_init(MachineState *machine)
        qemu_register_reset(spapr_cpu_reset, cpu);
    }

+    if (kvm_enabled()) {
+        /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
+        kvmppc_enable_logical_ci_hcalls();
+    }
+
    /* allocate RAM */
    spapr->ram_limit = ram_size;
    memory_region_allocate_system_memory(ram, NULL, "ppc_spapr.ram",
@@ -1520,18 +1527,18 @@ static void ppc_spapr_init(MachineState *machine)

    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
    if (!filename) {
-        hw_error("Could not find LPAR rtas '%s'\n", "spapr-rtas.bin");
+        error_report("Could not find LPAR rtas '%s'", "spapr-rtas.bin");
        exit(1);
    }
    spapr->rtas_size = get_image_size(filename);
    spapr->rtas_blob = g_malloc(spapr->rtas_size);
    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
-        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
+        error_report("Could not load LPAR rtas '%s'", filename);
        exit(1);
    }
    if (spapr->rtas_size > RTAS_MAX_SIZE) {
-        hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n",
-                 (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
+        error_report("RTAS too big ! 0x%zx bytes (max is 0x%x)",
+                     (size_t)spapr->rtas_size, RTAS_MAX_SIZE);
        exit(1);
    }
    g_free(filename);
@@ -1641,12 +1648,12 @@ static void ppc_spapr_init(MachineState *machine)
    }
    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
    if (!filename) {
-        hw_error("Could not find LPAR rtas '%s'\n", bios_name);
+        error_report("Could not find LPAR firmware '%s'", bios_name);
        exit(1);
    }
    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
-    if (fw_size < 0) {
-        hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
+    if (fw_size <= 0) {
+        error_report("Could not load LPAR firmware '%s'", filename);
        exit(1);
    }
    g_free(filename);
@@ -1660,9 +1667,14 @@ static void ppc_spapr_init(MachineState *machine)
    /* Prepare the device tree */
    spapr->fdt_skel = spapr_create_fdt_skel(initrd_base, initrd_size,
                                            kernel_size, kernel_le,
-                                            kernel_cmdline, spapr->epow_irq);
+                                            kernel_cmdline,
+                                            spapr->check_exception_irq);
    assert(spapr->fdt_skel != NULL);

+    /* used by RTAS */
+    QTAILQ_INIT(&spapr->ccs_list);
+    qemu_register_reset(spapr_ccs_reset_hook, spapr);
+
    qemu_register_boot_set(spapr_boot_set, spapr);
 }

@@ -1794,6 +1806,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
    mc->max_cpus = MAX_CPUS;
    mc->no_parallel = 1;
    mc->default_boot_order = "";
+    mc->default_ram_size = 512 * M_BYTE;
    mc->kvm_type = spapr_kvm_type;
    mc->has_dynamic_sysbus = true;

@@ -1816,7 +1829,12 @@ static const TypeInfo spapr_machine_info = {
 };

 #define SPAPR_COMPAT_2_3 \
-        HW_COMPAT_2_3
+        HW_COMPAT_2_3 \
+        {\
+            .driver   = "spapr-pci-host-bridge",\
+            .property = "dynamic-reconfiguration",\
+            .value    = "off",\
+        },

 #define SPAPR_COMPAT_2_2 \
        SPAPR_COMPAT_2_3 \
@@ -1905,10 +1923,15 @@ static const TypeInfo spapr_machine_2_2_info = {

 static void spapr_machine_2_3_class_init(ObjectClass *oc, void *data)
 {
+    static GlobalProperty compat_props[] = {
+        SPAPR_COMPAT_2_3
+        { /* end of list */ }
+    };
    MachineClass *mc = MACHINE_CLASS(oc);

    mc->name = "pseries-2.3";
    mc->desc = "pSeries Logical Partition (PAPR compliant) v2.3";
+    mc->compat_props = compat_props;
 }

 static const TypeInfo spapr_machine_2_3_info = {
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -0,0 +1,744 @@
+/*
+ * QEMU SPAPR Dynamic Reconfiguration Connector Implementation
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "hw/ppc/spapr_drc.h"
+#include "qom/object.h"
+#include "hw/qdev.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+
+/* #define DEBUG_SPAPR_DRC */
+
+#ifdef DEBUG_SPAPR_DRC
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#define DPRINTFN(fmt, ...) \
+    do { DPRINTF(fmt, ## __VA_ARGS__); fprintf(stderr, "\n"); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#define DPRINTFN(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define DRC_CONTAINER_PATH "/dr-connector"
+#define DRC_INDEX_TYPE_SHIFT 28
+#define DRC_INDEX_ID_MASK (~(~0 << DRC_INDEX_TYPE_SHIFT))
+
+static sPAPRDRConnectorTypeShift get_type_shift(sPAPRDRConnectorType type)
+{
+    uint32_t shift = 0;
+
+    /* make sure this isn't SPAPR_DR_CONNECTOR_TYPE_ANY, or some
+     * other wonky value.
+     */
+    g_assert(is_power_of_2(type));
+
+    while (type != (1 << shift)) {
+        shift++;
+    }
+    return shift;
+}
+
+static uint32_t get_index(sPAPRDRConnector *drc)
+{
+    /* no set format for a drc index: it only needs to be globally
+     * unique. this is how we encode the DRC type on bare-metal
+     * however, so might as well do that here
+     */
+    return (get_type_shift(drc->type) << DRC_INDEX_TYPE_SHIFT) |
+            (drc->id & DRC_INDEX_ID_MASK);
+}
+
+static int set_isolation_state(sPAPRDRConnector *drc,
+                               sPAPRDRIsolationState state)
+{
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    DPRINTFN("drc: %x, set_isolation_state: %x", get_index(drc), state);
+
+    drc->isolation_state = state;
+
+    if (drc->isolation_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+        /* if we're awaiting release, but still in an unconfigured state,
+         * it's likely the guest is still in the process of configuring
+         * the device and is transitioning the devices to an ISOLATED
+         * state as a part of that process. so we only complete the
+         * removal when this transition happens for a device in a
+         * configured state, as suggested by the state diagram from
+         * PAPR+ 2.7, 13.4
+         */
+        if (drc->awaiting_release) {
+            if (drc->configured) {
+                DPRINTFN("finalizing device removal");
+                drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
+                             drc->detach_cb_opaque, NULL);
+            } else {
+                DPRINTFN("deferring device removal on unconfigured device\n");
+            }
+        }
+        drc->configured = false;
+    }
+
+    return 0;
+}
+
+static int set_indicator_state(sPAPRDRConnector *drc,
+                               sPAPRDRIndicatorState state)
+{
+    DPRINTFN("drc: %x, set_indicator_state: %x", get_index(drc), state);
+    drc->indicator_state = state;
+    return 0;
+}
+
+static int set_allocation_state(sPAPRDRConnector *drc,
+                                sPAPRDRAllocationState state)
+{
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    DPRINTFN("drc: %x, set_allocation_state: %x", get_index(drc), state);
+
+    if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI) {
+        drc->allocation_state = state;
+        if (drc->awaiting_release &&
+            drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
+            DPRINTFN("finalizing device removal");
+            drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
+                         drc->detach_cb_opaque, NULL);
+        }
+    }
+    return 0;
+}
+
+static uint32_t get_type(sPAPRDRConnector *drc)
+{
+    return drc->type;
+}
+
+static const char *get_name(sPAPRDRConnector *drc)
+{
+    return drc->name;
+}
+
+static const void *get_fdt(sPAPRDRConnector *drc, int *fdt_start_offset)
+{
+    if (fdt_start_offset) {
+        *fdt_start_offset = drc->fdt_start_offset;
+    }
+    return drc->fdt;
+}
+
+static void set_configured(sPAPRDRConnector *drc)
+{
+    DPRINTFN("drc: %x, set_configured", get_index(drc));
+
+    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_UNISOLATED) {
+        /* guest should be not configuring an isolated device */
+        DPRINTFN("drc: %x, set_configured: skipping isolated device",
+                 get_index(drc));
+        return;
+    }
+    drc->configured = true;
+}
+
+/*
+ * dr-entity-sense sensor value
+ * returned via get-sensor-state RTAS calls
+ * as expected by state diagram in PAPR+ 2.7, 13.4
+ * based on the current allocation/indicator/power states
+ * for the DR connector.
+ */
+static sPAPRDREntitySense entity_sense(sPAPRDRConnector *drc)
+{
+    sPAPRDREntitySense state;
+
+    if (drc->dev) {
+        if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI &&
+            drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
+            /* for logical DR, we return a state of UNUSABLE
+             * iff the allocation state UNUSABLE.
+             * Otherwise, report the state as USABLE/PRESENT,
+             * as we would for PCI.
+             */
+            state = SPAPR_DR_ENTITY_SENSE_UNUSABLE;
+        } else {
+            /* this assumes all PCI devices are assigned to
+             * a 'live insertion' power domain, where QEMU
+             * manages power state automatically as opposed
+             * to the guest. present, non-PCI resources are
+             * unaffected by power state.
+             */
+            state = SPAPR_DR_ENTITY_SENSE_PRESENT;
+        }
+    } else {
+        if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) {
+            /* PCI devices, and only PCI devices, use EMPTY
+             * in cases where we'd otherwise use UNUSABLE
+             */
+            state = SPAPR_DR_ENTITY_SENSE_EMPTY;
+        } else {
+            state = SPAPR_DR_ENTITY_SENSE_UNUSABLE;
+        }
+    }
+
+    DPRINTFN("drc: %x, entity_sense: %x", get_index(drc), state);
+    return state;
+}
+
+static void prop_get_index(Object *obj, Visitor *v, void *opaque,
+                                  const char *name, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    uint32_t value = (uint32_t)drck->get_index(drc);
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static void prop_get_type(Object *obj, Visitor *v, void *opaque,
+                          const char *name, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    uint32_t value = (uint32_t)drck->get_type(drc);
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static char *prop_get_name(Object *obj, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    return g_strdup(drck->get_name(drc));
+}
+
+static void prop_get_entity_sense(Object *obj, Visitor *v, void *opaque,
+                                  const char *name, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    uint32_t value = (uint32_t)drck->entity_sense(drc);
+    visit_type_uint32(v, &value, name, errp);
+}
+
+static void prop_get_fdt(Object *obj, Visitor *v, void *opaque,
+                        const char *name, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+    int fdt_offset_next, fdt_offset, fdt_depth;
+    void *fdt;
+
+    if (!drc->fdt) {
+        return;
+    }
+
+    fdt = drc->fdt;
+    fdt_offset = drc->fdt_start_offset;
+    fdt_depth = 0;
+
+    do {
+        const char *name = NULL;
+        const struct fdt_property *prop = NULL;
+        int prop_len = 0, name_len = 0;
+        uint32_t tag;
+
+        tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next);
+        switch (tag) {
+        case FDT_BEGIN_NODE:
+            fdt_depth++;
+            name = fdt_get_name(fdt, fdt_offset, &name_len);
+            visit_start_struct(v, NULL, NULL, name, 0, NULL);
+            break;
+        case FDT_END_NODE:
+            /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */
+            g_assert(fdt_depth > 0);
+            visit_end_struct(v, NULL);
+            fdt_depth--;
+            break;
+        case FDT_PROP: {
+            int i;
+            prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len);
+            name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+            visit_start_list(v, name, NULL);
+            for (i = 0; i < prop_len; i++) {
+                visit_type_uint8(v, (uint8_t *)&prop->data[i], NULL, NULL);
+
+            }
+            visit_end_list(v, NULL);
+            break;
+        }
+        default:
+            error_setg(&error_abort, "device FDT in unexpected state: %d", tag);
+        }
+        fdt_offset = fdt_offset_next;
+    } while (fdt_depth != 0);
+}
+
+static void attach(sPAPRDRConnector *drc, DeviceState *d, void *fdt,
+                   int fdt_start_offset, bool coldplug, Error **errp)
+{
+    DPRINTFN("drc: %x, attach", get_index(drc));
+
+    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+        error_setg(errp, "an attached device is still awaiting release");
+        return;
+    }
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) {
+        g_assert(drc->allocation_state == SPAPR_DR_ALLOCATION_STATE_USABLE);
+    }
+    g_assert(fdt || coldplug);
+
+    /* NOTE: setting initial isolation state to UNISOLATED means we can't
+     * detach unless guest has a userspace/kernel that moves this state
+     * back to ISOLATED in response to an unplug event, or this is done
+     * manually by the admin prior. if we force things while the guest
+     * may be accessing the device, we can easily crash the guest, so we
+     * we defer completion of removal in such cases to the reset() hook.
+     */
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) {
+        drc->isolation_state = SPAPR_DR_ISOLATION_STATE_UNISOLATED;
+    }
+    drc->indicator_state = SPAPR_DR_INDICATOR_STATE_ACTIVE;
+
+    drc->dev = d;
+    drc->fdt = fdt;
+    drc->fdt_start_offset = fdt_start_offset;
+    drc->configured = false;
+
+    object_property_add_link(OBJECT(drc), "device",
+                             object_get_typename(OBJECT(drc->dev)),
+                             (Object **)(&drc->dev),
+                             NULL, 0, NULL);
+}
+
+static void detach(sPAPRDRConnector *drc, DeviceState *d,
+                   spapr_drc_detach_cb *detach_cb,
+                   void *detach_cb_opaque, Error **errp)
+{
+    DPRINTFN("drc: %x, detach", get_index(drc));
+
+    drc->detach_cb = detach_cb;
+    drc->detach_cb_opaque = detach_cb_opaque;
+
+    if (drc->isolation_state != SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+        DPRINTFN("awaiting transition to isolated state before removal");
+        drc->awaiting_release = true;
+        return;
+    }
+
+    if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI &&
+        drc->allocation_state != SPAPR_DR_ALLOCATION_STATE_UNUSABLE) {
+        DPRINTFN("awaiting transition to unusable state before removal");
+        drc->awaiting_release = true;
+        return;
+    }
+
+    drc->indicator_state = SPAPR_DR_INDICATOR_STATE_INACTIVE;
+
+    if (drc->detach_cb) {
+        drc->detach_cb(drc->dev, drc->detach_cb_opaque);
+    }
+
+    drc->awaiting_release = false;
+    g_free(drc->fdt);
+    drc->fdt = NULL;
+    drc->fdt_start_offset = 0;
+    object_property_del(OBJECT(drc), "device", NULL);
+    drc->dev = NULL;
+    drc->detach_cb = NULL;
+    drc->detach_cb_opaque = NULL;
+}
+
+static bool release_pending(sPAPRDRConnector *drc)
+{
+    return drc->awaiting_release;
+}
+
+static void reset(DeviceState *d)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    DPRINTFN("drc reset: %x", drck->get_index(drc));
+    /* immediately upon reset we can safely assume DRCs whose devices
+     * are pending removal can be safely removed, and that they will
+     * subsequently be left in an ISOLATED state. move the DRC to this
+     * state in these cases (which will in turn complete any pending
+     * device removals)
+     */
+    if (drc->awaiting_release) {
+        drck->set_isolation_state(drc, SPAPR_DR_ISOLATION_STATE_ISOLATED);
+        /* generally this should also finalize the removal, but if the device
+         * hasn't yet been configured we normally defer removal under the
+         * assumption that this transition is taking place as part of device
+         * configuration. so check if we're still waiting after this, and
+         * force removal if we are
+         */
+        if (drc->awaiting_release) {
+            drck->detach(drc, DEVICE(drc->dev), drc->detach_cb,
+                         drc->detach_cb_opaque, NULL);
+        }
+
+        /* non-PCI devices may be awaiting a transition to UNUSABLE */
+        if (drc->type != SPAPR_DR_CONNECTOR_TYPE_PCI &&
+            drc->awaiting_release) {
+            drck->set_allocation_state(drc, SPAPR_DR_ALLOCATION_STATE_UNUSABLE);
+        }
+    }
+}
+
+static void realize(DeviceState *d, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    Object *root_container;
+    char link_name[256];
+    gchar *child_name;
+    Error *err = NULL;
+
+    DPRINTFN("drc realize: %x", drck->get_index(drc));
+    /* NOTE: we do this as part of realize/unrealize due to the fact
+     * that the guest will communicate with the DRC via RTAS calls
+     * referencing the global DRC index. By unlinking the DRC
+     * from DRC_CONTAINER_PATH/<drc_index> we effectively make it
+     * inaccessible by the guest, since lookups rely on this path
+     * existing in the composition tree
+     */
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+    snprintf(link_name, sizeof(link_name), "%x", drck->get_index(drc));
+    child_name = object_get_canonical_path_component(OBJECT(drc));
+    DPRINTFN("drc child name: %s", child_name);
+    object_property_add_alias(root_container, link_name,
+                              drc->owner, child_name, &err);
+    if (err) {
+        error_report("%s", error_get_pretty(err));
+        error_free(err);
+        object_unref(OBJECT(drc));
+    }
+    DPRINTFN("drc realize complete");
+}
+
+static void unrealize(DeviceState *d, Error **errp)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(d);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    Object *root_container;
+    char name[256];
+    Error *err = NULL;
+
+    DPRINTFN("drc unrealize: %x", drck->get_index(drc));
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+    snprintf(name, sizeof(name), "%x", drck->get_index(drc));
+    object_property_del(root_container, name, &err);
+    if (err) {
+        error_report("%s", error_get_pretty(err));
+        error_free(err);
+        object_unref(OBJECT(drc));
+    }
+}
+
+sPAPRDRConnector *spapr_dr_connector_new(Object *owner,
+                                         sPAPRDRConnectorType type,
+                                         uint32_t id)
+{
+    sPAPRDRConnector *drc =
+        SPAPR_DR_CONNECTOR(object_new(TYPE_SPAPR_DR_CONNECTOR));
+
+    g_assert(type);
+
+    drc->type = type;
+    drc->id = id;
+    drc->owner = owner;
+    object_property_add_child(owner, "dr-connector[*]", OBJECT(drc), NULL);
+    object_property_set_bool(OBJECT(drc), true, "realized", NULL);
+
+    /* human-readable name for a DRC to encode into the DT
+     * description. this is mainly only used within a guest in place
+     * of the unique DRC index.
+     *
+     * in the case of VIO/PCI devices, it corresponds to a
+     * "location code" that maps a logical device/function (DRC index)
+     * to a physical (or virtual in the case of VIO) location in the
+     * system by chaining together the "location label" for each
+     * encapsulating component.
+     *
+     * since this is more to do with diagnosing physical hardware
+     * issues than guest compatibility, we choose location codes/DRC
+     * names that adhere to the documented format, but avoid encoding
+     * the entire topology information into the label/code, instead
+     * just using the location codes based on the labels for the
+     * endpoints (VIO/PCI adaptor connectors), which is basically
+     * just "C" followed by an integer ID.
+     *
+     * DRC names as documented by PAPR+ v2.7, 13.5.2.4
+     * location codes as documented by PAPR+ v2.7, 12.3.1.5
+     */
+    switch (drc->type) {
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        drc->name = g_strdup_printf("CPU %d", id);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PHB:
+        drc->name = g_strdup_printf("PHB %d", id);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_VIO:
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        drc->name = g_strdup_printf("C%d", id);
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_LMB:
+        drc->name = g_strdup_printf("LMB %d", id);
+        break;
+    default:
+        g_assert(false);
+    }
+
+    /* PCI slot always start in a USABLE state, and stay there */
+    if (drc->type == SPAPR_DR_CONNECTOR_TYPE_PCI) {
+        drc->allocation_state = SPAPR_DR_ALLOCATION_STATE_USABLE;
+    }
+
+    return drc;
+}
+
+static void spapr_dr_connector_instance_init(Object *obj)
+{
+    sPAPRDRConnector *drc = SPAPR_DR_CONNECTOR(obj);
+
+    object_property_add_uint32_ptr(obj, "isolation-state",
+                                   &drc->isolation_state, NULL);
+    object_property_add_uint32_ptr(obj, "indicator-state",
+                                   &drc->indicator_state, NULL);
+    object_property_add_uint32_ptr(obj, "allocation-state",
+                                   &drc->allocation_state, NULL);
+    object_property_add_uint32_ptr(obj, "id", &drc->id, NULL);
+    object_property_add(obj, "index", "uint32", prop_get_index,
+                        NULL, NULL, NULL, NULL);
+    object_property_add(obj, "connector_type", "uint32", prop_get_type,
+                        NULL, NULL, NULL, NULL);
+    object_property_add_str(obj, "name", prop_get_name, NULL, NULL);
+    object_property_add(obj, "entity-sense", "uint32", prop_get_entity_sense,
+                        NULL, NULL, NULL, NULL);
+    object_property_add(obj, "fdt", "struct", prop_get_fdt,
+                        NULL, NULL, NULL, NULL);
+}
+
+static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
+{
+    DeviceClass *dk = DEVICE_CLASS(k);
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    dk->reset = reset;
+    dk->realize = realize;
+    dk->unrealize = unrealize;
+    drck->set_isolation_state = set_isolation_state;
+    drck->set_indicator_state = set_indicator_state;
+    drck->set_allocation_state = set_allocation_state;
+    drck->get_index = get_index;
+    drck->get_type = get_type;
+    drck->get_name = get_name;
+    drck->get_fdt = get_fdt;
+    drck->set_configured = set_configured;
+    drck->entity_sense = entity_sense;
+    drck->attach = attach;
+    drck->detach = detach;
+    drck->release_pending = release_pending;
+}
+
+static const TypeInfo spapr_dr_connector_info = {
+    .name          = TYPE_SPAPR_DR_CONNECTOR,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(sPAPRDRConnector),
+    .instance_init = spapr_dr_connector_instance_init,
+    .class_size    = sizeof(sPAPRDRConnectorClass),
+    .class_init    = spapr_dr_connector_class_init,
+};
+
+static void spapr_drc_register_types(void)
+{
+    type_register_static(&spapr_dr_connector_info);
+}
+
+type_init(spapr_drc_register_types)
+
+/* helper functions for external users */
+
+sPAPRDRConnector *spapr_dr_connector_by_index(uint32_t index)
+{
+    Object *obj;
+    char name[256];
+
+    snprintf(name, sizeof(name), "%s/%x", DRC_CONTAINER_PATH, index);
+    obj = object_resolve_path(name, NULL);
+
+    return !obj ? NULL : SPAPR_DR_CONNECTOR(obj);
+}
+
+sPAPRDRConnector *spapr_dr_connector_by_id(sPAPRDRConnectorType type,
+                                           uint32_t id)
+{
+    return spapr_dr_connector_by_index(
+            (get_type_shift(type) << DRC_INDEX_TYPE_SHIFT) |
+            (id & DRC_INDEX_ID_MASK));
+}
+
+/* generate a string the describes the DRC to encode into the
+ * device tree.
+ *
+ * as documented by PAPR+ v2.7, 13.5.2.6 and C.6.1
+ */
+static const char *spapr_drc_get_type_str(sPAPRDRConnectorType type)
+{
+    switch (type) {
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        return "CPU";
+    case SPAPR_DR_CONNECTOR_TYPE_PHB:
+        return "PHB";
+    case SPAPR_DR_CONNECTOR_TYPE_VIO:
+        return "SLOT";
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        return "28";
+    case SPAPR_DR_CONNECTOR_TYPE_LMB:
+        return "MEM";
+    default:
+        g_assert(false);
+    }
+
+    return NULL;
+}
+
+/**
+ * spapr_drc_populate_dt
+ *
+ * @fdt: libfdt device tree
+ * @path: path in the DT to generate properties
+ * @owner: parent Object/DeviceState for which to generate DRC
+ *         descriptions for
+ * @drc_type_mask: mask of sPAPRDRConnectorType values corresponding
+ *   to the types of DRCs to generate entries for
+ *
+ * generate OF properties to describe DRC topology/indices to guests
+ *
+ * as documented in PAPR+ v2.1, 13.5.2
+ */
+int spapr_drc_populate_dt(void *fdt, int fdt_offset, Object *owner,
+                          uint32_t drc_type_mask)
+{
+    Object *root_container;
+    ObjectProperty *prop;
+    uint32_t drc_count = 0;
+    GArray *drc_indexes, *drc_power_domains;
+    GString *drc_names, *drc_types;
+    int ret;
+
+    /* the first entry of each properties is a 32-bit integer encoding
+     * the number of elements in the array. we won't know this until
+     * we complete the iteration through all the matching DRCs, but
+     * reserve the space now and set the offsets accordingly so we
+     * can fill them in later.
+     */
+    drc_indexes = g_array_new(false, true, sizeof(uint32_t));
+    drc_indexes = g_array_set_size(drc_indexes, 1);
+    drc_power_domains = g_array_new(false, true, sizeof(uint32_t));
+    drc_power_domains = g_array_set_size(drc_power_domains, 1);
+    drc_names = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+    drc_types = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+
+    /* aliases for all DRConnector objects will be rooted in QOM
+     * composition tree at DRC_CONTAINER_PATH
+     */
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+
+    QTAILQ_FOREACH(prop, &root_container->properties, node) {
+        Object *obj;
+        sPAPRDRConnector *drc;
+        sPAPRDRConnectorClass *drck;
+        uint32_t drc_index, drc_power_domain;
+
+        if (!strstart(prop->type, "link<", NULL)) {
+            continue;
+        }
+
+        obj = object_property_get_link(root_container, prop->name, NULL);
+        drc = SPAPR_DR_CONNECTOR(obj);
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+        if (owner && (drc->owner != owner)) {
+            continue;
+        }
+
+        if ((drc->type & drc_type_mask) == 0) {
+            continue;
+        }
+
+        drc_count++;
+
+        /* ibm,drc-indexes */
+        drc_index = cpu_to_be32(drck->get_index(drc));
+        g_array_append_val(drc_indexes, drc_index);
+
+        /* ibm,drc-power-domains */
+        drc_power_domain = cpu_to_be32(-1);
+        g_array_append_val(drc_power_domains, drc_power_domain);
+
+        /* ibm,drc-names */
+        drc_names = g_string_append(drc_names, drck->get_name(drc));
+        drc_names = g_string_insert_len(drc_names, -1, "\0", 1);
+
+        /* ibm,drc-types */
+        drc_types = g_string_append(drc_types,
+                                    spapr_drc_get_type_str(drc->type));
+        drc_types = g_string_insert_len(drc_types, -1, "\0", 1);
+    }
+
+    /* now write the drc count into the space we reserved at the
+     * beginning of the arrays previously
+     */
+    *(uint32_t *)drc_indexes->data = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_power_domains->data = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_names->str = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_types->str = cpu_to_be32(drc_count);
+
+    ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-indexes",
+                      drc_indexes->data,
+                      drc_indexes->len * sizeof(uint32_t));
+    if (ret) {
+        fprintf(stderr, "Couldn't create ibm,drc-indexes property\n");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-power-domains",
+                      drc_power_domains->data,
+                      drc_power_domains->len * sizeof(uint32_t));
+    if (ret) {
+        fprintf(stderr, "Couldn't finalize ibm,drc-power-domains property\n");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-names",
+                      drc_names->str, drc_names->len);
+    if (ret) {
+        fprintf(stderr, "Couldn't finalize ibm,drc-names property\n");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, fdt_offset, "ibm,drc-types",
+                      drc_types->str, drc_types->len);
+    if (ret) {
+        fprintf(stderr, "Couldn't finalize ibm,drc-types property\n");
+        goto out;
+    }
+
+out:
+    g_array_free(drc_indexes, true);
+    g_array_free(drc_power_domains, true);
+    g_string_free(drc_names, true);
+    g_string_free(drc_types, true);
+
+    return ret;
+}
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -32,6 +32,9 @@

 #include "hw/ppc/spapr.h"
 #include "hw/ppc/spapr_vio.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_drc.h"

 #include <libfdt.h>

@@ -77,6 +80,7 @@ struct rtas_error_log {
 #define   RTAS_LOG_TYPE_ECC_UNCORR              0x00000009
 #define   RTAS_LOG_TYPE_ECC_CORR                0x0000000a
 #define   RTAS_LOG_TYPE_EPOW                    0x00000040
+#define   RTAS_LOG_TYPE_HOTPLUG                 0x000000e5
    uint32_t extended_length;
 } QEMU_PACKED;

@@ -166,6 +170,38 @@ struct epow_log_full {
    struct rtas_event_log_v6_epow epow;
 } QEMU_PACKED;

+struct rtas_event_log_v6_hp {
+#define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t hotplug_type;
+#define RTAS_LOG_V6_HP_TYPE_CPU                          1
+#define RTAS_LOG_V6_HP_TYPE_MEMORY                       2
+#define RTAS_LOG_V6_HP_TYPE_SLOT                         3
+#define RTAS_LOG_V6_HP_TYPE_PHB                          4
+#define RTAS_LOG_V6_HP_TYPE_PCI                          5
+    uint8_t hotplug_action;
+#define RTAS_LOG_V6_HP_ACTION_ADD                        1
+#define RTAS_LOG_V6_HP_ACTION_REMOVE                     2
+    uint8_t hotplug_identifier;
+#define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
+#define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
+    uint8_t reserved;
+    union {
+        uint32_t index;
+        uint32_t count;
+        char name[1];
+    } drc;
+} QEMU_PACKED;
+
+struct hp_log_full {
+    struct rtas_error_log hdr;
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_maina maina;
+    struct rtas_event_log_v6_mainb mainb;
+    struct rtas_event_log_v6_hp hp;
+} QEMU_PACKED;
+
 #define EVENT_MASK_INTERNAL_ERRORS           0x80000000
 #define EVENT_MASK_EPOW                      0x40000000
 #define EVENT_MASK_HOTPLUG                   0x10000000
@@ -181,67 +217,105 @@ struct epow_log_full {
        }                                                          \
    } while (0)

-void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq)
+void spapr_events_fdt_skel(void *fdt, uint32_t check_exception_irq)
 {
-    uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)};
-    uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0};
+    uint32_t irq_ranges[] = {cpu_to_be32(check_exception_irq), cpu_to_be32(1)};
+    uint32_t interrupts[] = {cpu_to_be32(check_exception_irq), 0};

    _FDT((fdt_begin_node(fdt, "event-sources")));

    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
    _FDT((fdt_property(fdt, "interrupt-ranges",
-                       epow_irq_ranges, sizeof(epow_irq_ranges))));
+                       irq_ranges, sizeof(irq_ranges))));

    _FDT((fdt_begin_node(fdt, "epow-events")));
-    _FDT((fdt_property(fdt, "interrupts",
-                       epow_interrupts, sizeof(epow_interrupts))));
+    _FDT((fdt_property(fdt, "interrupts", interrupts, sizeof(interrupts))));
    _FDT((fdt_end_node(fdt)));

    _FDT((fdt_end_node(fdt)));
 }

-static struct epow_log_full *pending_epow;
+static void rtas_event_log_queue(int log_type, void *data, bool exception)
+{
+    sPAPREventLogEntry *entry = g_new(sPAPREventLogEntry, 1);
+
+    g_assert(data);
+    entry->log_type = log_type;
+    entry->exception = exception;
+    entry->data = data;
+    QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
+}
+
+static sPAPREventLogEntry *rtas_event_log_dequeue(uint32_t event_mask,
+                                                  bool exception)
+{
+    sPAPREventLogEntry *entry = NULL;
+
+    /* we only queue EPOW events atm. */
+    if ((event_mask & EVENT_MASK_EPOW) == 0) {
+        return NULL;
+    }
+
+    QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        if (entry->exception != exception) {
+            continue;
+        }
+
+        /* EPOW and hotplug events are surfaced in the same manner */
+        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
+            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+            break;
+        }
+    }
+
+    if (entry) {
+        QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+    }
+
+    return entry;
+}
+
+static bool rtas_event_log_contains(uint32_t event_mask, bool exception)
+{
+    sPAPREventLogEntry *entry = NULL;
+
+    /* we only queue EPOW events atm. */
+    if ((event_mask & EVENT_MASK_EPOW) == 0) {
+        return false;
+    }
+
+    QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        if (entry->exception != exception) {
+            continue;
+        }
+
+        /* EPOW and hotplug events are surfaced in the same manner */
+        if (entry->log_type == RTAS_LOG_TYPE_EPOW ||
+            entry->log_type == RTAS_LOG_TYPE_HOTPLUG) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
 static uint32_t next_plid;

-static void spapr_powerdown_req(Notifier *n, void *opaque)
+static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr)
 {
-    sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
-    struct rtas_error_log *hdr;
-    struct rtas_event_log_v6 *v6hdr;
-    struct rtas_event_log_v6_maina *maina;
-    struct rtas_event_log_v6_mainb *mainb;
-    struct rtas_event_log_v6_epow *epow;
-    struct tm tm;
-    int year;
-
-    if (pending_epow) {
-        /* For now, we just throw away earlier events if two come
-         * along before any are consumed.  This is sufficient for our
-         * powerdown messages, but we'll need more if we do more
-         * general error/event logging */
-        g_free(pending_epow);
-    }
-    pending_epow = g_malloc0(sizeof(*pending_epow));
-    hdr = &pending_epow->hdr;
-    v6hdr = &pending_epow->v6hdr;
-    maina = &pending_epow->maina;
-    mainb = &pending_epow->mainb;
-    epow = &pending_epow->epow;
-
-    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
-                               | RTAS_LOG_SEVERITY_EVENT
-                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
-                               | RTAS_LOG_OPTIONAL_PART_PRESENT
-                               | RTAS_LOG_TYPE_EPOW);
-    hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
-                                       - sizeof(pending_epow->hdr));
-
    v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
        | RTAS_LOG_V6_B0_BIGENDIAN;
    v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
        | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
    v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
+}
+
+static void spapr_init_maina(struct rtas_event_log_v6_maina *maina,
+                             int section_count)
+{
+    struct tm tm;
+    int year;

    maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
    maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
@@ -256,8 +330,37 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
                                       | (to_bcd(tm.tm_min) << 16)
                                       | (to_bcd(tm.tm_sec) << 8));
    maina->creator_id = 'H'; /* Hypervisor */
-    maina->section_count = 3; /* Main-A, Main-B and EPOW */
+    maina->section_count = section_count;
    maina->plid = next_plid++;
+}
+
+static void spapr_powerdown_req(Notifier *n, void *opaque)
+{
+    sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
+    struct rtas_error_log *hdr;
+    struct rtas_event_log_v6 *v6hdr;
+    struct rtas_event_log_v6_maina *maina;
+    struct rtas_event_log_v6_mainb *mainb;
+    struct rtas_event_log_v6_epow *epow;
+    struct epow_log_full *new_epow;
+
+    new_epow = g_malloc0(sizeof(*new_epow));
+    hdr = &new_epow->hdr;
+    v6hdr = &new_epow->v6hdr;
+    maina = &new_epow->maina;
+    mainb = &new_epow->mainb;
+    epow = &new_epow->epow;
+
+    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
+                               | RTAS_LOG_SEVERITY_EVENT
+                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+                               | RTAS_LOG_OPTIONAL_PART_PRESENT
+                               | RTAS_LOG_TYPE_EPOW);
+    hdr->extended_length = cpu_to_be32(sizeof(*new_epow)
+                                       - sizeof(new_epow->hdr));
+
+    spapr_init_v6hdr(v6hdr);
+    spapr_init_maina(maina, 3 /* Main-A, Main-B and EPOW */);

    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
@@ -274,7 +377,80 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
    epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
    epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;

-    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq));
+    rtas_event_log_queue(RTAS_LOG_TYPE_EPOW, new_epow, true);
+
+    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+}
+
+static void spapr_hotplug_req_event(sPAPRDRConnector *drc, uint8_t hp_action)
+{
+    struct hp_log_full *new_hp;
+    struct rtas_error_log *hdr;
+    struct rtas_event_log_v6 *v6hdr;
+    struct rtas_event_log_v6_maina *maina;
+    struct rtas_event_log_v6_mainb *mainb;
+    struct rtas_event_log_v6_hp *hp;
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    sPAPRDRConnectorType drc_type = drck->get_type(drc);
+
+    new_hp = g_malloc0(sizeof(struct hp_log_full));
+    hdr = &new_hp->hdr;
+    v6hdr = &new_hp->v6hdr;
+    maina = &new_hp->maina;
+    mainb = &new_hp->mainb;
+    hp = &new_hp->hp;
+
+    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
+                               | RTAS_LOG_SEVERITY_EVENT
+                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+                               | RTAS_LOG_OPTIONAL_PART_PRESENT
+                               | RTAS_LOG_INITIATOR_HOTPLUG
+                               | RTAS_LOG_TYPE_HOTPLUG);
+    hdr->extended_length = cpu_to_be32(sizeof(*new_hp)
+                                       - sizeof(new_hp->hdr));
+
+    spapr_init_v6hdr(v6hdr);
+    spapr_init_maina(maina, 3 /* Main-A, Main-B, HP */);
+
+    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+    mainb->subsystem_id = 0x80; /* External environment */
+    mainb->event_severity = 0x00; /* Informational / non-error */
+    mainb->event_subtype = 0x00; /* Normal shutdown */
+
+    hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG);
+    hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
+    hp->hdr.section_version = 1; /* includes extended modifier */
+    hp->hotplug_action = hp_action;
+
+
+    switch (drc_type) {
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        hp->drc.index = cpu_to_be32(drck->get_index(drc));
+        hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX;
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
+        break;
+    default:
+        /* we shouldn't be signaling hotplug events for resources
+         * that don't support them
+         */
+        g_assert(false);
+        return;
+    }
+
+    rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
+
+    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+}
+
+void spapr_hotplug_req_add_event(sPAPRDRConnector *drc)
+{
+    spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_ADD);
+}
+
+void spapr_hotplug_req_remove_event(sPAPRDRConnector *drc)
+{
+    spapr_hotplug_req_event(drc, RTAS_LOG_V6_HP_ACTION_REMOVE);
 }

 static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
@@ -282,8 +458,10 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                            target_ulong args,
                            uint32_t nret, target_ulong rets)
 {
-    uint32_t mask, buf, len;
+    uint32_t mask, buf, len, event_len;
    uint64_t xinfo;
+    sPAPREventLogEntry *event;
+    struct rtas_error_log *hdr;

    if ((nargs < 6) || (nargs > 7) || nret != 1) {
        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -298,25 +476,85 @@ static void check_exception(PowerPCCPU *cpu, sPAPREnvironment *spapr,
        xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
    }

-    if ((mask & EVENT_MASK_EPOW) && pending_epow) {
-        if (sizeof(*pending_epow) < len) {
-            len = sizeof(*pending_epow);
-        }
-
-        cpu_physical_memory_write(buf, pending_epow, len);
-        g_free(pending_epow);
-        pending_epow = NULL;
-        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
-    } else {
-        rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+    event = rtas_event_log_dequeue(mask, true);
+    if (!event) {
+        goto out_no_events;
    }
+
+    hdr = event->data;
+    event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr);
+
+    if (event_len < len) {
+        len = event_len;
+    }
+
+    cpu_physical_memory_write(buf, event->data, len);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    g_free(event->data);
+    g_free(event);
+
+    /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if
+     * there are still pending events to be fetched via check-exception. We
+     * do the latter here, since our code relies on edge-triggered
+     * interrupts.
+     */
+    if (rtas_event_log_contains(mask, true)) {
+        qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq));
+    }
+
+    return;
+
+out_no_events:
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+}
+
+static void event_scan(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                       uint32_t token, uint32_t nargs,
+                       target_ulong args,
+                       uint32_t nret, target_ulong rets)
+{
+    uint32_t mask, buf, len, event_len;
+    sPAPREventLogEntry *event;
+    struct rtas_error_log *hdr;
+
+    if (nargs != 4 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    mask = rtas_ld(args, 0);
+    buf = rtas_ld(args, 2);
+    len = rtas_ld(args, 3);
+
+    event = rtas_event_log_dequeue(mask, false);
+    if (!event) {
+        goto out_no_events;
+    }
+
+    hdr = event->data;
+    event_len = be32_to_cpu(hdr->extended_length) + sizeof(*hdr);
+
+    if (event_len < len) {
+        len = event_len;
+    }
+
+    cpu_physical_memory_write(buf, event->data, len);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    g_free(event->data);
+    g_free(event);
+    return;
+
+out_no_events:
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
 }

 void spapr_events_init(sPAPREnvironment *spapr)
 {
-    spapr->epow_irq = xics_alloc(spapr->icp, 0, 0, false);
+    QTAILQ_INIT(&spapr->pending_events);
+    spapr->check_exception_irq = xics_alloc(spapr->icp, 0, 0, false);
    spapr->epow_notifier.notify = spapr_powerdown_req;
    qemu_register_powerdown_notifier(&spapr->epow_notifier);
    spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
                        check_exception);
+    spapr_rtas_register(RTAS_EVENT_SCAN, "event-scan", event_scan);
 }
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -41,7 +41,7 @@ enum sPAPRTCEAccess {

 static QLIST_HEAD(spapr_tce_tables, sPAPRTCETable) spapr_tce_tables;

-static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
+sPAPRTCETable *spapr_tce_find_by_liobn(target_ulong liobn)
 {
    sPAPRTCETable *tcet;

@@ -52,7 +52,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
    }

    QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
-        if (tcet->liobn == liobn) {
+        if (tcet->liobn == (uint32_t)liobn) {
            return tcet;
        }
    }
@@ -126,11 +126,11 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = {
 static int spapr_tce_table_realize(DeviceState *dev)
 {
    sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
+    uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift;

-    if (kvm_enabled()) {
+    if (kvm_enabled() && !(window_size >> 32)) {
        tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
-                                              tcet->nb_table <<
-                                              tcet->page_shift,
+                                              window_size,
                                              &tcet->fd,
                                              tcet->vfio_accel);
    }
@@ -161,6 +161,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
                                   bool vfio_accel)
 {
    sPAPRTCETable *tcet;
+    char tmp[64];

    if (spapr_tce_find_by_liobn(liobn)) {
        fprintf(stderr, "Attempted to create TCE table with duplicate"
@@ -179,7 +180,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
    tcet->nb_table = nb_table;
    tcet->vfio_accel = vfio_accel;

-    object_property_add_child(OBJECT(owner), "tce-table", OBJECT(tcet), NULL);
+    snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
+    object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);

    object_property_set_bool(OBJECT(tcet), true, "realized", NULL);

@@ -247,7 +249,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
    target_ulong ioba1 = ioba;
    target_ulong tce_list = args[2];
    target_ulong npages = args[3];
-    target_ulong ret = H_PARAMETER;
+    target_ulong ret = H_PARAMETER, tce = 0;
    sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
    CPUState *cs = CPU(cpu);
    hwaddr page_mask, page_size;
@@ -267,7 +269,7 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
    for (i = 0; i < npages; ++i, ioba += page_size) {
        target_ulong off = (tce_list & ~SPAPR_TCE_RW) +
                                i * sizeof(target_ulong);
-        target_ulong tce = ldq_phys(cs->as, off);
+        tce = ldq_be_phys(cs->as, off);

        ret = put_tce_emu(tcet, ioba, tce);
        if (ret) {
@@ -277,11 +279,11 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,

    /* Trace last successful or the first problematic entry */
    i = i ? (i - 1) : 0;
-    trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i,
-                               ldq_phys(cs->as,
-                               tce_list + i * sizeof(target_ulong)),
-                               ret);
-
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret);
+    } else {
+        trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret);
+    }
    return ret;
 }

@@ -315,7 +317,11 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
            break;
        }
    }
-    trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret);
+    } else {
+        trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+    }

    return ret;
 }
@@ -336,7 +342,11 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,

        ret = put_tce_emu(tcet, ioba, tce);
    }
-    trace_spapr_iommu_put(liobn, ioba, tce, ret);
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_put(liobn, ioba, tce, ret);
+    } else {
+        trace_spapr_iommu_put(liobn, ioba, tce, ret);
+    }

    return ret;
 }
@@ -376,7 +386,11 @@ static target_ulong h_get_tce(PowerPCCPU *cpu, sPAPREnvironment *spapr,
            args[0] = tce;
        }
    }
-    trace_spapr_iommu_get(liobn, ioba, ret, tce);
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_get(liobn, ioba, ret, tce);
+    } else {
+        trace_spapr_iommu_get(liobn, ioba, ret, tce);
+    }

    return ret;
 }
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -33,8 +33,11 @@
 #include <libfdt.h>
 #include "trace.h"
 #include "qemu/error-report.h"
+#include "qapi/qmp/qerror.h"

 #include "hw/pci/pci_bus.h"
+#include "hw/ppc/spapr_drc.h"
+#include "sysemu/device_tree.h"

 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
 #define RTAS_QUERY_FN           0
@@ -47,7 +50,15 @@
 #define RTAS_TYPE_MSI           1
 #define RTAS_TYPE_MSIX          2

-static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            return ret;                                            \
+        }                                                          \
+    } while (0)
+
+sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid)
 {
    sPAPRPHBState *sphb;

@@ -61,10 +72,10 @@ static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
    return NULL;
 }

-static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
-                           uint32_t config_addr)
+PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
+                              uint32_t config_addr)
 {
-    sPAPRPHBState *sphb = find_phb(spapr, buid);
+    sPAPRPHBState *sphb = spapr_pci_find_phb(spapr, buid);
    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
    int bus_num = (config_addr >> 16) & 0xFF;
    int devfn = (config_addr >> 8) & 0xFF;
@@ -95,7 +106,7 @@ static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
        return;
    }

-    pci_dev = find_dev(spapr, buid, addr);
+    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
    addr = rtas_pci_cfgaddr(addr);

    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
@@ -162,7 +173,7 @@ static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
        return;
    }

-    pci_dev = find_dev(spapr, buid, addr);
+    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
    addr = rtas_pci_cfgaddr(addr);

    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
@@ -280,9 +291,9 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
    }

    /* Fins sPAPRPHBState */
-    phb = find_phb(spapr, buid);
+    phb = spapr_pci_find_phb(spapr, buid);
    if (phb) {
-        pdev = find_dev(spapr, buid, config_addr);
+        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
    }
    if (!phb || !pdev) {
        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -381,9 +392,9 @@ static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
    spapr_pci_msi *msi;

    /* Find sPAPRPHBState */
-    phb = find_phb(spapr, buid);
+    phb = spapr_pci_find_phb(spapr, buid);
    if (phb) {
-        pdev = find_dev(spapr, buid, config_addr);
+        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
    }
    if (!phb || !pdev) {
        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
@@ -426,7 +437,7 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
    addr = rtas_ld(args, 0);
    option = rtas_ld(args, 3);

-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -461,7 +472,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
    }

    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -479,7 +490,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
    switch (option) {
    case RTAS_GET_PE_ADDR:
        addr = rtas_ld(args, 0);
-        pdev = find_dev(spapr, buid, addr);
+        pdev = spapr_pci_find_dev(spapr, buid, addr);
        if (!pdev) {
            goto param_error_exit;
        }
@@ -516,7 +527,7 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
    }

    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -562,7 +573,7 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,

    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
    option = rtas_ld(args, 3);
-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -596,7 +607,7 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
    }

    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -631,7 +642,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
    }

    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
-    sphb = find_phb(spapr, buid);
+    sphb = spapr_pci_find_phb(spapr, buid);
    if (!sphb) {
        goto param_error_exit;
    }
@@ -731,6 +742,372 @@ static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
    return &phb->iommu_as;
 }

+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
+#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x)         b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
+#define b_fff(x)        b_x((x), 8, 3)  /* function number */
+#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
+
+/* for 'reg'/'assigned-addresses' OF properties */
+#define RESOURCE_CELLS_SIZE 2
+#define RESOURCE_CELLS_ADDRESS 3
+
+typedef struct ResourceFields {
+    uint32_t phys_hi;
+    uint32_t phys_mid;
+    uint32_t phys_lo;
+    uint32_t size_hi;
+    uint32_t size_lo;
+} QEMU_PACKED ResourceFields;
+
+typedef struct ResourceProps {
+    ResourceFields reg[8];
+    ResourceFields assigned[7];
+    uint32_t reg_len;
+    uint32_t assigned_len;
+} ResourceProps;
+
+/* fill in the 'reg'/'assigned-resources' OF properties for
+ * a PCI device. 'reg' describes resource requirements for a
+ * device's IO/MEM regions, 'assigned-addresses' describes the
+ * actual resource assignments.
+ *
+ * the properties are arrays of ('phys-addr', 'size') pairs describing
+ * the addressable regions of the PCI device, where 'phys-addr' is a
+ * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
+ * (phys.hi, phys.mid, phys.lo), and 'size' is a
+ * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
+ *
+ * phys.hi = 0xYYXXXXZZ, where:
+ *   0xYY = npt000ss
+ *          |||   |
+ *          |||   +-- space code: 1 if IO region, 2 if MEM region
+ *          ||+------ for non-relocatable IO: 1 if aliased
+ *          ||        for relocatable IO: 1 if below 64KB
+ *          ||        for MEM: 1 if below 1MB
+ *          |+------- 1 if region is prefetchable
+ *          +-------- 1 if region is non-relocatable
+ *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
+ *            bits respectively
+ *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
+ *          to the region
+ *
+ * phys.mid and phys.lo correspond respectively to the hi/lo portions
+ * of the actual address of the region.
+ *
+ * how the phys-addr/size values are used differ slightly between
+ * 'reg' and 'assigned-addresses' properties. namely, 'reg' has
+ * an additional description for the config space region of the
+ * device, and in the case of QEMU has n=0 and phys.mid=phys.lo=0
+ * to describe the region as relocatable, with an address-mapping
+ * that corresponds directly to the PHB's address space for the
+ * resource. 'assigned-addresses' always has n=1 set with an absolute
+ * address assigned for the resource. in general, 'assigned-addresses'
+ * won't be populated, since addresses for PCI devices are generally
+ * unmapped initially and left to the guest to assign.
+ *
+ * note also that addresses defined in these properties are, at least
+ * for PAPR guests, relative to the PHBs IO/MEM windows, and
+ * correspond directly to the addresses in the BARs.
+ *
+ * in accordance with PCI Bus Binding to Open Firmware,
+ * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
+ * Appendix C.
+ */
+static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
+{
+    int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
+    uint32_t dev_id = (b_bbbbbbbb(bus_num) |
+                       b_ddddd(PCI_SLOT(d->devfn)) |
+                       b_fff(PCI_FUNC(d->devfn)));
+    ResourceFields *reg, *assigned;
+    int i, reg_idx = 0, assigned_idx = 0;
+
+    /* config space region */
+    reg = &rp->reg[reg_idx++];
+    reg->phys_hi = cpu_to_be32(dev_id);
+    reg->phys_mid = 0;
+    reg->phys_lo = 0;
+    reg->size_hi = 0;
+    reg->size_lo = 0;
+
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        if (!d->io_regions[i].size) {
+            continue;
+        }
+
+        reg = &rp->reg[reg_idx++];
+
+        reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
+        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+            reg->phys_hi |= cpu_to_be32(b_ss(1));
+        } else {
+            reg->phys_hi |= cpu_to_be32(b_ss(2));
+        }
+        reg->phys_mid = 0;
+        reg->phys_lo = 0;
+        reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
+        reg->size_lo = cpu_to_be32(d->io_regions[i].size);
+
+        if (d->io_regions[i].addr == PCI_BAR_UNMAPPED) {
+            continue;
+        }
+
+        assigned = &rp->assigned[assigned_idx++];
+        assigned->phys_hi = cpu_to_be32(reg->phys_hi | b_n(1));
+        assigned->phys_mid = cpu_to_be32(d->io_regions[i].addr >> 32);
+        assigned->phys_lo = cpu_to_be32(d->io_regions[i].addr);
+        assigned->size_hi = reg->size_hi;
+        assigned->size_lo = reg->size_lo;
+    }
+
+    rp->reg_len = reg_idx * sizeof(ResourceFields);
+    rp->assigned_len = assigned_idx * sizeof(ResourceFields);
+}
+
+static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
+                                       int phb_index, int drc_index,
+                                       const char *drc_name)
+{
+    ResourceProps rp;
+    bool is_bridge = false;
+    int pci_status;
+
+    if (pci_default_read_config(dev, PCI_HEADER_TYPE, 1) ==
+        PCI_HEADER_TYPE_BRIDGE) {
+        is_bridge = true;
+    }
+
+    /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
+    _FDT(fdt_setprop_cell(fdt, offset, "vendor-id",
+                          pci_default_read_config(dev, PCI_VENDOR_ID, 2)));
+    _FDT(fdt_setprop_cell(fdt, offset, "device-id",
+                          pci_default_read_config(dev, PCI_DEVICE_ID, 2)));
+    _FDT(fdt_setprop_cell(fdt, offset, "revision-id",
+                          pci_default_read_config(dev, PCI_REVISION_ID, 1)));
+    _FDT(fdt_setprop_cell(fdt, offset, "class-code",
+                          pci_default_read_config(dev, PCI_CLASS_DEVICE, 2)
+                            << 8));
+    if (pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)) {
+        _FDT(fdt_setprop_cell(fdt, offset, "interrupts",
+                 pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1)));
+    }
+
+    if (!is_bridge) {
+        _FDT(fdt_setprop_cell(fdt, offset, "min-grant",
+            pci_default_read_config(dev, PCI_MIN_GNT, 1)));
+        _FDT(fdt_setprop_cell(fdt, offset, "max-latency",
+            pci_default_read_config(dev, PCI_MAX_LAT, 1)));
+    }
+
+    if (pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)) {
+        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id",
+                 pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2)));
+    }
+
+    if (pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)) {
+        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
+                 pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2)));
+    }
+
+    _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size",
+        pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1)));
+
+    /* the following fdt cells are masked off the pci status register */
+    pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
+    _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
+                          PCI_STATUS_DEVSEL_MASK & pci_status));
+
+    if (pci_status & PCI_STATUS_FAST_BACK) {
+        _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
+    }
+    if (pci_status & PCI_STATUS_66MHZ) {
+        _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
+    }
+    if (pci_status & PCI_STATUS_UDF) {
+        _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
+    }
+
+    /* NOTE: this is normally generated by firmware via path/unit name,
+     * but in our case we must set it manually since it does not get
+     * processed by OF beforehand
+     */
+    _FDT(fdt_setprop_string(fdt, offset, "name", "pci"));
+    _FDT(fdt_setprop(fdt, offset, "ibm,loc-code", drc_name, strlen(drc_name)));
+    _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index));
+
+    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
+                          RESOURCE_CELLS_ADDRESS));
+    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
+                          RESOURCE_CELLS_SIZE));
+    _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x",
+                          RESOURCE_CELLS_SIZE));
+
+    populate_resource_props(dev, &rp);
+    _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
+    _FDT(fdt_setprop(fdt, offset, "assigned-addresses",
+                     (uint8_t *)rp.assigned, rp.assigned_len));
+
+    return 0;
+}
+
+/* create OF node for pci device and required OF DT properties */
+static void *spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
+                                       int drc_index, const char *drc_name,
+                                       int *dt_offset)
+{
+    void *fdt;
+    int offset, ret, fdt_size;
+    int slot = PCI_SLOT(dev->devfn);
+    int func = PCI_FUNC(dev->devfn);
+    char nodename[512];
+
+    fdt = create_device_tree(&fdt_size);
+    if (func != 0) {
+        sprintf(nodename, "pci@%d,%d", slot, func);
+    } else {
+        sprintf(nodename, "pci@%d", slot);
+    }
+    offset = fdt_add_subnode(fdt, 0, nodename);
+    ret = spapr_populate_pci_child_dt(dev, fdt, offset, phb->index, drc_index,
+                                      drc_name);
+    g_assert(!ret);
+
+    *dt_offset = offset;
+    return fdt;
+}
+
+static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
+                                     sPAPRPHBState *phb,
+                                     PCIDevice *pdev,
+                                     Error **errp)
+{
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    DeviceState *dev = DEVICE(pdev);
+    int drc_index = drck->get_index(drc);
+    const char *drc_name = drck->get_name(drc);
+    void *fdt = NULL;
+    int fdt_start_offset = 0;
+
+    /* boot-time devices get their device tree node created by SLOF, but for
+     * hotplugged devices we need QEMU to generate it so the guest can fetch
+     * it via RTAS
+     */
+    if (dev->hotplugged) {
+        fdt = spapr_create_pci_child_dt(phb, pdev, drc_index, drc_name,
+                                        &fdt_start_offset);
+    }
+
+    drck->attach(drc, DEVICE(pdev),
+                 fdt, fdt_start_offset, !dev->hotplugged, errp);
+    if (*errp) {
+        g_free(fdt);
+    }
+}
+
+static void spapr_phb_remove_pci_device_cb(DeviceState *dev, void *opaque)
+{
+    /* some version guests do not wait for completion of a device
+     * cleanup (generally done asynchronously by the kernel) before
+     * signaling to QEMU that the device is safe, but instead sleep
+     * for some 'safe' period of time. unfortunately on a busy host
+     * this sleep isn't guaranteed to be long enough, resulting in
+     * bad things like IRQ lines being left asserted during final
+     * device removal. to deal with this we call reset just prior
+     * to finalizing the device, which will put the device back into
+     * an 'idle' state, as the device cleanup code expects.
+     */
+    pci_device_reset(PCI_DEVICE(dev));
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
+                                        sPAPRPHBState *phb,
+                                        PCIDevice *pdev,
+                                        Error **errp)
+{
+    sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
+}
+
+static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
+                                               PCIDevice *pdev)
+{
+    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+    return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
+                                    (phb->index << 16) |
+                                    (busnr << 8) |
+                                    pdev->devfn);
+}
+
+static void spapr_phb_hot_plug_child(HotplugHandler *plug_handler,
+                                     DeviceState *plugged_dev, Error **errp)
+{
+    sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
+    Error *local_err = NULL;
+
+    /* if DR is disabled we don't need to do anything in the case of
+     * hotplug or coldplug callbacks
+     */
+    if (!phb->dr_enabled) {
+        /* if this is a hotplug operation initiated by the user
+         * we need to let them know it's not enabled
+         */
+        if (plugged_dev->hotplugged) {
+            error_set(errp, QERR_BUS_NO_HOTPLUG,
+                      object_get_typename(OBJECT(phb)));
+        }
+        return;
+    }
+
+    g_assert(drc);
+
+    spapr_phb_add_pci_device(drc, phb, pdev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    if (plugged_dev->hotplugged) {
+        spapr_hotplug_req_add_event(drc);
+    }
+}
+
+static void spapr_phb_hot_unplug_child(HotplugHandler *plug_handler,
+                                       DeviceState *plugged_dev, Error **errp)
+{
+    sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+    sPAPRDRConnectorClass *drck;
+    sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
+    Error *local_err = NULL;
+
+    if (!phb->dr_enabled) {
+        error_set(errp, QERR_BUS_NO_HOTPLUG,
+                  object_get_typename(OBJECT(phb)));
+        return;
+    }
+
+    g_assert(drc);
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    if (!drck->release_pending(drc)) {
+        spapr_phb_remove_pci_device(drc, phb, pdev, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+        spapr_hotplug_req_remove_event(drc);
+    }
+}
+
 static void spapr_phb_realize(DeviceState *dev, Error **errp)
 {
    SysBusDevice *s = SYS_BUS_DEVICE(dev);
@@ -742,12 +1119,12 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
    PCIBus *bus;
    uint64_t msi_window_size = 4096;

-    if (sphb->index != -1) {
+    if (sphb->index != (uint32_t)-1) {
        hwaddr windows_base;

-        if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
-            || (sphb->mem_win_addr != -1)
-            || (sphb->io_win_addr != -1)) {
+        if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn != (uint32_t)-1)
+            || (sphb->mem_win_addr != (hwaddr)-1)
+            || (sphb->io_win_addr != (hwaddr)-1)) {
            error_setg(errp, "Either \"index\" or other parameters must"
                       " be specified for PAPR PHB, not both");
            return;
@@ -760,7 +1137,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
        }

        sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
-        sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
+        sphb->dma_liobn = SPAPR_PCI_LIOBN(sphb->index, 0);

        windows_base = SPAPR_PCI_WINDOW_BASE
            + sphb->index * SPAPR_PCI_WINDOW_SPACING;
@@ -768,27 +1145,27 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
        sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
    }

-    if (sphb->buid == -1) {
+    if (sphb->buid == (uint64_t)-1) {
        error_setg(errp, "BUID not specified for PHB");
        return;
    }

-    if (sphb->dma_liobn == -1) {
+    if (sphb->dma_liobn == (uint32_t)-1) {
        error_setg(errp, "LIOBN not specified for PHB");
        return;
    }

-    if (sphb->mem_win_addr == -1) {
+    if (sphb->mem_win_addr == (hwaddr)-1) {
        error_setg(errp, "Memory window address not specified for PHB");
        return;
    }

-    if (sphb->io_win_addr == -1) {
+    if (sphb->io_win_addr == (hwaddr)-1) {
        error_setg(errp, "IO window address not specified for PHB");
        return;
    }

-    if (find_phb(spapr, sphb->buid)) {
+    if (spapr_pci_find_phb(spapr, sphb->buid)) {
        error_setg(errp, "PCI host bridges must have unique BUIDs");
        return;
    }
@@ -824,6 +1201,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
                           &sphb->memspace, &sphb->iospace,
                           PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
    phb->bus = bus;
+    qbus_set_hotplug_handler(BUS(phb->bus), DEVICE(sphb), NULL);

    /*
     * Initialize PHB address space.
@@ -880,6 +1258,15 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
        sphb->lsi_table[i].irq = irq;
    }

+    /* allocate connectors for child PCI devices */
+    if (sphb->dr_enabled) {
+        for (i = 0; i < PCI_SLOT_MAX * 8; i++) {
+            spapr_dr_connector_new(OBJECT(phb),
+                                   SPAPR_DR_CONNECTOR_TYPE_PCI,
+                                   (sphb->index << 16) | i);
+        }
+    }
+
    if (!info->finish_realize) {
        error_setg(errp, "finish_realize not defined");
        return;
@@ -893,11 +1280,11 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
 {
    sPAPRTCETable *tcet;
+    uint32_t nb_table;

+    nb_table = SPAPR_PCI_DMA32_SIZE >> SPAPR_TCE_PAGE_SHIFT;
    tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
-                               0,
-                               SPAPR_TCE_PAGE_SHIFT,
-                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
+                               0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
    if (!tcet) {
        error_setg(errp, "Unable to create TCE table for %s",
                   sphb->dtbusname);
@@ -936,6 +1323,8 @@ static Property spapr_phb_properties[] = {
    DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
    DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
                       SPAPR_PCI_IO_WIN_SIZE),
+    DEFINE_PROP_BOOL("dynamic-reconfiguration", sPAPRPHBState, dr_enabled,
+                     true),
    DEFINE_PROP_END_OF_LIST(),
 };

@@ -1049,6 +1438,7 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
    DeviceClass *dc = DEVICE_CLASS(klass);
    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
+    HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);

    hc->root_bus_path = spapr_phb_root_bus_path;
    dc->realize = spapr_phb_realize;
@@ -1058,6 +1448,8 @@ static void spapr_phb_class_init(ObjectClass *klass, void *data)
    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
    dc->cannot_instantiate_with_device_add_yet = false;
    spc->finish_realize = spapr_phb_finish_realize;
+    hp->plug = spapr_phb_hot_plug_child;
+    hp->unplug = spapr_phb_hot_unplug_child;
 }

 static const TypeInfo spapr_phb_info = {
@@ -1066,6 +1458,10 @@ static const TypeInfo spapr_phb_info = {
    .instance_size = sizeof(sPAPRPHBState),
    .class_init    = spapr_phb_class_init,
    .class_size    = sizeof(sPAPRPHBClass),
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
 };

 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
@@ -1079,45 +1475,11 @@ PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
    return PCI_HOST_BRIDGE(dev);
 }

-/* Macros to operate with address in OF binding to PCI */
-#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
-#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
-#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
-#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
-#define b_ss(x)         b_x((x), 24, 2) /* the space code */
-#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
-#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
-#define b_fff(x)        b_x((x), 8, 3)  /* function number */
-#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
-
-typedef struct sPAPRTCEDT {
-    void *fdt;
-    int node_off;
-} sPAPRTCEDT;
-
-static int spapr_phb_children_dt(Object *child, void *opaque)
-{
-    sPAPRTCEDT *p = opaque;
-    sPAPRTCETable *tcet;
-
-    tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
-    if (!tcet) {
-        return 0;
-    }
-
-    spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
-                 tcet->liobn, tcet->bus_offset,
-                 tcet->nb_table << tcet->page_shift);
-    /* Stop after the first window */
-
-    return 1;
-}
-
 int spapr_populate_pci_dt(sPAPRPHBState *phb,
                          uint32_t xics_phandle,
                          void *fdt)
 {
-    int bus_off, i, j;
+    int bus_off, i, j, ret;
    char nodename[256];
    uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
    const uint64_t mmiosize = memory_region_size(&phb->memwindow);
@@ -1151,6 +1513,7 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
    uint32_t interrupt_map_mask[] = {
        cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
    uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
+    sPAPRTCETable *tcet;

    /* Start populating the FDT */
    sprintf(nodename, "pci@%" PRIx64, phb->buid);
@@ -1159,14 +1522,6 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
        return bus_off;
    }

-#define _FDT(exp) \
-    do { \
-        int ret = (exp);                                           \
-        if (ret < 0) {                                             \
-            return ret;                                            \
-        }                                                          \
-    } while (0)
-
    /* Write PHB properties */
    _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
    _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
@@ -1203,8 +1558,16 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
                     sizeof(interrupt_map)));

-    object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
-                         &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
+    tcet = spapr_tce_find_by_liobn(SPAPR_PCI_LIOBN(phb->index, 0));
+    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
+                 tcet->liobn, tcet->bus_offset,
+                 tcet->nb_table << tcet->page_shift);
+
+    ret = spapr_drc_populate_dt(fdt, bus_off, OBJECT(phb),
+                                SPAPR_DR_CONNECTOR_TYPE_PCI);
+    if (ret) {
+        return ret;
+    }

    return 0;
 }
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -35,6 +35,55 @@
 #include "qapi-event.h"

 #include <libfdt.h>
+#include "hw/ppc/spapr_drc.h"
+
+/* #define DEBUG_SPAPR */
+
+#ifdef DEBUG_SPAPR
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+static sPAPRConfigureConnectorState *spapr_ccs_find(sPAPREnvironment *spapr,
+                                                    uint32_t drc_index)
+{
+    sPAPRConfigureConnectorState *ccs = NULL;
+
+    QTAILQ_FOREACH(ccs, &spapr->ccs_list, next) {
+        if (ccs->drc_index == drc_index) {
+            break;
+        }
+    }
+
+    return ccs;
+}
+
+static void spapr_ccs_add(sPAPREnvironment *spapr,
+                          sPAPRConfigureConnectorState *ccs)
+{
+    g_assert(!spapr_ccs_find(spapr, ccs->drc_index));
+    QTAILQ_INSERT_HEAD(&spapr->ccs_list, ccs, next);
+}
+
+static void spapr_ccs_remove(sPAPREnvironment *spapr,
+                             sPAPRConfigureConnectorState *ccs)
+{
+    QTAILQ_REMOVE(&spapr->ccs_list, ccs, next);
+    g_free(ccs);
+}
+
+void spapr_ccs_reset_hook(void *opaque)
+{
+    sPAPREnvironment *spapr = opaque;
+    sPAPRConfigureConnectorState *ccs, *ccs_tmp;
+
+    QTAILQ_FOREACH_SAFE(ccs, &spapr->ccs_list, next, ccs_tmp) {
+        spapr_ccs_remove(spapr, ccs);
+    }
+}

 static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
                                   uint32_t token, uint32_t nargs,
@@ -245,6 +294,308 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
    rtas_st(rets, 0, ret);
 }

+static void rtas_set_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args, uint32_t nret,
+                                 target_ulong rets)
+{
+    int32_t power_domain;
+
+    if (nargs != 2 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* we currently only use a single, "live insert" powerdomain for
+     * hotplugged/dlpar'd resources, so the power is always live/full (100)
+     */
+    power_domain = rtas_ld(args, 0);
+    if (power_domain != -1) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, 100);
+}
+
+static void rtas_get_power_level(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    int32_t power_domain;
+
+    if (nargs != 1 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* we currently only use a single, "live insert" powerdomain for
+     * hotplugged/dlpar'd resources, so the power is always live/full (100)
+     */
+    power_domain = rtas_ld(args, 0);
+    if (power_domain != -1) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, 100);
+}
+
+static bool sensor_type_is_dr(uint32_t sensor_type)
+{
+    switch (sensor_type) {
+    case RTAS_SENSOR_TYPE_ISOLATION_STATE:
+    case RTAS_SENSOR_TYPE_DR:
+    case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
+        return true;
+    }
+
+    return false;
+}
+
+static void rtas_set_indicator(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                               uint32_t token, uint32_t nargs,
+                               target_ulong args, uint32_t nret,
+                               target_ulong rets)
+{
+    uint32_t sensor_type;
+    uint32_t sensor_index;
+    uint32_t sensor_state;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+
+    if (nargs != 3 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    sensor_type = rtas_ld(args, 0);
+    sensor_index = rtas_ld(args, 1);
+    sensor_state = rtas_ld(args, 2);
+
+    if (!sensor_type_is_dr(sensor_type)) {
+        goto out_unimplemented;
+    }
+
+    /* if this is a DR sensor we can assume sensor_index == drc_index */
+    drc = spapr_dr_connector_by_index(sensor_index);
+    if (!drc) {
+        DPRINTF("rtas_set_indicator: invalid sensor/DRC index: %xh\n",
+                sensor_index);
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    switch (sensor_type) {
+    case RTAS_SENSOR_TYPE_ISOLATION_STATE:
+        /* if the guest is configuring a device attached to this
+         * DRC, we should reset the configuration state at this
+         * point since it may no longer be reliable (guest released
+         * device and needs to start over, or unplug occurred so
+         * the FDT is no longer valid)
+         */
+        if (sensor_state == SPAPR_DR_ISOLATION_STATE_ISOLATED) {
+            sPAPRConfigureConnectorState *ccs = spapr_ccs_find(spapr,
+                                                               sensor_index);
+            if (ccs) {
+                spapr_ccs_remove(spapr, ccs);
+            }
+        }
+        drck->set_isolation_state(drc, sensor_state);
+        break;
+    case RTAS_SENSOR_TYPE_DR:
+        drck->set_indicator_state(drc, sensor_state);
+        break;
+    case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
+        drck->set_allocation_state(drc, sensor_state);
+        break;
+    default:
+        goto out_unimplemented;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    return;
+
+out_unimplemented:
+    /* currently only DR-related sensors are implemented */
+    DPRINTF("rtas_set_indicator: sensor/indicator not implemented: %d\n",
+            sensor_type);
+    rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+}
+
+static void rtas_get_sensor_state(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    uint32_t sensor_type;
+    uint32_t sensor_index;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    uint32_t entity_sense;
+
+    if (nargs != 2 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    sensor_type = rtas_ld(args, 0);
+    sensor_index = rtas_ld(args, 1);
+
+    if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) {
+        /* currently only DR-related sensors are implemented */
+        DPRINTF("rtas_get_sensor_state: sensor/indicator not implemented: %d\n",
+                sensor_type);
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    drc = spapr_dr_connector_by_index(sensor_index);
+    if (!drc) {
+        DPRINTF("rtas_get_sensor_state: invalid sensor/DRC index: %xh\n",
+                sensor_index);
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    entity_sense = drck->entity_sense(drc);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, entity_sense);
+}
+
+/* configure-connector work area offsets, int32_t units for field
+ * indexes, bytes for field offset/len values.
+ *
+ * as documented by PAPR+ v2.7, 13.5.3.5
+ */
+#define CC_IDX_NODE_NAME_OFFSET 2
+#define CC_IDX_PROP_NAME_OFFSET 2
+#define CC_IDX_PROP_LEN 3
+#define CC_IDX_PROP_DATA_OFFSET 4
+#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
+#define CC_WA_LEN 4096
+
+static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
+                                         sPAPREnvironment *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args, uint32_t nret,
+                                         target_ulong rets)
+{
+    uint64_t wa_addr;
+    uint64_t wa_offset;
+    uint32_t drc_index;
+    sPAPRDRConnector *drc;
+    sPAPRDRConnectorClass *drck;
+    sPAPRConfigureConnectorState *ccs;
+    sPAPRDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE;
+    int rc;
+    const void *fdt;
+
+    if (nargs != 2 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
+
+    drc_index = rtas_ld(wa_addr, 0);
+    drc = spapr_dr_connector_by_index(drc_index);
+    if (!drc) {
+        DPRINTF("rtas_ibm_configure_connector: invalid DRC index: %xh\n",
+                drc_index);
+        rc = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    fdt = drck->get_fdt(drc, NULL);
+
+    ccs = spapr_ccs_find(spapr, drc_index);
+    if (!ccs) {
+        ccs = g_new0(sPAPRConfigureConnectorState, 1);
+        (void)drck->get_fdt(drc, &ccs->fdt_offset);
+        ccs->drc_index = drc_index;
+        spapr_ccs_add(spapr, ccs);
+    }
+
+    do {
+        uint32_t tag;
+        const char *name;
+        const struct fdt_property *prop;
+        int fdt_offset_next, prop_len;
+
+        tag = fdt_next_tag(fdt, ccs->fdt_offset, &fdt_offset_next);
+
+        switch (tag) {
+        case FDT_BEGIN_NODE:
+            ccs->fdt_depth++;
+            name = fdt_get_name(fdt, ccs->fdt_offset, NULL);
+
+            /* provide the name of the next OF node */
+            wa_offset = CC_VAL_DATA_OFFSET;
+            rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
+            rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                                  (uint8_t *)name, strlen(name) + 1);
+            resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD;
+            break;
+        case FDT_END_NODE:
+            ccs->fdt_depth--;
+            if (ccs->fdt_depth == 0) {
+                /* done sending the device tree, don't need to track
+                 * the state anymore
+                 */
+                drck->set_configured(drc);
+                spapr_ccs_remove(spapr, ccs);
+                ccs = NULL;
+                resp = SPAPR_DR_CC_RESPONSE_SUCCESS;
+            } else {
+                resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT;
+            }
+            break;
+        case FDT_PROP:
+            prop = fdt_get_property_by_offset(fdt, ccs->fdt_offset,
+                                              &prop_len);
+            name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+
+            /* provide the name of the next OF property */
+            wa_offset = CC_VAL_DATA_OFFSET;
+            rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
+            rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                                  (uint8_t *)name, strlen(name) + 1);
+
+            /* provide the length and value of the OF property. data gets
+             * placed immediately after NULL terminator of the OF property's
+             * name string
+             */
+            wa_offset += strlen(name) + 1,
+            rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
+            rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
+            rtas_st_buffer_direct(wa_addr + wa_offset, CC_WA_LEN - wa_offset,
+                                  (uint8_t *)((struct fdt_property *)prop)->data,
+                                  prop_len);
+            resp = SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
+            break;
+        case FDT_END:
+            resp = SPAPR_DR_CC_RESPONSE_ERROR;
+        default:
+            /* keep seeking for an actionable tag */
+            break;
+        }
+        if (ccs) {
+            ccs->fdt_offset = fdt_offset_next;
+        }
+    } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE);
+
+    rc = resp;
+out:
+    rtas_st(rets, 0, rc);
+}
+
 static struct rtas_call {
    const char *name;
    spapr_rtas_fn fn;
@@ -370,6 +721,16 @@ static void core_rtas_register_types(void)
                        rtas_ibm_set_system_parameter);
    spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
                        rtas_ibm_os_term);
+    spapr_rtas_register(RTAS_SET_POWER_LEVEL, "set-power-level",
+                        rtas_set_power_level);
+    spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
+                        rtas_get_power_level);
+    spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator",
+                        rtas_set_indicator);
+    spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
+                        rtas_get_sensor_state);
+    spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
+                        rtas_ibm_configure_connector);
 }

 type_init(core_rtas_register_types)
--- a/Show More
+++ b/Show More