Update version for v5.0.0-rc4 release

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
target/arm: Fix ID_MMFR4 value on AArch64 'max' CPU
2020-04-22 17:51:35 +01:00 · 2020-04-22 16:18:31 +01:00 · 2020-04-21 18:39:20 +01:00 · 2020-04-20 22:22:49 +01:00 · 2020-04-20 19:57:18 +01:00 · 2020-04-20 14:43:10 +01:00
75 changed files with 1581 additions and 711 deletions
--- a/17
+++ b/17
@@ -1853,6 +1853,18 @@ F: hw/display/virtio-gpu*
 F: hw/display/virtio-vga.*
 F: include/hw/virtio/virtio-gpu.h

+vhost-user-blk
+M: Raphael Norwitz <raphael.norwitz@nutanix.com>
+S: Maintained
+F: contrib/vhost-user-blk/
+F: contrib/vhost-user-scsi/
+F: hw/block/vhost-user-blk.c
+F: hw/scsi/vhost-user-scsi.c
+F: hw/virtio/vhost-user-blk-pci.c
+F: hw/virtio/vhost-user-scsi-pci.c
+F: include/hw/virtio/vhost-user-blk.h
+F: include/hw/virtio/vhost-user-scsi.h
+
 vhost-user-gpu
 M: Marc-André Lureau <marcandre.lureau@redhat.com>
 M: Gerd Hoffmann <kraxel@redhat.com>
@@ -2026,6 +2038,11 @@ M: Markus Armbruster <armbru@redhat.com>
 S: Supported
 F: scripts/coverity-model.c

+Coverity Scan integration
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Maintained
+F: scripts/coverity-scan/
+
 Device Tree
 M: Alistair Francis <alistair.francis@wdc.com>
 R: David Gibson <david@gibson.dropbear.id.au>
--- a/2
+++ b/2
@@ -1076,7 +1076,7 @@ sphinxdocs: $(MANUAL_BUILDDIR)/devel/index.html \
 # Note the use of different doctree for each (manual, builder) tuple;
 # this works around Sphinx not handling parallel invocation on
 # a single doctree: https://github.com/sphinx-doc/sphinx/issues/2946
-build-manual = $(call quiet-command,CONFDIR="$(qemu_confdir)" $(SPHINX_BUILD) $(if $(V),,-q) -W -b $2 -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1-$2 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1")
+build-manual = $(call quiet-command,CONFDIR="$(qemu_confdir)" $(SPHINX_BUILD) $(if $(V),,-q) $(SPHINX_WERROR) -b $2 -D version=$(VERSION) -D release="$(FULL_VERSION)" -d .doctrees/$1-$2 $(SRC_PATH)/docs/$1 $(MANUAL_BUILDDIR)/$1 ,"SPHINX","$(MANUAL_BUILDDIR)/$1")
 # We assume all RST files in the manual's directory are used in it
 manual-deps = $(wildcard $(SRC_PATH)/docs/$1/*.rst $(SRC_PATH)/docs/$1/*/*.rst) \
              $(SRC_PATH)/docs/defs.rst.inc \
--- a/2
+++ b/2
@@ -1 +1 @@
-4.2.92
+4.2.94
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -280,6 +280,7 @@ static void host_memory_backend_init(Object *obj)
    /* TODO: convert access to globals to compat properties */
    backend->merge = machine_mem_merge(machine);
    backend->dump = machine_dump_guest_core(machine);
+    backend->prealloc_threads = 1;
 }

 static void host_memory_backend_post_init(Object *obj)
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -991,8 +991,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;

        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
-        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
-             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
+        ss = MIN(acb->ioh->mx_sb_len, acb->ioh->sb_len_wr);
        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
    }

--- a/12
+++ b/12
@@ -807,6 +807,7 @@ MINGW32*)
    audio_drv_list=""
  fi
  supported_os="yes"
+  pie="no"
 ;;
 GNU/kFreeBSD)
  bsd="yes"
@@ -4928,6 +4929,12 @@ if check_include sys/kcov.h ; then
    kcov=yes
 fi

+# If we're making warnings fatal, apply this to Sphinx runs as well
+sphinx_werror=""
+if test "$werror" = "yes"; then
+    sphinx_werror="-W"
+fi
+
 # Check we have a new enough version of sphinx-build
 has_sphinx_build() {
    # This is a bit awkward but works: create a trivial document and
@@ -4936,7 +4943,9 @@ has_sphinx_build() {
    # sphinx-build doesn't exist at all or if it is too old.
    mkdir -p "$TMPDIR1/sphinx"
    touch "$TMPDIR1/sphinx/index.rst"
-    "$sphinx_build" -c "$source_path/docs" -b html "$TMPDIR1/sphinx" "$TMPDIR1/sphinx/out" >/dev/null 2>&1
+    "$sphinx_build" $sphinx_werror -c "$source_path/docs" \
+                    -b html "$TMPDIR1/sphinx" \
+                    "$TMPDIR1/sphinx/out"  >> config.log 2>&1
 }

 # Check if tools are available to build documentation.
@@ -7631,6 +7640,7 @@ echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak
 echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak
 echo "PYTHON=$python" >> $config_host_mak
 echo "SPHINX_BUILD=$sphinx_build" >> $config_host_mak
+echo "SPHINX_WERROR=$sphinx_werror" >> $config_host_mak
 echo "GENISOIMAGE=$genisoimage" >> $config_host_mak
 echo "CC=$cc" >> $config_host_mak
 if $iasl -h > /dev/null 2>&1; then
--- a/contrib/vhost-user-gpu/vhost-user-gpu.c
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -848,7 +848,7 @@ vg_handle_ctrl(VuDev *dev, int qidx)
            QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next);
            vg->inflight++;
        } else {
-            g_free(cmd);
+            free(cmd);
        }
    }
 }
@@ -939,7 +939,7 @@ vg_handle_cursor(VuDev *dev, int qidx)
        }
        vu_queue_push(dev, vq, elem, 0);
        vu_queue_notify(dev, vq);
-        g_free(elem);
+        free(elem);
    }
 }

--- a/contrib/vhost-user-gpu/virgl.c
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -519,7 +519,7 @@ virgl_write_fence(void *opaque, uint32_t fence)
        g_debug("FENCE %" PRIu64, cmd->cmd_hdr.fence_id);
        vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
        QTAILQ_REMOVE(&g->fenceq, cmd, next);
-        g_free(cmd);
+        free(cmd);
        g->inflight--;
    }
 }
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -59,8 +59,10 @@ sys.path.insert(0, os.path.join(qemu_docdir, "sphinx"))

 # If your documentation needs a minimal Sphinx version, state it here.
 #
-# 1.3 is where the 'alabaster' theme was shipped with Sphinx.
-needs_sphinx = '1.3'
+# Sphinx 1.5 and earlier can't build our docs because they are too
+# picky about the syntax of the argument to the option:: directive
+# (see Sphinx bugs #646, #3366).
+needs_sphinx = '1.6'

 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
--- a/docs/devel/atomics.rst
+++ b/docs/devel/atomics.rst
@@ -0,0 +1,507 @@
+=========================
+Atomic operations in QEMU
+=========================
+
+CPUs perform independent memory operations effectively in random order.
+but this can be a problem for CPU-CPU interaction (including interactions
+between QEMU and the guest).  Multi-threaded programs use various tools
+to instruct the compiler and the CPU to restrict the order to something
+that is consistent with the expectations of the programmer.
+
+The most basic tool is locking.  Mutexes, condition variables and
+semaphores are used in QEMU, and should be the default approach to
+synchronization.  Anything else is considerably harder, but it's
+also justified more often than one would like;
+the most performance-critical parts of QEMU in particular require
+a very low level approach to concurrency, involving memory barriers
+and atomic operations.  The semantics of concurrent memory accesses are governed
+by the C11 memory model.
+
+QEMU provides a header, ``qemu/atomic.h``, which wraps C11 atomics to
+provide better portability and a less verbose syntax.  ``qemu/atomic.h``
+provides macros that fall in three camps:
+
+- compiler barriers: ``barrier()``;
+
+- weak atomic access and manual memory barriers: ``atomic_read()``,
+  ``atomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, ``smp_mb_acquire()``,
+  ``smp_mb_release()``, ``smp_read_barrier_depends()``;
+
+- sequentially consistent atomic access: everything else.
+
+In general, use of ``qemu/atomic.h`` should be wrapped with more easily
+used data structures (e.g. the lock-free singly-linked list operations
+``QSLIST_INSERT_HEAD_ATOMIC`` and ``QSLIST_MOVE_ATOMIC``) or synchronization
+primitives (such as RCU, ``QemuEvent`` or ``QemuLockCnt``).  Bare use of
+atomic operations and memory barriers should be limited to inter-thread
+checking of flags and documented thoroughly.
+
+
+
+Compiler memory barrier
+=======================
+
+``barrier()`` prevents the compiler from moving the memory accesses on
+either side of it to the other side.  The compiler barrier has no direct
+effect on the CPU, which may then reorder things however it wishes.
+
+``barrier()`` is mostly used within ``qemu/atomic.h`` itself.  On some
+architectures, CPU guarantees are strong enough that blocking compiler
+optimizations already ensures the correct order of execution.  In this
+case, ``qemu/atomic.h`` will reduce stronger memory barriers to simple
+compiler barriers.
+
+Still, ``barrier()`` can be useful when writing code that can be interrupted
+by signal handlers.
+
+
+Sequentially consistent atomic access
+=====================================
+
+Most of the operations in the ``qemu/atomic.h`` header ensure *sequential
+consistency*, where "the result of any execution is the same as if the
+operations of all the processors were executed in some sequential order,
+and the operations of each individual processor appear in this sequence
+in the order specified by its program".
+
+``qemu/atomic.h`` provides the following set of atomic read-modify-write
+operations::
+
+    void atomic_inc(ptr)
+    void atomic_dec(ptr)
+    void atomic_add(ptr, val)
+    void atomic_sub(ptr, val)
+    void atomic_and(ptr, val)
+    void atomic_or(ptr, val)
+
+    typeof(*ptr) atomic_fetch_inc(ptr)
+    typeof(*ptr) atomic_fetch_dec(ptr)
+    typeof(*ptr) atomic_fetch_add(ptr, val)
+    typeof(*ptr) atomic_fetch_sub(ptr, val)
+    typeof(*ptr) atomic_fetch_and(ptr, val)
+    typeof(*ptr) atomic_fetch_or(ptr, val)
+    typeof(*ptr) atomic_fetch_xor(ptr, val)
+    typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
+    typeof(*ptr) atomic_xchg(ptr, val)
+    typeof(*ptr) atomic_cmpxchg(ptr, old, new)
+
+all of which return the old value of ``*ptr``.  These operations are
+polymorphic; they operate on any type that is as wide as a pointer or
+smaller.
+
+Similar operations return the new value of ``*ptr``::
+
+    typeof(*ptr) atomic_inc_fetch(ptr)
+    typeof(*ptr) atomic_dec_fetch(ptr)
+    typeof(*ptr) atomic_add_fetch(ptr, val)
+    typeof(*ptr) atomic_sub_fetch(ptr, val)
+    typeof(*ptr) atomic_and_fetch(ptr, val)
+    typeof(*ptr) atomic_or_fetch(ptr, val)
+    typeof(*ptr) atomic_xor_fetch(ptr, val)
+
+``qemu/atomic.h`` also provides loads and stores that cannot be reordered
+with each other::
+
+    typeof(*ptr) atomic_mb_read(ptr)
+    void         atomic_mb_set(ptr, val)
+
+However these do not provide sequential consistency and, in particular,
+they do not participate in the total ordering enforced by
+sequentially-consistent operations.  For this reason they are deprecated.
+They should instead be replaced with any of the following (ordered from
+easiest to hardest):
+
+- accesses inside a mutex or spinlock
+
+- lightweight synchronization primitives such as ``QemuEvent``
+
+- RCU operations (``atomic_rcu_read``, ``atomic_rcu_set``) when publishing
+  or accessing a new version of a data structure
+
+- other atomic accesses: ``atomic_read`` and ``atomic_load_acquire`` for
+  loads, ``atomic_set`` and ``atomic_store_release`` for stores, ``smp_mb``
+  to forbid reordering subsequent loads before a store.
+
+
+Weak atomic access and manual memory barriers
+=============================================
+
+Compared to sequentially consistent atomic access, programming with
+weaker consistency models can be considerably more complicated.
+The only guarantees that you can rely upon in this case are:
+
+- atomic accesses will not cause data races (and hence undefined behavior);
+  ordinary accesses instead cause data races if they are concurrent with
+  other accesses of which at least one is a write.  In order to ensure this,
+  the compiler will not optimize accesses out of existence, create unsolicited
+  accesses, or perform other similar optimzations.
+
+- acquire operations will appear to happen, with respect to the other
+  components of the system, before all the LOAD or STORE operations
+  specified afterwards.
+
+- release operations will appear to happen, with respect to the other
+  components of the system, after all the LOAD or STORE operations
+  specified before.
+
+- release operations will *synchronize with* acquire operations;
+  see :ref:`acqrel` for a detailed explanation.
+
+When using this model, variables are accessed with:
+
+- ``atomic_read()`` and ``atomic_set()``; these prevent the compiler from
+  optimizing accesses out of existence and creating unsolicited
+  accesses, but do not otherwise impose any ordering on loads and
+  stores: both the compiler and the processor are free to reorder
+  them.
+
+- ``atomic_load_acquire()``, which guarantees the LOAD to appear to
+  happen, with respect to the other components of the system,
+  before all the LOAD or STORE operations specified afterwards.
+  Operations coming before ``atomic_load_acquire()`` can still be
+  reordered after it.
+
+- ``atomic_store_release()``, which guarantees the STORE to appear to
+  happen, with respect to the other components of the system,
+  after all the LOAD or STORE operations specified before.
+  Operations coming after ``atomic_store_release()`` can still be
+  reordered before it.
+
+Restrictions to the ordering of accesses can also be specified
+using the memory barrier macros: ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
+``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``.
+
+Memory barriers control the order of references to shared memory.
+They come in six kinds:
+
+- ``smp_rmb()`` guarantees that all the LOAD operations specified before
+  the barrier will appear to happen before all the LOAD operations
+  specified after the barrier with respect to the other components of
+  the system.
+
+  In other words, ``smp_rmb()`` puts a partial ordering on loads, but is not
+  required to have any effect on stores.
+
+- ``smp_wmb()`` guarantees that all the STORE operations specified before
+  the barrier will appear to happen before all the STORE operations
+  specified after the barrier with respect to the other components of
+  the system.
+
+  In other words, ``smp_wmb()`` puts a partial ordering on stores, but is not
+  required to have any effect on loads.
+
+- ``smp_mb_acquire()`` guarantees that all the LOAD operations specified before
+  the barrier will appear to happen before all the LOAD or STORE operations
+  specified after the barrier with respect to the other components of
+  the system.
+
+- ``smp_mb_release()`` guarantees that all the STORE operations specified *after*
+  the barrier will appear to happen after all the LOAD or STORE operations
+  specified *before* the barrier with respect to the other components of
+  the system.
+
+- ``smp_mb()`` guarantees that all the LOAD and STORE operations specified
+  before the barrier will appear to happen before all the LOAD and
+  STORE operations specified after the barrier with respect to the other
+  components of the system.
+
+  ``smp_mb()`` puts a partial ordering on both loads and stores.  It is
+  stronger than both a read and a write memory barrier; it implies both
+  ``smp_mb_acquire()`` and ``smp_mb_release()``, but it also prevents STOREs
+  coming before the barrier from overtaking LOADs coming after the
+  barrier and vice versa.
+
+- ``smp_read_barrier_depends()`` is a weaker kind of read barrier.  On
+  most processors, whenever two loads are performed such that the
+  second depends on the result of the first (e.g., the first load
+  retrieves the address to which the second load will be directed),
+  the processor will guarantee that the first LOAD will appear to happen
+  before the second with respect to the other components of the system.
+  However, this is not always true---for example, it was not true on
+  Alpha processors.  Whenever this kind of access happens to shared
+  memory (that is not protected by a lock), a read barrier is needed,
+  and ``smp_read_barrier_depends()`` can be used instead of ``smp_rmb()``.
+
+  Note that the first load really has to have a _data_ dependency and not
+  a control dependency.  If the address for the second load is dependent
+  on the first load, but the dependency is through a conditional rather
+  than actually loading the address itself, then it's a _control_
+  dependency and a full read barrier or better is required.
+
+
+Memory barriers and ``atomic_load_acquire``/``atomic_store_release`` are
+mostly used when a data structure has one thread that is always a writer
+and one thread that is always a reader:
+
+    +----------------------------------+----------------------------------+
+    | thread 1                         | thread 2                         |
+    +==================================+==================================+
+    | ::                               | ::                               |
+    |                                  |                                  |
+    |   atomic_store_release(&a, x);   |   y = atomic_load_acquire(&b);   |
+    |   atomic_store_release(&b, y);   |   x = atomic_load_acquire(&a);   |
+    +----------------------------------+----------------------------------+
+
+In this case, correctness is easy to check for using the "pairing"
+trick that is explained below.
+
+Sometimes, a thread is accessing many variables that are otherwise
+unrelated to each other (for example because, apart from the current
+thread, exactly one other thread will read or write each of these
+variables).  In this case, it is possible to "hoist" the barriers
+outside a loop.  For example:
+
+    +------------------------------------------+----------------------------------+
+    | before                                   | after                            |
+    +==========================================+==================================+
+    | ::                                       | ::                               |
+    |                                          |                                  |
+    |   n = 0;                                 |   n = 0;                         |
+    |   for (i = 0; i < 10; i++)               |   for (i = 0; i < 10; i++)       |
+    |     n += atomic_load_acquire(&a[i]);     |     n += atomic_read(&a[i]);     |
+    |                                          |   smp_mb_acquire();              |
+    +------------------------------------------+----------------------------------+
+    | ::                                       | ::                               |
+    |                                          |                                  |
+    |                                          |   smp_mb_release();              |
+    |   for (i = 0; i < 10; i++)               |   for (i = 0; i < 10; i++)       |
+    |     atomic_store_release(&a[i], false);  |     atomic_set(&a[i], false);    |
+    +------------------------------------------+----------------------------------+
+
+Splitting a loop can also be useful to reduce the number of barriers:
+
+    +------------------------------------------+----------------------------------+
+    | before                                   | after                            |
+    +==========================================+==================================+
+    | ::                                       | ::                               |
+    |                                          |                                  |
+    |   n = 0;                                 |     smp_mb_release();            |
+    |   for (i = 0; i < 10; i++) {             |     for (i = 0; i < 10; i++)     |
+    |     atomic_store_release(&a[i], false);  |       atomic_set(&a[i], false);  |
+    |     smp_mb();                            |     smb_mb();                    |
+    |     n += atomic_read(&b[i]);             |     n = 0;                       |
+    |   }                                      |     for (i = 0; i < 10; i++)     |
+    |                                          |       n += atomic_read(&b[i]);   |
+    +------------------------------------------+----------------------------------+
+
+In this case, a ``smp_mb_release()`` is also replaced with a (possibly cheaper, and clearer
+as well) ``smp_wmb()``:
+
+    +------------------------------------------+----------------------------------+
+    | before                                   | after                            |
+    +==========================================+==================================+
+    | ::                                       | ::                               |
+    |                                          |                                  |
+    |                                          |     smp_mb_release();            |
+    |   for (i = 0; i < 10; i++) {             |     for (i = 0; i < 10; i++)     |
+    |     atomic_store_release(&a[i], false);  |       atomic_set(&a[i], false);  |
+    |     atomic_store_release(&b[i], false);  |     smb_wmb();                   |
+    |   }                                      |     for (i = 0; i < 10; i++)     |
+    |                                          |       atomic_set(&b[i], false);  |
+    +------------------------------------------+----------------------------------+
+
+
+.. _acqrel:
+
+Acquire/release pairing and the *synchronizes-with* relation
+------------------------------------------------------------
+
+Atomic operations other than ``atomic_set()`` and ``atomic_read()`` have
+either *acquire* or *release* semantics [#rmw]_.  This has two effects:
+
+.. [#rmw] Read-modify-write operations can have both---acquire applies to the
+          read part, and release to the write.
+
+- within a thread, they are ordered either before subsequent operations
+  (for acquire) or after previous operations (for release).
+
+- if a release operation in one thread *synchronizes with* an acquire operation
+  in another thread, the ordering constraints propagates from the first to the
+  second thread.  That is, everything before the release operation in the
+  first thread is guaranteed to *happen before* everything after the
+  acquire operation in the second thread.
+
+The concept of acquire and release semantics is not exclusive to atomic
+operations; almost all higher-level synchronization primitives also have
+acquire or release semantics.  For example:
+
+- ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
+  release semantics and synchronizes with a ``pthread_mutex_lock`` for the
+  same mutex.
+
+- ``pthread_cond_signal`` and ``pthread_cond_broadcast`` have release semantics;
+  ``pthread_cond_wait`` has both release semantics (synchronizing with
+  ``pthread_mutex_lock``) and acquire semantics (synchronizing with
+  ``pthread_mutex_unlock`` and signaling of the condition variable).
+
+- ``pthread_create`` has release semantics and synchronizes with the start
+  of the new thread; ``pthread_join`` has acquire semantics and synchronizes
+  with the exiting of the thread.
+
+- ``qemu_event_set`` has release semantics, ``qemu_event_wait`` has
+  acquire semantics.
+
+For example, in the following example there are no atomic accesses, but still
+thread 2 is relying on the *synchronizes-with* relation between ``pthread_exit``
+(release) and ``pthread_join`` (acquire):
+
+      +----------------------+-------------------------------+
+      | thread 1             | thread 2                      |
+      +======================+===============================+
+      | ::                   | ::                            |
+      |                      |                               |
+      |   *a = 1;            |                               |
+      |   pthread_exit(a);   |   pthread_join(thread1, &a);  |
+      |                      |   x = *a;                     |
+      +----------------------+-------------------------------+
+
+Synchronization between threads basically descends from this pairing of
+a release operation and an acquire operation.  Therefore, atomic operations
+other than ``atomic_set()`` and ``atomic_read()`` will almost always be
+paired with another operation of the opposite kind: an acquire operation
+will pair with a release operation and vice versa.  This rule of thumb is
+extremely useful; in the case of QEMU, however, note that the other
+operation may actually be in a driver that runs in the guest!
+
+``smp_read_barrier_depends()``, ``smp_rmb()``, ``smp_mb_acquire()``,
+``atomic_load_acquire()`` and ``atomic_rcu_read()`` all count
+as acquire operations.  ``smp_wmb()``, ``smp_mb_release()``,
+``atomic_store_release()`` and ``atomic_rcu_set()`` all count as release
+operations.  ``smp_mb()`` counts as both acquire and release, therefore
+it can pair with any other atomic operation.  Here is an example:
+
+      +----------------------+------------------------------+
+      | thread 1             | thread 2                     |
+      +======================+==============================+
+      | ::                   | ::                           |
+      |                      |                              |
+      |   atomic_set(&a, 1); |                              |
+      |   smp_wmb();         |                              |
+      |   atomic_set(&b, 2); |   x = atomic_read(&b);       |
+      |                      |   smp_rmb();                 |
+      |                      |   y = atomic_read(&a);       |
+      +----------------------+------------------------------+
+
+Note that a load-store pair only counts if the two operations access the
+same variable: that is, a store-release on a variable ``x`` *synchronizes
+with* a load-acquire on a variable ``x``, while a release barrier
+synchronizes with any acquire operation.  The following example shows
+correct synchronization:
+
+      +--------------------------------+--------------------------------+
+      | thread 1                       | thread 2                       |
+      +================================+================================+
+      | ::                             | ::                             |
+      |                                |                                |
+      |   atomic_set(&a, 1);           |                                |
+      |   atomic_store_release(&b, 2); |   x = atomic_load_acquire(&b); |
+      |                                |   y = atomic_read(&a);         |
+      +--------------------------------+--------------------------------+
+
+Acquire and release semantics of higher-level primitives can also be
+relied upon for the purpose of establishing the *synchronizes with*
+relation.
+
+Note that the "writing" thread is accessing the variables in the
+opposite order as the "reading" thread.  This is expected: stores
+before a release operation will normally match the loads after
+the acquire operation, and vice versa.  In fact, this happened already
+in the ``pthread_exit``/``pthread_join`` example above.
+
+Finally, this more complex example has more than two accesses and data
+dependency barriers.  It also does not use atomic accesses whenever there
+cannot be a data race:
+
+      +----------------------+------------------------------+
+      | thread 1             | thread 2                     |
+      +======================+==============================+
+      | ::                   | ::                           |
+      |                      |                              |
+      |   b[2] = 1;          |                              |
+      |   smp_wmb();         |                              |
+      |   x->i = 2;          |                              |
+      |   smp_wmb();         |                              |
+      |   atomic_set(&a, x); |  x = atomic_read(&a);        |
+      |                      |  smp_read_barrier_depends(); |
+      |                      |  y = x->i;                   |
+      |                      |  smp_read_barrier_depends(); |
+      |                      |  z = b[y];                   |
+      +----------------------+------------------------------+
+
+Comparison with Linux kernel primitives
+=======================================
+
+Here is a list of differences between Linux kernel atomic operations
+and memory barriers, and the equivalents in QEMU:
+
+- atomic operations in Linux are always on a 32-bit int type and
+  use a boxed ``atomic_t`` type; atomic operations in QEMU are polymorphic
+  and use normal C types.
+
+- Originally, ``atomic_read`` and ``atomic_set`` in Linux gave no guarantee
+  at all. Linux 4.1 updated them to implement volatile
+  semantics via ``ACCESS_ONCE`` (or the more recent ``READ``/``WRITE_ONCE``).
+
+  QEMU's ``atomic_read`` and ``atomic_set`` implement C11 atomic relaxed
+  semantics if the compiler supports it, and volatile semantics otherwise.
+  Both semantics prevent the compiler from doing certain transformations;
+  the difference is that atomic accesses are guaranteed to be atomic,
+  while volatile accesses aren't. Thus, in the volatile case we just cross
+  our fingers hoping that the compiler will generate atomic accesses,
+  since we assume the variables passed are machine-word sized and
+  properly aligned.
+
+  No barriers are implied by ``atomic_read`` and ``atomic_set`` in either Linux
+  or QEMU.
+
+- atomic read-modify-write operations in Linux are of three kinds:
+
+         ===================== =========================================
+         ``atomic_OP``         returns void
+         ``atomic_OP_return``  returns new value of the variable
+         ``atomic_fetch_OP``   returns the old value of the variable
+         ``atomic_cmpxchg``    returns the old value of the variable
+         ===================== =========================================
+
+  In QEMU, the second kind is named ``atomic_OP_fetch``.
+
+- different atomic read-modify-write operations in Linux imply
+  a different set of memory barriers; in QEMU, all of them enforce
+  sequential consistency.
+
+- in QEMU, ``atomic_read()`` and ``atomic_set()`` do not participate in
+  the total ordering enforced by sequentially-consistent operations.
+  This is because QEMU uses the C11 memory model.  The following example
+  is correct in Linux but not in QEMU:
+
+      +----------------------------------+--------------------------------+
+      | Linux (correct)                  | QEMU (incorrect)               |
+      +==================================+================================+
+      | ::                               | ::                             |
+      |                                  |                                |
+      |   a = atomic_fetch_add(&x, 2);   |   a = atomic_fetch_add(&x, 2); |
+      |   b = READ_ONCE(&y);             |   b = atomic_read(&y);         |
+      +----------------------------------+--------------------------------+
+
+  because the read of ``y`` can be moved (by either the processor or the
+  compiler) before the write of ``x``.
+
+  Fixing this requires an ``smp_mb()`` memory barrier between the write
+  of ``x`` and the read of ``y``.  In the common case where only one thread
+  writes ``x``, it is also possible to write it like this:
+
+      +--------------------------------+
+      | QEMU (correct)                 |
+      +================================+
+      | ::                             |
+      |                                |
+      |   a = atomic_read(&x);         |
+      |   atomic_set(&x, a + 2);       |
+      |   smp_mb();                    |
+      |   b = atomic_read(&y);         |
+      +--------------------------------+
+
+Sources
+=======
+
+- ``Documentation/memory-barriers.txt`` from the Linux kernel
--- a/docs/devel/atomics.txt
+++ b/docs/devel/atomics.txt
@@ -1,403 +0,0 @@
-CPUs perform independent memory operations effectively in random order.
-but this can be a problem for CPU-CPU interaction (including interactions
-between QEMU and the guest).  Multi-threaded programs use various tools
-to instruct the compiler and the CPU to restrict the order to something
-that is consistent with the expectations of the programmer.
-
-The most basic tool is locking.  Mutexes, condition variables and
-semaphores are used in QEMU, and should be the default approach to
-synchronization.  Anything else is considerably harder, but it's
-also justified more often than one would like.  The two tools that
-are provided by qemu/atomic.h are memory barriers and atomic operations.
-
-Macros defined by qemu/atomic.h fall in three camps:
-
- compiler barriers: barrier();
-
- weak atomic access and manual memory barriers: atomic_read(),
-  atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_mb_acquire(),
-  smp_mb_release(), smp_read_barrier_depends();
-
- sequentially consistent atomic access: everything else.
-
-
-COMPILER MEMORY BARRIER
-=======================
-
-barrier() prevents the compiler from moving the memory accesses either
-side of it to the other side.  The compiler barrier has no direct effect
-on the CPU, which may then reorder things however it wishes.
-
-barrier() is mostly used within qemu/atomic.h itself.  On some
-architectures, CPU guarantees are strong enough that blocking compiler
-optimizations already ensures the correct order of execution.  In this
-case, qemu/atomic.h will reduce stronger memory barriers to simple
-compiler barriers.
-
-Still, barrier() can be useful when writing code that can be interrupted
-by signal handlers.
-
-
-SEQUENTIALLY CONSISTENT ATOMIC ACCESS
-=====================================
-
-Most of the operations in the qemu/atomic.h header ensure *sequential
-consistency*, where "the result of any execution is the same as if the
-operations of all the processors were executed in some sequential order,
-and the operations of each individual processor appear in this sequence
-in the order specified by its program".
-
-qemu/atomic.h provides the following set of atomic read-modify-write
-operations:
-
-    void atomic_inc(ptr)
-    void atomic_dec(ptr)
-    void atomic_add(ptr, val)
-    void atomic_sub(ptr, val)
-    void atomic_and(ptr, val)
-    void atomic_or(ptr, val)
-
-    typeof(*ptr) atomic_fetch_inc(ptr)
-    typeof(*ptr) atomic_fetch_dec(ptr)
-    typeof(*ptr) atomic_fetch_add(ptr, val)
-    typeof(*ptr) atomic_fetch_sub(ptr, val)
-    typeof(*ptr) atomic_fetch_and(ptr, val)
-    typeof(*ptr) atomic_fetch_or(ptr, val)
-    typeof(*ptr) atomic_fetch_xor(ptr, val)
-    typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
-    typeof(*ptr) atomic_xchg(ptr, val)
-    typeof(*ptr) atomic_cmpxchg(ptr, old, new)
-
-all of which return the old value of *ptr.  These operations are
-polymorphic; they operate on any type that is as wide as a pointer.
-
-Similar operations return the new value of *ptr:
-
-    typeof(*ptr) atomic_inc_fetch(ptr)
-    typeof(*ptr) atomic_dec_fetch(ptr)
-    typeof(*ptr) atomic_add_fetch(ptr, val)
-    typeof(*ptr) atomic_sub_fetch(ptr, val)
-    typeof(*ptr) atomic_and_fetch(ptr, val)
-    typeof(*ptr) atomic_or_fetch(ptr, val)
-    typeof(*ptr) atomic_xor_fetch(ptr, val)
-
-Sequentially consistent loads and stores can be done using:
-
-    atomic_fetch_add(ptr, 0) for loads
-    atomic_xchg(ptr, val) for stores
-
-However, they are quite expensive on some platforms, notably POWER and
-Arm.  Therefore, qemu/atomic.h provides two primitives with slightly
-weaker constraints:
-
-    typeof(*ptr) atomic_mb_read(ptr)
-    void         atomic_mb_set(ptr, val)
-
-The semantics of these primitives map to Java volatile variables,
-and are strongly related to memory barriers as used in the Linux
-kernel (see below).
-
-As long as you use atomic_mb_read and atomic_mb_set, accesses cannot
-be reordered with each other, and it is also not possible to reorder
-"normal" accesses around them.
-
-However, and this is the important difference between
-atomic_mb_read/atomic_mb_set and sequential consistency, it is important
-for both threads to access the same volatile variable.  It is not the
-case that everything visible to thread A when it writes volatile field f
-becomes visible to thread B after it reads volatile field g. The store
-and load have to "match" (i.e., be performed on the same volatile
-field) to achieve the right semantics.
-
-
-These operations operate on any type that is as wide as an int or smaller.
-
-
-WEAK ATOMIC ACCESS AND MANUAL MEMORY BARRIERS
-=============================================
-
-Compared to sequentially consistent atomic access, programming with
-weaker consistency models can be considerably more complicated.
-In general, if the algorithm you are writing includes both writes
-and reads on the same side, it is generally simpler to use sequentially
-consistent primitives.
-
-When using this model, variables are accessed with:
-
- atomic_read() and atomic_set(); these prevent the compiler from
-  optimizing accesses out of existence and creating unsolicited
-  accesses, but do not otherwise impose any ordering on loads and
-  stores: both the compiler and the processor are free to reorder
-  them.
-
- atomic_load_acquire(), which guarantees the LOAD to appear to
-  happen, with respect to the other components of the system,
-  before all the LOAD or STORE operations specified afterwards.
-  Operations coming before atomic_load_acquire() can still be
-  reordered after it.
-
- atomic_store_release(), which guarantees the STORE to appear to
-  happen, with respect to the other components of the system,
-  after all the LOAD or STORE operations specified afterwards.
-  Operations coming after atomic_store_release() can still be
-  reordered after it.
-
-Restrictions to the ordering of accesses can also be specified
-using the memory barrier macros: smp_rmb(), smp_wmb(), smp_mb(),
-smp_mb_acquire(), smp_mb_release(), smp_read_barrier_depends().
-
-Memory barriers control the order of references to shared memory.
-They come in six kinds:
-
- smp_rmb() guarantees that all the LOAD operations specified before
-  the barrier will appear to happen before all the LOAD operations
-  specified after the barrier with respect to the other components of
-  the system.
-
-  In other words, smp_rmb() puts a partial ordering on loads, but is not
-  required to have any effect on stores.
-
- smp_wmb() guarantees that all the STORE operations specified before
-  the barrier will appear to happen before all the STORE operations
-  specified after the barrier with respect to the other components of
-  the system.
-
-  In other words, smp_wmb() puts a partial ordering on stores, but is not
-  required to have any effect on loads.
-
- smp_mb_acquire() guarantees that all the LOAD operations specified before
-  the barrier will appear to happen before all the LOAD or STORE operations
-  specified after the barrier with respect to the other components of
-  the system.
-
- smp_mb_release() guarantees that all the STORE operations specified *after*
-  the barrier will appear to happen after all the LOAD or STORE operations
-  specified *before* the barrier with respect to the other components of
-  the system.
-
- smp_mb() guarantees that all the LOAD and STORE operations specified
-  before the barrier will appear to happen before all the LOAD and
-  STORE operations specified after the barrier with respect to the other
-  components of the system.
-
-  smp_mb() puts a partial ordering on both loads and stores.  It is
-  stronger than both a read and a write memory barrier; it implies both
-  smp_mb_acquire() and smp_mb_release(), but it also prevents STOREs
-  coming before the barrier from overtaking LOADs coming after the
-  barrier and vice versa.
-
- smp_read_barrier_depends() is a weaker kind of read barrier.  On
-  most processors, whenever two loads are performed such that the
-  second depends on the result of the first (e.g., the first load
-  retrieves the address to which the second load will be directed),
-  the processor will guarantee that the first LOAD will appear to happen
-  before the second with respect to the other components of the system.
-  However, this is not always true---for example, it was not true on
-  Alpha processors.  Whenever this kind of access happens to shared
-  memory (that is not protected by a lock), a read barrier is needed,
-  and smp_read_barrier_depends() can be used instead of smp_rmb().
-
-  Note that the first load really has to have a _data_ dependency and not
-  a control dependency.  If the address for the second load is dependent
-  on the first load, but the dependency is through a conditional rather
-  than actually loading the address itself, then it's a _control_
-  dependency and a full read barrier or better is required.
-
-
-This is the set of barriers that is required *between* two atomic_read()
-and atomic_set() operations to achieve sequential consistency:
-
-                    |               2nd operation                   |
-                    |-----------------------------------------------|
-     1st operation  | (after last)   | atomic_read | atomic_set     |
-     ---------------+----------------+-------------+----------------|
-     (before first) |                | none        | smp_mb_release |
-     ---------------+----------------+-------------+----------------|
-     atomic_read    | smp_mb_acquire | smp_rmb     | **             |
-     ---------------+----------------+-------------+----------------|
-     atomic_set     | none           | smp_mb()*** | smp_wmb()      |
-     ---------------+----------------+-------------+----------------|
-
-       * Or smp_read_barrier_depends().
-
-      ** This requires a load-store barrier.  This is achieved by
-         either smp_mb_acquire() or smp_mb_release().
-
-     *** This requires a store-load barrier.  On most machines, the only
-         way to achieve this is a full barrier.
-
-
-You can see that the two possible definitions of atomic_mb_read()
-and atomic_mb_set() are the following:
-
-    1) atomic_mb_read(p)   = atomic_read(p); smp_mb_acquire()
-       atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v); smp_mb()
-
-    2) atomic_mb_read(p)   = smp_mb() atomic_read(p); smp_mb_acquire()
-       atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v);
-
-Usually the former is used, because smp_mb() is expensive and a program
-normally has more reads than writes.  Therefore it makes more sense to
-make atomic_mb_set() the more expensive operation.
-
-There are two common cases in which atomic_mb_read and atomic_mb_set
-generate too many memory barriers, and thus it can be useful to manually
-place barriers, or use atomic_load_acquire/atomic_store_release instead:
-
- when a data structure has one thread that is always a writer
-  and one thread that is always a reader, manual placement of
-  memory barriers makes the write side faster.  Furthermore,
-  correctness is easy to check for in this case using the "pairing"
-  trick that is explained below:
-
-     thread 1                                thread 1
-     -------------------------               ------------------------
-     (other writes)
-     atomic_mb_set(&a, x)                    atomic_store_release(&a, x)
-     atomic_mb_set(&b, y)                    atomic_store_release(&b, y)
-
-                                       =>
-     thread 2                                thread 2
-     -------------------------               ------------------------
-     y = atomic_mb_read(&b)                  y = atomic_load_acquire(&b)
-     x = atomic_mb_read(&a)                  x = atomic_load_acquire(&a)
-     (other reads)
-
-  Note that the barrier between the stores in thread 1, and between
-  the loads in thread 2, has been optimized here to a write or a
-  read memory barrier respectively.  On some architectures, notably
-  ARMv7, smp_mb_acquire and smp_mb_release are just as expensive as
-  smp_mb, but smp_rmb and/or smp_wmb are more efficient.
-
- sometimes, a thread is accessing many variables that are otherwise
-  unrelated to each other (for example because, apart from the current
-  thread, exactly one other thread will read or write each of these
-  variables).  In this case, it is possible to "hoist" the implicit
-  barriers provided by atomic_mb_read() and atomic_mb_set() outside
-  a loop.  For example, the above definition atomic_mb_read() gives
-  the following transformation:
-
-     n = 0;                                  n = 0;
-     for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
-       n += atomic_mb_read(&a[i]);             n += atomic_read(&a[i]);
-                                             smp_mb_acquire();
-
-  Similarly, atomic_mb_set() can be transformed as follows:
-
-                                             smp_mb_release();
-     for (i = 0; i < 10; i++)          =>    for (i = 0; i < 10; i++)
-       atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
-                                             smp_mb();
-
-
-  The other thread can still use atomic_mb_read()/atomic_mb_set().
-
-The two tricks can be combined.  In this case, splitting a loop in
-two lets you hoist the barriers out of the loops _and_ eliminate the
-expensive smp_mb():
-
-                                             smp_mb_release();
-     for (i = 0; i < 10; i++) {        =>    for (i = 0; i < 10; i++)
-       atomic_mb_set(&a[i], false);            atomic_set(&a[i], false);
-       atomic_mb_set(&b[i], false);          smb_wmb();
-     }                                       for (i = 0; i < 10; i++)
-                                               atomic_set(&a[i], false);
-                                             smp_mb();
-
-
-Memory barrier pairing
----------------------
-
-A useful rule of thumb is that memory barriers should always, or almost
-always, be paired with another barrier.  In the case of QEMU, however,
-note that the other barrier may actually be in a driver that runs in
-the guest!
-
-For the purposes of pairing, smp_read_barrier_depends() and smp_rmb()
-both count as read barriers.  A read barrier shall pair with a write
-barrier or a full barrier; a write barrier shall pair with a read
-barrier or a full barrier.  A full barrier can pair with anything.
-For example:
-
-        thread 1             thread 2
-        ===============      ===============
-        a = 1;
-        smp_wmb();
-        b = 2;               x = b;
-                             smp_rmb();
-                             y = a;
-
-Note that the "writing" thread is accessing the variables in the
-opposite order as the "reading" thread.  This is expected: stores
-before the write barrier will normally match the loads after the
-read barrier, and vice versa.  The same is true for more than 2
-access and for data dependency barriers:
-
-        thread 1             thread 2
-        ===============      ===============
-        b[2] = 1;
-        smp_wmb();
-        x->i = 2;
-        smp_wmb();
-        a = x;               x = a;
-                             smp_read_barrier_depends();
-                             y = x->i;
-                             smp_read_barrier_depends();
-                             z = b[y];
-
-smp_wmb() also pairs with atomic_mb_read() and smp_mb_acquire().
-and smp_rmb() also pairs with atomic_mb_set() and smp_mb_release().
-
-
-COMPARISON WITH LINUX KERNEL MEMORY BARRIERS
-============================================
-
-Here is a list of differences between Linux kernel atomic operations
-and memory barriers, and the equivalents in QEMU:
-
- atomic operations in Linux are always on a 32-bit int type and
-  use a boxed atomic_t type; atomic operations in QEMU are polymorphic
-  and use normal C types.
-
- Originally, atomic_read and atomic_set in Linux gave no guarantee
-  at all. Linux 4.1 updated them to implement volatile
-  semantics via ACCESS_ONCE (or the more recent READ/WRITE_ONCE).
-
-  QEMU's atomic_read/set implement, if the compiler supports it, C11
-  atomic relaxed semantics, and volatile semantics otherwise.
-  Both semantics prevent the compiler from doing certain transformations;
-  the difference is that atomic accesses are guaranteed to be atomic,
-  while volatile accesses aren't. Thus, in the volatile case we just cross
-  our fingers hoping that the compiler will generate atomic accesses,
-  since we assume the variables passed are machine-word sized and
-  properly aligned.
-  No barriers are implied by atomic_read/set in either Linux or QEMU.
-
- atomic read-modify-write operations in Linux are of three kinds:
-
-         atomic_OP          returns void
-         atomic_OP_return   returns new value of the variable
-         atomic_fetch_OP    returns the old value of the variable
-         atomic_cmpxchg     returns the old value of the variable
-
-  In QEMU, the second kind does not exist.  Currently Linux has
-  atomic_fetch_or only.  QEMU provides and, or, inc, dec, add, sub.
-
- different atomic read-modify-write operations in Linux imply
-  a different set of memory barriers; in QEMU, all of them enforce
-  sequential consistency, which means they imply full memory barriers
-  before and after the operation.
-
- Linux does not have an equivalent of atomic_mb_set().  In particular,
-  note that smp_store_mb() is a little weaker than atomic_mb_set().
-  atomic_mb_read() compiles to the same instructions as Linux's
-  smp_load_acquire(), but this should be treated as an implementation
-  detail.
-
-SOURCES
-=======
-
-* Documentation/memory-barriers.txt from the Linux kernel
-
-* "The JSR-133 Cookbook for Compiler Writers", available at
-  http://g.oswego.edu/dl/jmm/cookbook.html
--- a/docs/devel/index.rst
+++ b/docs/devel/index.rst
@@ -17,6 +17,7 @@ Contents:
   loads-stores
   memory
   migration
+   atomics
   stable-process
   testing
   decodetree
--- a/docs/devel/rcu.txt
+++ b/docs/devel/rcu.txt
@@ -132,7 +132,7 @@ The core RCU API is small:

     typeof(*p) atomic_rcu_read(p);

-        atomic_rcu_read() is similar to atomic_mb_read(), but it makes
+        atomic_rcu_read() is similar to atomic_load_acquire(), but it makes
        some assumptions on the code that calls it.  This allows a more
        optimized implementation.

@@ -154,7 +154,7 @@ The core RCU API is small:

     void atomic_rcu_set(p, typeof(*p) v);

-        atomic_rcu_set() is also similar to atomic_mb_set(), and it also
+        atomic_rcu_set() is similar to atomic_store_release(), though it also
        makes assumptions on the code that calls it in order to allow a more
        optimized implementation.

--- a/docs/sphinx/kerneldoc.py
+++ b/docs/sphinx/kerneldoc.py
@@ -99,6 +99,7 @@ class KernelDocDirective(Directive):
                env.note_dependency(os.path.abspath(f))
                cmd += ['-export-file', f]

+        cmd += ['-sphinx-version', sphinx.__version__]
        cmd += [filename]

        try:
--- a/docs/system/deprecated.rst
+++ b/docs/system/deprecated.rst
@@ -336,6 +336,14 @@ The ``compat`` property used to set backwards compatibility modes for
 the processor has been deprecated. The ``max-cpu-compat`` property of
 the ``pseries`` machine type should be used instead.

+KVM guest support on 32-bit Arm hosts (since 5.0)
+'''''''''''''''''''''''''''''''''''''''''''''''''
+
+The Linux kernel has dropped support for allowing 32-bit Arm systems
+to host KVM guests as of the 5.7 kernel. Accordingly, QEMU is deprecating
+its support for this configuration and will remove it in a future version.
+Running 32-bit guests on a 64-bit Arm host remains supported.
+
 System emulator devices
 -----------------------

--- a/docs/system/gdb.rst
+++ b/docs/system/gdb.rst
@@ -3,17 +3,25 @@
 GDB usage
 ---------

-QEMU has a primitive support to work with gdb, so that you can do
-'Ctrl-C' while the virtual machine is running and inspect its state.
+QEMU supports working with gdb via gdb's remote-connection facility
+(the "gdbstub"). This allows you to debug guest code in the same
+way that you might with a low-level debug facility like JTAG
+on real hardware. You can stop and start the virtual machine,
+examine state like registers and memory, and set breakpoints and
+watchpoints.

-In order to use gdb, launch QEMU with the '-s' option. It will wait for
-a gdb connection:
+In order to use gdb, launch QEMU with the ``-s`` and ``-S`` options.
+The ``-s`` option will make QEMU listen for an incoming connection
+from gdb on TCP port 1234, and ``-S`` will make QEMU not start the
+guest until you tell it to from gdb. (If you want to specify which
+TCP port to use or to use something other than TCP for the gdbstub
+connection, use the ``-gdb dev`` option instead of ``-s``.)

 .. parsed-literal::

-   |qemu_system| -s -kernel bzImage -hda rootdisk.img -append "root=/dev/hda"
-   Connected to host network interface: tun0
-   Waiting gdb connection on port 1234
+   |qemu_system| -s -S -kernel bzImage -hda rootdisk.img -append "root=/dev/hda"
+
+QEMU will launch but will silently wait for gdb to connect.

 Then launch gdb on the 'vmlinux' executable::

--- a/exec.c
+++ b/exec.c
@@ -2074,11 +2074,23 @@ static int memory_try_enable_merging(void *addr, size_t len)
 */
 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
 {
+    const ram_addr_t unaligned_size = newsize;
+
    assert(block);

    newsize = HOST_PAGE_ALIGN(newsize);

    if (block->used_length == newsize) {
+        /*
+         * We don't have to resize the ram block (which only knows aligned
+         * sizes), however, we have to notify if the unaligned size changed.
+         */
+        if (unaligned_size != memory_region_size(block->mr)) {
+            memory_region_set_size(block->mr, unaligned_size);
+            if (block->resized) {
+                block->resized(block->idstr, unaligned_size, block->host);
+            }
+        }
        return 0;
    }

@@ -2102,9 +2114,9 @@ int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
    block->used_length = newsize;
    cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
                                        DIRTY_CLIENTS_ALL);
-    memory_region_set_size(block->mr, newsize);
+    memory_region_set_size(block->mr, unaligned_size);
    if (block->resized) {
-        block->resized(block->idstr, newsize, block->host);
+        block->resized(block->idstr, unaligned_size, block->host);
    }
    return 0;
 }
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -929,7 +929,7 @@ void virt_acpi_setup(VirtMachineState *vms)

    build_state->linker_mr =
        acpi_add_rom_blob(virt_acpi_build_update, build_state,
-                          tables.linker->cmd_blob, "etc/table-loader", 0);
+                          tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, 0);

    fw_cfg_add_file(vms->fw_cfg, ACPI_BUILD_TPMLOG_FILE, tables.tcpalog->data,
                    acpi_data_len(tables.tcpalog));
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -39,6 +39,7 @@ GlobalProperty hw_compat_4_2[] = {
    { "usb-redir", "suppress-remote-wake", "off" },
    { "qxl", "revision", "4" },
    { "qxl-vga", "revision", "4" },
+    { "fw_cfg", "acpi-mr-restore", "false" },
 };
 const size_t hw_compat_4_2_len = G_N_ELEMENTS(hw_compat_4_2);

--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -3043,7 +3043,7 @@ void acpi_setup(void)

    build_state->linker_mr =
        acpi_add_rom_blob(acpi_build_update, build_state,
-                          tables.linker->cmd_blob, "etc/table-loader", 0);
+                          tables.linker->cmd_blob, ACPI_BUILD_LOADER_FILE, 0);

    fw_cfg_add_file(x86ms->fw_cfg, ACPI_BUILD_TPMLOG_FILE,
                    tables.tcpalog->data, acpi_data_len(tables.tcpalog));
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -948,13 +948,26 @@ DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa,


 #ifdef CONFIG_XEN
-static void xenfv_machine_options(MachineClass *m)
+static void xenfv_4_2_machine_options(MachineClass *m)
 {
+    pc_i440fx_4_2_machine_options(m);
    m->desc = "Xen Fully-virtualized PC";
    m->max_cpus = HVM_MAX_VCPUS;
    m->default_machine_opts = "accel=xen";
 }

-DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init,
-                  xenfv_machine_options);
+DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,
+                  xenfv_4_2_machine_options);
+
+static void xenfv_3_1_machine_options(MachineClass *m)
+{
+    pc_i440fx_3_1_machine_options(m);
+    m->desc = "Xen Fully-virtualized PC";
+    m->alias = "xenfv";
+    m->max_cpus = HVM_MAX_VCPUS;
+    m->default_machine_opts = "accel=xen";
+}
+
+DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
+                  xenfv_3_1_machine_options);
 #endif
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -39,6 +39,7 @@
 #include "qemu/config-file.h"
 #include "qemu/cutils.h"
 #include "qapi/error.h"
+#include "hw/acpi/aml-build.h"

 #define FW_CFG_FILE_SLOTS_DFLT 0x20

@@ -610,6 +611,55 @@ bool fw_cfg_dma_enabled(void *opaque)
    return s->dma_enabled;
 }

+static bool fw_cfg_acpi_mr_restore(void *opaque)
+{
+    FWCfgState *s = opaque;
+    bool mr_aligned;
+
+    mr_aligned = QEMU_IS_ALIGNED(s->table_mr_size, qemu_real_host_page_size) &&
+                 QEMU_IS_ALIGNED(s->linker_mr_size, qemu_real_host_page_size) &&
+                 QEMU_IS_ALIGNED(s->rsdp_mr_size, qemu_real_host_page_size);
+    return s->acpi_mr_restore && !mr_aligned;
+}
+
+static void fw_cfg_update_mr(FWCfgState *s, uint16_t key, size_t size)
+{
+    MemoryRegion *mr;
+    ram_addr_t offset;
+    int arch = !!(key & FW_CFG_ARCH_LOCAL);
+    void *ptr;
+
+    key &= FW_CFG_ENTRY_MASK;
+    assert(key < fw_cfg_max_entry(s));
+
+    ptr = s->entries[arch][key].data;
+    mr = memory_region_from_host(ptr, &offset);
+
+    memory_region_ram_resize(mr, size, &error_abort);
+}
+
+static int fw_cfg_acpi_mr_restore_post_load(void *opaque, int version_id)
+{
+    FWCfgState *s = opaque;
+    int i, index;
+
+    assert(s->files);
+
+    index = be32_to_cpu(s->files->count);
+
+    for (i = 0; i < index; i++) {
+        if (!strcmp(s->files->f[i].name, ACPI_BUILD_TABLE_FILE)) {
+            fw_cfg_update_mr(s, FW_CFG_FILE_FIRST + i, s->table_mr_size);
+        } else if (!strcmp(s->files->f[i].name, ACPI_BUILD_LOADER_FILE)) {
+            fw_cfg_update_mr(s, FW_CFG_FILE_FIRST + i, s->linker_mr_size);
+        } else if (!strcmp(s->files->f[i].name, ACPI_BUILD_RSDP_FILE)) {
+            fw_cfg_update_mr(s, FW_CFG_FILE_FIRST + i, s->rsdp_mr_size);
+        }
+    }
+
+    return 0;
+}
+
 static const VMStateDescription vmstate_fw_cfg_dma = {
    .name = "fw_cfg/dma",
    .needed = fw_cfg_dma_enabled,
@@ -619,6 +669,20 @@ static const VMStateDescription vmstate_fw_cfg_dma = {
    },
 };

+static const VMStateDescription vmstate_fw_cfg_acpi_mr = {
+    .name = "fw_cfg/acpi_mr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = fw_cfg_acpi_mr_restore,
+    .post_load = fw_cfg_acpi_mr_restore_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(table_mr_size, FWCfgState),
+        VMSTATE_UINT64(linker_mr_size, FWCfgState),
+        VMSTATE_UINT64(rsdp_mr_size, FWCfgState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_fw_cfg = {
    .name = "fw_cfg",
    .version_id = 2,
@@ -631,6 +695,7 @@ static const VMStateDescription vmstate_fw_cfg = {
    },
    .subsections = (const VMStateDescription*[]) {
        &vmstate_fw_cfg_dma,
+        &vmstate_fw_cfg_acpi_mr,
        NULL,
    }
 };
@@ -815,6 +880,23 @@ static struct {
 #define FW_CFG_ORDER_OVERRIDE_LAST 200
 };

+/*
+ * Any sub-page size update to these table MRs will be lost during migration,
+ * as we use aligned size in ram_load_precopy() -> qemu_ram_resize() path.
+ * In order to avoid the inconsistency in sizes save them seperately and
+ * migrate over in vmstate post_load().
+ */
+static void fw_cfg_acpi_mr_save(FWCfgState *s, const char *filename, size_t len)
+{
+    if (!strcmp(filename, ACPI_BUILD_TABLE_FILE)) {
+        s->table_mr_size = len;
+    } else if (!strcmp(filename, ACPI_BUILD_LOADER_FILE)) {
+        s->linker_mr_size = len;
+    } else if (!strcmp(filename, ACPI_BUILD_RSDP_FILE)) {
+        s->rsdp_mr_size = len;
+    }
+}
+
 static int get_fw_cfg_order(FWCfgState *s, const char *name)
 {
    int i;
@@ -914,6 +996,7 @@ void fw_cfg_add_file_callback(FWCfgState *s,  const char *filename,
    trace_fw_cfg_add_file(s, index, s->files->f[index].name, len);

    s->files->count = cpu_to_be32(count+1);
+    fw_cfg_acpi_mr_save(s, filename, len);
 }

 void fw_cfg_add_file(FWCfgState *s,  const char *filename,
@@ -937,6 +1020,7 @@ void *fw_cfg_modify_file(FWCfgState *s, const char *filename,
            ptr = fw_cfg_modify_bytes_read(s, FW_CFG_FILE_FIRST + i,
                                           data, len);
            s->files->f[i].size   = cpu_to_be32(len);
+            fw_cfg_acpi_mr_save(s, filename, len);
            return ptr;
        }
    }
@@ -973,7 +1057,10 @@ static void fw_cfg_machine_ready(struct Notifier *n, void *data)
    qemu_register_reset(fw_cfg_machine_reset, s);
 }

-
+static Property fw_cfg_properties[] = {
+    DEFINE_PROP_BOOL("acpi-mr-restore", FWCfgState, acpi_mr_restore, true),
+    DEFINE_PROP_END_OF_LIST(),
+};

 static void fw_cfg_common_realize(DeviceState *dev, Error **errp)
 {
@@ -1097,6 +1184,8 @@ static void fw_cfg_class_init(ObjectClass *klass, void *data)

    dc->reset = fw_cfg_reset;
    dc->vmsd = &vmstate_fw_cfg;
+
+    device_class_set_props(dc, fw_cfg_properties);
 }

 static const TypeInfo fw_cfg_info = {
--- a/include/block/aio-wait.h
+++ b/include/block/aio-wait.h
@@ -26,6 +26,7 @@
 #define QEMU_AIO_WAIT_H

 #include "block/aio.h"
+#include "qemu/main-loop.h"

 /**
 * AioWait:
@@ -124,4 +125,25 @@ void aio_wait_kick(void);
 */
 void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);

+/**
+ * in_aio_context_home_thread:
+ * @ctx: the aio context
+ *
+ * Return whether we are running in the thread that normally runs @ctx.  Note
+ * that acquiring/releasing ctx does not affect the outcome, each AioContext
+ * still only has one home thread that is responsible for running it.
+ */
+static inline bool in_aio_context_home_thread(AioContext *ctx)
+{
+    if (ctx == qemu_get_current_aio_context()) {
+        return true;
+    }
+
+    if (ctx == qemu_get_aio_context()) {
+        return qemu_mutex_iothread_locked();
+    } else {
+        return false;
+    }
+}
+
 #endif /* QEMU_AIO_WAIT_H */
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -133,12 +133,16 @@ struct AioContext {
    AioHandlerList deleted_aio_handlers;

    /* Used to avoid unnecessary event_notifier_set calls in aio_notify;
-     * accessed with atomic primitives.  If this field is 0, everything
-     * (file descriptors, bottom halves, timers) will be re-evaluated
-     * before the next blocking poll(), thus the event_notifier_set call
-     * can be skipped.  If it is non-zero, you may need to wake up a
-     * concurrent aio_poll or the glib main event loop, making
-     * event_notifier_set necessary.
+     * only written from the AioContext home thread, or under the BQL in
+     * the case of the main AioContext.  However, it is read from any
+     * thread so it is still accessed with atomic primitives.
+     *
+     * If this field is 0, everything (file descriptors, bottom halves,
+     * timers) will be re-evaluated before the next blocking poll() or
+     * io_uring wait; therefore, the event_notifier_set call can be
+     * skipped.  If it is non-zero, you may need to wake up a concurrent
+     * aio_poll or the glib main event loop, making event_notifier_set
+     * necessary.
     *
     * Bit 0 is reserved for GSource usage of the AioContext, and is 1
     * between a call to aio_ctx_prepare and the next call to aio_ctx_check.
@@ -681,19 +685,6 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
 */
 AioContext *qemu_get_current_aio_context(void);

-/**
- * in_aio_context_home_thread:
- * @ctx: the aio context
- *
- * Return whether we are running in the thread that normally runs @ctx.  Note
- * that acquiring/releasing ctx does not affect the outcome, each AioContext
- * still only has one home thread that is responsible for running it.
- */
-static inline bool in_aio_context_home_thread(AioContext *ctx)
-{
-    return ctx == qemu_get_current_aio_context();
-}
-
 /**
 * aio_context_setup:
 * @ctx: the aio context
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -125,6 +125,24 @@ static inline int gdb_get_reg128(GByteArray *buf, uint64_t val_hi,
    return 16;
 }

+static inline int gdb_get_float32(GByteArray *array, float32 val)
+{
+    uint8_t buf[sizeof(CPU_FloatU)];
+
+    stfl_p(buf, val);
+    g_byte_array_append(array, buf, sizeof(buf));
+
+    return sizeof(buf);
+}
+static inline int gdb_get_zeroes(GByteArray *array, size_t len)
+{
+    guint oldlen = array->len;
+    g_byte_array_set_size(array, oldlen + len);
+    memset(array->data + oldlen, 0, len);
+
+    return len;
+}
+
 /**
 * gdb_get_reg_ptr: get pointer to start of last element
 * @len: length of element
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -2351,8 +2351,8 @@ void address_space_write_cached_slow(MemoryRegionCache *cache,
 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
 {
    if (is_write) {
-        return memory_region_is_ram(mr) &&
-               !mr->readonly && !memory_region_is_ram_device(mr);
+        return memory_region_is_ram(mr) && !mr->readonly &&
+               !mr->rom_device && !memory_region_is_ram_device(mr);
    } else {
        return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
               memory_region_is_romd(mr);
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -13,6 +13,7 @@
 #define ACPI_BUILD_TABLE_FILE "etc/acpi/tables"
 #define ACPI_BUILD_RSDP_FILE "etc/acpi/rsdp"
 #define ACPI_BUILD_TPMLOG_FILE "etc/tpm/log"
+#define ACPI_BUILD_LOADER_FILE "etc/table-loader"

 #define AML_NOTIFY_METHOD "NTFY"

--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -53,6 +53,12 @@ struct FWCfgState {
    dma_addr_t dma_addr;
    AddressSpace *dma_as;
    MemoryRegion dma_iomem;
+
+    /* restore during migration */
+    bool acpi_mr_restore;
+    uint64_t table_mr_size;
+    uint64_t linker_mr_size;
+    uint64_t rsdp_mr_size;
 };

 struct FWCfgIoState {
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -33,20 +33,6 @@
 #else
 #include "exec/poison.h"
 #endif
-#ifdef __COVERITY__
-/* Coverity does not like the new _Float* types that are used by
- * recent glibc, and croaks on every single file that includes
- * stdlib.h.  These typedefs are enough to please it.
- *
- * Note that these fix parse errors so they cannot be placed in
- * scripts/coverity-model.c.
- */
-typedef float _Float32;
-typedef double _Float32x;
-typedef double _Float64;
-typedef __float80 _Float64x;
-typedef __float128 _Float128;
-#endif

 #include "qemu/compiler.h"

--- a/include/qemu/thread.h
+++ b/include/qemu/thread.h
@@ -57,17 +57,17 @@ extern QemuCondTimedWaitFunc qemu_cond_timedwait_func;
 * hide them.
 */
 #define qemu_mutex_lock(m)                                              \
-            qemu_mutex_lock_impl(m, __FILE__, __LINE__);
+            qemu_mutex_lock_impl(m, __FILE__, __LINE__)
 #define qemu_mutex_trylock(m)                                           \
-            qemu_mutex_trylock_impl(m, __FILE__, __LINE__);
+            qemu_mutex_trylock_impl(m, __FILE__, __LINE__)
 #define qemu_rec_mutex_lock(m)                                          \
-            qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__);
+            qemu_rec_mutex_lock_impl(m, __FILE__, __LINE__)
 #define qemu_rec_mutex_trylock(m)                                       \
-            qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__);
+            qemu_rec_mutex_trylock_impl(m, __FILE__, __LINE__)
 #define qemu_cond_wait(c, m)                                            \
-            qemu_cond_wait_impl(c, m, __FILE__, __LINE__);
+            qemu_cond_wait_impl(c, m, __FILE__, __LINE__)
 #define qemu_cond_timedwait(c, m, ms)                                   \
-            qemu_cond_wait_impl(c, m, ms, __FILE__, __LINE__);
+            qemu_cond_timedwait_impl(c, m, ms, __FILE__, __LINE__)
 #else
 #define qemu_mutex_lock(m) ({                                           \
            QemuMutexLockFunc _f = atomic_read(&qemu_mutex_lock_func);  \
--- a/linux-user/flatload.c
+++ b/linux-user/flatload.c
@@ -37,7 +37,7 @@

 #include "qemu.h"
 #include "flat.h"
-#include <target_flat.h>
+#include "target_flat.h"

 //#define DEBUG

--- a/linux-user/ppc/signal.c
+++ b/linux-user/ppc/signal.c
@@ -35,12 +35,26 @@ struct target_mcontext {
    target_ulong mc_gregs[48];
    /* Includes fpscr.  */
    uint64_t mc_fregs[33];
+
 #if defined(TARGET_PPC64)
    /* Pointer to the vector regs */
    target_ulong v_regs;
+    /*
+     * On ppc64, this mcontext structure is naturally *unaligned*,
+     * or rather it is aligned on a 8 bytes boundary but not on
+     * a 16 byte boundary.  This pad fixes it up.  This is why we
+     * cannot use ppc_avr_t, which would force alignment.  This is
+     * also why the vector regs are referenced in the ABI by the
+     * v_regs pointer above so any amount of padding can be added here.
+     */
+    target_ulong pad;
+    /* VSCR and VRSAVE are saved separately.  Also reserve space for VSX. */
+    struct {
+        uint64_t altivec[34 + 16][2];
+    } mc_vregs;
 #else
    target_ulong mc_pad[2];
-#endif
+
    /* We need to handle Altivec and SPE at the same time, which no
       kernel needs to do.  Fortunately, the kernel defines this bit to
       be Altivec-register-large all the time, rather than trying to
@@ -48,32 +62,14 @@ struct target_mcontext {
    union {
        /* SPE vector registers.  One extra for SPEFSCR.  */
        uint32_t spe[33];
-        /* Altivec vector registers.  The packing of VSCR and VRSAVE
-           varies depending on whether we're PPC64 or not: PPC64 splits
-           them apart; PPC32 stuffs them together.
-           We also need to account for the VSX registers on PPC64
-        */
-#if defined(TARGET_PPC64)
-#define QEMU_NVRREG (34 + 16)
-        /* On ppc64, this mcontext structure is naturally *unaligned*,
-         * or rather it is aligned on a 8 bytes boundary but not on
-         * a 16 bytes one. This pad fixes it up. This is also why the
-         * vector regs are referenced by the v_regs pointer above so
-         * any amount of padding can be added here
+        /*
+         * Altivec vector registers.  One extra for VRSAVE.
+         * On ppc32, we are already aligned to 16 bytes.  We could
+         * use ppc_avr_t, but choose to share the same type as ppc64.
         */
-        target_ulong pad;
-#else
-        /* On ppc32, we are already aligned to 16 bytes */
-#define QEMU_NVRREG 33
-#endif
-        /* We cannot use ppc_avr_t here as we do *not* want the implied
-         * 16-bytes alignment that would result from it. This would have
-         * the effect of making the whole struct target_mcontext aligned
-         * which breaks the layout of struct target_ucontext on ppc64.
-         */
-        uint64_t altivec[QEMU_NVRREG][2];
-#undef QEMU_NVRREG
+        uint64_t altivec[33][2];
    } mc_vregs;
+#endif
 };

 /* See arch/powerpc/include/asm/sigcontext.h.  */
@@ -278,6 +274,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
        __put_user((uint32_t)env->spr[SPR_VRSAVE], vrsave);
    }

+#if defined(TARGET_PPC64)
    /* Save VSX second halves */
    if (env->insns_flags2 & PPC2_VSX) {
        uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34];
@@ -286,6 +283,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
            __put_user(*vsrl, &vsregs[i]);
        }
    }
+#endif

    /* Save floating point registers.  */
    if (env->insns_flags & PPC_FLOAT) {
@@ -296,22 +294,18 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
        __put_user((uint64_t) env->fpscr, &frame->mc_fregs[32]);
    }

+#if !defined(TARGET_PPC64)
    /* Save SPE registers.  The kernel only saves the high half.  */
    if (env->insns_flags & PPC_SPE) {
-#if defined(TARGET_PPC64)
-        for (i = 0; i < ARRAY_SIZE(env->gpr); i++) {
-            __put_user(env->gpr[i] >> 32, &frame->mc_vregs.spe[i]);
-        }
-#else
        for (i = 0; i < ARRAY_SIZE(env->gprh); i++) {
            __put_user(env->gprh[i], &frame->mc_vregs.spe[i]);
        }
-#endif
        /* Set MSR_SPE in the saved MSR value to indicate that
           frame->mc_vregs contains valid data.  */
        msr |= MSR_SPE;
        __put_user(env->spe_fscr, &frame->mc_vregs.spe[32]);
    }
+#endif

    /* Store MSR.  */
    __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]);
@@ -392,6 +386,7 @@ static void restore_user_regs(CPUPPCState *env,
        __get_user(env->spr[SPR_VRSAVE], vrsave);
    }

+#if defined(TARGET_PPC64)
    /* Restore VSX second halves */
    if (env->insns_flags2 & PPC2_VSX) {
        uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34];
@@ -400,6 +395,7 @@ static void restore_user_regs(CPUPPCState *env,
            __get_user(*vsrl, &vsregs[i]);
        }
    }
+#endif

    /* Restore floating point registers.  */
    if (env->insns_flags & PPC_FLOAT) {
@@ -412,22 +408,15 @@ static void restore_user_regs(CPUPPCState *env,
        env->fpscr = (uint32_t) fpscr;
    }

+#if !defined(TARGET_PPC64)
    /* Save SPE registers.  The kernel only saves the high half.  */
    if (env->insns_flags & PPC_SPE) {
-#if defined(TARGET_PPC64)
-        for (i = 0; i < ARRAY_SIZE(env->gpr); i++) {
-            uint32_t hi;
-
-            __get_user(hi, &frame->mc_vregs.spe[i]);
-            env->gpr[i] = ((uint64_t)hi << 32) | ((uint32_t) env->gpr[i]);
-        }
-#else
        for (i = 0; i < ARRAY_SIZE(env->gprh); i++) {
            __get_user(env->gprh[i], &frame->mc_vregs.spe[i]);
        }
-#endif
        __get_user(env->spe_fscr, &frame->mc_vregs.spe[32]);
    }
+#endif
 }

 #if !defined(TARGET_PPC64)
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -7295,34 +7295,29 @@ static int open_self_stat(void *cpu_env, int fd)
 {
    CPUState *cpu = env_cpu((CPUArchState *)cpu_env);
    TaskState *ts = cpu->opaque;
-    abi_ulong start_stack = ts->info->start_stack;
+    g_autoptr(GString) buf = g_string_new(NULL);
    int i;

    for (i = 0; i < 44; i++) {
-      char buf[128];
-      int len;
-      uint64_t val = 0;
+        if (i == 0) {
+            /* pid */
+            g_string_printf(buf, FMT_pid " ", getpid());
+        } else if (i == 1) {
+            /* app name */
+            gchar *bin = g_strrstr(ts->bprm->argv[0], "/");
+            bin = bin ? bin + 1 : ts->bprm->argv[0];
+            g_string_printf(buf, "(%.15s) ", bin);
+        } else if (i == 27) {
+            /* stack bottom */
+            g_string_printf(buf, TARGET_ABI_FMT_ld " ", ts->info->start_stack);
+        } else {
+            /* for the rest, there is MasterCard */
+            g_string_printf(buf, "0%c", i == 43 ? '\n' : ' ');
+        }

-      if (i == 0) {
-        /* pid */
-        val = getpid();
-        snprintf(buf, sizeof(buf), "%"PRId64 " ", val);
-      } else if (i == 1) {
-        /* app name */
-        snprintf(buf, sizeof(buf), "(%s) ", ts->bprm->argv[0]);
-      } else if (i == 27) {
-        /* stack bottom */
-        val = start_stack;
-        snprintf(buf, sizeof(buf), "%"PRId64 " ", val);
-      } else {
-        /* for the rest, there is MasterCard */
-        snprintf(buf, sizeof(buf), "0%c", i == 43 ? '\n' : ' ');
-      }
-
-      len = strlen(buf);
-      if (write(fd, buf, len) != len) {
-          return -1;
-      }
+        if (write(fd, buf->str, buf->len) != buf->len) {
+            return -1;
+        }
    }

    return 0;
@@ -12017,7 +12012,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
 #endif
 #if defined(TARGET_NR_epoll_create1) && defined(CONFIG_EPOLL_CREATE1)
    case TARGET_NR_epoll_create1:
-        return get_errno(epoll_create1(arg1));
+        return get_errno(epoll_create1(target_to_host_bitmask(arg1, fcntl_flags_tbl)));
 #endif
 #if defined(TARGET_NR_epoll_ctl)
    case TARGET_NR_epoll_ctl:
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3680,14 +3680,26 @@ SRST
 ERST

 DEF("gdb", HAS_ARG, QEMU_OPTION_gdb, \
-    "-gdb dev        wait for gdb connection on 'dev'\n", QEMU_ARCH_ALL)
+    "-gdb dev        accept gdb connection on 'dev'. (QEMU defaults to starting\n"
+    "                the guest without waiting for gdb to connect; use -S too\n"
+    "                if you want it to not start execution.)\n",
+    QEMU_ARCH_ALL)
 SRST
 ``-gdb dev``
-    Wait for gdb connection on device dev (see
-    :ref:`gdb_005fusage`). Typical connections will likely be
-    TCP-based, but also UDP, pseudo TTY, or even stdio are reasonable
-    use case. The latter is allowing to start QEMU from within gdb and
-    establish the connection via a pipe:
+    Accept a gdb connection on device dev (see
+    :ref:`gdb_005fusage`). Note that this option does not pause QEMU
+    execution -- if you want QEMU to not start the guest until you
+    connect with gdb and issue a ``continue`` command, you will need to
+    also pass the ``-S`` option to QEMU.
+
+    The most usual configuration is to listen on a local TCP socket::
+
+        -gdb tcp::3117
+
+    but you can specify other backends; UDP, pseudo TTY, or even stdio
+    are all reasonable use cases. For example, a stdio connection
+    allows you to start QEMU from within gdb and establish the
+    connection via a pipe:

    .. parsed-literal::

--- a/qga/commands-common.h
+++ b/qga/commands-common.h
@@ -0,0 +1,21 @@
+/*
+ * QEMU Guest Agent common/cross-platform common commands
+ *
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef QGA_COMMANDS_COMMON_H
+#define QGA_COMMANDS_COMMON_H
+
+#include "qga-qapi-types.h"
+
+typedef struct GuestFileHandle GuestFileHandle;
+
+GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp);
+
+GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
+                                      int64_t count, Error **errp);
+
+#endif
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -26,6 +26,7 @@
 #include "qemu/sockets.h"
 #include "qemu/base64.h"
 #include "qemu/cutils.h"
+#include "commands-common.h"

 #ifdef HAVE_UTMPX
 #include <utmpx.h>
@@ -237,12 +238,12 @@ typedef enum {
    RW_STATE_WRITING,
 } RwState;

-typedef struct GuestFileHandle {
+struct GuestFileHandle {
    uint64_t id;
    FILE *fh;
    RwState state;
    QTAILQ_ENTRY(GuestFileHandle) next;
-} GuestFileHandle;
+};

 static struct {
    QTAILQ_HEAD(, GuestFileHandle) filehandles;
@@ -268,7 +269,7 @@ static int64_t guest_file_handle_add(FILE *fh, Error **errp)
    return handle;
 }

-static GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
+GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
 {
    GuestFileHandle *gfh;

@@ -460,29 +461,14 @@ void qmp_guest_file_close(int64_t handle, Error **errp)
    g_free(gfh);
 }

-struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
-                                          int64_t count, Error **errp)
+GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
+                                      int64_t count, Error **errp)
 {
-    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
    GuestFileRead *read_data = NULL;
    guchar *buf;
-    FILE *fh;
+    FILE *fh = gfh->fh;
    size_t read_count;

-    if (!gfh) {
-        return NULL;
-    }
-
-    if (!has_count) {
-        count = QGA_READ_COUNT_DEFAULT;
-    } else if (count < 0 || count >= UINT32_MAX) {
-        error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
-                   count);
-        return NULL;
-    }
-
-    fh = gfh->fh;
-
    /* explicitly flush when switching from writing to reading */
    if (gfh->state == RW_STATE_WRITING) {
        int ret = fflush(fh);
@@ -497,7 +483,6 @@ struct GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
    read_count = fread(buf, 1, count, fh);
    if (ferror(fh)) {
        error_setg_errno(errp, errno, "failed to read file");
-        slog("guest-file-read failed, handle: %" PRId64, handle);
    } else {
        buf[read_count] = 0;
        read_data = g_new0(GuestFileRead, 1);
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -37,6 +37,7 @@
 #include "qemu/queue.h"
 #include "qemu/host-utils.h"
 #include "qemu/base64.h"
+#include "commands-common.h"

 #ifndef SHTDN_REASON_FLAG_PLANNED
 #define SHTDN_REASON_FLAG_PLANNED 0x80000000
@@ -50,11 +51,11 @@

 #define INVALID_SET_FILE_POINTER ((DWORD)-1)

-typedef struct GuestFileHandle {
+struct GuestFileHandle {
    int64_t id;
    HANDLE fh;
    QTAILQ_ENTRY(GuestFileHandle) next;
-} GuestFileHandle;
+};

 static struct {
    QTAILQ_HEAD(, GuestFileHandle) filehandles;
@@ -126,7 +127,7 @@ static int64_t guest_file_handle_add(HANDLE fh, Error **errp)
    return handle;
 }

-static GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
+GuestFileHandle *guest_file_handle_find(int64_t id, Error **errp)
 {
    GuestFileHandle *gfh;
    QTAILQ_FOREACH(gfh, &guest_file_state.filehandles, next) {
@@ -321,39 +322,19 @@ void qmp_guest_shutdown(bool has_mode, const char *mode, Error **errp)
    }
 }

-GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
-                                   int64_t count, Error **errp)
+GuestFileRead *guest_file_read_unsafe(GuestFileHandle *gfh,
+                                      int64_t count, Error **errp)
 {
    GuestFileRead *read_data = NULL;
    guchar *buf;
-    HANDLE fh;
+    HANDLE fh = gfh->fh;
    bool is_ok;
    DWORD read_count;
-    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);

-    if (!gfh) {
-        return NULL;
-    }
-    if (!has_count) {
-        count = QGA_READ_COUNT_DEFAULT;
-    } else if (count < 0 || count >= UINT32_MAX) {
-        error_setg(errp, "value '%" PRId64
-                   "' is invalid for argument count", count);
-        return NULL;
-    }
-
-    fh = gfh->fh;
-    buf = g_try_malloc0(count + 1);
-    if (!buf) {
-        error_setg(errp,
-                   "failed to allocate sufficient memory "
-                   "to complete the requested service");
-        return NULL;
-    }
+    buf = g_malloc0(count + 1);
    is_ok = ReadFile(fh, buf, count, &read_count, NULL);
    if (!is_ok) {
        error_setg_win32(errp, GetLastError(), "failed to read file");
-        slog("guest-file-read failed, handle %" PRId64, handle);
    } else {
        buf[read_count] = 0;
        read_data = g_new0(GuestFileRead, 1);
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -11,6 +11,7 @@
 */

 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "guest-agent-core.h"
 #include "qga-qapi-commands.h"
 #include "qapi/error.h"
@@ -18,11 +19,18 @@
 #include "qemu/base64.h"
 #include "qemu/cutils.h"
 #include "qemu/atomic.h"
+#include "commands-common.h"

 /* Maximum captured guest-exec out_data/err_data - 16MB */
 #define GUEST_EXEC_MAX_OUTPUT (16*1024*1024)
 /* Allocation and I/O buffer for reading guest-exec out_data/err_data - 4KB */
 #define GUEST_EXEC_IO_SIZE (4*1024)
+/*
+ * Maximum file size to read - 48MB
+ *
+ * (48MB + Base64 3:4 overhead = JSON parser 64 MB limit)
+ */
+#define GUEST_FILE_READ_COUNT_MAX (48 * MiB)

 /* Note: in some situations, like with the fsfreeze, logging may be
 * temporarilly disabled. if it is necessary that a command be able
@@ -547,3 +555,28 @@ error:
    g_free(info);
    return NULL;
 }
+
+GuestFileRead *qmp_guest_file_read(int64_t handle, bool has_count,
+                                   int64_t count, Error **errp)
+{
+    GuestFileHandle *gfh = guest_file_handle_find(handle, errp);
+    GuestFileRead *read_data;
+
+    if (!gfh) {
+        return NULL;
+    }
+    if (!has_count) {
+        count = QGA_READ_COUNT_DEFAULT;
+    } else if (count < 0 || count > GUEST_FILE_READ_COUNT_MAX) {
+        error_setg(errp, "value '%" PRId64 "' is invalid for argument count",
+                   count);
+        return NULL;
+    }
+
+    read_data = guest_file_read_unsafe(gfh, count, errp);
+    if (!read_data) {
+        slog("guest-file-write failed, handle: %" PRId64, handle);
+    }
+
+    return read_data;
+}
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -266,11 +266,13 @@
 ##
 # @guest-file-read:
 #
-# Read from an open file in the guest. Data will be base64-encoded
+# Read from an open file in the guest. Data will be base64-encoded.
+# As this command is just for limited, ad-hoc debugging, such as log
+# file access, the number of bytes to read is limited to 48 MB.
 #
 # @handle: filehandle returned by guest-file-open
 #
-# @count: maximum number of bytes to read (default is 4KB)
+# @count: maximum number of bytes to read (default is 4KB, maximum is 48MB)
 #
 # Returns: @GuestFileRead on success.
 #
--- a/scripts/coverity-scan/coverity-scan.docker
+++ b/scripts/coverity-scan/coverity-scan.docker
@@ -0,0 +1,131 @@
+# syntax=docker/dockerfile:1.0.0-experimental
+#
+# Docker setup for running the "Coverity Scan" tools over the source
+# tree and uploading them to the website, as per
+# https://scan.coverity.com/projects/qemu/builds/new
+# We do this on a fixed config (currently Fedora 30 with a known
+# set of dependencies and a configure command that enables a specific
+# set of options) so that random changes don't result in our accidentally
+# dropping some files from the scan.
+#
+# We don't build on top of the fedora.docker file because we don't
+# want to accidentally change or break the scan config when that
+# is updated.
+
+# The work of actually doing the build is handled by the
+# run-coverity-scan script.
+
+FROM fedora:30
+ENV PACKAGES \
+    alsa-lib-devel \
+    bc \
+    bison \
+    brlapi-devel \
+    bzip2 \
+    bzip2-devel \
+    ccache \
+    clang \
+    curl \
+    cyrus-sasl-devel \
+    dbus-daemon \
+    device-mapper-multipath-devel \
+    findutils \
+    flex \
+    gcc \
+    gcc-c++ \
+    gettext \
+    git \
+    glib2-devel \
+    glusterfs-api-devel \
+    gnutls-devel \
+    gtk3-devel \
+    hostname \
+    libaio-devel \
+    libasan \
+    libattr-devel \
+    libblockdev-mpath-devel \
+    libcap-devel \
+    libcap-ng-devel \
+    libcurl-devel \
+    libepoxy-devel \
+    libfdt-devel \
+    libgbm-devel \
+    libiscsi-devel \
+    libjpeg-devel \
+    libpmem-devel \
+    libnfs-devel \
+    libpng-devel \
+    librbd-devel \
+    libseccomp-devel \
+    libssh-devel \
+    libubsan \
+    libudev-devel \
+    libusbx-devel \
+    libxml2-devel \
+    libzstd-devel \
+    llvm \
+    lzo-devel \
+    make \
+    mingw32-bzip2 \
+    mingw32-curl \
+    mingw32-glib2 \
+    mingw32-gmp \
+    mingw32-gnutls \
+    mingw32-gtk3 \
+    mingw32-libjpeg-turbo \
+    mingw32-libpng \
+    mingw32-libtasn1 \
+    mingw32-nettle \
+    mingw32-nsis \
+    mingw32-pixman \
+    mingw32-pkg-config \
+    mingw32-SDL2 \
+    mingw64-bzip2 \
+    mingw64-curl \
+    mingw64-glib2 \
+    mingw64-gmp \
+    mingw64-gnutls \
+    mingw64-gtk3 \
+    mingw64-libjpeg-turbo \
+    mingw64-libpng \
+    mingw64-libtasn1 \
+    mingw64-nettle \
+    mingw64-pixman \
+    mingw64-pkg-config \
+    mingw64-SDL2 \
+    ncurses-devel \
+    nettle-devel \
+    nss-devel \
+    numactl-devel \
+    perl \
+    perl-Test-Harness \
+    pixman-devel \
+    pulseaudio-libs-devel \
+    python3 \
+    python3-sphinx \
+    PyYAML \
+    rdma-core-devel \
+    SDL2-devel \
+    snappy-devel \
+    sparse \
+    spice-server-devel \
+    systemd-devel \
+    systemtap-sdt-devel \
+    tar \
+    texinfo \
+    usbredir-devel \
+    virglrenderer-devel \
+    vte291-devel \
+    wget \
+    which \
+    xen-devel \
+    xfsprogs-devel \
+    zlib-devel
+ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
+
+RUN dnf install -y $PACKAGES
+RUN rpm -q $PACKAGES | sort > /packages.txt
+ENV PATH $PATH:/usr/libexec/python3-sphinx/
+ENV COVERITY_TOOL_BASE=/coverity-tools
+COPY run-coverity-scan run-coverity-scan
+RUN --mount=type=secret,id=coverity.token,required ./run-coverity-scan --update-tools-only --tokenfile /run/secrets/coverity.token
--- a/scripts/coverity-scan/run-coverity-scan
+++ b/scripts/coverity-scan/run-coverity-scan
@@ -0,0 +1,401 @@
+#!/bin/sh -e
+
+# Upload a created tarball to Coverity Scan, as per
+# https://scan.coverity.com/projects/qemu/builds/new
+
+# This work is licensed under the terms of the GNU GPL version 2,
+# or (at your option) any later version.
+# See the COPYING file in the top-level directory.
+#
+# Copyright (c) 2017-2020 Linaro Limited
+# Written by Peter Maydell
+
+# Note that this script will automatically download and
+# run the (closed-source) coverity build tools, so don't
+# use it if you don't trust them!
+
+# This script assumes that you're running it from a QEMU source
+# tree, and that tree is a fresh clean one, because we do an in-tree
+# build. (This is necessary so that the filenames that the Coverity
+# Scan server sees are relative paths that match up with the component
+# regular expressions it uses; an out-of-tree build won't work for this.)
+# The host machine should have as many of QEMU's dependencies
+# installed as possible, for maximum coverity coverage.
+
+# To do an upload you need to be a maintainer in the Coverity online
+# service, and you will need to know the "Coverity token", which is a
+# secret 8 digit hex string. You can find that from the web UI in the
+# project settings, if you have maintainer access there.
+
+# Command line options:
+#   --dry-run : run the tools, but don't actually do the upload
+#   --docker : create and work inside a docker container
+#   --update-tools-only : update the cached copy of the tools, but don't run them
+#   --tokenfile : file to read Coverity token from
+#   --version ver : specify version being analyzed (default: ask git)
+#   --description desc : specify description of this version (default: ask git)
+#   --srcdir : QEMU source tree to analyze (default: current working dir)
+#   --results-tarball : path to copy the results tarball to (default: don't
+#                       copy it anywhere, just upload it)
+#   --src-tarball : tarball to untar into src dir (default: none); this
+#                   is intended mainly for internal use by the Docker support
+#
+# User-specifiable environment variables:
+#  COVERITY_TOKEN -- Coverity token
+#  COVERITY_EMAIL -- the email address to use for uploads (default:
+#                    looks at your git user.email config)
+#  COVERITY_BUILD_CMD -- make command (default: 'make -jN' where N is
+#                    number of CPUs as determined by 'nproc')
+#  COVERITY_TOOL_BASE -- set to directory to put coverity tools
+#                        (default: /tmp/coverity-tools)
+#
+# You must specify the token, either by environment variable or by
+# putting it in a file and using --tokenfile. Everything else has
+# a reasonable default if this is run from a git tree.
+
+check_upload_permissions() {
+    # Check whether we can do an upload to the server; will exit the script
+    # with status 1 if the check failed (usually a bad token);
+    # will exit the script with status 0 if the check indicated that we
+    # can't upload yet (ie we are at quota)
+    # Assumes that PROJTOKEN, PROJNAME and DRYRUN have been initialized.
+
+    echo "Checking upload permissions..."
+
+    if ! up_perm="$(wget https://scan.coverity.com/api/upload_permitted --post-data "token=$PROJTOKEN&project=$PROJNAME" -q -O -)"; then
+        echo "Coverity Scan API access denied: bad token?"
+        exit 1
+    fi
+
+    # Really up_perm is a JSON response with either
+    # {upload_permitted:true} or {next_upload_permitted_at:<date>}
+    # We do some hacky string parsing instead of properly parsing it.
+    case "$up_perm" in
+        *upload_permitted*true*)
+            echo "Coverity Scan: upload permitted"
+            ;;
+        *next_upload_permitted_at*)
+            if [ "$DRYRUN" = yes ]; then
+                echo "Coverity Scan: upload quota reached, continuing dry run"
+            else
+                echo "Coverity Scan: upload quota reached; stopping here"
+                # Exit success as this isn't a build error.
+                exit 0
+            fi
+            ;;
+        *)
+            echo "Coverity Scan upload check: unexpected result $up_perm"
+            exit 1
+            ;;
+    esac
+}
+
+
+update_coverity_tools () {
+    # Check for whether we need to download the Coverity tools
+    # (either because we don't have a copy, or because it's out of date)
+    # Assumes that COVERITY_TOOL_BASE, PROJTOKEN and PROJNAME are set.
+
+    mkdir -p "$COVERITY_TOOL_BASE"
+    cd "$COVERITY_TOOL_BASE"
+
+    echo "Checking for new version of coverity build tools..."
+    wget https://scan.coverity.com/download/linux64 --post-data "token=$PROJTOKEN&project=$PROJNAME&md5=1" -O coverity_tool.md5.new
+
+    if ! cmp -s coverity_tool.md5 coverity_tool.md5.new; then
+        # out of date md5 or no md5: download new build tool
+        # blow away the old build tool
+        echo "Downloading coverity build tools..."
+        rm -rf coverity_tool coverity_tool.tgz
+        wget https://scan.coverity.com/download/linux64 --post-data "token=$PROJTOKEN&project=$PROJNAME" -O coverity_tool.tgz
+        if ! (cat coverity_tool.md5.new; echo "  coverity_tool.tgz") | md5sum -c --status; then
+            echo "Downloaded tarball didn't match md5sum!"
+            exit 1
+        fi
+        # extract the new one, keeping it corralled in a 'coverity_tool' directory
+        echo "Unpacking coverity build tools..."
+        mkdir -p coverity_tool
+        cd coverity_tool
+        tar xf ../coverity_tool.tgz
+        cd ..
+        mv coverity_tool.md5.new coverity_tool.md5
+    fi
+
+    rm -f coverity_tool.md5.new
+}
+
+
+# Check user-provided environment variables and arguments
+DRYRUN=no
+UPDATE_ONLY=no
+DOCKER=no
+
+while [ "$#" -ge 1 ]; do
+    case "$1" in
+        --dry-run)
+            shift
+            DRYRUN=yes
+            ;;
+        --update-tools-only)
+            shift
+            UPDATE_ONLY=yes
+            ;;
+        --version)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--version needs an argument"
+                exit 1
+            fi
+            VERSION="$1"
+            shift
+            ;;
+        --description)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--description needs an argument"
+                exit 1
+            fi
+            DESCRIPTION="$1"
+            shift
+            ;;
+        --tokenfile)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--tokenfile needs an argument"
+                exit 1
+            fi
+            COVERITY_TOKEN="$(cat "$1")"
+            shift
+            ;;
+        --srcdir)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--srcdir needs an argument"
+                exit 1
+            fi
+            SRCDIR="$1"
+            shift
+            ;;
+        --results-tarball)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--results-tarball needs an argument"
+                exit 1
+            fi
+            RESULTSTARBALL="$1"
+            shift
+            ;;
+        --src-tarball)
+            shift
+            if [ $# -eq 0 ]; then
+                echo "--src-tarball needs an argument"
+                exit 1
+            fi
+            SRCTARBALL="$1"
+            shift
+            ;;
+        --docker)
+            DOCKER=yes
+            shift
+            ;;
+        *)
+            echo "Unexpected argument '$1'"
+            exit 1
+            ;;
+    esac
+done
+
+if [ -z "$COVERITY_TOKEN" ]; then
+    echo "COVERITY_TOKEN environment variable not set"
+    exit 1
+fi
+
+if [ -z "$COVERITY_BUILD_CMD" ]; then
+    NPROC=$(nproc)
+    COVERITY_BUILD_CMD="make -j$NPROC"
+    echo "COVERITY_BUILD_CMD: using default '$COVERITY_BUILD_CMD'"
+fi
+
+if [ -z "$COVERITY_TOOL_BASE" ]; then
+    echo "COVERITY_TOOL_BASE: using default /tmp/coverity-tools"
+    COVERITY_TOOL_BASE=/tmp/coverity-tools
+fi
+
+if [ -z "$SRCDIR" ]; then
+    SRCDIR="$PWD"
+fi
+
+PROJTOKEN="$COVERITY_TOKEN"
+PROJNAME=QEMU
+TARBALL=cov-int.tar.xz
+
+if [ "$UPDATE_ONLY" = yes ] && [ "$DOCKER" = yes ]; then
+    echo "Combining --docker and --update-only is not supported"
+    exit 1
+fi
+
+if [ "$UPDATE_ONLY" = yes ]; then
+    # Just do the tools update; we don't need to check whether
+    # we are in a source tree or have upload rights for this,
+    # so do it before some of the command line and source tree checks.
+    update_coverity_tools
+    exit 0
+fi
+
+if [ ! -e "$SRCDIR" ]; then
+    mkdir "$SRCDIR"
+fi
+
+cd "$SRCDIR"
+
+if [ ! -z "$SRCTARBALL" ]; then
+    echo "Untarring source tarball into $SRCDIR..."
+    tar xvf "$SRCTARBALL"
+fi
+
+echo "Checking this is a QEMU source tree..."
+if ! [ -e "$SRCDIR/VERSION" ]; then
+    echo "Not in a QEMU source tree?"
+    exit 1
+fi
+
+# Fill in defaults used by the non-update-only process
+if [ -z "$VERSION" ]; then
+    VERSION="$(git describe --always HEAD)"
+fi
+
+if [ -z "$DESCRIPTION" ]; then
+    DESCRIPTION="$(git rev-parse HEAD)"
+fi
+
+if [ -z "$COVERITY_EMAIL" ]; then
+    COVERITY_EMAIL="$(git config user.email)"
+fi
+
+# Run ourselves inside docker if that's what the user wants
+if [ "$DOCKER" = yes ]; then
+    # build docker container including the coverity-scan tools
+    # Put the Coverity token into a temporary file that only
+    # we have read access to, and then pass it to docker build
+    # using --secret. This requires at least Docker 18.09.
+    # Mostly what we are trying to do here is ensure we don't leak
+    # the token into the Docker image.
+    umask 077
+    SECRETDIR=$(mktemp -d)
+    if [ -z "$SECRETDIR" ]; then
+        echo "Failed to create temporary directory"
+        exit 1
+    fi
+    trap 'rm -rf "$SECRETDIR"' INT TERM EXIT
+    echo "Created temporary directory $SECRETDIR"
+    SECRET="$SECRETDIR/token"
+    echo "$COVERITY_TOKEN" > "$SECRET"
+    echo "Building docker container..."
+    # TODO: This re-downloads the tools every time, rather than
+    # caching and reusing the image produced with the downloaded tools.
+    # Not sure why.
+    # TODO: how do you get 'docker build' to print the output of the
+    # commands it is running to its stdout? This would be useful for debug.
+    DOCKER_BUILDKIT=1 docker build -t coverity-scanner \
+                   --secret id=coverity.token,src="$SECRET" \
+                   -f scripts/coverity-scan/coverity-scan.docker \
+                   scripts/coverity-scan
+    echo "Archiving sources to be analyzed..."
+    ./scripts/archive-source.sh "$SECRETDIR/qemu-sources.tgz"
+    if [ "$DRYRUN" = yes ]; then
+        DRYRUNARG=--dry-run
+    fi
+    echo "Running scanner..."
+    # If we need to capture the output tarball, get the inner run to
+    # save it to the secrets directory so we can copy it out before the
+    # directory is cleaned up.
+    if [ ! -z "$RESULTSTARBALL" ]; then
+        RTARGS="--results-tarball /work/cov-int.tar.xz"
+    else
+        RTARGS=""
+    fi
+    # Arrange for this docker run to get access to the sources with -v.
+    # We pass through all the configuration from the outer script to the inner.
+    export COVERITY_EMAIL COVERITY_BUILD_CMD
+    docker run -it --env COVERITY_EMAIL --env COVERITY_BUILD_CMD \
+           -v "$SECRETDIR:/work" coverity-scanner \
+           ./run-coverity-scan --version "$VERSION" \
+           --description "$DESCRIPTION" $DRYRUNARG --tokenfile /work/token \
+           --srcdir /qemu --src-tarball /work/qemu-sources.tgz $RTARGS
+    if [ ! -z "$RESULTSTARBALL" ]; then
+        echo "Copying results tarball to $RESULTSTARBALL..."
+        cp "$SECRETDIR/cov-int.tar.xz" "$RESULTSTARBALL"
+    fi
+    echo "Docker work complete."
+    exit 0
+fi
+
+# Otherwise, continue with the full build and upload process.
+
+check_upload_permissions
+
+update_coverity_tools
+
+TOOLBIN="$(cd "$COVERITY_TOOL_BASE" && echo $PWD/coverity_tool/cov-analysis-*/bin)"
+
+if ! test -x "$TOOLBIN/cov-build"; then
+    echo "Couldn't find cov-build in the coverity build-tool directory??"
+    exit 1
+fi
+
+export PATH="$TOOLBIN:$PATH"
+
+cd "$SRCDIR"
+
+echo "Doing make distclean..."
+make distclean
+
+echo "Configuring..."
+# We configure with a fixed set of enables here to ensure that we don't
+# accidentally reduce the scope of the analysis by doing the build on
+# the system that's missing a dependency that we need to build part of
+# the codebase.
+./configure --disable-modules --enable-sdl --enable-gtk \
+    --enable-opengl --enable-vte --enable-gnutls \
+    --enable-nettle --enable-curses --enable-curl \
+    --audio-drv-list=oss,alsa,sdl,pa --enable-virtfs \
+    --enable-vnc --enable-vnc-sasl --enable-vnc-jpeg --enable-vnc-png \
+    --enable-xen --enable-brlapi \
+    --enable-linux-aio --enable-attr \
+    --enable-cap-ng --enable-trace-backends=log --enable-spice --enable-rbd \
+    --enable-xfsctl --enable-libusb --enable-usb-redir \
+    --enable-libiscsi --enable-libnfs --enable-seccomp \
+    --enable-tpm --enable-libssh --enable-lzo --enable-snappy --enable-bzip2 \
+    --enable-numa --enable-rdma --enable-smartcard --enable-virglrenderer \
+    --enable-mpath --enable-libxml2 --enable-glusterfs \
+    --enable-virtfs --enable-zstd
+
+echo "Making libqemustub.a..."
+make libqemustub.a
+
+echo "Running cov-build..."
+rm -rf cov-int
+mkdir cov-int
+cov-build --dir cov-int $COVERITY_BUILD_CMD
+
+echo "Creating results tarball..."
+tar cvf - cov-int | xz > "$TARBALL"
+
+if [ ! -z "$RESULTSTARBALL" ]; then
+    echo "Copying results tarball to $RESULTSTARBALL..."
+    cp "$TARBALL" "$RESULTSTARBALL"
+fi
+
+echo "Uploading results tarball..."
+
+if [ "$DRYRUN" = yes ]; then
+    echo "Dry run only, not uploading $TARBALL"
+    exit 0
+fi
+
+curl --form token="$PROJTOKEN" --form email="$COVERITY_EMAIL" \
+     --form file=@"$TARBALL" --form version="$VERSION" \
+     --form description="$DESCRIPTION" \
+     https://scan.coverity.com/builds?project="$PROJNAME"
+
+echo "Done."
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -71,6 +71,8 @@ Output selection (mutually exclusive):
 			DOC: sections. May be specified multiple times.

 Output selection modifiers:
+  -sphinx-version VER   Generate rST syntax for the specified Sphinx version.
+                        Only works with reStructuredTextFormat.
  -no-doc-sections	Do not output DOC: sections.
  -enable-lineno        Enable output of #define LINENO lines. Only works with
                        reStructuredText format.
@@ -286,6 +288,7 @@ use constant {
 };
 my $output_selection = OUTPUT_ALL;
 my $show_not_found = 0;	# No longer used
+my $sphinx_version = "0.0"; # if not specified, assume old

 my @export_file_list;

@@ -436,6 +439,8 @@ while ($ARGV[0] =~ m/^--?(.*)/) {
 	    $enable_lineno = 1;
    } elsif ($cmd eq 'show-not-found') {
 	$show_not_found = 1;  # A no-op but don't fail
+    } elsif ($cmd eq 'sphinx-version') {
+        $sphinx_version = shift @ARGV;
    } else {
 	# Unknown argument
        usage();
@@ -853,7 +858,7 @@ sub output_function_rst(%) {

 	if ($type =~ m/([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)/) {
 	    # pointer-to-function
-	    print $1 . $parameter . ") (" . $2;
+	    print $1 . $parameter . ") (" . $2 . ")";
 	} else {
 	    print $type . " " . $parameter;
 	}
@@ -963,7 +968,16 @@ sub output_struct_rst(%) {
    my $oldprefix = $lineprefix;
    my $name = $args{'type'} . " " . $args{'struct'};

-    print "\n\n.. c:type:: " . $name . "\n\n";
+    # Sphinx 3.0 and up will emit warnings for "c:type:: struct Foo".
+    # It wants to see "c:struct:: Foo" (and will add the word 'struct' in
+    # the rendered output).
+    if ((split(/\./, $sphinx_version))[0] >= 3) {
+        my $sname = $name;
+        $sname =~ s/^struct //;
+        print "\n\n.. c:struct:: " . $sname . "\n\n";
+    } else {
+        print "\n\n.. c:type:: " . $name . "\n\n";
+    }
    print_lineno($declaration_start_line);
    $lineprefix = "   ";
    output_highlight_rst($args{'purpose'});
--- a/2
+++ b/2
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -4315,6 +4315,11 @@ void qemu_init(int argc, char **argv, char **envp)
                             "explicitly specified 'memory-backend' property");
                exit(EXIT_FAILURE);
        }
+        if (mem_path) {
+            error_report("'-mem-path' can't be used together with"
+                         "'-machine memory-backend'");
+            exit(EXIT_FAILURE);
+        }
        ram_size = backend_size;
    }

--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -705,7 +705,7 @@ static void aarch64_max_initfn(Object *obj)
        u = cpu->isar.id_mmfr4;
        u = FIELD_DP32(u, ID_MMFR4, HPDS, 1); /* AA32HPD */
        u = FIELD_DP32(u, ID_MMFR4, AC2, 1); /* ACTLR2, HACTLR2 */
-        u = FIELD_DP32(t, ID_MMFR4, CNP, 1); /* TTCNP */
+        u = FIELD_DP32(u, ID_MMFR4, CNP, 1); /* TTCNP */
        cpu->isar.id_mmfr4 = u;

        u = cpu->isar.id_aa64dfr0;
--- a/target/arm/gdbstub.c
+++ b/target/arm/gdbstub.c
@@ -47,8 +47,7 @@ int arm_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
        if (gdb_has_xml) {
            return 0;
        }
-        memset(mem_buf, 0, 12);
-        return 12;
+        return gdb_get_zeroes(mem_buf, 12);
    }
    switch (n) {
    case 24:
--- a/target/i386/gdbstub.c
+++ b/target/i386/gdbstub.c
@@ -106,7 +106,7 @@ int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
    } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) {
        floatx80 *fp = (floatx80 *) &env->fpregs[n - IDX_FP_REGS];
        int len = gdb_get_reg64(mem_buf, cpu_to_le64(fp->low));
-        len += gdb_get_reg16(mem_buf + len, cpu_to_le16(fp->high));
+        len += gdb_get_reg16(mem_buf, cpu_to_le16(fp->high));
        return len;
    } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) {
        n -= IDX_XMM_REGS;
--- a/target/i386/hax-windows.c
+++ b/target/i386/hax-windows.c
@@ -185,12 +185,12 @@ int hax_mod_version(struct hax_state *hax, struct hax_module_version *version)

 static char *hax_vm_devfs_string(int vm_id)
 {
-    return g_strdup_printf("/dev/hax_vm/vm%02d", vm_id);
+    return g_strdup_printf("\\\\.\\hax_vm%02d", vm_id);
 }

 static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id)
 {
-    return g_strdup_printf("/dev/hax_vm%02d/vcpu%02d", vm_id, vcpu_id);
+    return g_strdup_printf("\\\\.\\hax_vm%02d_vcpu%02d", vm_id, vcpu_id);
 }

 int hax_host_create_vm(struct hax_state *hax, int *vmid)
--- a/target/m68k/helper.c
+++ b/target/m68k/helper.c
@@ -109,8 +109,8 @@ static int m68k_fpu_gdb_get_reg(CPUM68KState *env, GByteArray *mem_buf, int n)
 {
    if (n < 8) {
        int len = gdb_get_reg16(mem_buf, env->fregs[n].l.upper);
-        len += gdb_get_reg16(mem_buf + len, 0);
-        len += gdb_get_reg64(mem_buf + len, env->fregs[n].l.lower);
+        len += gdb_get_reg16(mem_buf, 0);
+        len += gdb_get_reg64(mem_buf, env->fregs[n].l.lower);
        return len;
    }
    switch (n) {
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2816,11 +2816,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
 #if defined(TARGET_PPC64)
 int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
 {
-    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+    uint16_t flags = run->flags & KVM_RUN_PPC_NMI_DISP_MASK;

    cpu_synchronize_state(CPU(cpu));

-    spapr_mce_req_event(cpu, recovered);
+    spapr_mce_req_event(cpu, flags == KVM_RUN_PPC_NMI_DISP_FULLY_RECOV);

    return 0;
 }
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx)
    CHK_SV;

 #if !defined(CONFIG_USER_ONLY)
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
    if (ctx->opcode & 0x00010000) {
-        /* Special form that does not need any synchronisation */
+        /* L=1 form only updates EE and RI */
        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
                        (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(cpu_msr, cpu_msr,
+        tcg_gen_andi_tl(t1, cpu_msr,
                        ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-        tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
+        tcg_gen_or_tl(t1, t1, t0);
+
+        gen_helper_store_msr(cpu_env, t1);
        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+
    } else {
        /*
         * XXX: we need to update nip before the store if we enter
         *      power saving mode, we will exit the loop directly from
         *      ppc_store_msr
         */
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
        gen_update_nip(ctx, ctx->base.pc_next);
        gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]);
-        /* Must stop the translation as machine state (may have) changed */
-        /* Note that mtmsr is not always defined as context-synchronizing */
-        gen_stop_exception(ctx);
    }
+    /* Must stop the translation as machine state (may have) changed */
+    gen_stop_exception(ctx);
 #endif /* !defined(CONFIG_USER_ONLY) */
 }
 #endif /* defined(TARGET_PPC64) */
@@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx)
    CHK_SV;

 #if !defined(CONFIG_USER_ONLY)
-   if (ctx->opcode & 0x00010000) {
-        /* Special form that does not need any synchronisation */
+    if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
+        gen_io_start();
+    }
+    if (ctx->opcode & 0x00010000) {
+        /* L=1 form only updates EE and RI */
        TCGv t0 = tcg_temp_new();
+        TCGv t1 = tcg_temp_new();
        tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)],
                        (1 << MSR_RI) | (1 << MSR_EE));
-        tcg_gen_andi_tl(cpu_msr, cpu_msr,
+        tcg_gen_andi_tl(t1, cpu_msr,
                        ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE)));
-        tcg_gen_or_tl(cpu_msr, cpu_msr, t0);
+        tcg_gen_or_tl(t1, t1, t0);
+
+        gen_helper_store_msr(cpu_env, t1);
        tcg_temp_free(t0);
+        tcg_temp_free(t1);
+
    } else {
        TCGv msr = tcg_temp_new();

@@ -4411,9 +4423,6 @@ static void gen_mtmsr(DisasContext *ctx)
         *      power saving mode, we will exit the loop directly from
         *      ppc_store_msr
         */
-        if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) {
-            gen_io_start();
-        }
        gen_update_nip(ctx, ctx->base.pc_next);
 #if defined(TARGET_PPC64)
        tcg_gen_deposit_tl(msr, cpu_msr, cpu_gpr[rS(ctx->opcode)], 0, 32);
@@ -4422,10 +4431,9 @@ static void gen_mtmsr(DisasContext *ctx)
 #endif
        gen_helper_store_msr(cpu_env, msr);
        tcg_temp_free(msr);
-        /* Must stop the translation as machine state (may have) changed */
-        /* Note that mtmsr is not always defined as context-synchronizing */
-        gen_stop_exception(ctx);
    }
+    /* Must stop the translation as machine state (may have) changed */
+    gen_stop_exception(ctx);
 #endif
 }

@@ -5003,6 +5011,7 @@ static void gen_slbia(DisasContext *ctx)
    CHK_SV;

    gen_helper_slbia(cpu_env, t0);
+    tcg_temp_free_i32(t0);
 #endif /* defined(CONFIG_USER_ONLY) */
 }

--- a/target/rx/translate.c
+++ b/target/rx/translate.c
@@ -2362,6 +2362,7 @@ static void rx_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs)
        break;
    case DISAS_UPDATE:
        tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);
+        /* fall through */
    case DISAS_EXIT:
        tcg_gen_exit_tb(NULL, 0);
        break;
--- a/target/sh4/gdbstub.c
+++ b/target/sh4/gdbstub.c
@@ -58,11 +58,9 @@ int superh_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
        return gdb_get_regl(mem_buf, env->fpscr);
    case 25 ... 40:
        if (env->fpscr & FPSCR_FR) {
-            stfl_p(mem_buf, env->fregs[n - 9]);
-        } else {
-            stfl_p(mem_buf, env->fregs[n - 25]);
+            return gdb_get_float32(mem_buf, env->fregs[n - 9]);
        }
-        return 4;
+        return gdb_get_float32(mem_buf, env->fregs[n - 25]);
    case 41:
        return gdb_get_regl(mem_buf, env->ssr);
    case 42:
--- a/target/xtensa/cpu.h
+++ b/target/xtensa/cpu.h
@@ -213,6 +213,9 @@ enum {
 #define MEMCTL_IL0EN 0x1

 #define MAX_INSN_LENGTH 64
+#define MAX_INSNBUF_LENGTH \
+    ((MAX_INSN_LENGTH + sizeof(xtensa_insnbuf_word) - 1) / \
+     sizeof(xtensa_insnbuf_word))
 #define MAX_INSN_SLOTS 32
 #define MAX_OPCODE_ARGS 16
 #define MAX_NAREG 64
--- a/target/xtensa/gdbstub.c
+++ b/target/xtensa/gdbstub.c
@@ -105,8 +105,7 @@ int xtensa_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
        default:
            qemu_log_mask(LOG_UNIMP, "%s from reg %d of unsupported size %d\n",
                          __func__, n, reg->size);
-            memset(mem_buf, 0, reg->size);
-            return reg->size;
+            return gdb_get_zeroes(mem_buf, reg->size);
        }

    case xtRegisterTypeWindow: /*a*/
@@ -115,8 +114,7 @@ int xtensa_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n)
    default:
        qemu_log_mask(LOG_UNIMP, "%s from reg %d of unsupported type %d\n",
                      __func__, n, reg->type);
-        memset(mem_buf, 0, reg->size);
-        return reg->size;
+        return gdb_get_zeroes(mem_buf, reg->size);
    }
 }

--- a/target/xtensa/helper.c
+++ b/target/xtensa/helper.c
@@ -96,6 +96,7 @@ static void init_libisa(XtensaConfig *config)

    config->isa = xtensa_isa_init(config->isa_internal, NULL, NULL);
    assert(xtensa_isa_maxlength(config->isa) <= MAX_INSN_LENGTH);
+    assert(xtensa_insnbuf_size(config->isa) <= MAX_INSNBUF_LENGTH);
    opcodes = xtensa_isa_num_opcodes(config->isa);
    formats = xtensa_isa_num_formats(config->isa);
    regfiles = xtensa_isa_num_regfiles(config->isa);
--- a/target/xtensa/translate.c
+++ b/target/xtensa/translate.c
@@ -72,8 +72,8 @@ struct DisasContext {
    unsigned cpenable;

    uint32_t op_flags;
-    xtensa_insnbuf insnbuf;
-    xtensa_insnbuf slotbuf;
+    xtensa_insnbuf_word insnbuf[MAX_INSNBUF_LENGTH];
+    xtensa_insnbuf_word slotbuf[MAX_INSNBUF_LENGTH];
 };

 static TCGv_i32 cpu_pc;
@@ -1173,16 +1173,6 @@ static void xtensa_tr_init_disas_context(DisasContextBase *dcbase,
    dc->cwoe = tb_flags & XTENSA_TBFLAG_CWOE;
    dc->callinc = ((tb_flags & XTENSA_TBFLAG_CALLINC_MASK) >>
                   XTENSA_TBFLAG_CALLINC_SHIFT);
-
-    /*
-     * FIXME: This will leak when a failed instruction load or similar
-     * event causes us to longjump out of the translation loop and
-     * hence not clean-up in xtensa_tr_tb_stop
-     */
-    if (dc->config->isa) {
-        dc->insnbuf = xtensa_insnbuf_alloc(dc->config->isa);
-        dc->slotbuf = xtensa_insnbuf_alloc(dc->config->isa);
-    }
    init_sar_tracker(dc);
 }

@@ -1272,10 +1262,6 @@ static void xtensa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
    DisasContext *dc = container_of(dcbase, DisasContext, base);

    reset_sar_tracker(dc);
-    if (dc->config->isa) {
-        xtensa_insnbuf_free(dc->config->isa, dc->insnbuf);
-        xtensa_insnbuf_free(dc->config->isa, dc->slotbuf);
-    }
    if (dc->icount) {
        tcg_temp_free(dc->next_icount);
    }
@@ -3746,7 +3732,7 @@ static const XtensaOpcodeOps core_ops[] = {
        .name = "pfwait.a",
        .translate = translate_nop,
    }, {
-        .name = "pfwait.o",
+        .name = "pfwait.r",
        .translate = translate_nop,
    }, {
        .name = "pitlb",
--- a/tcg/mips/tcg-target.inc.c
+++ b/tcg/mips/tcg-target.inc.c
@@ -404,11 +404,11 @@ typedef enum {

    /* MIPS r6 introduced names for weaker variants of SYNC.  These are
       backward compatible to previous architecture revisions.  */
-    OPC_SYNC_WMB     = OPC_SYNC | 0x04 << 5,
-    OPC_SYNC_MB      = OPC_SYNC | 0x10 << 5,
-    OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 5,
-    OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 5,
-    OPC_SYNC_RMB     = OPC_SYNC | 0x13 << 5,
+    OPC_SYNC_WMB     = OPC_SYNC | 0x04 << 6,
+    OPC_SYNC_MB      = OPC_SYNC | 0x10 << 6,
+    OPC_SYNC_ACQUIRE = OPC_SYNC | 0x11 << 6,
+    OPC_SYNC_RELEASE = OPC_SYNC | 0x12 << 6,
+    OPC_SYNC_RMB     = OPC_SYNC | 0x13 << 6,

    /* Aliases for convenience.  */
    ALIAS_PADD     = sizeof(void *) == 4 ? OPC_ADDU : OPC_DADDU,
--- a/tests/docker/dockerfiles/debian10.docker
+++ b/tests/docker/dockerfiles/debian10.docker
@@ -34,3 +34,5 @@ RUN apt update && \
        python3-sphinx \
        texinfo \
        $(apt-get -s build-dep qemu | egrep ^Inst | fgrep '[all]' | cut -d\  -f2)
+
+ENV FEATURES docs
--- a/tests/docker/dockerfiles/debian9.docker
+++ b/tests/docker/dockerfiles/debian9.docker
@@ -30,6 +30,4 @@ RUN apt update && \
        pkg-config \
        psmisc \
        python3 \
-        python3-sphinx \
-        texinfo \
        $(apt-get -s build-dep qemu | egrep ^Inst | fgrep '[all]' | cut -d\  -f2)
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -103,4 +103,4 @@ ENV QEMU_CONFIGURE_OPTS --python=/usr/bin/python3
 RUN dnf install -y $PACKAGES
 RUN rpm -q $PACKAGES | sort > /packages.txt
 ENV PATH $PATH:/usr/libexec/python3-sphinx/
-ENV FEATURES mingw clang pyyaml asan
+ENV FEATURES mingw clang pyyaml asan docs
--- a/tests/docker/dockerfiles/travis.docker
+++ b/tests/docker/dockerfiles/travis.docker
@@ -13,5 +13,5 @@ RUN apt-get -y install device-tree-compiler python3 python3-yaml dh-autoreconf g
 # Travis tools require PhantomJS / Neo4j / Maven accessible
 # in their PATH (QEMU build won't access them).
 ENV PATH /usr/local/phantomjs/bin:/usr/local/phantomjs:/usr/local/neo4j-3.2.7/bin:/usr/local/maven-3.5.2/bin:/usr/local/cmake-3.9.2/bin:/usr/local/clang-5.0.0/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-ENV FEATURES clang pyyaml
+ENV FEATURES clang pyyaml docs
 USER travis
--- a/tests/docker/dockerfiles/ubuntu.docker
+++ b/tests/docker/dockerfiles/ubuntu.docker
@@ -68,4 +68,4 @@ ENV PACKAGES flex bison \
 RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get -y install $PACKAGES
 RUN dpkg -l $PACKAGES | sort > /packages.txt
-ENV FEATURES clang pyyaml sdl2
+ENV FEATURES clang pyyaml sdl2 docs
--- a/tests/docker/dockerfiles/ubuntu1804.docker
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
@@ -54,7 +54,7 @@ ENV PACKAGES flex bison \
 RUN apt-get update && \
    DEBIAN_FRONTEND=noninteractive apt-get -y install $PACKAGES
 RUN dpkg -l $PACKAGES | sort > /packages.txt
-ENV FEATURES clang pyyaml sdl2
+ENV FEATURES clang pyyaml sdl2 docs

 # https://bugs.launchpad.net/qemu/+bug/1838763
 ENV QEMU_CONFIGURE_OPTS --disable-libssh
--- a/tests/docker/test-misc
+++ b/tests/docker/test-misc
@@ -14,6 +14,8 @@

 . common.rc

+requires docs
+
 cd "$BUILD_DIR"

 # build everything else but QEMU
--- a/tests/qtest/device-introspect-test.c
+++ b/tests/qtest/device-introspect-test.c
@@ -288,7 +288,7 @@ static void add_machine_test_case(const char *mname)
    char *path, *args;

    /* Ignore blacklisted machines */
-    if (g_str_equal("xenfv", mname) || g_str_equal("xenpv", mname)) {
+    if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
        return;
    }

--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -455,7 +455,7 @@ static void pc_hmat_off_cfg(const void *data)
    cli = make_cli(data, "-nodefaults --preconfig "
                         "-smp 2,sockets=2 "
                         "-m 128M,slots=2,maxmem=1G "
-                         "-object memory-backend-ram,size=64M,id=m0 "
+                         "-object memory-backend-ram,size=64M,id=m0,prealloc=y "
                         "-object memory-backend-ram,size=64M,id=m1 "
                         "-numa node,nodeid=0,memdev=m0");
    qs = qtest_init(cli);
--- a/tests/qtest/qom-test.c
+++ b/tests/qtest/qom-test.c
@@ -15,35 +15,6 @@
 #include "qemu/cutils.h"
 #include "libqtest.h"

-static const char *blacklist_x86[] = {
-    "xenfv", "xenpv", NULL
-};
-
-static const struct {
-    const char *arch;
-    const char **machine;
-} blacklists[] = {
-    { "i386", blacklist_x86 },
-    { "x86_64", blacklist_x86 },
-};
-
-static bool is_blacklisted(const char *arch, const char *mach)
-{
-    int i;
-    const char **p;
-
-    for (i = 0; i < ARRAY_SIZE(blacklists); i++) {
-        if (!strcmp(blacklists[i].arch, arch)) {
-            for (p = blacklists[i].machine; *p; p++) {
-                if (!strcmp(*p, mach)) {
-                    return true;
-                }
-            }
-        }
-    }
-    return false;
-}
-
 static void test_properties(QTestState *qts, const char *path, bool recurse)
 {
    char *child_path;
@@ -108,13 +79,16 @@ static void test_machine(gconstpointer data)

 static void add_machine_test_case(const char *mname)
 {
-    const char *arch = qtest_get_arch();
+    char *path;

-    if (!is_blacklisted(arch, mname)) {
-        char *path = g_strdup_printf("qom/%s", mname);
-        qtest_add_data_func(path, g_strdup(mname), test_machine);
-        g_free(path);
+    /* Ignore blacklisted machines that have known problems */
+    if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
+        return;
    }
+
+    path = g_strdup_printf("qom/%s", mname);
+    qtest_add_data_func(path, g_strdup(mname), test_machine);
+    g_free(path);
 }

 int main(int argc, char **argv)
--- a/tests/qtest/test-hmp.c
+++ b/tests/qtest/test-hmp.c
@@ -143,7 +143,7 @@ static void add_machine_test_case(const char *mname)
    char *path;

    /* Ignore blacklisted machines that have known problems */
-    if (!strcmp("xenfv", mname) || !strcmp("xenpv", mname)) {
+    if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
        return;
    }

--- a/util/aio-posix.c
+++ b/util/aio-posix.c
@@ -559,6 +559,11 @@ bool aio_poll(AioContext *ctx, bool blocking)
    int64_t timeout;
    int64_t start = 0;

+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     */
    assert(in_aio_context_home_thread(ctx));

    /* aio_notify can avoid the expensive event_notifier_set if
@@ -569,7 +574,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
     * so disable the optimization now.
     */
    if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before computing the timeout
+         * (reading bottom half flags, etc.).  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
    }

    qemu_lockcnt_inc(&ctx->list_lock);
@@ -590,7 +601,8 @@ bool aio_poll(AioContext *ctx, bool blocking)
    }

    if (blocking) {
-        atomic_sub(&ctx->notify_me, 2);
+        /* Finish the poll before clearing the flag.  */
+        atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
        aio_notify_accept(ctx);
    }

--- a/util/aio-win32.c
+++ b/util/aio-win32.c
@@ -321,6 +321,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
    int count;
    int timeout;

+    /*
+     * There cannot be two concurrent aio_poll calls for the same AioContext (or
+     * an aio_poll concurrent with a GSource prepare/check/dispatch callback).
+     * We rely on this below to avoid slow locked accesses to ctx->notify_me.
+     */
+    assert(in_aio_context_home_thread(ctx));
    progress = false;

    /* aio_notify can avoid the expensive event_notifier_set if
@@ -331,7 +337,13 @@ bool aio_poll(AioContext *ctx, bool blocking)
     * so disable the optimization now.
     */
    if (blocking) {
-        atomic_add(&ctx->notify_me, 2);
+        atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) + 2);
+        /*
+         * Write ctx->notify_me before computing the timeout
+         * (reading bottom half flags, etc.).  Pairs with
+         * smp_mb in aio_notify().
+         */
+        smp_mb();
    }

    qemu_lockcnt_inc(&ctx->list_lock);
@@ -364,8 +376,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
        ret = WaitForMultipleObjects(count, events, FALSE, timeout);
        if (blocking) {
            assert(first);
-            assert(in_aio_context_home_thread(ctx));
-            atomic_sub(&ctx->notify_me, 2);
+            atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) - 2);
            aio_notify_accept(ctx);
        }

--- a/util/async.c
+++ b/util/async.c
@@ -249,7 +249,14 @@ aio_ctx_prepare(GSource *source, gint    *timeout)
 {
    AioContext *ctx = (AioContext *) source;

-    atomic_or(&ctx->notify_me, 1);
+    atomic_set(&ctx->notify_me, atomic_read(&ctx->notify_me) | 1);
+
+    /*
+     * Write ctx->notify_me before computing the timeout
+     * (reading bottom half flags, etc.).  Pairs with
+     * smp_mb in aio_notify().
+     */
+    smp_mb();

    /* We assume there is no timeout already supplied */
    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
@@ -268,7 +275,8 @@ aio_ctx_check(GSource *source)
    QEMUBH *bh;
    BHListSlice *s;

-    atomic_and(&ctx->notify_me, ~1);
+    /* Finish computing the timeout before clearing the flag.  */
+    atomic_store_release(&ctx->notify_me, atomic_read(&ctx->notify_me) & ~1);
    aio_notify_accept(ctx);

    QSLIST_FOREACH_RCU(bh, &ctx->bh_list, next) {
@@ -411,10 +419,10 @@ LuringState *aio_get_linux_io_uring(AioContext *ctx)
 void aio_notify(AioContext *ctx)
 {
    /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
-     * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
+     * with smp_mb in aio_ctx_prepare or aio_poll.
     */
    smp_mb();
-    if (ctx->notify_me) {
+    if (atomic_read(&ctx->notify_me)) {
        event_notifier_set(&ctx->notifier);
        atomic_mb_set(&ctx->notified, true);
    }
--- a/util/fdmon-io_uring.c
+++ b/util/fdmon-io_uring.c
@@ -88,7 +88,10 @@ static struct io_uring_sqe *get_sqe(AioContext *ctx)
    }

    /* No free sqes left, submit pending sqes first */
-    ret = io_uring_submit(ring);
+    do {
+        ret = io_uring_submit(ring);
+    } while (ret == -EINTR);
+
    assert(ret > 1);
    sqe = io_uring_get_sqe(ring);
    assert(sqe);
@@ -282,7 +285,10 @@ static int fdmon_io_uring_wait(AioContext *ctx, AioHandlerList *ready_list,

    fill_sq_ring(ctx);

-    ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
+    do {
+        ret = io_uring_submit_and_wait(&ctx->fdmon_io_uring, wait_nr);
+    } while (ret == -EINTR);
+
    assert(ret >= 0);

    return process_cq_ring(ctx, ready_list);
--- a/util/module.c
+++ b/util/module.c
@@ -177,7 +177,7 @@ bool module_load_one(const char *prefix, const char *lib_name)
    char *version_dir;
 #endif
    const char *search_dir;
-    char *dirs[4];
+    char *dirs[5];
    char *module_name;
    int i = 0, n_dirs = 0;
    int ret;
--- a/util/oslib-posix.c
+++ b/util/oslib-posix.c
@@ -492,8 +492,11 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
                           QEMU_THREAD_JOINABLE);
        addr += memset_thread[i].numpages * hpagesize;
    }
+
+    qemu_mutex_lock(&page_mutex);
    threads_created_flag = true;
    qemu_cond_broadcast(&page_cond);
+    qemu_mutex_unlock(&page_mutex);

    for (i = 0; i < memset_num_threads; i++) {
        qemu_thread_join(&memset_thread[i].pgthread);
@@ -1 +1 @@
 .2.92
 .2.94