Update version for 8.0.4 release

Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
target/i386: Check CR0.TS before enter_mmx
2023-08-07 15:05:10 +03:00 · 2023-08-05 20:49:49 +03:00 · 2023-08-05 20:49:49 +03:00 · 2023-08-05 20:49:49 +03:00 · 2023-08-05 20:49:49 +03:00 · 2023-08-05 20:49:49 +03:00
2163 changed files with 51316 additions and 113712 deletions
--- a/.gitlab-ci.d/base.yml
+++ b/.gitlab-ci.d/base.yml
@@ -1,24 +1,10 @@

-variables:
-  # On stable branches this is changed by later rules. Should also
-  # be overridden per pipeline if running pipelines concurrently
-  # for different branches in contributor forks.
-  QEMU_CI_CONTAINER_TAG: latest
-
-  # For purposes of CI rules, upstream is the gitlab.com/qemu-project
-  # namespace. When testing CI, it might be usefult to override this
-  # to point to a fork repo
-  QEMU_CI_UPSTREAM: qemu-project
-
 # The order of rules defined here is critically important.
 # They are evaluated in order and first match wins.
 #
 # Thus we group them into a number of stages, ordered from
 # most restrictive to least restrictive
 #
-# For pipelines running for stable "staging-X.Y" branches
-# we must override QEMU_CI_CONTAINER_TAG
-#
 .base_job_template:
  variables:
    # Each script line from will be in a collapsible section in the job output
@@ -33,36 +19,28 @@ variables:
    # want jobs to run
    #############################################################

-    # Never run jobs upstream on stable branch, staging branch jobs already ran
-    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /^stable-/'
-      when: never
-
-    # Never run jobs upstream on tags, staging branch jobs already ran
-    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_TAG'
-      when: never
-
    # Cirrus jobs can't run unless the creds / target repo are set
    - if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == null || $CIRRUS_API_TOKEN == null)'
      when: never

    # Publishing jobs should only run on the default branch in upstream
-    - if: '$QEMU_JOB_PUBLISH == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH'
+    - if: '$QEMU_JOB_PUBLISH == "1" && $CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH != $CI_DEFAULT_BRANCH'
      when: never

    # Non-publishing jobs should only run on staging branches in upstream
-    - if: '$QEMU_JOB_PUBLISH != "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH !~ /staging/'
+    - if: '$QEMU_JOB_PUBLISH != "1" && $CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH !~ /staging/'
      when: never

    # Jobs only intended for forks should always be skipped on upstream
-    - if: '$QEMU_JOB_ONLY_FORKS == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM'
+    - if: '$QEMU_JOB_ONLY_FORKS == "1" && $CI_PROJECT_NAMESPACE == "qemu-project"'
      when: never

    # Forks don't get pipelines unless QEMU_CI=1 or QEMU_CI=2 is set
-    - if: '$QEMU_CI != "1" && $QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM'
+    - if: '$QEMU_CI != "1" && $QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != "qemu-project"'
      when: never

    # Avocado jobs don't run in forks unless $QEMU_CI_AVOCADO_TESTING is set
-    - if: '$QEMU_JOB_AVOCADO && $QEMU_CI_AVOCADO_TESTING != "1" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM'
+    - if: '$QEMU_JOB_AVOCADO && $QEMU_CI_AVOCADO_TESTING != "1" && $CI_PROJECT_NAMESPACE != "qemu-project"'
      when: never


@@ -72,29 +50,17 @@ variables:
    #############################################################

    # Optional jobs should not be run unless manually triggered
-    - if: '$QEMU_JOB_OPTIONAL && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/'
-      when: manual
-      allow_failure: true
-      variables:
-        QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG
-
    - if: '$QEMU_JOB_OPTIONAL'
      when: manual
      allow_failure: true

    # Skipped jobs should not be run unless manually triggered
-    - if: '$QEMU_JOB_SKIPPED && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/'
-      when: manual
-      allow_failure: true
-      variables:
-        QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG
-
    - if: '$QEMU_JOB_SKIPPED'
      when: manual
      allow_failure: true

    # Avocado jobs can be manually start in forks if $QEMU_CI_AVOCADO_TESTING is unset
-    - if: '$QEMU_JOB_AVOCADO && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM'
+    - if: '$QEMU_JOB_AVOCADO && $CI_PROJECT_NAMESPACE != "qemu-project"'
      when: manual
      allow_failure: true

@@ -106,23 +72,8 @@ variables:

    # Forks pipeline jobs don't start automatically unless
    # QEMU_CI=2 is set
-    - if: '$QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != $QEMU_CI_UPSTREAM'
-      when: manual
-
-    # Upstream pipeline jobs start automatically unless told not to
-    # by setting QEMU_CI=1
-    - if: '$QEMU_CI == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/'
-      when: manual
-      variables:
-        QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG
-
-    - if: '$QEMU_CI == "1" && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM'
+    - if: '$QEMU_CI != "2" && $CI_PROJECT_NAMESPACE != "qemu-project"'
      when: manual

    # Jobs can run if any jobs they depend on were successful
-    - if: '$QEMU_JOB_SKIPPED && $CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_BRANCH =~ /staging-[[:digit:]]+\.[[:digit:]]/'
-      when: on_success
-      variables:
-        QEMU_CI_CONTAINER_TAG: $CI_COMMIT_REF_SLUG
-
    - when: on_success
--- a/.gitlab-ci.d/buildtest-template.yml
+++ b/.gitlab-ci.d/buildtest-template.yml
@@ -1,19 +1,23 @@
 .native_build_job_template:
  extends: .base_job_template
  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
  before_script:
    - JOBS=$(expr $(nproc) + 1)
  script:
+    - if test -n "$LD_JOBS";
+      then
+        scripts/git-submodule.sh update meson ;
+      fi
    - mkdir build
    - cd build
    - ../configure --enable-werror --disable-docs --enable-fdt=system
-          ${TARGETS:+--target-list="$TARGETS"}
+          ${LD_JOBS:+--meson=git} ${TARGETS:+--target-list="$TARGETS"}
          $CONFIGURE_ARGS ||
      { cat config.log meson-logs/meson-log.txt && exit 1; }
    - if test -n "$LD_JOBS";
      then
-        pyvenv/bin/meson configure . -Dbackend_max_links="$LD_JOBS" ;
+        ../meson/meson.py configure . -Dbackend_max_links="$LD_JOBS" ;
      fi || exit 1;
    - make -j"$JOBS"
    - if test -n "$MAKE_CHECK_ARGS";
@@ -25,7 +29,6 @@
 # rebuilding all the object files we skip in the artifacts
 .native_build_artifact_template:
  artifacts:
-    when: on_success
    expire_in: 2 days
    paths:
      - build
@@ -41,10 +44,10 @@
 .common_test_job_template:
  extends: .base_job_template
  stage: test
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
  script:
-    - scripts/git-submodule.sh update roms/SLOF
-    - meson subprojects download $(cd build/subprojects && echo *)
+    - scripts/git-submodule.sh update
+        $(sed -n '/GIT_SUBMODULES=/ s/.*=// p' build/config-host.mak)
    - cd build
    - find . -type f -exec touch {} +
    # Avoid recompiling by hiding ninja with NINJA=":"
@@ -54,7 +57,6 @@
  extends: .common_test_job_template
  artifacts:
    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    when: always
    expire_in: 7 days
    paths:
      - build/meson-logs/testlog.txt
@@ -70,7 +72,7 @@
    policy: pull-push
  artifacts:
    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    when: always
+    when: on_failure
    expire_in: 7 days
    paths:
      - build/tests/results/latest/results.xml
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -454,7 +454,7 @@ gcov:
    IMAGE: ubuntu2204
    CONFIGURE_ARGS: --enable-gcov
    TARGETS: aarch64-softmmu ppc64-softmmu s390x-softmmu x86_64-softmmu
-    MAKE_CHECK_ARGS: check-unit check-softfloat
+    MAKE_CHECK_ARGS: check
  after_script:
    - cd build
    - gcovr --xml-pretty --exclude-unreachable-branches --print-summary
@@ -462,12 +462,8 @@ gcov:
  coverage: /^\s*lines:\s*\d+.\d+\%/
  artifacts:
    name: ${CI_JOB_NAME}-${CI_COMMIT_REF_NAME}-${CI_COMMIT_SHA}
-    when: always
    expire_in: 2 days
-    paths:
-      - build/meson-logs/testlog.txt
    reports:
-      junit: build/meson-logs/testlog.junit.xml
      coverage_report:
        coverage_format: cobertura
        path: build/coverage.xml
@@ -531,12 +527,12 @@ build-without-defaults:
      --disable-strip
    TARGETS: avr-softmmu mips64-softmmu s390x-softmmu sh4-softmmu
      sparc64-softmmu hexagon-linux-user i386-linux-user s390x-linux-user
-    MAKE_CHECK_ARGS: check
+    MAKE_CHECK_ARGS: check-unit check-qtest-avr check-qtest-mips64

 build-libvhost-user:
  extends: .base_job_template
  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/fedora:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/fedora:latest
  needs:
    job: amd64-fedora-container
  script:
@@ -576,7 +572,7 @@ build-tools-and-docs-debian:
 # of what topic branch they're currently using
 pages:
  extends: .base_job_template
-  image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:latest
  stage: test
  needs:
    - job: build-tools-and-docs-debian
@@ -591,7 +587,6 @@ pages:
    - make -C build install DESTDIR=$(pwd)/temp-install
    - mv temp-install/usr/local/share/doc/qemu/* public/
  artifacts:
-    when: on_success
    paths:
      - public
  variables:
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -44,6 +44,19 @@
  variables:
    QEMU_JOB_CIRRUS: 1

+x64-freebsd-12-build:
+  extends: .cirrus_build_job
+  variables:
+    NAME: freebsd-12
+    CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
+    CIRRUS_VM_IMAGE_SELECTOR: image_family
+    CIRRUS_VM_IMAGE_NAME: freebsd-12-4
+    CIRRUS_VM_CPUS: 8
+    CIRRUS_VM_RAM: 8G
+    UPDATE_COMMAND: pkg update; pkg upgrade -y
+    INSTALL_COMMAND: pkg install -y
+    TEST_TARGETS: check
+
 x64-freebsd-13-build:
  extends: .cirrus_build_job
  variables:
--- a/.gitlab-ci.d/cirrus/freebsd-12.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-12.vars
@@ -0,0 +1,16 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables freebsd-12 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci
+
+CCACHE='/usr/local/bin/ccache'
+CPAN_PKGS=''
+CROSS_PKGS=''
+MAKE='/usr/local/bin/gmake'
+NINJA='/usr/local/bin/ninja'
+PACKAGING_COMMAND='pkg'
+PIP3='/usr/local/bin/pip-3.8'
+PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 zstd'
+PYPI_PKGS=''
+PYTHON='/usr/local/bin/python3'
--- a/.gitlab-ci.d/cirrus/freebsd-13.vars
+++ b/.gitlab-ci.d/cirrus/freebsd-13.vars
@@ -11,6 +11,6 @@ MAKE='/usr/local/bin/gmake'
 NINJA='/usr/local/bin/ninja'
 PACKAGING_COMMAND='pkg'
 PIP3='/usr/local/bin/pip-3.8'
-PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson mtools ncurses nettle ninja opencv pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 xorriso zstd'
+PKGS='alsa-lib bash bison bzip2 ca_root_nss capstone4 ccache cdrkit-genisoimage cmocka ctags curl cyrus-sasl dbus diffutils dtc flex fusefs-libs3 gettext git glib gmake gnutls gsed gtk3 json-c libepoxy libffi libgcrypt libjpeg-turbo libnfs libslirp libspice-server libssh libtasn1 llvm lzo2 meson ncurses nettle ninja opencv pixman pkgconf png py39-numpy py39-pillow py39-pip py39-sphinx py39-sphinx_rtd_theme py39-yaml python3 rpm2cpio sdl2 sdl2_image snappy sndio socat spice-protocol tesseract usbredir virglrenderer vte3 zstd'
 PYPI_PKGS=''
 PYTHON='/usr/local/bin/python3'
--- a/.gitlab-ci.d/cirrus/kvm-build.yml
+++ b/.gitlab-ci.d/cirrus/kvm-build.yml
@@ -15,7 +15,7 @@ env:
    folder: $HOME/.cache/qemu-vm
  install_script:
    - dnf update -y
-    - dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget meson
+    - dnf install -y git make openssh-clients qemu-img qemu-system-x86 wget
  clone_script:
    - git clone --depth 100 "$CI_REPOSITORY_URL" .
    - git fetch origin "$CI_COMMIT_REF_NAME"
--- a/.gitlab-ci.d/cirrus/macos-12.vars
+++ b/.gitlab-ci.d/cirrus/macos-12.vars
@@ -11,6 +11,6 @@ MAKE='/opt/homebrew/bin/gmake'
 NINJA='/opt/homebrew/bin/ninja'
 PACKAGING_COMMAND='brew'
 PIP3='/opt/homebrew/bin/pip3'
-PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy socat sparse spice-protocol tesseract usbredir vde vte3 xorriso zlib zstd'
+PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson ncurses nettle ninja pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy socat sparse spice-protocol tesseract usbredir vde vte3 zlib zstd'
 PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme'
 PYTHON='/opt/homebrew/bin/python3'
--- a/.gitlab-ci.d/container-template.yml
+++ b/.gitlab-ci.d/container-template.yml
@@ -1,15 +1,15 @@
 .container_job_template:
  extends: .base_job_template
-  image: docker:latest
+  image: docker:stable
  stage: containers
  services:
    - docker:dind
  before_script:
-    - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:$QEMU_CI_CONTAINER_TAG"
-    # Always ':latest' because we always use upstream as a common cache source
+    - export TAG="$CI_REGISTRY_IMAGE/qemu/$NAME:latest"
    - export COMMON_TAG="$CI_REGISTRY/qemu-project/qemu/qemu/$NAME:latest"
+    - apk add python3
+    - docker info
    - docker login $CI_REGISTRY -u "$CI_REGISTRY_USER" -p "$CI_REGISTRY_PASSWORD"
-    - until docker info; do sleep 1; done
  script:
    - echo "TAG:$TAG"
    - echo "COMMON_TAG:$COMMON_TAG"
--- a/.gitlab-ci.d/crossbuild-template.yml
+++ b/.gitlab-ci.d/crossbuild-template.yml
@@ -1,7 +1,7 @@
 .cross_system_build_job:
  extends: .base_job_template
  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
  timeout: 80m
  script:
    - mkdir build
@@ -27,7 +27,7 @@
 .cross_accel_build_job:
  extends: .base_job_template
  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
  timeout: 30m
  script:
    - mkdir build
@@ -39,7 +39,7 @@
 .cross_user_build_job:
  extends: .base_job_template
  stage: build
-  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/$IMAGE:latest
  script:
    - mkdir build
    - cd build
@@ -55,7 +55,6 @@
 .cross_test_artifacts:
  artifacts:
    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    when: always
    expire_in: 7 days
    paths:
      - build/meson-logs/testlog.txt
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -29,14 +29,6 @@ cross-arm64-user:
  variables:
    IMAGE: debian-arm64-cross

-cross-arm64-kvm-only:
-  extends: .cross_accel_build_job
-  needs:
-    job: arm64-debian-cross-container
-  variables:
-    IMAGE: debian-arm64-cross
-    EXTRA_CONFIGURE_OPTS: --disable-tcg --without-default-features
-
 cross-i386-user:
  extends:
    - .cross_user_build_job
@@ -57,7 +49,7 @@ cross-i386-tci:
  variables:
    IMAGE: fedora-i386-cross
    ACCEL: tcg-interpreter
-    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user --disable-plugins
+    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user
    MAKE_CHECK_ARGS: check check-tcg

 cross-mipsel-system:
@@ -169,7 +161,6 @@ cross-win32-system:
    CROSS_SKIP_TARGETS: alpha-softmmu avr-softmmu hppa-softmmu m68k-softmmu
                        microblazeel-softmmu mips64el-softmmu nios2-softmmu
  artifacts:
-    when: on_success
    paths:
      - build/qemu-setup*.exe

@@ -185,7 +176,6 @@ cross-win64-system:
                        or1k-softmmu rx-softmmu sh4eb-softmmu sparc64-softmmu
                        tricore-softmmu xtensaeb-softmmu
  artifacts:
-    when: on_success
    paths:
      - build/qemu-setup*.exe

--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -20,10 +20,8 @@ variables:
  artifacts:
    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
    expire_in: 7 days
-    when: always
    paths:
-      - build/build.ninja
-      - build/meson-logs
+      - build/meson-logs/testlog.txt
    reports:
      junit: build/meson-logs/testlog.junit.xml

--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
@@ -1,6 +1,6 @@
 # All ubuntu-22.04 jobs should run successfully in an environment
 # setup by the scripts/ci/setup/qemu/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 22.04"
+# "Install basic packages to build QEMU on Ubuntu 20.04"

 ubuntu-22.04-aarch32-all:
 extends: .custom_runner_template
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
@@ -1,6 +1,6 @@
-# All ubuntu-22.04 jobs should run successfully in an environment
+# All ubuntu-20.04 jobs should run successfully in an environment
 # setup by the scripts/ci/setup/qemu/build-environment.yml task
-# "Install basic packages to build QEMU on Ubuntu 22.04"
+# "Install basic packages to build QEMU on Ubuntu 20.04"

 ubuntu-22.04-aarch64-all-linux-static:
 extends: .custom_runner_template
@@ -45,28 +45,6 @@ ubuntu-22.04-aarch64-all:
 - make --output-sync -j`nproc --ignore=40`
 - make --output-sync -j`nproc --ignore=40` check

-ubuntu-22.04-aarch64-without-defaults:
- extends: .custom_runner_template
- needs: []
- stage: build
- tags:
- - ubuntu_22.04
- - aarch64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
-   when: manual
-   allow_failure: true
- - if: "$AARCH64_RUNNER_AVAILABLE"
-   when: manual
-   allow_failure: true
- script:
- - mkdir build
- - cd build
- - ../configure --disable-user --without-default-devices --without-default-features
-   || { cat config.log meson-logs/meson-log.txt; exit 1; }
- - make --output-sync -j`nproc --ignore=40`
- - make --output-sync -j`nproc --ignore=40` check
-
 ubuntu-22.04-aarch64-alldbg:
 extends: .custom_runner_template
 needs: []
@@ -145,7 +123,7 @@ ubuntu-22.04-aarch64-notcg:
 script:
 - mkdir build
 - cd build
- - ../configure --disable-tcg --with-devices-aarch64=minimal
+ - ../configure --disable-tcg
   || { cat config.log meson-logs/meson-log.txt; exit 1; }
 - make --output-sync -j`nproc --ignore=40`
 - make --output-sync -j`nproc --ignore=40` check
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@@ -42,15 +42,17 @@
 docker-opensbi:
  extends: .opensbi_job_rules
  stage: containers
-  image: docker:latest
+  image: docker:stable
  services:
-    - docker:dind
+    - docker:stable-dind
  variables:
    GIT_DEPTH: 3
    IMAGE_TAG: $CI_REGISTRY_IMAGE:opensbi-cross-build
+    # We don't use TLS
+    DOCKER_HOST: tcp://docker:2375
+    DOCKER_TLS_CERTDIR: ""
  before_script:
    - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
-    - until docker info; do sleep 1; done
  script:
    - docker pull $IMAGE_TAG || true
    - docker build --cache-from $IMAGE_TAG --tag $CI_REGISTRY_IMAGE:$CI_COMMIT_SHA
@@ -63,7 +65,6 @@ build-opensbi:
  stage: build
  needs: ['docker-opensbi']
  artifacts:
-    when: on_success
    paths: # 'artifacts.zip' will contains the following files:
      - pc-bios/opensbi-riscv32-generic-fw_dynamic.bin
      - pc-bios/opensbi-riscv64-generic-fw_dynamic.bin
--- a/.gitlab-ci.d/qemu-project.yml
+++ b/.gitlab-ci.d/qemu-project.yml
@@ -1,13 +1,6 @@
 # This file contains the set of jobs run by the QEMU project:
 # https://gitlab.com/qemu-project/qemu/-/pipelines

-variables:
-  RUNNER_TAG: ""
-
-default:
-  tags:
-    - $RUNNER_TAG
-
 include:
  - local: '/.gitlab-ci.d/base.yml'
  - local: '/.gitlab-ci.d/stages.yml'
--- a/.gitlab-ci.d/static_checks.yml
+++ b/.gitlab-ci.d/static_checks.yml
@@ -26,7 +26,7 @@ check-dco:
 check-python-minreqs:
  extends: .base_job_template
  stage: test
-  image: $CI_REGISTRY_IMAGE/qemu/python:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/python:latest
  script:
    - make -C python check-minreqs
  variables:
@@ -37,7 +37,7 @@ check-python-minreqs:
 check-python-tox:
  extends: .base_job_template
  stage: test
-  image: $CI_REGISTRY_IMAGE/qemu/python:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/python:latest
  script:
    - make -C python check-tox
  variables:
--- a/.gitlab-ci.d/windows.yml
+++ b/.gitlab-ci.d/windows.yml
@@ -7,57 +7,19 @@
  cache:
    key: "${CI_JOB_NAME}-cache"
    paths:
-      - msys64/var/cache
-    when: always
+      - ${CI_PROJECT_DIR}/msys64/var/cache
  needs: []
  stage: build
  timeout: 80m
-  variables:
-    # This feature doesn't (currently) work with PowerShell, it stops
-    # the echo'ing of commands being run and doesn't show any timing
-    FF_SCRIPT_SECTIONS: 0
-  artifacts:
-    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
-    expire_in: 7 days
-    paths:
-      - build/meson-logs/testlog.txt
-    reports:
-      junit: "build/meson-logs/testlog.junit.xml"
  before_script:
-  - Write-Output "Acquiring msys2.exe installer at $(Get-Date -Format u)"
  - If ( !(Test-Path -Path msys64\var\cache ) ) {
      mkdir msys64\var\cache
    }
-  - Invoke-WebRequest
-    "https://repo.msys2.org/distrib/msys2-x86_64-latest.sfx.exe.sig"
-    -outfile "msys2.exe.sig"
-  - if ( Test-Path -Path msys64\var\cache\msys2.exe.sig ) {
-      Write-Output "Cached installer sig" ;
-      if ( ((Get-FileHash msys2.exe.sig).Hash -ne (Get-FileHash msys64\var\cache\msys2.exe.sig).Hash) ) {
-        Write-Output "Mis-matched installer sig, new installer download required" ;
-        Remove-Item -Path msys64\var\cache\msys2.exe.sig ;
-        if ( Test-Path -Path msys64\var\cache\msys2.exe ) {
-          Remove-Item -Path msys64\var\cache\msys2.exe
-        }
-      } else {
-        Write-Output "Matched installer sig, cached installer still valid"
-      }
-    } else {
-      Write-Output "No cached installer sig, new installer download required" ;
-      if ( Test-Path -Path msys64\var\cache\msys2.exe ) {
-        Remove-Item -Path msys64\var\cache\msys2.exe
-      }
-    }
-  - if ( !(Test-Path -Path msys64\var\cache\msys2.exe ) ) {
-      Write-Output "Fetching latest installer" ;
+  - If ( !(Test-Path -Path msys64\var\cache\msys2.exe ) ) {
      Invoke-WebRequest
-      "https://repo.msys2.org/distrib/msys2-x86_64-latest.sfx.exe"
-      -outfile "msys64\var\cache\msys2.exe" ;
-      Copy-Item -Path msys2.exe.sig -Destination msys64\var\cache\msys2.exe.sig
-    } else {
-      Write-Output "Using cached installer"
+      "https://github.com/msys2/msys2-installer/releases/download/2022-06-03/msys2-base-x86_64-20220603.sfx.exe"
+      -outfile "msys64\var\cache\msys2.exe"
    }
-  - Write-Output "Invoking msys2.exe installer at $(Get-Date -Format u)"
  - msys64\var\cache\msys2.exe -y
  - ((Get-Content -path .\msys64\etc\\post-install\\07-pacman-key.post -Raw)
      -replace '--refresh-keys', '--version') |
@@ -66,66 +28,97 @@
  - .\msys64\usr\bin\bash -lc 'pacman --noconfirm -Syuu'  # Core update
  - .\msys64\usr\bin\bash -lc 'pacman --noconfirm -Syuu'  # Normal update
  - taskkill /F /FI "MODULES eq msys-2.0.dll"
-  script:
-  - Write-Output "Installing mingw packages at $(Get-Date -Format u)"
-  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
-      bison diffutils flex
-      git grep make sed
-      $MINGW_TARGET-capstone
-      $MINGW_TARGET-curl
-      $MINGW_TARGET-cyrus-sasl
-      $MINGW_TARGET-dtc
-      $MINGW_TARGET-gcc
-      $MINGW_TARGET-glib2
-      $MINGW_TARGET-gnutls
-      $MINGW_TARGET-gtk3
-      $MINGW_TARGET-libgcrypt
-      $MINGW_TARGET-libjpeg-turbo
-      $MINGW_TARGET-libnfs
-      $MINGW_TARGET-libpng
-      $MINGW_TARGET-libssh
-      $MINGW_TARGET-libtasn1
-      $MINGW_TARGET-libusb
-      $MINGW_TARGET-lzo2
-      $MINGW_TARGET-nettle
-      $MINGW_TARGET-ninja
-      $MINGW_TARGET-pixman
-      $MINGW_TARGET-pkgconf
-      $MINGW_TARGET-python
-      $MINGW_TARGET-SDL2
-      $MINGW_TARGET-SDL2_image
-      $MINGW_TARGET-snappy
-      $MINGW_TARGET-spice
-      $MINGW_TARGET-usbredir
-      $MINGW_TARGET-zstd "
-  - Write-Output "Running build at $(Get-Date -Format u)"
-  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
-  - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink
-  - mkdir build
-  - cd build
-  - ..\msys64\usr\bin\bash -lc "../configure --enable-fdt=system $CONFIGURE_ARGS"
-  - ..\msys64\usr\bin\bash -lc "make"
-  - ..\msys64\usr\bin\bash -lc "make check MTESTARGS='$TEST_ARGS' || { cat meson-logs/testlog.txt; exit 1; } ;"
-  - Write-Output "Finished build at $(Get-Date -Format u)"

 msys2-64bit:
  extends: .shared_msys2_builder
-  variables:
-    MINGW_TARGET: mingw-w64-x86_64
-    MSYSTEM: MINGW64
-    # do not remove "--without-default-devices"!
-    # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices"
-    # changed to compile QEMU with the --without-default-devices switch
-    # for the msys2 64-bit job, due to the build could not complete within
-    CONFIGURE_ARGS:  --target-list=x86_64-softmmu --without-default-devices -Ddebug=false -Doptimization=0
-    # qTests don't run successfully with "--without-default-devices",
-    # so let's exclude the qtests from CI for now.
-    TEST_ARGS: --no-suite qtest
+  script:
+  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
+      bison diffutils flex
+      git grep make sed
+      mingw-w64-x86_64-capstone
+      mingw-w64-x86_64-curl
+      mingw-w64-x86_64-cyrus-sasl
+      mingw-w64-x86_64-dtc
+      mingw-w64-x86_64-gcc
+      mingw-w64-x86_64-glib2
+      mingw-w64-x86_64-gnutls
+      mingw-w64-x86_64-gtk3
+      mingw-w64-x86_64-libgcrypt
+      mingw-w64-x86_64-libjpeg-turbo
+      mingw-w64-x86_64-libnfs
+      mingw-w64-x86_64-libpng
+      mingw-w64-x86_64-libssh
+      mingw-w64-x86_64-libtasn1
+      mingw-w64-x86_64-libusb
+      mingw-w64-x86_64-lzo2
+      mingw-w64-x86_64-nettle
+      mingw-w64-x86_64-ninja
+      mingw-w64-x86_64-pixman
+      mingw-w64-x86_64-pkgconf
+      mingw-w64-x86_64-python
+      mingw-w64-x86_64-SDL2
+      mingw-w64-x86_64-SDL2_image
+      mingw-w64-x86_64-snappy
+      mingw-w64-x86_64-spice
+      mingw-w64-x86_64-usbredir
+      mingw-w64-x86_64-zstd "
+  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
+  - $env:MSYSTEM = 'MINGW64'     # Start a 64-bit MinGW environment
+  - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink
+  - mkdir output
+  - cd output
+  # Note: do not remove "--without-default-devices"!
+  # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices"
+  # changed to compile QEMU with the --without-default-devices switch
+  # for the msys2 64-bit job, due to the build could not complete within
+  # the project timeout.
+  - ..\msys64\usr\bin\bash -lc '../configure --target-list=x86_64-softmmu
+      --without-default-devices --enable-fdt=system'
+  - ..\msys64\usr\bin\bash -lc 'make'
+  # qTests don't run successfully with "--without-default-devices",
+  # so let's exclude the qtests from CI for now.
+  - ..\msys64\usr\bin\bash -lc 'make check MTESTARGS=\"--no-suite qtest\" || { cat meson-logs/testlog.txt; exit 1; } ;'

 msys2-32bit:
  extends: .shared_msys2_builder
-  variables:
-    MINGW_TARGET: mingw-w64-i686
-    MSYSTEM: MINGW32
-    CONFIGURE_ARGS:  --target-list=ppc64-softmmu -Ddebug=false -Doptimization=0
-    TEST_ARGS: --no-suite qtest
+  script:
+  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
+      bison diffutils flex
+      git grep make sed
+      mingw-w64-i686-capstone
+      mingw-w64-i686-curl
+      mingw-w64-i686-cyrus-sasl
+      mingw-w64-i686-dtc
+      mingw-w64-i686-gcc
+      mingw-w64-i686-glib2
+      mingw-w64-i686-gnutls
+      mingw-w64-i686-gtk3
+      mingw-w64-i686-libgcrypt
+      mingw-w64-i686-libjpeg-turbo
+      mingw-w64-i686-libnfs
+      mingw-w64-i686-libpng
+      mingw-w64-i686-libssh
+      mingw-w64-i686-libtasn1
+      mingw-w64-i686-libusb
+      mingw-w64-i686-lzo2
+      mingw-w64-i686-nettle
+      mingw-w64-i686-ninja
+      mingw-w64-i686-pixman
+      mingw-w64-i686-pkgconf
+      mingw-w64-i686-python
+      mingw-w64-i686-SDL2
+      mingw-w64-i686-SDL2_image
+      mingw-w64-i686-snappy
+      mingw-w64-i686-spice
+      mingw-w64-i686-usbredir
+      mingw-w64-i686-zstd "
+  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
+  - $env:MSYSTEM = 'MINGW32'     # Start a 32-bit MinGW environment
+  - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink
+  - mkdir output
+  - cd output
+  - ..\msys64\usr\bin\bash -lc '../configure --target-list=ppc64-softmmu
+                                --enable-fdt=system'
+  - ..\msys64\usr\bin\bash -lc 'make'
+  - ..\msys64\usr\bin\bash -lc 'make check MTESTARGS=\"--no-suite qtest\" ||
+                                { cat meson-logs/testlog.txt; exit 1; }'
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,6 +13,9 @@
 [submodule "roms/qemu-palcode"]
 	path = roms/qemu-palcode
 	url = https://gitlab.com/qemu-project/qemu-palcode.git
+[submodule "dtc"]
+	path = dtc
+	url = https://gitlab.com/qemu-project/dtc.git
 [submodule "roms/u-boot"]
 	path = roms/u-boot
 	url = https://gitlab.com/qemu-project/u-boot.git
@@ -22,12 +25,21 @@
 [submodule "roms/QemuMacDrivers"]
 	path = roms/QemuMacDrivers
 	url = https://gitlab.com/qemu-project/QemuMacDrivers.git
+[submodule "ui/keycodemapdb"]
+	path = ui/keycodemapdb
+	url = https://gitlab.com/qemu-project/keycodemapdb.git
 [submodule "roms/seabios-hppa"]
 	path = roms/seabios-hppa
 	url = https://gitlab.com/qemu-project/seabios-hppa.git
 [submodule "roms/u-boot-sam460ex"]
 	path = roms/u-boot-sam460ex
 	url = https://gitlab.com/qemu-project/u-boot-sam460ex.git
+[submodule "tests/fp/berkeley-testfloat-3"]
+	path = tests/fp/berkeley-testfloat-3
+	url = https://gitlab.com/qemu-project/berkeley-testfloat-3.git
+[submodule "tests/fp/berkeley-softfloat-3"]
+	path = tests/fp/berkeley-softfloat-3
+	url = https://gitlab.com/qemu-project/berkeley-softfloat-3.git
 [submodule "roms/edk2"]
 	path = roms/edk2
 	url = https://gitlab.com/qemu-project/edk2.git
@@ -37,9 +49,15 @@
 [submodule "roms/qboot"]
 	path = roms/qboot
 	url = https://gitlab.com/qemu-project/qboot.git
+[submodule "meson"]
+	path = meson
+	url = https://gitlab.com/qemu-project/meson.git
 [submodule "roms/vbootrom"]
 	path = roms/vbootrom
 	url = https://gitlab.com/qemu-project/vbootrom.git
 [submodule "tests/lcitool/libvirt-ci"]
 	path = tests/lcitool/libvirt-ci
 	url = https://gitlab.com/libvirt/libvirt-ci.git
+[submodule "subprojects/libvfio-user"]
+	path = subprojects/libvfio-user
+	url = https://gitlab.com/qemu-project/libvfio-user.git
--- a/.mailmap
+++ b/.mailmap
@@ -54,7 +54,6 @@ Aleksandar Markovic <aleksandar.qemu.devel@gmail.com> <amarkovic@wavecomp.com>
 Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <arikalo@wavecomp.com>
 Aleksandar Rikalo <aleksandar.rikalo@syrmia.com> <aleksandar.rikalo@rt-rk.com>
 Alexander Graf <agraf@csgraf.de> <agraf@suse.de>
-Ani Sinha <anisinha@redhat.com> <ani@anisinha.ca>
 Anthony Liguori <anthony@codemonkey.ws> Anthony Liguori <aliguori@us.ibm.com>
 Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
 Damien Hedde <damien.hedde@dahe.fr> <damien.hedde@greensocs.com>
@@ -76,9 +75,7 @@ Paul Burton <paulburton@kernel.org> <pburton@wavecomp.com>
 Philippe Mathieu-Daudé <philmd@linaro.org> <f4bug@amsat.org>
 Philippe Mathieu-Daudé <philmd@linaro.org> <philmd@redhat.com>
 Philippe Mathieu-Daudé <philmd@linaro.org> <philmd@fungible.com>
-Roman Bolshakov <rbolshakov@ddn.com> <r.bolshakov@yadro.com>
 Stefan Brankovic <stefan.brankovic@syrmia.com> <stefan.brankovic@rt-rk.com.com>
-Taylor Simpson <ltaylorsimpson@gmail.com> <tsimpson@quicinc.com>
 Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>

 # Also list preferred name forms where people have changed their
--- a/.travis.yml
+++ b/.travis.yml
@@ -237,15 +237,13 @@ jobs:
          - libglib2.0-dev
          - libgnutls28-dev
          - ninja-build
-          - flex
-          - bison
      env:
        - CONFIG="--disable-containers --disable-system"

    - name: "[s390x] Clang (disable-tcg)"
      arch: s390x
      dist: focal
-      compiler: clang-10
+      compiler: clang
      addons:
        apt_packages:
          - libaio-dev
@@ -271,7 +269,6 @@ jobs:
          - libvdeplug-dev
          - libvte-2.91-dev
          - ninja-build
-          - clang-10
      env:
        - TEST_CMD="make check-unit"
        - CONFIG="--disable-containers --disable-tcg --enable-kvm --disable-tools
--- a/108
+++ b/108
@@ -70,7 +70,6 @@ R: Daniel P. Berrangé <berrange@redhat.com>
 R: Thomas Huth <thuth@redhat.com>
 R: Markus Armbruster <armbru@redhat.com>
 R: Philippe Mathieu-Daudé <philmd@linaro.org>
-R: Juan Quintela <quintela@redhat.com>
 W: https://www.qemu.org/docs/master/devel/index.html
 S: Odd Fixes
 F: docs/devel/style.rst
@@ -154,14 +153,9 @@ F: include/exec/exec-all.h
 F: include/exec/tb-flush.h
 F: include/exec/target_long.h
 F: include/exec/helper*.h
-F: include/exec/helper*.h.inc
-F: include/exec/helper-info.c.inc
 F: include/sysemu/cpus.h
 F: include/sysemu/tcg.h
 F: include/hw/core/tcg-cpu-ops.h
-F: host/include/*/host/cpuinfo.h
-F: util/cpuinfo-*.c
-F: include/tcg/

 FPU emulation
 M: Aurelien Jarno <aurelien@aurel32.net>
@@ -220,7 +214,7 @@ F: tests/tcg/cris/
 F: disas/cris.c

 Hexagon TCG CPUs
-M: Brian Cain <bcain@quicinc.com>
+M: Taylor Simpson <tsimpson@quicinc.com>
 S: Supported
 F: target/hexagon/
 X: target/hexagon/idef-parser/
@@ -230,7 +224,6 @@ F: tests/tcg/hexagon/
 F: disas/hexagon.c
 F: configs/targets/hexagon-linux-user/default.mak
 F: docker/dockerfiles/debian-hexagon-cross.docker
-F: gdb-xml/hexagon*.xml

 Hexagon idef-parser
 M: Alessandro Di Federico <ale@rev.ng>
@@ -251,7 +244,6 @@ M: Xiaojuan Yang <yangxiaojuan@loongson.cn>
 S: Maintained
 F: target/loongarch/
 F: tests/tcg/loongarch64/
-F: tests/avocado/machine_loongarch.py

 M68K TCG CPUs
 M: Laurent Vivier <laurent@vivier.eu>
@@ -302,7 +294,6 @@ M: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: Cédric Le Goater <clg@kaod.org>
 R: David Gibson <david@gibson.dropbear.id.au>
 R: Greg Kurz <groug@kaod.org>
-R: Nicholas Piggin <npiggin@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: target/ppc/
@@ -336,7 +327,7 @@ F: target/riscv/xthead*.decode
 RISC-V XVentanaCondOps extension
 M: Philipp Tomsich <philipp.tomsich@vrull.eu>
 L: qemu-riscv@nongnu.org
-S: Maintained
+S: Supported
 F: target/riscv/XVentanaCondOps.decode
 F: target/riscv/insn_trans/trans_xventanacondops.c.inc

@@ -452,6 +443,8 @@ S: Supported
 F: target/s390x/kvm/
 F: target/s390x/machine.c
 F: target/s390x/sigp.c
+F: hw/s390x/pv.c
+F: include/hw/s390x/pv.h
 F: gdb-xml/s390*.xml
 T: git https://github.com/borntraeger/qemu.git s390-next
 L: qemu-s390x@nongnu.org
@@ -496,14 +489,14 @@ F: target/arm/hvf/

 X86 HVF CPUs
 M: Cameron Esfahani <dirty@apple.com>
-M: Roman Bolshakov <rbolshakov@ddn.com>
+M: Roman Bolshakov <r.bolshakov@yadro.com>
 W: https://wiki.qemu.org/Features/HVF
 S: Maintained
 F: target/i386/hvf/

 HVF
 M: Cameron Esfahani <dirty@apple.com>
-M: Roman Bolshakov <rbolshakov@ddn.com>
+M: Roman Bolshakov <r.bolshakov@yadro.com>
 W: https://wiki.qemu.org/Features/HVF
 S: Maintained
 F: accel/hvf/
@@ -945,12 +938,10 @@ SBSA-REF
 M: Radoslaw Biernacki <rad@semihalf.com>
 M: Peter Maydell <peter.maydell@linaro.org>
 R: Leif Lindholm <quic_llindhol@quicinc.com>
-R: Marcin Juszkiewicz <marcin.juszkiewicz@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/sbsa-ref.c
 F: docs/system/arm/sbsa.rst
-F: tests/avocado/machine_aarch64_sbsaref.py

 Sharp SL-5500 (Collie) PDA
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -1120,7 +1111,7 @@ F: include/hw/misc/pca9552*.h
 F: hw/net/ftgmac100.c
 F: include/hw/net/ftgmac100.h
 F: docs/system/arm/aspeed.rst
-F: tests/*/*aspeed*
+F: tests/qtest/*aspeed*
 F: hw/arm/fby35.c

 NRF51
@@ -1224,7 +1215,6 @@ q800
 M: Laurent Vivier <laurent@vivier.eu>
 S: Maintained
 F: hw/m68k/q800.c
-F: hw/m68k/q800-glue.c
 F: hw/misc/mac_via.c
 F: hw/nubus/*
 F: hw/display/macfb.c
@@ -1236,8 +1226,6 @@ F: include/hw/misc/mac_via.h
 F: include/hw/nubus/*
 F: include/hw/display/macfb.h
 F: include/hw/block/swim.h
-F: include/hw/m68k/q800.h
-F: include/hw/m68k/q800-glue.h

 virt
 M: Laurent Vivier <laurent@vivier.eu>
@@ -1432,7 +1420,6 @@ M: Daniel Henrique Barboza <danielhb413@gmail.com>
 R: Cédric Le Goater <clg@kaod.org>
 R: David Gibson <david@gibson.dropbear.id.au>
 R: Greg Kurz <groug@kaod.org>
-R: Harsh Prateek Bora <harshpb@linux.ibm.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/*/spapr*
@@ -1450,8 +1437,6 @@ F: tests/avocado/ppc_pseries.py

 PowerNV (Non-Virtualized)
 M: Cédric Le Goater <clg@kaod.org>
-R: Frédéric Barrat <fbarrat@linux.ibm.com>
-R: Nicholas Piggin <npiggin@gmail.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: docs/system/ppc/powernv.rst
@@ -1744,7 +1729,7 @@ F: hw/rtc/mc146818rtc*
 F: hw/watchdog/wdt_ib700.c
 F: hw/watchdog/wdt_i6300esb.c
 F: include/hw/display/vga.h
-F: include/hw/char/parallel*.h
+F: include/hw/char/parallel.h
 F: include/hw/dma/i8257.h
 F: include/hw/i2c/pm_smbus.h
 F: include/hw/input/i8042.h
@@ -1823,7 +1808,7 @@ M: Francisco Iglesias <francisco.iglesias@amd.com>
 S: Maintained
 F: hw/net/can/xlnx-*
 F: include/hw/net/xlnx-*
-F: tests/qtest/xlnx-can*-test*
+F: tests/qtest/xlnx-can-test*

 EDU
 M: Jiri Slaby <jslaby@suse.cz>
@@ -1909,7 +1894,7 @@ F: hw/pci/pcie_doe.c
 ACPI/SMBIOS
 M: Michael S. Tsirkin <mst@redhat.com>
 M: Igor Mammedov <imammedo@redhat.com>
-R: Ani Sinha <anisinha@redhat.com>
+R: Ani Sinha <ani@anisinha.ca>
 S: Supported
 F: include/hw/acpi/*
 F: include/hw/firmware/smbios.h
@@ -1946,7 +1931,7 @@ F: hw/acpi/viot.c
 F: hw/acpi/viot.h

 ACPI/AVOCADO/BIOSBITS
-M: Ani Sinha <anisinha@redhat.com>
+M: Ani Sinha <ani@anisinha.ca>
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: tests/avocado/acpi-bits/*
@@ -2049,7 +2034,7 @@ F: hw/usb/dev-serial.c

 VFIO
 M: Alex Williamson <alex.williamson@redhat.com>
-M: Cédric Le Goater <clg@redhat.com>
+R: Cédric Le Goater <clg@redhat.com>
 S: Supported
 F: hw/vfio/*
 F: include/hw/vfio/
@@ -2090,10 +2075,6 @@ F: backends/vhost-user.c
 F: include/sysemu/vhost-user-backend.h
 F: subprojects/libvhost-user/

-vhost-shadow-virtqueue
-R: Eugenio Pérez <eperezma@redhat.com>
-F: hw/virtio/vhost-shadow-virtqueue.*
-
 virtio
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -2118,24 +2099,17 @@ F: include/sysemu/balloon.h
 virtio-9p
 M: Greg Kurz <groug@kaod.org>
 M: Christian Schoenebeck <qemu_oss@crudebyte.com>
-S: Maintained
+S: Odd Fixes
 W: https://wiki.qemu.org/Documentation/9p
 F: hw/9pfs/
 X: hw/9pfs/xen-9p*
-X: hw/9pfs/9p-proxy*
 F: fsdev/
-X: fsdev/virtfs-proxy-helper.c
+F: docs/tools/virtfs-proxy-helper.rst
 F: tests/qtest/virtio-9p-test.c
 F: tests/qtest/libqos/virtio-9p*
 T: git https://gitlab.com/gkurz/qemu.git 9p-next
 T: git https://github.com/cschoenebeck/qemu.git 9p.next

-virtio-9p-proxy
-F: hw/9pfs/9p-proxy*
-F: fsdev/virtfs-proxy-helper.c
-F: docs/tools/virtfs-proxy-helper.rst
-S: Obsolete
-
 virtio-blk
 M: Stefan Hajnoczi <stefanha@redhat.com>
 L: qemu-block@nongnu.org
@@ -2215,13 +2189,6 @@ F: hw/virtio/vhost-user-gpio*
 F: include/hw/virtio/vhost-user-gpio.h
 F: tests/qtest/libqos/virtio-gpio.*

-vhost-user-scmi
-R: mzamazal@redhat.com
-S: Supported
-F: hw/virtio/vhost-user-scmi*
-F: include/hw/virtio/vhost-user-scmi.h
-F: tests/qtest/libqos/virtio-scmi.*
-
 virtio-crypto
 M: Gonglei <arei.gonglei@huawei.com>
 S: Supported
@@ -2229,13 +2196,6 @@ F: hw/virtio/virtio-crypto.c
 F: hw/virtio/virtio-crypto-pci.c
 F: include/hw/virtio/virtio-crypto.h

-virtio based memory device
-M: David Hildenbrand <david@redhat.com>
-S: Supported
-F: hw/virtio/virtio-md-pci.c
-F: include/hw/virtio/virtio-md-pci.h
-F: stubs/virtio-md-pci.c
-
 virtio-mem
 M: David Hildenbrand <david@redhat.com>
 S: Supported
@@ -2267,7 +2227,6 @@ F: tests/qtest/fuzz-megasas-test.c

 Network packet abstractions
 M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
-R: Akihiko Odaki <akihiko.odaki@daynix.com>
 S: Maintained
 F: include/net/eth.h
 F: net/eth.c
@@ -2310,7 +2269,7 @@ R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-F: tests/avocado/netdev-ethtool.py
+F: tests/avocado/igb.py
 F: tests/qtest/igb-test.c
 F: tests/qtest/libqos/igb.c

@@ -2467,7 +2426,6 @@ T: git https://github.com/philmd/qemu.git fw_cfg-next

 XIVE
 M: Cédric Le Goater <clg@kaod.org>
-R: Frédéric Barrat <fbarrat@linux.ibm.com>
 L: qemu-ppc@nongnu.org
 S: Odd Fixes
 F: hw/*/*xive*
@@ -2601,7 +2559,7 @@ Core Audio framework backend
 M: Gerd Hoffmann <kraxel@redhat.com>
 M: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Christian Schoenebeck <qemu_oss@crudebyte.com>
-R: Akihiko Odaki <akihiko.odaki@daynix.com>
+R: Akihiko Odaki <akihiko.odaki@gmail.com>
 S: Odd Fixes
 F: audio/coreaudio.c

@@ -2678,7 +2636,6 @@ F: util/aio-*.c
 F: util/aio-*.h
 F: util/fdmon-*.c
 F: block/io.c
-F: block/plug.c
 F: migration/block*
 F: include/block/aio.h
 F: include/block/aio-wait.h
@@ -2886,13 +2843,14 @@ F: docs/devel/ui.rst
 Cocoa graphics
 M: Peter Maydell <peter.maydell@linaro.org>
 M: Philippe Mathieu-Daudé <philmd@linaro.org>
-R: Akihiko Odaki <akihiko.odaki@daynix.com>
+R: Akihiko Odaki <akihiko.odaki@gmail.com>
 S: Odd Fixes
 F: ui/cocoa.m

 Main loop
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
+F: include/exec/gen-icount.h
 F: include/qemu/main-loop.h
 F: include/sysemu/runstate.h
 F: include/sysemu/runstate-action.h
@@ -3072,7 +3030,6 @@ F: include/qom/
 F: qapi/qom.json
 F: qapi/qdev.json
 F: scripts/coccinelle/qom-parent-type.cocci
-F: scripts/qom-cast-macro-clean-cocci-gen.py
 F: softmmu/qdev-monitor.c
 F: stubs/qdev.c
 F: qom/
@@ -3125,7 +3082,6 @@ R: Qiuhao Li <Qiuhao.Li@outlook.com>
 S: Maintained
 F: tests/qtest/fuzz/
 F: tests/qtest/fuzz-*test.c
-F: tests/docker/test-fuzz
 F: scripts/oss-fuzz/
 F: hw/mem/sparse-mem.c
 F: docs/devel/fuzzing.rst
@@ -3193,8 +3149,6 @@ F: scripts/checkpatch.pl

 Migration
 M: Juan Quintela <quintela@redhat.com>
-R: Peter Xu <peterx@redhat.com>
-R: Leonardo Bras <leobras@redhat.com>
 S: Maintained
 F: hw/core/vmstate-if.c
 F: include/hw/vmstate-if.h
@@ -3209,15 +3163,6 @@ F: qapi/migration.json
 F: tests/migration/
 F: util/userfaultfd.c

-Migration dirty limit and dirty page rate
-M: Hyman Huang <yong.huang@smartx.com>
-S: Maintained
-F: softmmu/dirtylimit.c
-F: include/sysemu/dirtylimit.h
-F: migration/dirtyrate.c
-F: migration/dirtyrate.h
-F: include/sysemu/dirtyrate.h
-
 D-Bus
 M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
@@ -3231,7 +3176,6 @@ F: docs/interop/dbus*
 F: docs/sphinx/dbus*
 F: docs/sphinx/fakedbusdoc.py
 F: tests/qtest/dbus*
-F: scripts/xml-preprocess*

 Seccomp
 M: Daniel P. Berrange <berrange@redhat.com>
@@ -3245,7 +3189,6 @@ M: Daniel P. Berrange <berrange@redhat.com>
 S: Maintained
 F: crypto/
 F: include/crypto/
-F: host/include/*/host/crypto/
 F: qapi/crypto.json
 F: tests/unit/test-crypto-*
 F: tests/bench/benchmark-crypto-*
@@ -3424,10 +3367,6 @@ F: hw/i386/intel_iommu.c
 F: hw/i386/intel_iommu_internal.h
 F: include/hw/i386/intel_iommu.h

-AMD-Vi Emulation
-S: Orphan
-F: hw/i386/amd_iommu.?
-
 OpenSBI Firmware
 M: Bin Meng <bmeng.cn@gmail.com>
 S: Supported
@@ -3824,6 +3763,7 @@ F: tests/tcg/aarch64/system/semiheap.c
 Multi-process QEMU
 M: Elena Ufimtseva <elena.ufimtseva@oracle.com>
 M: Jagannathan Raman <jag.raman@oracle.com>
+M: John G Johnson <john.g.johnson@oracle.com>
 S: Maintained
 F: docs/devel/multi-process.rst
 F: docs/system/multi-process.rst
@@ -3957,16 +3897,6 @@ F: configure
 F: scripts/mtest2make.py
 F: tests/Makefile.include

-Kconfig
-M: Paolo Bonzini <pbonzini@redhat.com>
-S: Maintained
-F: scripts/minikconf.py
-F: docs/devel/kconfig.rst
-F: Kconfig*
-F: */Kconfig*
-F: hw/*/Kconfig*
-F: target/*/Kconfig*
-
 GIT submodules
 M: Daniel P. Berrange <berrange@redhat.com>
 S: Odd Fixes
--- a/41
+++ b/41
@@ -26,9 +26,9 @@ quiet-command-run = $(if $(V),,$(if $2,printf "  %-7s %s\n" $2 $3 && ))$1
 quiet-@ = $(if $(V),,@)
 quiet-command = $(quiet-@)$(call quiet-command-run,$1,$2,$3)

-UNCHECKED_GOALS := TAGS gtags cscope ctags dist \
+UNCHECKED_GOALS := %clean TAGS cscope ctags dist \
    help check-help print-% \
-    docker docker-% lcitool-refresh vm-help vm-test vm-build-%
+    docker docker-% vm-help vm-test vm-build-%

 all:
 .PHONY: all clean distclean recurse-all dist msi FORCE
@@ -45,6 +45,18 @@ include config-host.mak
 include Makefile.prereqs
 Makefile.prereqs: config-host.mak

+git-submodule-update:
+.git-submodule-status: git-submodule-update config-host.mak
+Makefile: .git-submodule-status
+
+.PHONY: git-submodule-update
+git-submodule-update:
+ifneq ($(GIT_SUBMODULES_ACTION),ignore)
+	$(call quiet-command, \
+		(GIT="$(GIT)" "$(SRC_PATH)/scripts/git-submodule.sh" $(GIT_SUBMODULES_ACTION) $(GIT_SUBMODULES)), \
+		"GIT","$(GIT_SUBMODULES)")
+endif
+
 # 0. ensure the build tree is okay

 # Check that we're not trying to do an out-of-tree build from
@@ -83,17 +95,16 @@ config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/scripts/meson-buildoptions.sh
 	@if test -f meson-private/coredata.dat; then \
 	  ./config.status --skip-meson; \
 	else \
-	  ./config.status; \
+	  ./config.status && touch build.ninja.stamp; \
 	fi

 # 2. meson.stamp exists if meson has run at least once (so ninja reconfigure
 # works), but otherwise never needs to be updated
-
 meson-private/coredata.dat: meson.stamp
 meson.stamp: config-host.mak
 	@touch meson.stamp

-# 3. ensure meson-generated build files are up-to-date
+# 3. ensure generated build files are up-to-date

 ifneq ($(NINJA),)
 Makefile.ninja: build.ninja
@@ -104,23 +115,15 @@ Makefile.ninja: build.ninja
 	  $(NINJA) -t query build.ninja | sed -n '1,/^  input:/d; /^  outputs:/q; s/$$/ \\/p'; \
 	} > $@.tmp && mv $@.tmp $@
 -include Makefile.ninja
-endif

-ifneq ($(MESON),)
-# The path to meson always points to pyvenv/bin/meson, but the absolute
-# paths could change.  In that case, force a regeneration of build.ninja.
-# Note that this invocation of $(NINJA), just like when Make rebuilds
-# Makefiles, does not include -n.
+# A separate rule is needed for Makefile dependencies to avoid -n
 build.ninja: build.ninja.stamp
 $(build-files):
 build.ninja.stamp: meson.stamp $(build-files)
-	@if test "$$(cat build.ninja.stamp)" = "$(MESON)" && test -n "$(NINJA)"; then \
-	  $(NINJA) build.ninja; \
-	else \
-	  echo "$(MESON) setup --reconfigure $(SRC_PATH)"; \
-	  $(MESON) setup --reconfigure $(SRC_PATH); \
-	fi && echo "$(MESON)" > $@
+	$(NINJA) $(if $V,-v,) build.ninja && touch $@
+endif

+ifneq ($(MESON),)
 Makefile.mtest: build.ninja scripts/mtest2make.py
 	$(MESON) introspect --targets --tests --benchmarks | $(PYTHON) scripts/mtest2make.py > $@
 -include Makefile.mtest
@@ -173,8 +176,10 @@ plugins:
 endif # $(CONFIG_PLUGIN)

 else # config-host.mak does not exist
+config-host.mak:
 ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
-$(error Please call configure before running make)
+	@echo "Please call configure before running make!"
+	@exit 1
 endif
 endif # config-host.mak does not exist

--- a/2
+++ b/2
@@ -1 +1 @@
-8.1.0
+8.0.4
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -52,7 +52,6 @@
 #include "qemu/main-loop.h"
 #include "exec/address-spaces.h"
 #include "exec/exec-all.h"
-#include "exec/gdbstub.h"
 #include "sysemu/cpus.h"
 #include "sysemu/hvf.h"
 #include "sysemu/hvf_int.h"
@@ -304,7 +303,7 @@ static void hvf_region_del(MemoryListener *listener,

 static MemoryListener hvf_memory_listener = {
    .name = "hvf",
-    .priority = MEMORY_LISTENER_PRIORITY_ACCEL,
+    .priority = 10,
    .region_add = hvf_region_add,
    .region_del = hvf_region_del,
    .log_start = hvf_log_start,
@@ -335,26 +334,18 @@ static int hvf_accel_init(MachineState *ms)
        s->slots[x].slot_id = x;
    }

-    QTAILQ_INIT(&s->hvf_sw_breakpoints);
-
    hvf_state = s;
    memory_listener_register(&hvf_memory_listener, &address_space_memory);

    return hvf_arch_init();
 }

-static inline int hvf_gdbstub_sstep_flags(void)
-{
-    return SSTEP_ENABLE | SSTEP_NOIRQ;
-}
-
 static void hvf_accel_class_init(ObjectClass *oc, void *data)
 {
    AccelClass *ac = ACCEL_CLASS(oc);
    ac->name = "HVF";
    ac->init_machine = hvf_accel_init;
    ac->allowed = &hvf_allowed;
-    ac->gdbstub_supported_sstep_flags = hvf_gdbstub_sstep_flags;
 }

 static const TypeInfo hvf_accel_type = {
@@ -372,19 +363,19 @@ type_init(hvf_type_init);

 static void hvf_vcpu_destroy(CPUState *cpu)
 {
-    hv_return_t ret = hv_vcpu_destroy(cpu->accel->fd);
+    hv_return_t ret = hv_vcpu_destroy(cpu->hvf->fd);
    assert_hvf_ok(ret);

    hvf_arch_vcpu_destroy(cpu);
-    g_free(cpu->accel);
-    cpu->accel = NULL;
+    g_free(cpu->hvf);
+    cpu->hvf = NULL;
 }

 static int hvf_init_vcpu(CPUState *cpu)
 {
    int r;

-    cpu->accel = g_new0(AccelCPUState, 1);
+    cpu->hvf = g_malloc0(sizeof(*cpu->hvf));

    /* init cpu signals */
    struct sigaction sigact;
@@ -393,20 +384,17 @@ static int hvf_init_vcpu(CPUState *cpu)
    sigact.sa_handler = dummy_signal;
    sigaction(SIG_IPI, &sigact, NULL);

-    pthread_sigmask(SIG_BLOCK, NULL, &cpu->accel->unblock_ipi_mask);
-    sigdelset(&cpu->accel->unblock_ipi_mask, SIG_IPI);
+    pthread_sigmask(SIG_BLOCK, NULL, &cpu->hvf->unblock_ipi_mask);
+    sigdelset(&cpu->hvf->unblock_ipi_mask, SIG_IPI);

 #ifdef __aarch64__
-    r = hv_vcpu_create(&cpu->accel->fd,
-                       (hv_vcpu_exit_t **)&cpu->accel->exit, NULL);
+    r = hv_vcpu_create(&cpu->hvf->fd, (hv_vcpu_exit_t **)&cpu->hvf->exit, NULL);
 #else
-    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->accel->fd, HV_VCPU_DEFAULT);
+    r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf->fd, HV_VCPU_DEFAULT);
 #endif
    cpu->vcpu_dirty = 1;
    assert_hvf_ok(r);

-    cpu->accel->guest_debug_enabled = false;
-
    return hvf_arch_init_vcpu(cpu);
 }

@@ -474,108 +462,6 @@ static void hvf_start_vcpu_thread(CPUState *cpu)
                       cpu, QEMU_THREAD_JOINABLE);
 }

-static int hvf_insert_breakpoint(CPUState *cpu, int type, hwaddr addr, hwaddr len)
-{
-    struct hvf_sw_breakpoint *bp;
-    int err;
-
-    if (type == GDB_BREAKPOINT_SW) {
-        bp = hvf_find_sw_breakpoint(cpu, addr);
-        if (bp) {
-            bp->use_count++;
-            return 0;
-        }
-
-        bp = g_new(struct hvf_sw_breakpoint, 1);
-        bp->pc = addr;
-        bp->use_count = 1;
-        err = hvf_arch_insert_sw_breakpoint(cpu, bp);
-        if (err) {
-            g_free(bp);
-            return err;
-        }
-
-        QTAILQ_INSERT_HEAD(&hvf_state->hvf_sw_breakpoints, bp, entry);
-    } else {
-        err = hvf_arch_insert_hw_breakpoint(addr, len, type);
-        if (err) {
-            return err;
-        }
-    }
-
-    CPU_FOREACH(cpu) {
-        err = hvf_update_guest_debug(cpu);
-        if (err) {
-            return err;
-        }
-    }
-    return 0;
-}
-
-static int hvf_remove_breakpoint(CPUState *cpu, int type, hwaddr addr, hwaddr len)
-{
-    struct hvf_sw_breakpoint *bp;
-    int err;
-
-    if (type == GDB_BREAKPOINT_SW) {
-        bp = hvf_find_sw_breakpoint(cpu, addr);
-        if (!bp) {
-            return -ENOENT;
-        }
-
-        if (bp->use_count > 1) {
-            bp->use_count--;
-            return 0;
-        }
-
-        err = hvf_arch_remove_sw_breakpoint(cpu, bp);
-        if (err) {
-            return err;
-        }
-
-        QTAILQ_REMOVE(&hvf_state->hvf_sw_breakpoints, bp, entry);
-        g_free(bp);
-    } else {
-        err = hvf_arch_remove_hw_breakpoint(addr, len, type);
-        if (err) {
-            return err;
-        }
-    }
-
-    CPU_FOREACH(cpu) {
-        err = hvf_update_guest_debug(cpu);
-        if (err) {
-            return err;
-        }
-    }
-    return 0;
-}
-
-static void hvf_remove_all_breakpoints(CPUState *cpu)
-{
-    struct hvf_sw_breakpoint *bp, *next;
-    CPUState *tmpcpu;
-
-    QTAILQ_FOREACH_SAFE(bp, &hvf_state->hvf_sw_breakpoints, entry, next) {
-        if (hvf_arch_remove_sw_breakpoint(cpu, bp) != 0) {
-            /* Try harder to find a CPU that currently sees the breakpoint. */
-            CPU_FOREACH(tmpcpu)
-            {
-                if (hvf_arch_remove_sw_breakpoint(tmpcpu, bp) == 0) {
-                    break;
-                }
-            }
-        }
-        QTAILQ_REMOVE(&hvf_state->hvf_sw_breakpoints, bp, entry);
-        g_free(bp);
-    }
-    hvf_arch_remove_all_hw_breakpoints();
-
-    CPU_FOREACH(cpu) {
-        hvf_update_guest_debug(cpu);
-    }
-}
-
 static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
 {
    AccelOpsClass *ops = ACCEL_OPS_CLASS(oc);
@@ -587,12 +473,6 @@ static void hvf_accel_ops_class_init(ObjectClass *oc, void *data)
    ops->synchronize_post_init = hvf_cpu_synchronize_post_init;
    ops->synchronize_state = hvf_cpu_synchronize_state;
    ops->synchronize_pre_loadvm = hvf_cpu_synchronize_pre_loadvm;
-
-    ops->insert_breakpoint = hvf_insert_breakpoint;
-    ops->remove_breakpoint = hvf_remove_breakpoint;
-    ops->remove_all_breakpoints = hvf_remove_all_breakpoints;
-    ops->update_guest_debug = hvf_update_guest_debug;
-    ops->supports_guest_debug = hvf_arch_supports_guest_debug;
 };
 static const TypeInfo hvf_accel_ops_type = {
    .name = ACCEL_OPS_NAME("hvf"),
--- a/accel/hvf/hvf-all.c
+++ b/accel/hvf/hvf-all.c
@@ -38,38 +38,9 @@ void assert_hvf_ok(hv_return_t ret)
    case HV_UNSUPPORTED:
        error_report("Error: HV_UNSUPPORTED");
        break;
-#if defined(MAC_OS_VERSION_11_0) && \
-    MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
-    case HV_DENIED:
-        error_report("Error: HV_DENIED");
-        break;
-#endif
    default:
        error_report("Unknown Error");
    }

    abort();
 }
-
-struct hvf_sw_breakpoint *hvf_find_sw_breakpoint(CPUState *cpu, target_ulong pc)
-{
-    struct hvf_sw_breakpoint *bp;
-
-    QTAILQ_FOREACH(bp, &hvf_state->hvf_sw_breakpoints, entry) {
-        if (bp->pc == pc) {
-            return bp;
-        }
-    }
-    return NULL;
-}
-
-int hvf_sw_breakpoints_active(CPUState *cpu)
-{
-    return !QTAILQ_EMPTY(&hvf_state->hvf_sw_breakpoints);
-}
-
-int hvf_update_guest_debug(CPUState *cpu)
-{
-    hvf_arch_update_guest_debug(cpu);
-    return 0;
-}
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -450,8 +450,6 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
                         "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
                         kvm_arch_vcpu_id(cpu));
    }
-    cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
-
 err:
    return ret;
 }
@@ -1105,7 +1103,6 @@ static MemoryListener kvm_coalesced_pio_listener = {
    .name = "kvm-coalesced-pio",
    .coalesced_io_add = kvm_coalesce_pio_add,
    .coalesced_io_del = kvm_coalesce_pio_del,
-    .priority = MEMORY_LISTENER_PRIORITY_MIN,
 };

 int kvm_check_extension(KVMState *s, unsigned int extension)
@@ -1364,10 +1361,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
                 */
                if (kvm_state->kvm_dirty_ring_size) {
                    kvm_dirty_ring_reap_locked(kvm_state, NULL);
-                    if (kvm_state->kvm_dirty_ring_with_bitmap) {
-                        kvm_slot_sync_dirty_pages(mem);
-                        kvm_slot_get_dirty_log(kvm_state, mem);
-                    }
                } else {
                    kvm_slot_get_dirty_log(kvm_state, mem);
                }
@@ -1465,69 +1458,6 @@ static int kvm_dirty_ring_reaper_init(KVMState *s)
    return 0;
 }

-static int kvm_dirty_ring_init(KVMState *s)
-{
-    uint32_t ring_size = s->kvm_dirty_ring_size;
-    uint64_t ring_bytes = ring_size * sizeof(struct kvm_dirty_gfn);
-    unsigned int capability = KVM_CAP_DIRTY_LOG_RING;
-    int ret;
-
-    s->kvm_dirty_ring_size = 0;
-    s->kvm_dirty_ring_bytes = 0;
-
-    /* Bail if the dirty ring size isn't specified */
-    if (!ring_size) {
-        return 0;
-    }
-
-    /*
-     * Read the max supported pages. Fall back to dirty logging mode
-     * if the dirty ring isn't supported.
-     */
-    ret = kvm_vm_check_extension(s, capability);
-    if (ret <= 0) {
-        capability = KVM_CAP_DIRTY_LOG_RING_ACQ_REL;
-        ret = kvm_vm_check_extension(s, capability);
-    }
-
-    if (ret <= 0) {
-        warn_report("KVM dirty ring not available, using bitmap method");
-        return 0;
-    }
-
-    if (ring_bytes > ret) {
-        error_report("KVM dirty ring size %" PRIu32 " too big "
-                     "(maximum is %ld).  Please use a smaller value.",
-                     ring_size, (long)ret / sizeof(struct kvm_dirty_gfn));
-        return -EINVAL;
-    }
-
-    ret = kvm_vm_enable_cap(s, capability, 0, ring_bytes);
-    if (ret) {
-        error_report("Enabling of KVM dirty ring failed: %s. "
-                     "Suggested minimum value is 1024.", strerror(-ret));
-        return -EIO;
-    }
-
-    /* Enable the backup bitmap if it is supported */
-    ret = kvm_vm_check_extension(s, KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP);
-    if (ret > 0) {
-        ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP, 0);
-        if (ret) {
-            error_report("Enabling of KVM dirty ring's backup bitmap failed: "
-                         "%s. ", strerror(-ret));
-            return -EIO;
-        }
-
-        s->kvm_dirty_ring_with_bitmap = true;
-    }
-
-    s->kvm_dirty_ring_size = ring_size;
-    s->kvm_dirty_ring_bytes = ring_bytes;
-
-    return 0;
-}
-
 static void kvm_region_add(MemoryListener *listener,
                           MemoryRegionSection *section)
 {
@@ -1633,7 +1563,7 @@ static void kvm_log_sync(MemoryListener *listener,
    kvm_slots_unlock();
 }

-static void kvm_log_sync_global(MemoryListener *l, bool last_stage)
+static void kvm_log_sync_global(MemoryListener *l)
 {
    KVMMemoryListener *kml = container_of(l, KVMMemoryListener, listener);
    KVMState *s = kvm_state;
@@ -1652,12 +1582,6 @@ static void kvm_log_sync_global(MemoryListener *l, bool last_stage)
        mem = &kml->slots[i];
        if (mem->memory_size && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
            kvm_slot_sync_dirty_pages(mem);
-
-            if (s->kvm_dirty_ring_with_bitmap && last_stage &&
-                kvm_slot_get_dirty_log(s, mem)) {
-                kvm_slot_sync_dirty_pages(mem);
-            }
-
            /*
             * This is not needed by KVM_GET_DIRTY_LOG because the
             * ioctl will unconditionally overwrite the whole region.
@@ -1778,7 +1702,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
    kml->listener.commit = kvm_region_commit;
    kml->listener.log_start = kvm_log_start;
    kml->listener.log_stop = kvm_log_stop;
-    kml->listener.priority = MEMORY_LISTENER_PRIORITY_ACCEL;
+    kml->listener.priority = 10;
    kml->listener.name = name;

    if (s->kvm_dirty_ring_size) {
@@ -1803,7 +1727,7 @@ static MemoryListener kvm_io_listener = {
    .name = "kvm-io",
    .eventfd_add = kvm_io_ioeventfd_add,
    .eventfd_del = kvm_io_ioeventfd_del,
-    .priority = MEMORY_LISTENER_PRIORITY_DEV_BACKEND,
+    .priority = 10,
 };

 int kvm_set_irq(KVMState *s, int irq, int level)
@@ -2597,9 +2521,35 @@ static int kvm_init(MachineState *ms)
     * Enable KVM dirty ring if supported, otherwise fall back to
     * dirty logging mode
     */
-    ret = kvm_dirty_ring_init(s);
-    if (ret < 0) {
-        goto err;
+    if (s->kvm_dirty_ring_size > 0) {
+        uint64_t ring_bytes;
+
+        ring_bytes = s->kvm_dirty_ring_size * sizeof(struct kvm_dirty_gfn);
+
+        /* Read the max supported pages */
+        ret = kvm_vm_check_extension(s, KVM_CAP_DIRTY_LOG_RING);
+        if (ret > 0) {
+            if (ring_bytes > ret) {
+                error_report("KVM dirty ring size %" PRIu32 " too big "
+                             "(maximum is %ld).  Please use a smaller value.",
+                             s->kvm_dirty_ring_size,
+                             (long)ret / sizeof(struct kvm_dirty_gfn));
+                ret = -EINVAL;
+                goto err;
+            }
+
+            ret = kvm_vm_enable_cap(s, KVM_CAP_DIRTY_LOG_RING, 0, ring_bytes);
+            if (ret) {
+                error_report("Enabling of KVM dirty ring failed: %s. "
+                             "Suggested minimum value is 1024.", strerror(-ret));
+                goto err;
+            }
+
+            s->kvm_dirty_ring_bytes = ring_bytes;
+         } else {
+             warn_report("KVM dirty ring not available, using bitmap method");
+             s->kvm_dirty_ring_size = 0;
+        }
    }

    /*
@@ -2812,7 +2762,7 @@ void kvm_flush_coalesced_mmio_buffer(void)
 {
    KVMState *s = kvm_state;

-    if (!s || s->coalesced_flush_in_progress) {
+    if (s->coalesced_flush_in_progress) {
        return;
    }

@@ -3760,7 +3710,6 @@ static void kvm_accel_instance_init(Object *obj)
    s->kernel_irqchip_split = ON_OFF_AUTO_AUTO;
    /* KVM dirty ring is by default off */
    s->kvm_dirty_ring_size = 0;
-    s->kvm_dirty_ring_with_bitmap = false;
    s->notify_vmexit = NOTIFY_VMEXIT_OPTION_RUN;
    s->notify_window = 0;
    s->xen_version = 0;
@@ -4010,7 +3959,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd

    /* Read stats header */
    kvm_stats_header = &descriptors->kvm_stats_header;
-    ret = pread(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header), 0);
+    ret = read(stats_fd, kvm_stats_header, sizeof(*kvm_stats_header));
    if (ret != sizeof(*kvm_stats_header)) {
        error_setg(errp, "KVM stats: failed to read stats header: "
                   "expected %zu actual %zu",
@@ -4041,8 +3990,7 @@ static StatsDescriptors *find_stats_descriptors(StatsTarget target, int stats_fd
 }

 static void query_stats(StatsResultList **result, StatsTarget target,
-                        strList *names, int stats_fd, CPUState *cpu,
-                        Error **errp)
+                        strList *names, int stats_fd, Error **errp)
 {
    struct kvm_stats_desc *kvm_stats_desc;
    struct kvm_stats_header *kvm_stats_header;
@@ -4100,7 +4048,7 @@ static void query_stats(StatsResultList **result, StatsTarget target,
        break;
    case STATS_TARGET_VCPU:
        add_stats_entry(result, STATS_PROVIDER_KVM,
-                        cpu->parent_obj.canonical_path,
+                        current_cpu->parent_obj.canonical_path,
                        stats_list);
        break;
    default:
@@ -4137,9 +4085,10 @@ static void query_stats_schema(StatsSchemaList **result, StatsTarget target,
    add_stats_schema(result, STATS_PROVIDER_KVM, target, stats_list);
 }

-static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
+static void query_stats_vcpu(CPUState *cpu, run_on_cpu_data data)
 {
-    int stats_fd = cpu->kvm_vcpu_stats_fd;
+    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
+    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
    Error *local_err = NULL;

    if (stats_fd == -1) {
@@ -4148,13 +4097,14 @@ static void query_stats_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
        return;
    }
    query_stats(kvm_stats_args->result.stats, STATS_TARGET_VCPU,
-                kvm_stats_args->names, stats_fd, cpu,
-                kvm_stats_args->errp);
+                kvm_stats_args->names, stats_fd, kvm_stats_args->errp);
+    close(stats_fd);
 }

-static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
+static void query_stats_schema_vcpu(CPUState *cpu, run_on_cpu_data data)
 {
-    int stats_fd = cpu->kvm_vcpu_stats_fd;
+    StatsArgs *kvm_stats_args = (StatsArgs *) data.host_ptr;
+    int stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
    Error *local_err = NULL;

    if (stats_fd == -1) {
@@ -4164,6 +4114,7 @@ static void query_stats_schema_vcpu(CPUState *cpu, StatsArgs *kvm_stats_args)
    }
    query_stats_schema(kvm_stats_args->result.schema, STATS_TARGET_VCPU, stats_fd,
                       kvm_stats_args->errp);
+    close(stats_fd);
 }

 static void query_stats_cb(StatsResultList **result, StatsTarget target,
@@ -4181,7 +4132,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
            error_setg_errno(errp, errno, "KVM stats: ioctl failed");
            return;
        }
-        query_stats(result, target, names, stats_fd, NULL, errp);
+        query_stats(result, target, names, stats_fd, errp);
        close(stats_fd);
        break;
    }
@@ -4195,7 +4146,7 @@ static void query_stats_cb(StatsResultList **result, StatsTarget target,
            if (!apply_str_list_filter(cpu->parent_obj.canonical_path, targets)) {
                continue;
            }
-            query_stats_vcpu(cpu, &stats_args);
+            run_on_cpu(cpu, query_stats_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
        }
        break;
    }
@@ -4221,6 +4172,6 @@ void query_stats_schemas_cb(StatsSchemaList **result, Error **errp)
    if (first_cpu) {
        stats_args.result.schema = result;
        stats_args.errp = errp;
-        query_stats_schema_vcpu(first_cpu, &stats_args);
+        run_on_cpu(first_cpu, query_stats_schema_vcpu, RUN_ON_CPU_HOST_PTR(&stats_args));
    }
 }
--- a/accel/meson.build
+++ b/accel/meson.build
@@ -1,5 +1,5 @@
 specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
-system_ss.add(files('accel-softmmu.c'))
+softmmu_ss.add(files('accel-softmmu.c'))
 user_ss.add(files('accel-user.c'))

 subdir('tcg')
@@ -12,4 +12,4 @@ if have_system
 endif

 # qtest
-system_ss.add(files('dummy-cpus.c'))
+softmmu_ss.add(files('dummy-cpus.c'))
--- a/accel/qtest/meson.build
+++ b/accel/qtest/meson.build
@@ -1 +1 @@
-qtest_module_ss.add(when: ['CONFIG_SYSTEM_ONLY'], if_true: files('qtest.c'))
+qtest_module_ss.add(when: ['CONFIG_SOFTMMU'], if_true: files('qtest.c'))
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -27,7 +27,6 @@ bool kvm_allowed;
 bool kvm_readonly_mem_allowed;
 bool kvm_ioeventfd_any_length_allowed;
 bool kvm_msi_use_devid;
-bool kvm_direct_msi_allowed;

 void kvm_flush_coalesced_mmio_buffer(void)
 {
--- a/accel/stubs/meson.build
+++ b/accel/stubs/meson.build
@@ -4,4 +4,4 @@ sysemu_stubs_ss.add(when: 'CONFIG_XEN', if_false: files('xen-stub.c'))
 sysemu_stubs_ss.add(when: 'CONFIG_KVM', if_false: files('kvm-stub.c'))
 sysemu_stubs_ss.add(when: 'CONFIG_TCG', if_false: files('tcg-stub.c'))

-specific_ss.add_all(when: ['CONFIG_SYSTEM_ONLY'], if_true: sysemu_stubs_ss)
+specific_ss.add_all(when: ['CONFIG_SOFTMMU'], if_true: sysemu_stubs_ss)
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -18,7 +18,7 @@ void tb_flush(CPUState *cpu)
 {
 }

-void tlb_set_dirty(CPUState *cpu, vaddr vaddr)
+void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
 {
 }

@@ -26,14 +26,14 @@ void tcg_flush_jmp_cache(CPUState *cpu)
 {
 }

-int probe_access_flags(CPUArchState *env, vaddr addr, int size,
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool nonfault, void **phost, uintptr_t retaddr)
 {
     g_assert_not_reached();
 }

-void *probe_access(CPUArchState *env, vaddr addr, int size,
+void *probe_access(CPUArchState *env, target_ulong addr, int size,
                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
     /* Handled by hardware accelerator. */
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -13,12 +13,26 @@
 * See the COPYING file in the top-level directory.
 */

-static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
+static void atomic_trace_rmw_post(CPUArchState *env, target_ulong addr,
                                  MemOpIdx oi)
 {
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
 }

+#if HAVE_ATOMIC128
+static void atomic_trace_ld_post(CPUArchState *env, target_ulong addr,
+                                 MemOpIdx oi)
+{
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+}
+
+static void atomic_trace_st_post(CPUArchState *env, target_ulong addr,
+                                 MemOpIdx oi)
+{
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+#endif
+
 /*
 * Atomic helpers callable from TCG.
 * These have a common interface and all defer to cpu_atomic_*
@@ -26,7 +40,7 @@ static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
 */

 #define CMPXCHG_HELPER(OP, TYPE) \
-    TYPE HELPER(atomic_##OP)(CPUArchState *env, uint64_t addr,      \
+    TYPE HELPER(atomic_##OP)(CPUArchState *env, target_ulong addr,  \
                             TYPE oldv, TYPE newv, uint32_t oi)     \
    { return cpu_atomic_##OP##_mmu(env, addr, oldv, newv, oi, GETPC()); }

@@ -41,23 +55,43 @@ CMPXCHG_HELPER(cmpxchgq_be, uint64_t)
 CMPXCHG_HELPER(cmpxchgq_le, uint64_t)
 #endif

-#if HAVE_CMPXCHG128
+#ifdef CONFIG_CMPXCHG128
 CMPXCHG_HELPER(cmpxchgo_be, Int128)
 CMPXCHG_HELPER(cmpxchgo_le, Int128)
 #endif

 #undef CMPXCHG_HELPER

-Int128 HELPER(nonatomic_cmpxchgo)(CPUArchState *env, uint64_t addr,
-                                  Int128 cmpv, Int128 newv, uint32_t oi)
+Int128 HELPER(nonatomic_cmpxchgo_be)(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv, uint32_t oi)
 {
 #if TCG_TARGET_REG_BITS == 32
    uintptr_t ra = GETPC();
    Int128 oldv;

-    oldv = cpu_ld16_mmu(env, addr, oi, ra);
+    oldv = cpu_ld16_be_mmu(env, addr, oi, ra);
    if (int128_eq(oldv, cmpv)) {
-        cpu_st16_mmu(env, addr, newv, oi, ra);
+        cpu_st16_be_mmu(env, addr, newv, oi, ra);
+    } else {
+        /* Even with comparison failure, still need a write cycle. */
+        probe_write(env, addr, 16, get_mmuidx(oi), ra);
+    }
+    return oldv;
+#else
+    g_assert_not_reached();
+#endif
+}
+
+Int128 HELPER(nonatomic_cmpxchgo_le)(CPUArchState *env, target_ulong addr,
+                                     Int128 cmpv, Int128 newv, uint32_t oi)
+{
+#if TCG_TARGET_REG_BITS == 32
+    uintptr_t ra = GETPC();
+    Int128 oldv;
+
+    oldv = cpu_ld16_le_mmu(env, addr, oi, ra);
+    if (int128_eq(oldv, cmpv)) {
+        cpu_st16_le_mmu(env, addr, newv, oi, ra);
    } else {
        /* Even with comparison failure, still need a write cycle. */
        probe_write(env, addr, 16, get_mmuidx(oi), ra);
@@ -69,7 +103,7 @@ Int128 HELPER(nonatomic_cmpxchgo)(CPUArchState *env, uint64_t addr,
 }

 #define ATOMIC_HELPER(OP, TYPE) \
-    TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, uint64_t addr,  \
+    TYPE HELPER(glue(atomic_,OP))(CPUArchState *env, target_ulong addr,  \
                                  TYPE val, uint32_t oi)                 \
    { return glue(glue(cpu_atomic_,OP),_mmu)(env, addr, val, oi, GETPC()); }

--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -73,7 +73,8 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv,
                              MemOpIdx oi, uintptr_t retaddr)
 {
-    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;

 #if DATA_SIZE == 16
@@ -86,11 +87,38 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
    return ret;
 }

-#if DATA_SIZE < 16
+#if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
+                         MemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ, retaddr);
+    DATA_TYPE val;
+
+    val = atomic16_read(haddr);
+    ATOMIC_MMU_CLEANUP;
+    atomic_trace_ld_post(env, addr, oi);
+    return val;
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                     MemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_WRITE, retaddr);
+
+    atomic16_set(haddr, val);
+    ATOMIC_MMU_CLEANUP;
+    atomic_trace_st_post(env, addr, oi);
+}
+#endif
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
                           MemOpIdx oi, uintptr_t retaddr)
 {
-    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;

    ret = qatomic_xchg__nocheck(haddr, val);
@@ -103,8 +131,9 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    DATA_TYPE *haddr, ret;                                          \
-    haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);   \
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
+                                         PAGE_READ | PAGE_WRITE, retaddr); \
+    DATA_TYPE ret;                                                  \
    ret = qatomic_##X(haddr, val);                                  \
    ATOMIC_MMU_CLEANUP;                                             \
    atomic_trace_rmw_post(env, addr, oi);                           \
@@ -134,8 +163,9 @@ GEN_ATOMIC_HELPER(xor_fetch)
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    XDATA_TYPE *haddr, cmp, old, new, val = xval;                   \
-    haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);   \
+    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
+                                          PAGE_READ | PAGE_WRITE, retaddr); \
+    XDATA_TYPE cmp, old, new, val = xval;                           \
    smp_mb();                                                       \
    cmp = qatomic_read__nocheck(haddr);                             \
    do {                                                            \
@@ -158,7 +188,7 @@ GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new)
 GEN_ATOMIC_HELPER_FN(umax_fetch, MAX,  DATA_TYPE, new)

 #undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA SIZE < 16 */
+#endif /* DATA SIZE >= 16 */

 #undef END

@@ -176,7 +206,8 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
                              ABI_TYPE cmpv, ABI_TYPE newv,
                              MemOpIdx oi, uintptr_t retaddr)
 {
-    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
    DATA_TYPE ret;

 #if DATA_SIZE == 16
@@ -189,11 +220,39 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr,
    return BSWAP(ret);
 }

-#if DATA_SIZE < 16
+#if DATA_SIZE >= 16
+#if HAVE_ATOMIC128
+ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr,
+                         MemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ, retaddr);
+    DATA_TYPE val;
+
+    val = atomic16_read(haddr);
+    ATOMIC_MMU_CLEANUP;
+    atomic_trace_ld_post(env, addr, oi);
+    return BSWAP(val);
+}
+
+void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
+                     MemOpIdx oi, uintptr_t retaddr)
+{
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_WRITE, retaddr);
+
+    val = BSWAP(val);
+    atomic16_set(haddr, val);
+    ATOMIC_MMU_CLEANUP;
+    atomic_trace_st_post(env, addr, oi);
+}
+#endif
+#else
 ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
                           MemOpIdx oi, uintptr_t retaddr)
 {
-    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,
+                                         PAGE_READ | PAGE_WRITE, retaddr);
    ABI_TYPE ret;

    ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
@@ -206,8 +265,9 @@ ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val,
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                        ABI_TYPE val, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    DATA_TYPE *haddr, ret;                                          \
-    haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);   \
+    DATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE,  \
+                                         PAGE_READ | PAGE_WRITE, retaddr); \
+    DATA_TYPE ret;                                                  \
    ret = qatomic_##X(haddr, BSWAP(val));                           \
    ATOMIC_MMU_CLEANUP;                                             \
    atomic_trace_rmw_post(env, addr, oi);                           \
@@ -234,8 +294,9 @@ GEN_ATOMIC_HELPER(xor_fetch)
 ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr,       \
                        ABI_TYPE xval, MemOpIdx oi, uintptr_t retaddr) \
 {                                                                   \
-    XDATA_TYPE *haddr, ldo, ldn, old, new, val = xval;              \
-    haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, retaddr);   \
+    XDATA_TYPE *haddr = atomic_mmu_lookup(env, addr, oi, DATA_SIZE, \
+                                          PAGE_READ | PAGE_WRITE, retaddr); \
+    XDATA_TYPE ldo, ldn, old, new, val = xval;                      \
    smp_mb();                                                       \
    ldn = qatomic_read__nocheck(haddr);                             \
    do {                                                            \
@@ -265,7 +326,7 @@ GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
 #undef ADD

 #undef GEN_ATOMIC_HELPER_FN
-#endif /* DATA_SIZE < 16 */
+#endif /* DATA_SIZE >= 16 */

 #undef END
 #endif /* DATA_SIZE > 1 */
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -22,7 +22,6 @@
 #include "sysemu/tcg.h"
 #include "exec/exec-all.h"
 #include "qemu/plugin.h"
-#include "internal.h"

 bool tcg_allowed;

@@ -82,8 +81,6 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)

 void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
 {
-    /* Prevent looping if already executing in a serial context. */
-    g_assert(!cpu_in_serial_context(cpu));
    cpu->exception_index = EXCP_ATOMIC;
    cpu_loop_exit_restore(cpu, pc);
 }
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -38,7 +38,7 @@
 #include "sysemu/cpu-timers.h"
 #include "exec/replay-core.h"
 #include "sysemu/tcg.h"
-#include "exec/helper-proto-common.h"
+#include "exec/helper-proto.h"
 #include "tb-jmp-cache.h"
 #include "tb-hash.h"
 #include "tb-context.h"
@@ -159,7 +159,7 @@ uint32_t curr_cflags(CPUState *cpu)
     */
    if (unlikely(cpu->singlestep_enabled)) {
        cflags |= CF_NO_GOTO_TB | CF_NO_GOTO_PTR | CF_SINGLE_STEP | 1;
-    } else if (qatomic_read(&one_insn_per_tb)) {
+    } else if (singlestep) {
        cflags |= CF_NO_GOTO_TB | 1;
    } else if (qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
        cflags |= CF_NO_GOTO_TB;
@@ -169,12 +169,13 @@ uint32_t curr_cflags(CPUState *cpu)
 }

 struct tb_desc {
-    vaddr pc;
-    uint64_t cs_base;
+    target_ulong pc;
+    target_ulong cs_base;
    CPUArchState *env;
    tb_page_addr_t page_addr0;
    uint32_t flags;
    uint32_t cflags;
+    uint32_t trace_vcpu_dstate;
 };

 static bool tb_lookup_cmp(const void *p, const void *d)
@@ -186,6 +187,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
        tb_page_addr0(tb) == desc->page_addr0 &&
        tb->cs_base == desc->cs_base &&
        tb->flags == desc->flags &&
+        tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
        tb_cflags(tb) == desc->cflags) {
        /* check next page if needed */
        tb_page_addr_t tb_phys_page1 = tb_page_addr1(tb);
@@ -193,7 +195,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
            return true;
        } else {
            tb_page_addr_t phys_page1;
-            vaddr virt_page1;
+            target_ulong virt_page1;

            /*
             * We know that the first page matched, and an otherwise valid TB
@@ -214,8 +216,8 @@ static bool tb_lookup_cmp(const void *p, const void *d)
    return false;
 }

-static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
-                                          uint64_t cs_base, uint32_t flags,
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base, uint32_t flags,
                                          uint32_t cflags)
 {
    tb_page_addr_t phys_pc;
@@ -226,6 +228,7 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
    desc.cs_base = cs_base;
    desc.flags = flags;
    desc.cflags = cflags;
+    desc.trace_vcpu_dstate = *cpu->trace_dstate;
    desc.pc = pc;
    phys_pc = get_page_addr_code(desc.env, pc);
    if (phys_pc == -1) {
@@ -233,14 +236,14 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
    }
    desc.page_addr0 = phys_pc;
    h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
-                     flags, cs_base, cflags);
+                     flags, cflags, *cpu->trace_dstate);
    return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }

 /* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
-                                          uint64_t cs_base, uint32_t flags,
-                                          uint32_t cflags)
+static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
+                                          target_ulong cs_base,
+                                          uint32_t flags, uint32_t cflags)
 {
    TranslationBlock *tb;
    CPUJumpCache *jc;
@@ -260,6 +263,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
                   jc->array[hash].pc == pc &&
                   tb->cs_base == cs_base &&
                   tb->flags == flags &&
+                   tb->trace_vcpu_dstate == *cpu->trace_dstate &&
                   tb_cflags(tb) == cflags)) {
            return tb;
        }
@@ -278,6 +282,7 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
                   tb->pc == pc &&
                   tb->cs_base == cs_base &&
                   tb->flags == flags &&
+                   tb->trace_vcpu_dstate == *cpu->trace_dstate &&
                   tb_cflags(tb) == cflags)) {
            return tb;
        }
@@ -292,16 +297,17 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
    return tb;
 }

-static void log_cpu_exec(vaddr pc, CPUState *cpu,
+static void log_cpu_exec(target_ulong pc, CPUState *cpu,
                         const TranslationBlock *tb)
 {
    if (qemu_log_in_addr_range(pc)) {
        qemu_log_mask(CPU_LOG_EXEC,
-                      "Trace %d: %p [%08" PRIx64
-                      "/%016" VADDR_PRIx "/%08x/%08x] %s\n",
+                      "Trace %d: %p [" TARGET_FMT_lx
+                      "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
                      cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
                      tb->flags, tb->cflags, lookup_symbol(pc));

+#if defined(DEBUG_DISAS)
        if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
            FILE *logfile = qemu_log_trylock();
            if (logfile) {
@@ -313,17 +319,15 @@ static void log_cpu_exec(vaddr pc, CPUState *cpu,
 #if defined(TARGET_I386)
                flags |= CPU_DUMP_CCOP;
 #endif
-                if (qemu_loglevel_mask(CPU_LOG_TB_VPU)) {
-                    flags |= CPU_DUMP_VPU;
-                }
                cpu_dump_state(cpu, logfile, flags);
                qemu_log_unlock(logfile);
            }
        }
+#endif /* DEBUG_DISAS */
    }
 }

-static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
+static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
                                       uint32_t *cflags)
 {
    CPUBreakpoint *bp;
@@ -389,7 +393,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
    return false;
 }

-static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
+static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
                                         uint32_t *cflags)
 {
    return unlikely(!QTAILQ_EMPTY(&cpu->breakpoints)) &&
@@ -408,8 +412,7 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 {
    CPUState *cpu = env_cpu(env);
    TranslationBlock *tb;
-    vaddr pc;
-    uint64_t cs_base;
+    target_ulong cs_base, pc;
    uint32_t flags, cflags;

    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
@@ -485,10 +488,10 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
            cc->set_pc(cpu, last_tb->pc);
        }
        if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
-            vaddr pc = log_pc(cpu, last_tb);
+            target_ulong pc = log_pc(cpu, last_tb);
            if (qemu_log_in_addr_range(pc)) {
-                qemu_log("Stopped execution of TB chain before %p [%016"
-                         VADDR_PRIx "] %s\n",
+                qemu_log("Stopped execution of TB chain before %p ["
+                         TARGET_FMT_lx "] %s\n",
                         last_tb->tc.ptr, pc, lookup_symbol(pc));
            }
        }
@@ -526,49 +529,11 @@ static void cpu_exec_exit(CPUState *cpu)
    }
 }

-static void cpu_exec_longjmp_cleanup(CPUState *cpu)
-{
-    /* Non-buggy compilers preserve this; assert the correct value. */
-    g_assert(cpu == current_cpu);
-
-#ifdef CONFIG_USER_ONLY
-    clear_helper_retaddr();
-    if (have_mmap_lock()) {
-        mmap_unlock();
-    }
-#else
-    /*
-     * For softmmu, a tlb_fill fault during translation will land here,
-     * and we need to release any page locks held.  In system mode we
-     * have one tcg_ctx per thread, so we know it was this cpu doing
-     * the translation.
-     *
-     * Alternative 1: Install a cleanup to be called via an exception
-     * handling safe longjmp.  It seems plausible that all our hosts
-     * support such a thing.  We'd have to properly register unwind info
-     * for the JIT for EH, rather that just for GDB.
-     *
-     * Alternative 2: Set and restore cpu->jmp_env in tb_gen_code to
-     * capture the cpu_loop_exit longjmp, perform the cleanup, and
-     * jump again to arrive here.
-     */
-    if (tcg_ctx->gen_tb) {
-        tb_unlock_pages(tcg_ctx->gen_tb);
-        tcg_ctx->gen_tb = NULL;
-    }
-#endif
-    if (qemu_mutex_iothread_locked()) {
-        qemu_mutex_unlock_iothread();
-    }
-    assert_no_pages_locked();
-}
-
 void cpu_exec_step_atomic(CPUState *cpu)
 {
    CPUArchState *env = cpu->env_ptr;
    TranslationBlock *tb;
-    vaddr pc;
-    uint64_t cs_base;
+    target_ulong cs_base, pc;
    uint32_t flags, cflags;
    int tb_exit;

@@ -605,7 +570,16 @@ void cpu_exec_step_atomic(CPUState *cpu)
        cpu_tb_exec(cpu, tb, &tb_exit);
        cpu_exec_exit(cpu);
    } else {
-        cpu_exec_longjmp_cleanup(cpu);
+#ifndef CONFIG_SOFTMMU
+        clear_helper_retaddr();
+        if (have_mmap_lock()) {
+            mmap_unlock();
+        }
+#endif
+        if (qemu_mutex_iothread_locked()) {
+            qemu_mutex_unlock_iothread();
+        }
+        assert_no_pages_locked();
    }

    /*
@@ -807,7 +781,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
     * Ensure zeroing happens before reading cpu->exit_request or
     * cpu->interrupt_request (see also smp_wmb in cpu_exit())
     */
-    qatomic_set_mb(&cpu_neg(cpu)->icount_decr.u16.high, 0);
+    qatomic_mb_set(&cpu_neg(cpu)->icount_decr.u16.high, 0);

    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
        int interrupt_request;
@@ -910,8 +884,8 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
 }

 static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
-                                    vaddr pc, TranslationBlock **last_tb,
-                                    int *tb_exit)
+                                    target_ulong pc,
+                                    TranslationBlock **last_tb, int *tb_exit)
 {
    int32_t insns_left;

@@ -972,8 +946,7 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)

        while (!cpu_handle_interrupt(cpu, &last_tb)) {
            TranslationBlock *tb;
-            vaddr pc;
-            uint64_t cs_base;
+            target_ulong cs_base, pc;
            uint32_t flags, cflags;

            cpu_get_tb_cpu_state(cpu->env_ptr, &pc, &cs_base, &flags);
@@ -1051,7 +1024,20 @@ static int cpu_exec_setjmp(CPUState *cpu, SyncClocks *sc)
 {
    /* Prepare setjmp context for exception handling. */
    if (unlikely(sigsetjmp(cpu->jmp_env, 0) != 0)) {
-        cpu_exec_longjmp_cleanup(cpu);
+        /* Non-buggy compilers preserve this; assert the correct value. */
+        g_assert(cpu == current_cpu);
+
+#ifndef CONFIG_SOFTMMU
+        clear_helper_retaddr();
+        if (have_mmap_lock()) {
+            mmap_unlock();
+        }
+#endif
+        if (qemu_mutex_iothread_locked()) {
+            qemu_mutex_unlock_iothread();
+        }
+
+        assert_no_pages_locked();
    }

    return cpu_exec_loop(cpu, sc);
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -10,7 +10,6 @@
 #define ACCEL_TCG_INTERNAL_H

 #include "exec/exec-all.h"
-#include "exec/translate-all.h"

 /*
 * Access to the various translations structures need to be serialised
@@ -18,10 +17,10 @@
 * memory related structures are protected with mmap_lock.
 * In !user-mode we use per-page locks.
 */
-#ifdef CONFIG_USER_ONLY
-#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
-#else
+#ifdef CONFIG_SOFTMMU
 #define assert_memory_lock()
+#else
+#define assert_memory_lock() tcg_debug_assert(have_mmap_lock())
 #endif

 #if defined(CONFIG_SOFTMMU) && defined(CONFIG_DEBUG_TCG)
@@ -36,32 +35,6 @@ static inline void page_table_config_init(void) { }
 void page_table_config_init(void);
 #endif

-#ifdef CONFIG_USER_ONLY
-/*
- * For user-only, page_protect sets the page read-only.
- * Since most execution is already on read-only pages, and we'd need to
- * account for other TBs on the same page, defer undoing any page protection
- * until we receive the write fault.
- */
-static inline void tb_lock_page0(tb_page_addr_t p0)
-{
-    page_protect(p0);
-}
-
-static inline void tb_lock_page1(tb_page_addr_t p0, tb_page_addr_t p1)
-{
-    page_protect(p1);
-}
-
-static inline void tb_unlock_page1(tb_page_addr_t p0, tb_page_addr_t p1) { }
-static inline void tb_unlock_pages(TranslationBlock *tb) { }
-#else
-void tb_lock_page0(tb_page_addr_t);
-void tb_lock_page1(tb_page_addr_t, tb_page_addr_t);
-void tb_unlock_page1(tb_page_addr_t, tb_page_addr_t);
-void tb_unlock_pages(TranslationBlock *);
-#endif
-
 #ifdef CONFIG_SOFTMMU
 void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
                                   unsigned size,
@@ -69,19 +42,20 @@ void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
 G_NORETURN void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr);
 #endif /* CONFIG_SOFTMMU */

-TranslationBlock *tb_gen_code(CPUState *cpu, vaddr pc,
-                              uint64_t cs_base, uint32_t flags,
+TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc,
+                              target_ulong cs_base, uint32_t flags,
                              int cflags);
 void page_init(void);
 void tb_htable_init(void);
 void tb_reset_jump(TranslationBlock *tb, int n);
-TranslationBlock *tb_link_page(TranslationBlock *tb);
+TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                               tb_page_addr_t phys_page2);
 bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc);
 void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                               uintptr_t host_pc);

 /* Return the current PC from CPU, which may be cached in TB. */
-static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb)
+static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb)
 {
    if (tb_cflags(tb) & CF_PCREL) {
        return cpu->cc->get_pc(cpu);
@@ -90,52 +64,7 @@ static inline vaddr log_pc(CPUState *cpu, const TranslationBlock *tb)
    }
 }

-/*
- * Return true if CS is not running in parallel with other cpus, either
- * because there are no other cpus or we are within an exclusive context.
- */
-static inline bool cpu_in_serial_context(CPUState *cs)
-{
-    return !(cs->tcg_cflags & CF_PARALLEL) || cpu_in_exclusive_context(cs);
-}
-
 extern int64_t max_delay;
 extern int64_t max_advance;

-extern bool one_insn_per_tb;
-
-/**
- * tcg_req_mo:
- * @type: TCGBar
- *
- * Filter @type to the barrier that is required for the guest
- * memory ordering vs the host memory ordering.  A non-zero
- * result indicates that some barrier is required.
- *
- * If TCG_GUEST_DEFAULT_MO is not defined, assume that the
- * guest requires strict ordering.
- *
- * This is a macro so that it's constant even without optimization.
- */
-#ifdef TCG_GUEST_DEFAULT_MO
-# define tcg_req_mo(type) \
-    ((type) & TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO)
-#else
-# define tcg_req_mo(type) ((type) & ~TCG_TARGET_DEFAULT_MO)
-#endif
-
-/**
- * cpu_req_mo:
- * @type: TCGBar
- *
- * If tcg_req_mo indicates a barrier for @type is required
- * for the guest memory model, issue a host memory barrier.
- */
-#define cpu_req_mo(type)          \
-    do {                          \
-        if (tcg_req_mo(type)) {   \
-            smp_mb();             \
-        }                         \
-    } while (0)
-
 #endif /* ACCEL_TCG_INTERNAL_H */
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -26,7 +26,7 @@ uint32_t cpu_lduw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                               int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
-    return cpu_ldw_mmu(env, addr, oi, ra);
+    return cpu_ldw_be_mmu(env, addr, oi, ra);
 }

 int cpu_ldsw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
@@ -39,21 +39,21 @@ uint32_t cpu_ldl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                              int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
-    return cpu_ldl_mmu(env, addr, oi, ra);
+    return cpu_ldl_be_mmu(env, addr, oi, ra);
 }

 uint64_t cpu_ldq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                              int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
-    return cpu_ldq_mmu(env, addr, oi, ra);
+    return cpu_ldq_be_mmu(env, addr, oi, ra);
 }

 uint32_t cpu_lduw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                               int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
-    return cpu_ldw_mmu(env, addr, oi, ra);
+    return cpu_ldw_le_mmu(env, addr, oi, ra);
 }

 int cpu_ldsw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
@@ -66,14 +66,14 @@ uint32_t cpu_ldl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                              int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
-    return cpu_ldl_mmu(env, addr, oi, ra);
+    return cpu_ldl_le_mmu(env, addr, oi, ra);
 }

 uint64_t cpu_ldq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr,
                              int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
-    return cpu_ldq_mmu(env, addr, oi, ra);
+    return cpu_ldq_le_mmu(env, addr, oi, ra);
 }

 void cpu_stb_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
@@ -87,42 +87,42 @@ void cpu_stw_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUW | MO_UNALN, mmu_idx);
-    cpu_stw_mmu(env, addr, val, oi, ra);
+    cpu_stw_be_mmu(env, addr, val, oi, ra);
 }

 void cpu_stl_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUL | MO_UNALN, mmu_idx);
-    cpu_stl_mmu(env, addr, val, oi, ra);
+    cpu_stl_be_mmu(env, addr, val, oi, ra);
 }

 void cpu_stq_be_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_BEUQ | MO_UNALN, mmu_idx);
-    cpu_stq_mmu(env, addr, val, oi, ra);
+    cpu_stq_be_mmu(env, addr, val, oi, ra);
 }

 void cpu_stw_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUW | MO_UNALN, mmu_idx);
-    cpu_stw_mmu(env, addr, val, oi, ra);
+    cpu_stw_le_mmu(env, addr, val, oi, ra);
 }

 void cpu_stl_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint32_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUL | MO_UNALN, mmu_idx);
-    cpu_stl_mmu(env, addr, val, oi, ra);
+    cpu_stl_le_mmu(env, addr, val, oi, ra);
 }

 void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
                          int mmu_idx, uintptr_t ra)
 {
    MemOpIdx oi = make_memop_idx(MO_LEUQ | MO_UNALN, mmu_idx);
-    cpu_stq_mmu(env, addr, val, oi, ra);
+    cpu_stq_le_mmu(env, addr, val, oi, ra);
 }

 /*--------------------------*/
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -10,18 +10,18 @@ tcg_ss.add(files(
  'translator.c',
 ))
 tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
-tcg_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c'))
+tcg_ss.add(when: 'CONFIG_SOFTMMU', if_false: files('user-exec-stub.c'))
 tcg_ss.add(when: 'CONFIG_PLUGIN', if_true: [files('plugin-gen.c')])
 tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
 tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)

-specific_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files(
+specific_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
  'cputlb.c',
  'monitor.c',
 ))

-tcg_module_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files(
+tcg_module_ss.add(when: ['CONFIG_SOFTMMU', 'CONFIG_TCG'], if_true: files(
  'tcg-accel-ops.c',
  'tcg-accel-ops-mttcg.c',
  'tcg-accel-ops-icount.c',
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -7,7 +7,6 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/accel.h"
 #include "qapi/error.h"
 #include "qapi/type-helpers.h"
 #include "qapi/qapi-commands-machine.h"
@@ -15,7 +14,6 @@
 #include "sysemu/cpus.h"
 #include "sysemu/cpu-timers.h"
 #include "sysemu/tcg.h"
-#include "tcg/tcg.h"
 #include "internal.h"


@@ -38,18 +36,6 @@ static void dump_drift_info(GString *buf)
    }
 }

-static void dump_accel_info(GString *buf)
-{
-    AccelState *accel = current_accel();
-    bool one_insn_per_tb = object_property_get_bool(OBJECT(accel),
-                                                    "one-insn-per-tb",
-                                                    &error_fatal);
-
-    g_string_append_printf(buf, "Accelerator settings:\n");
-    g_string_append_printf(buf, "one-insn-per-tb: %s\n\n",
-                           one_insn_per_tb ? "on" : "off");
-}
-
 HumanReadableText *qmp_x_query_jit(Error **errp)
 {
    g_autoptr(GString) buf = g_string_new("");
@@ -59,7 +45,6 @@ HumanReadableText *qmp_x_query_jit(Error **errp)
        return NULL;
    }

-    dump_accel_info(buf);
    dump_exec_info(buf);
    dump_drift_info(buf);

@@ -81,6 +66,37 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
    return human_readable_text_from_str(buf);
 }

+#ifdef CONFIG_PROFILER
+
+int64_t dev_time;
+
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+    g_autoptr(GString) buf = g_string_new("");
+    static int64_t last_cpu_exec_time;
+    int64_t cpu_exec_time;
+    int64_t delta;
+
+    cpu_exec_time = tcg_cpu_exec_time();
+    delta = cpu_exec_time - last_cpu_exec_time;
+
+    g_string_append_printf(buf, "async time  %" PRId64 " (%0.3f)\n",
+                           dev_time, dev_time / (double)NANOSECONDS_PER_SECOND);
+    g_string_append_printf(buf, "qemu time   %" PRId64 " (%0.3f)\n",
+                           delta, delta / (double)NANOSECONDS_PER_SECOND);
+    last_cpu_exec_time = cpu_exec_time;
+    dev_time = 0;
+
+    return human_readable_text_from_str(buf);
+}
+#else
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+    error_setg(errp, "Internal profiler not compiled");
+    return NULL;
+}
+#endif
+
 static void hmp_tcg_register(void)
 {
    monitor_register_hmp_info_hrt("jit", qmp_x_query_jit);
--- a/accel/tcg/perf.c
+++ b/accel/tcg/perf.c
@@ -111,8 +111,6 @@ static void write_perfmap_entry(const void *start, size_t insn,
 }

 static FILE *jitdump;
-static size_t perf_marker_size;
-static void *perf_marker = MAP_FAILED;

 #define JITHEADER_MAGIC 0x4A695444
 #define JITHEADER_VERSION 1
@@ -192,6 +190,7 @@ void perf_enable_jitdump(void)
 {
    struct jitheader header;
    char jitdump_file[32];
+    void *perf_marker;

    if (!use_rt_clock) {
        warn_report("CLOCK_MONOTONIC is not available, proceeding without jitdump");
@@ -211,8 +210,7 @@ void perf_enable_jitdump(void)
     * PERF_RECORD_MMAP or PERF_RECORD_MMAP2 event is of the form jit-%d.dump
     * and will process it as a jitdump file.
     */
-    perf_marker_size = qemu_real_host_page_size();
-    perf_marker = mmap(NULL, perf_marker_size, PROT_READ | PROT_EXEC,
+    perf_marker = mmap(NULL, qemu_real_host_page_size(), PROT_READ | PROT_EXEC,
                       MAP_PRIVATE, fileno(jitdump), 0);
    if (perf_marker == MAP_FAILED) {
        warn_report("Could not map %s: %s, proceeding without jitdump",
@@ -313,8 +311,7 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
                      const void *start)
 {
    struct debuginfo_query *q;
-    size_t insn, start_words;
-    uint64_t *gen_insn_data;
+    size_t insn;

    if (!perfmap && !jitdump) {
        return;
@@ -328,12 +325,9 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
    debuginfo_lock();

    /* Query debuginfo for each guest instruction. */
-    gen_insn_data = tcg_ctx->gen_insn_data;
-    start_words = tcg_ctx->insn_start_words;
-
    for (insn = 0; insn < tb->icount; insn++) {
        /* FIXME: This replicates the restore_state_to_opc() logic. */
-        q[insn].address = gen_insn_data[insn * start_words + 0];
+        q[insn].address = tcg_ctx->gen_insn_data[insn][0];
        if (tb_cflags(tb) & CF_PCREL) {
            q[insn].address |= (guest_pc & TARGET_PAGE_MASK);
        } else {
@@ -374,11 +368,6 @@ void perf_exit(void)
        perfmap = NULL;
    }

-    if (perf_marker != MAP_FAILED) {
-        munmap(perf_marker, perf_marker_size);
-        perf_marker = MAP_FAILED;
-    }
-
    if (jitdump) {
        fclose(jitdump);
        jitdump = NULL;
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -43,18 +43,12 @@
 * CPU's index into a TCG temp, since the first callback did it already.
 */
 #include "qemu/osdep.h"
-#include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-temp-internal.h"
 #include "tcg/tcg-op.h"
 #include "exec/exec-all.h"
 #include "exec/plugin-gen.h"
 #include "exec/translator.h"
-#include "exec/helper-proto-common.h"
-
-#define HELPER_H  "accel/tcg/plugin-helpers.h"
-#include "exec/helper-info.c.inc"
-#undef  HELPER_H

 #ifdef CONFIG_SOFTMMU
 # define CONFIG_SOFTMMU_GATE 1
@@ -98,6 +92,27 @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
                                void *userdata)
 { }

+static void do_gen_mem_cb(TCGv vaddr, uint32_t info)
+{
+    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
+    TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
+    TCGv_i64 vaddr64 = tcg_temp_ebb_new_i64();
+    TCGv_ptr udata = tcg_temp_ebb_new_ptr();
+
+    tcg_gen_movi_i32(meminfo, info);
+    tcg_gen_movi_ptr(udata, 0);
+    tcg_gen_ld_i32(cpu_index, cpu_env,
+                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+    tcg_gen_extu_tl_i64(vaddr64, vaddr);
+
+    gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, vaddr64, udata);
+
+    tcg_temp_free_ptr(udata);
+    tcg_temp_free_i64(vaddr64);
+    tcg_temp_free_i32(meminfo);
+    tcg_temp_free_i32(cpu_index);
+}
+
 static void gen_empty_udata_cb(void)
 {
    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
@@ -130,22 +145,9 @@ static void gen_empty_inline_cb(void)
    tcg_temp_free_i64(val);
 }

-static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
+static void gen_empty_mem_cb(TCGv addr, uint32_t info)
 {
-    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
-    TCGv_i32 meminfo = tcg_temp_ebb_new_i32();
-    TCGv_ptr udata = tcg_temp_ebb_new_ptr();
-
-    tcg_gen_movi_i32(meminfo, info);
-    tcg_gen_movi_ptr(udata, 0);
-    tcg_gen_ld_i32(cpu_index, cpu_env,
-                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-
-    gen_helper_plugin_vcpu_mem_cb(cpu_index, meminfo, addr, udata);
-
-    tcg_temp_free_ptr(udata);
-    tcg_temp_free_i32(meminfo);
-    tcg_temp_free_i32(cpu_index);
+    do_gen_mem_cb(addr, info);
 }

 /*
@@ -200,17 +202,35 @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
    }
 }

-void plugin_gen_empty_mem_callback(TCGv_i64 addr, uint32_t info)
+union mem_gen_fn {
+    void (*mem_fn)(TCGv, uint32_t);
+    void (*inline_fn)(void);
+};
+
+static void gen_mem_wrapped(enum plugin_gen_cb type,
+                            const union mem_gen_fn *f, TCGv addr,
+                            uint32_t info, bool is_mem)
 {
    enum qemu_plugin_mem_rw rw = get_plugin_meminfo_rw(info);

-    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_MEM, rw);
-    gen_empty_mem_cb(addr, info);
+    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, type, rw);
+    if (is_mem) {
+        f->mem_fn(addr, info);
+    } else {
+        f->inline_fn();
+    }
    tcg_gen_plugin_cb_end();
+}

-    gen_plugin_cb_start(PLUGIN_GEN_FROM_MEM, PLUGIN_GEN_CB_INLINE, rw);
-    gen_empty_inline_cb();
-    tcg_gen_plugin_cb_end();
+void plugin_gen_empty_mem_callback(TCGv addr, uint32_t info)
+{
+    union mem_gen_fn fn;
+
+    fn.mem_fn = gen_empty_mem_cb;
+    gen_mem_wrapped(PLUGIN_GEN_CB_MEM, &fn, addr, info, true);
+
+    fn.inline_fn = gen_empty_inline_cb;
+    gen_mem_wrapped(PLUGIN_GEN_CB_INLINE, &fn, 0, info, false);
 }

 static TCGOp *find_op(TCGOp *op, TCGOpcode opc)
@@ -260,6 +280,33 @@ static TCGOp *copy_op(TCGOp **begin_op, TCGOp *op, TCGOpcode opc)
    return op;
 }

+static TCGOp *copy_extu_i32_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* mov_i32 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+        /* mov_i32 w/ $0 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+    } else {
+        /* extu_i32_i64 */
+        op = copy_op(begin_op, op, INDEX_op_extu_i32_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_mov_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TCG_TARGET_REG_BITS == 32) {
+        /* 2x mov_i32 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+    } else {
+        /* mov_i64 */
+        op = copy_op(begin_op, op, INDEX_op_mov_i64);
+    }
+    return op;
+}
+
 static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
 {
    if (UINTPTR_MAX == UINT32_MAX) {
@@ -274,6 +321,18 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
    return op;
 }

+static TCGOp *copy_extu_tl_i64(TCGOp **begin_op, TCGOp *op)
+{
+    if (TARGET_LONG_BITS == 32) {
+        /* extu_i32_i64 */
+        op = copy_extu_i32_i64(begin_op, op);
+    } else {
+        /* mov_i64 */
+        op = copy_mov_i64(begin_op, op);
+    }
+    return op;
+}
+
 static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
 {
    if (TCG_TARGET_REG_BITS == 32) {
@@ -418,6 +477,9 @@ static TCGOp *append_mem_cb(const struct qemu_plugin_dyn_cb *cb,
        tcg_debug_assert(begin_op && begin_op->opc == INDEX_op_ld_i32);
    }

+    /* extu_tl_i64 */
+    op = copy_extu_tl_i64(&begin_op, op);
+
    if (type == PLUGIN_GEN_CB_MEM) {
        /* call */
        op = copy_call(&begin_op, op, HELPER(plugin_vcpu_mem_cb),
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -35,16 +35,16 @@
 #define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
 #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)

-static inline unsigned int tb_jmp_cache_hash_page(vaddr pc)
+static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
 {
-    vaddr tmp;
+    target_ulong tmp;
    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
    return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK;
 }

-static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
 {
-    vaddr tmp;
+    target_ulong tmp;
    tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
    return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK)
           | (tmp & TB_JMP_ADDR_MASK));
@@ -53,7 +53,7 @@ static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
 #else

 /* In user-mode we can get better hashing because we do not have a TLB */
-static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
+static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
 {
    return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
 }
@@ -61,10 +61,10 @@ static inline unsigned int tb_jmp_cache_hash_func(vaddr pc)
 #endif /* CONFIG_SOFTMMU */

 static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, vaddr pc,
-                      uint32_t flags, uint64_t flags2, uint32_t cf_mask)
+uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, uint32_t flags,
+                      uint32_t cf_mask, uint32_t trace_vcpu_dstate)
 {
-    return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask);
+    return qemu_xxhash7(phys_pc, pc, flags, cf_mask, trace_vcpu_dstate);
 }

 #endif
--- a/accel/tcg/tb-jmp-cache.h
+++ b/accel/tcg/tb-jmp-cache.h
@@ -21,7 +21,7 @@ struct CPUJumpCache {
    struct rcu_head rcu;
    struct {
        TranslationBlock *tb;
-        vaddr pc;
+        target_ulong pc;
    } array[TB_JMP_CACHE_SIZE];
 };

--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -50,6 +50,7 @@ static bool tb_cmp(const void *ap, const void *bp)
            a->cs_base == b->cs_base &&
            a->flags == b->flags &&
            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
+            a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
            tb_page_addr0(a) == tb_page_addr0(b) &&
            tb_page_addr1(a) == tb_page_addr1(b));
 }
@@ -70,7 +71,17 @@ typedef struct PageDesc PageDesc;
 */
 #define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())

-static inline void tb_lock_pages(const TranslationBlock *tb) { }
+static inline void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
+                                  PageDesc **ret_p2, tb_page_addr_t phys2,
+                                  bool alloc)
+{
+    *ret_p1 = NULL;
+    *ret_p2 = NULL;
+}
+
+static inline void page_unlock(PageDesc *pd) { }
+static inline void page_lock_tb(const TranslationBlock *tb) { }
+static inline void page_unlock_tb(const TranslationBlock *tb) { }

 /*
 * For user-only, since we are protecting all of memory with a single lock,
@@ -86,9 +97,9 @@ static void tb_remove_all(void)
 }

 /* Call with mmap_lock held. */
-static void tb_record(TranslationBlock *tb)
+static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
 {
-    vaddr addr;
+    target_ulong addr;
    int flags;

    assert_memory_lock();
@@ -381,108 +392,12 @@ static void page_lock(PageDesc *pd)
    qemu_spin_lock(&pd->lock);
 }

-/* Like qemu_spin_trylock, returns false on success */
-static bool page_trylock(PageDesc *pd)
-{
-    bool busy = qemu_spin_trylock(&pd->lock);
-    if (!busy) {
-        page_lock__debug(pd);
-    }
-    return busy;
-}
-
 static void page_unlock(PageDesc *pd)
 {
    qemu_spin_unlock(&pd->lock);
    page_unlock__debug(pd);
 }

-void tb_lock_page0(tb_page_addr_t paddr)
-{
-    page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
-}
-
-void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
-{
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
-    PageDesc *pd0, *pd1;
-
-    if (pindex0 == pindex1) {
-        /* Identical pages, and the first page is already locked. */
-        return;
-    }
-
-    pd1 = page_find_alloc(pindex1, true);
-    if (pindex0 < pindex1) {
-        /* Correct locking order, we may block. */
-        page_lock(pd1);
-        return;
-    }
-
-    /* Incorrect locking order, we cannot block lest we deadlock. */
-    if (!page_trylock(pd1)) {
-        return;
-    }
-
-    /*
-     * Drop the lock on page0 and get both page locks in the right order.
-     * Restart translation via longjmp.
-     */
-    pd0 = page_find_alloc(pindex0, false);
-    page_unlock(pd0);
-    page_lock(pd1);
-    page_lock(pd0);
-    siglongjmp(tcg_ctx->jmp_trans, -3);
-}
-
-void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
-{
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
-
-    if (pindex0 != pindex1) {
-        page_unlock(page_find_alloc(pindex1, false));
-    }
-}
-
-static void tb_lock_pages(TranslationBlock *tb)
-{
-    tb_page_addr_t paddr0 = tb_page_addr0(tb);
-    tb_page_addr_t paddr1 = tb_page_addr1(tb);
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
-
-    if (unlikely(paddr0 == -1)) {
-        return;
-    }
-    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
-        if (pindex0 < pindex1) {
-            page_lock(page_find_alloc(pindex0, true));
-            page_lock(page_find_alloc(pindex1, true));
-            return;
-        }
-        page_lock(page_find_alloc(pindex1, true));
-    }
-    page_lock(page_find_alloc(pindex0, true));
-}
-
-void tb_unlock_pages(TranslationBlock *tb)
-{
-    tb_page_addr_t paddr0 = tb_page_addr0(tb);
-    tb_page_addr_t paddr1 = tb_page_addr1(tb);
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
-
-    if (unlikely(paddr0 == -1)) {
-        return;
-    }
-    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
-        page_unlock(page_find_alloc(pindex1, false));
-    }
-    page_unlock(page_find_alloc(pindex0, false));
-}
-
 static inline struct page_entry *
 page_entry_new(PageDesc *pd, tb_page_addr_t index)
 {
@@ -506,10 +421,13 @@ static void page_entry_destroy(gpointer p)
 /* returns false on success */
 static bool page_entry_trylock(struct page_entry *pe)
 {
-    bool busy = page_trylock(pe->pd);
+    bool busy;
+
+    busy = qemu_spin_trylock(&pe->pd->lock);
    if (!busy) {
        g_assert(!pe->locked);
        pe->locked = true;
+        page_lock__debug(pe->pd);
    }
    return busy;
 }
@@ -687,7 +605,8 @@ static void tb_remove_all(void)
 * Add the tb in the target page and protect it if necessary.
 * Called with @p->lock held.
 */
-static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
+static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
+                               unsigned int n)
 {
    bool page_already_protected;

@@ -707,21 +626,15 @@ static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
    }
 }

-static void tb_record(TranslationBlock *tb)
+static void tb_record(TranslationBlock *tb, PageDesc *p1, PageDesc *p2)
 {
-    tb_page_addr_t paddr0 = tb_page_addr0(tb);
-    tb_page_addr_t paddr1 = tb_page_addr1(tb);
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
-
-    assert(paddr0 != -1);
-    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
-        tb_page_add(page_find_alloc(pindex1, false), tb, 1);
+    tb_page_add(p1, tb, 0);
+    if (unlikely(p2)) {
+        tb_page_add(p2, tb, 1);
    }
-    tb_page_add(page_find_alloc(pindex0, false), tb, 0);
 }

-static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
+static inline void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
 {
    TranslationBlock *tb1;
    uintptr_t *pprev;
@@ -741,16 +654,74 @@ static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)

 static void tb_remove(TranslationBlock *tb)
 {
-    tb_page_addr_t paddr0 = tb_page_addr0(tb);
-    tb_page_addr_t paddr1 = tb_page_addr1(tb);
-    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
-    tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
+    PageDesc *pd;

-    assert(paddr0 != -1);
-    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
-        tb_page_remove(page_find_alloc(pindex1, false), tb);
+    pd = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
+    tb_page_remove(pd, tb);
+    if (unlikely(tb->page_addr[1] != -1)) {
+        pd = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
+        tb_page_remove(pd, tb);
+    }
+}
+
+static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
+                           PageDesc **ret_p2, tb_page_addr_t phys2, bool alloc)
+{
+    PageDesc *p1, *p2;
+    tb_page_addr_t page1;
+    tb_page_addr_t page2;
+
+    assert_memory_lock();
+    g_assert(phys1 != -1);
+
+    page1 = phys1 >> TARGET_PAGE_BITS;
+    page2 = phys2 >> TARGET_PAGE_BITS;
+
+    p1 = page_find_alloc(page1, alloc);
+    if (ret_p1) {
+        *ret_p1 = p1;
+    }
+    if (likely(phys2 == -1)) {
+        page_lock(p1);
+        return;
+    } else if (page1 == page2) {
+        page_lock(p1);
+        if (ret_p2) {
+            *ret_p2 = p1;
+        }
+        return;
+    }
+    p2 = page_find_alloc(page2, alloc);
+    if (ret_p2) {
+        *ret_p2 = p2;
+    }
+    if (page1 < page2) {
+        page_lock(p1);
+        page_lock(p2);
+    } else {
+        page_lock(p2);
+        page_lock(p1);
+    }
+}
+
+/* lock the page(s) of a TB in the correct acquisition order */
+static void page_lock_tb(const TranslationBlock *tb)
+{
+    page_lock_pair(NULL, tb_page_addr0(tb), NULL, tb_page_addr1(tb), false);
+}
+
+static void page_unlock_tb(const TranslationBlock *tb)
+{
+    PageDesc *p1 = page_find(tb_page_addr0(tb) >> TARGET_PAGE_BITS);
+
+    page_unlock(p1);
+    if (unlikely(tb_page_addr1(tb) != -1)) {
+        PageDesc *p2 = page_find(tb_page_addr1(tb) >> TARGET_PAGE_BITS);
+
+        if (p2 != p1) {
+            page_unlock(p2);
+        }
    }
-    tb_page_remove(page_find_alloc(pindex0, false), tb);
 }
 #endif /* CONFIG_USER_ONLY */

@@ -775,7 +746,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)

    tcg_region_reset_all();
    /* XXX: flush processor icache at this point if cache flush is expensive */
-    qatomic_inc(&tb_ctx.tb_flush_count);
+    qatomic_mb_set(&tb_ctx.tb_flush_count, tb_ctx.tb_flush_count + 1);

 done:
    mmap_unlock();
@@ -787,9 +758,9 @@ done:
 void tb_flush(CPUState *cpu)
 {
    if (tcg_enabled()) {
-        unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
+        unsigned tb_flush_count = qatomic_mb_read(&tb_ctx.tb_flush_count);

-        if (cpu_in_serial_context(cpu)) {
+        if (cpu_in_exclusive_context(cpu)) {
            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
        } else {
            async_safe_run_on_cpu(cpu, do_tb_flush,
@@ -917,7 +888,7 @@ static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
    /* remove the TB from the hash list */
    phys_pc = tb_page_addr0(tb);
    h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
-                     tb->flags, tb->cs_base, orig_cflags);
+                     tb->flags, orig_cflags, tb->trace_vcpu_dstate);
    if (!qht_remove(&tb_ctx.htable, tb, h)) {
        return;
    }
@@ -955,16 +926,18 @@ static void tb_phys_invalidate__locked(TranslationBlock *tb)
 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 {
    if (page_addr == -1 && tb_page_addr0(tb) != -1) {
-        tb_lock_pages(tb);
+        page_lock_tb(tb);
        do_tb_phys_invalidate(tb, true);
-        tb_unlock_pages(tb);
+        page_unlock_tb(tb);
    } else {
        do_tb_phys_invalidate(tb, false);
    }
 }

 /*
- * Add a new TB and link it to the physical page tables.
+ * Add a new TB and link it to the physical page tables. phys_page2 is
+ * (-1) to indicate that only one page contains the TB.
+ *
 * Called with mmap_lock held for user-mode emulation.
 *
 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
@@ -972,29 +945,43 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 * for the same block of guest code that @tb corresponds to. In that case,
 * the caller should discard the original @tb, and use instead the returned TB.
 */
-TranslationBlock *tb_link_page(TranslationBlock *tb)
+TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
+                               tb_page_addr_t phys_page2)
 {
+    PageDesc *p;
+    PageDesc *p2 = NULL;
    void *existing_tb = NULL;
    uint32_t h;

    assert_memory_lock();
    tcg_debug_assert(!(tb->cflags & CF_INVALID));

-    tb_record(tb);
+    /*
+     * Add the TB to the page list, acquiring first the pages's locks.
+     * We keep the locks held until after inserting the TB in the hash table,
+     * so that if the insertion fails we know for sure that the TBs are still
+     * in the page descriptors.
+     * Note that inserting into the hash table first isn't an option, since
+     * we can only insert TBs that are fully initialized.
+     */
+    page_lock_pair(&p, phys_pc, &p2, phys_page2, true);
+    tb_record(tb, p, p2);

    /* add in the hash table */
-    h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
-                     tb->flags, tb->cs_base, tb->cflags);
+    h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
+                     tb->flags, tb->cflags, tb->trace_vcpu_dstate);
    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);

    /* remove TB from the page(s) if we couldn't insert it */
    if (unlikely(existing_tb)) {
        tb_remove(tb);
-        tb_unlock_pages(tb);
-        return existing_tb;
+        tb = existing_tb;
    }

-    tb_unlock_pages(tb);
+    if (p2 && p2 != p) {
+        page_unlock(p2);
+    }
+    page_unlock(p);
    return tb;
 }

--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -89,20 +89,7 @@ void icount_handle_deadline(void)
    }
 }

-/* Distribute the budget evenly across all CPUs */
-int64_t icount_percpu_budget(int cpu_count)
-{
-    int64_t limit = icount_get_limit();
-    int64_t timeslice = limit / cpu_count;
-
-    if (timeslice == 0) {
-        timeslice = limit;
-    }
-
-    return timeslice;
-}
-
-void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
+void icount_prepare_for_run(CPUState *cpu)
 {
    int insns_left;

@@ -114,13 +101,13 @@ void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
    g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
    g_assert(cpu->icount_extra == 0);

-    replay_mutex_lock();
-
-    cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
+    cpu->icount_budget = icount_get_limit();
    insns_left = MIN(0xffff, cpu->icount_budget);
    cpu_neg(cpu)->icount_decr.u16.low = insns_left;
    cpu->icount_extra = cpu->icount_budget - insns_left;

+    replay_mutex_lock();
+
    if (cpu->icount_budget == 0) {
        /*
         * We're called without the iothread lock, so must take it while
--- a/accel/tcg/tcg-accel-ops-icount.h
+++ b/accel/tcg/tcg-accel-ops-icount.h
@@ -11,8 +11,7 @@
 #define TCG_ACCEL_OPS_ICOUNT_H

 void icount_handle_deadline(void);
-void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget);
-int64_t icount_percpu_budget(int cpu_count);
+void icount_prepare_for_run(CPUState *cpu);
 void icount_process_data(CPUState *cpu);

 void icount_handle_interrupt(CPUState *cpu, int mask);
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -32,7 +32,7 @@
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
 #include "hw/boards.h"
-#include "tcg/tcg.h"
+
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-mttcg.h"

@@ -119,7 +119,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
            }
        }

-        qatomic_set_mb(&cpu->exit_request, 0);
+        qatomic_mb_set(&cpu->exit_request, 0);
        qemu_wait_io_event(cpu);
    } while (!cpu->unplug || cpu_can_run(cpu));

@@ -152,4 +152,8 @@ void mttcg_start_vcpu_thread(CPUState *cpu)

    qemu_thread_create(cpu->thread, thread_name, mttcg_cpu_thread_fn,
                       cpu, QEMU_THREAD_JOINABLE);
+
+#ifdef _WIN32
+    cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
 }
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -24,7 +24,6 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/lockable.h"
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "sysemu/cpu-timers.h"
@@ -32,7 +31,7 @@
 #include "qemu/notify.h"
 #include "qemu/guest-random.h"
 #include "exec/exec-all.h"
-#include "tcg/tcg.h"
+
 #include "tcg-accel-ops.h"
 #include "tcg-accel-ops-rr.h"
 #include "tcg-accel-ops-icount.h"
@@ -72,13 +71,11 @@ static void rr_kick_next_cpu(void)
 {
    CPUState *cpu;
    do {
-        cpu = qatomic_read(&rr_current_cpu);
+        cpu = qatomic_mb_read(&rr_current_cpu);
        if (cpu) {
            cpu_exit(cpu);
        }
-        /* Finish kicking this cpu before reading again.  */
-        smp_mb();
-    } while (cpu != qatomic_read(&rr_current_cpu));
+    } while (cpu != qatomic_mb_read(&rr_current_cpu));
 }

 static void rr_kick_thread(void *opaque)
@@ -142,33 +139,6 @@ static void rr_force_rcu(Notifier *notify, void *data)
    rr_kick_next_cpu();
 }

-/*
- * Calculate the number of CPUs that we will process in a single iteration of
- * the main CPU thread loop so that we can fairly distribute the instruction
- * count across CPUs.
- *
- * The CPU count is cached based on the CPU list generation ID to avoid
- * iterating the list every time.
- */
-static int rr_cpu_count(void)
-{
-    static unsigned int last_gen_id = ~0;
-    static int cpu_count;
-    CPUState *cpu;
-
-    QEMU_LOCK_GUARD(&qemu_cpu_list_lock);
-
-    if (cpu_list_generation_id_get() != last_gen_id) {
-        cpu_count = 0;
-        CPU_FOREACH(cpu) {
-            ++cpu_count;
-        }
-        last_gen_id = cpu_list_generation_id_get();
-    }
-
-    return cpu_count;
-}
-
 /*
 * In the single-threaded case each vCPU is simulated in turn. If
 * there is more than a single vCPU we create a simple timer to kick
@@ -215,16 +185,11 @@ static void *rr_cpu_thread_fn(void *arg)
    cpu->exit_request = 1;

    while (1) {
-        /* Only used for icount_enabled() */
-        int64_t cpu_budget = 0;
-
        qemu_mutex_unlock_iothread();
        replay_mutex_lock();
        qemu_mutex_lock_iothread();

        if (icount_enabled()) {
-            int cpu_count = rr_cpu_count();
-
            /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
            icount_account_warp_timer();
            /*
@@ -232,8 +197,6 @@ static void *rr_cpu_thread_fn(void *arg)
             * waking up the I/O thread and waiting for completion.
             */
            icount_handle_deadline();
-
-            cpu_budget = icount_percpu_budget(cpu_count);
        }

        replay_mutex_unlock();
@@ -243,9 +206,8 @@ static void *rr_cpu_thread_fn(void *arg)
        }

        while (cpu && cpu_work_list_empty(cpu) && !cpu->exit_request) {
-            /* Store rr_current_cpu before evaluating cpu_can_run().  */
-            qatomic_set_mb(&rr_current_cpu, cpu);

+            qatomic_mb_set(&rr_current_cpu, cpu);
            current_cpu = cpu;

            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
@@ -256,7 +218,7 @@ static void *rr_cpu_thread_fn(void *arg)

                qemu_mutex_unlock_iothread();
                if (icount_enabled()) {
-                    icount_prepare_for_run(cpu, cpu_budget);
+                    icount_prepare_for_run(cpu);
                }
                r = tcg_cpus_exec(cpu);
                if (icount_enabled()) {
@@ -283,11 +245,11 @@ static void *rr_cpu_thread_fn(void *arg)
            cpu = CPU_NEXT(cpu);
        } /* while (cpu && !cpu->exit_request).. */

-        /* Does not need a memory barrier because a spurious wakeup is okay.  */
+        /* Does not need qatomic_mb_set because a spurious wakeup is okay.  */
        qatomic_set(&rr_current_cpu, NULL);

        if (cpu && cpu->exit_request) {
-            qatomic_set_mb(&cpu->exit_request, 0);
+            qatomic_mb_set(&cpu->exit_request, 0);
        }

        if (icount_enabled() && all_cpu_threads_idle()) {
@@ -329,6 +291,9 @@ void rr_start_vcpu_thread(CPUState *cpu)

        single_tcg_halt_cond = cpu->halt_cond;
        single_tcg_cpu_thread = cpu->thread;
+#ifdef _WIN32
+        cpu->hThread = qemu_thread_get_handle(cpu->thread);
+#endif
    } else {
        /* we share the thread */
        cpu->thread = single_tcg_cpu_thread;
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,10 +70,20 @@ void tcg_cpus_destroy(CPUState *cpu)
 int tcg_cpus_exec(CPUState *cpu)
 {
    int ret;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
    assert(tcg_enabled());
+#ifdef CONFIG_PROFILER
+    ti = profile_getclock();
+#endif
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
    cpu_exec_end(cpu);
+#ifdef CONFIG_PROFILER
+    qatomic_set(&tcg_ctx->prof.cpu_exec_time,
+                tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
+#endif
    return ret;
 }

--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -28,11 +28,9 @@
 #include "exec/replay-core.h"
 #include "sysemu/cpu-timers.h"
 #include "tcg/tcg.h"
-#include "tcg/oversized-guest.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
 #include "qemu/accel.h"
-#include "qemu/atomic.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qemu/units.h"
 #if !defined(CONFIG_USER_ONLY)
@@ -44,7 +42,6 @@ struct TCGState {
    AccelState parent_obj;

    bool mttcg_enabled;
-    bool one_insn_per_tb;
    int splitwx_enabled;
    unsigned long tb_size;
 };
@@ -64,23 +61,37 @@ DECLARE_INSTANCE_CHECKER(TCGState, TCG_STATE,
 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 *
 * Once a guest architecture has been converted to the new primitives
- * there is one remaining limitation to check:
- *   - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ * there are two remaining limitations to check.
+ *
+ * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
+ * - The host must have a stronger memory order than the guest
+ *
+ * It may be possible in future to support strong guests on weak hosts
+ * but that will require tagging all load/stores in a guest with their
+ * implicit memory order requirements which would likely slow things
+ * down a lot.
 */

+static bool check_tcg_memory_orders_compatible(void)
+{
+#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
+    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
+#else
+    return false;
+#endif
+}
+
 static bool default_mttcg_enabled(void)
 {
    if (icount_enabled() || TCG_OVERSIZED_GUEST) {
        return false;
-    }
+    } else {
 #ifdef TARGET_SUPPORTS_MTTCG
-# ifndef TCG_GUEST_DEFAULT_MO
-#  error "TARGET_SUPPORTS_MTTCG without TCG_GUEST_DEFAULT_MO"
-# endif
-    return true;
+        return check_tcg_memory_orders_compatible();
 #else
-    return false;
+        return false;
 #endif
+    }
 }

 static void tcg_accel_instance_init(Object *obj)
@@ -98,7 +109,6 @@ static void tcg_accel_instance_init(Object *obj)
 }

 bool mttcg_enabled;
-bool one_insn_per_tb;

 static int tcg_init_machine(MachineState *ms)
 {
@@ -148,6 +158,11 @@ static void tcg_set_thread(Object *obj, const char *value, Error **errp)
            warn_report("Guest not yet converted to MTTCG - "
                        "you may get unexpected results");
 #endif
+            if (!check_tcg_memory_orders_compatible()) {
+                warn_report("Guest expects a stronger memory ordering "
+                            "than the host provides");
+                error_printf("This may cause strange/hard to debug errors\n");
+            }
            s->mttcg_enabled = true;
        }
    } else if (strcmp(value, "single") == 0) {
@@ -193,20 +208,6 @@ static void tcg_set_splitwx(Object *obj, bool value, Error **errp)
    s->splitwx_enabled = value;
 }

-static bool tcg_get_one_insn_per_tb(Object *obj, Error **errp)
-{
-    TCGState *s = TCG_STATE(obj);
-    return s->one_insn_per_tb;
-}
-
-static void tcg_set_one_insn_per_tb(Object *obj, bool value, Error **errp)
-{
-    TCGState *s = TCG_STATE(obj);
-    s->one_insn_per_tb = value;
-    /* Set the global also: this changes the behaviour */
-    qatomic_set(&one_insn_per_tb, value);
-}
-
 static int tcg_gdbstub_supported_sstep_flags(void)
 {
    /*
@@ -244,12 +245,6 @@ static void tcg_accel_class_init(ObjectClass *oc, void *data)
        tcg_get_splitwx, tcg_set_splitwx);
    object_class_property_set_description(oc, "split-wx",
        "Map jit pages into separate RW and RX regions");
-
-    object_class_property_add_bool(oc, "one-insn-per-tb",
-                                   tcg_get_one_insn_per_tb,
-                                   tcg_set_one_insn_per_tb);
-    object_class_property_set_description(oc, "one-insn-per-tb",
-        "Only put one guest insn in each translation block");
 }

 static const TypeInfo tcg_accel_type = {
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -20,7 +20,7 @@
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
 #include "cpu.h"
-#include "exec/helper-proto-common.h"
+#include "exec/helper-proto.h"
 #include "tcg/tcg-gvec-desc.h"


@@ -550,17 +550,6 @@ void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
    clear_high(d, oprsz, desc);
 }

-void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
-{
-    intptr_t oprsz = simd_oprsz(desc);
-    intptr_t i;
-
-    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
-        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
-    }
-    clear_high(d, oprsz, desc);
-}
-
 void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 {
    intptr_t oprsz = simd_oprsz(desc);
--- a/accel/tcg/tcg-runtime.c
+++ b/accel/tcg/tcg-runtime.c
@@ -24,17 +24,13 @@
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
 #include "cpu.h"
-#include "exec/helper-proto-common.h"
+#include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 #include "disas/disas.h"
 #include "exec/log.h"
 #include "tcg/tcg.h"

-#define HELPER_H  "accel/tcg/tcg-runtime.h"
-#include "exec/helper-info.c.inc"
-#undef  HELPER_H
-
 /* 32-bit helpers */

 int32_t HELPER(div_i32)(int32_t arg1, int32_t arg2)
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -39,63 +39,62 @@ DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 DEF_HELPER_FLAGS_3(memset, TCG_CALL_NO_RWG, ptr, ptr, int, ptr)
 #endif /* IN_HELPER_PROTO */

-DEF_HELPER_FLAGS_3(ld_i128, TCG_CALL_NO_WG, i128, env, i64, i32)
-DEF_HELPER_FLAGS_4(st_i128, TCG_CALL_NO_WG, void, env, i64, i128, i32)
-
 DEF_HELPER_FLAGS_5(atomic_cmpxchgb, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
+                   i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgw_be, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
+                   i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgw_le, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
+                   i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgl_be, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
+                   i32, env, tl, i32, i32, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgl_le, TCG_CALL_NO_WG,
-                   i32, env, i64, i32, i32, i32)
+                   i32, env, tl, i32, i32, i32)
 #ifdef CONFIG_ATOMIC64
 DEF_HELPER_FLAGS_5(atomic_cmpxchgq_be, TCG_CALL_NO_WG,
-                   i64, env, i64, i64, i64, i32)
+                   i64, env, tl, i64, i64, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgq_le, TCG_CALL_NO_WG,
-                   i64, env, i64, i64, i64, i32)
+                   i64, env, tl, i64, i64, i32)
 #endif
-#if HAVE_CMPXCHG128
+#ifdef CONFIG_CMPXCHG128
 DEF_HELPER_FLAGS_5(atomic_cmpxchgo_be, TCG_CALL_NO_WG,
-                   i128, env, i64, i128, i128, i32)
+                   i128, env, tl, i128, i128, i32)
 DEF_HELPER_FLAGS_5(atomic_cmpxchgo_le, TCG_CALL_NO_WG,
-                   i128, env, i64, i128, i128, i32)
+                   i128, env, tl, i128, i128, i32)
 #endif

-DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo, TCG_CALL_NO_WG,
-                   i128, env, i64, i128, i128, i32)
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_be, TCG_CALL_NO_WG,
+                   i128, env, tl, i128, i128, i32)
+DEF_HELPER_FLAGS_5(nonatomic_cmpxchgo_le, TCG_CALL_NO_WG,
+                   i128, env, tl, i128, i128, i32)

 #ifdef CONFIG_ATOMIC64
 #define GEN_ATOMIC_HELPERS(NAME)                                  \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_le),           \
-                       TCG_CALL_NO_WG, i64, env, i64, i64, i32)   \
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), q_be),           \
-                       TCG_CALL_NO_WG, i64, env, i64, i64, i32)
+                       TCG_CALL_NO_WG, i64, env, tl, i64, i32)
 #else
 #define GEN_ATOMIC_HELPERS(NAME)                                  \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), b),              \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), w_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_le),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)   \
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)    \
    DEF_HELPER_FLAGS_4(glue(glue(atomic_, NAME), l_be),           \
-                       TCG_CALL_NO_WG, i32, env, i64, i32, i32)
+                       TCG_CALL_NO_WG, i32, env, tl, i32, i32)
 #endif /* CONFIG_ATOMIC64 */

 GEN_ATOMIC_HELPERS(fetch_add)
@@ -218,7 +217,6 @@ DEF_HELPER_FLAGS_4(gvec_nor, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(gvec_eqv, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

 DEF_HELPER_FLAGS_4(gvec_ands, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
-DEF_HELPER_FLAGS_4(gvec_andcs, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_xors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
 DEF_HELPER_FLAGS_4(gvec_ors, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)

--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -19,6 +19,7 @@

 #include "qemu/osdep.h"

+#define NO_CPU_IO_DEFS
 #include "trace.h"
 #include "disas/disas.h"
 #include "exec/exec-all.h"
@@ -63,15 +64,17 @@
 #include "tb-context.h"
 #include "internal.h"
 #include "perf.h"
-#include "tcg/insn-start-words.h"
+
+/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
+QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
+                  sizeof_field(TranslationBlock, trace_vcpu_dstate)
+                  * BITS_PER_BYTE);

 TBContext tb_ctx;

-/*
- * Encode VAL as a signed leb128 sequence at P.
- * Return P incremented past the encoded value.
- */
-static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
+/* Encode VAL as a signed leb128 sequence at P.
+   Return P incremented past the encoded value.  */
+static uint8_t *encode_sleb128(uint8_t *p, target_long val)
 {
    int more, byte;

@@ -89,23 +92,21 @@ static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
    return p;
 }

-/*
- * Decode a signed leb128 sequence at *PP; increment *PP past the
- * decoded value.  Return the decoded value.
- */
-static int64_t decode_sleb128(const uint8_t **pp)
+/* Decode a signed leb128 sequence at *PP; increment *PP past the
+   decoded value.  Return the decoded value.  */
+static target_long decode_sleb128(const uint8_t **pp)
 {
    const uint8_t *p = *pp;
-    int64_t val = 0;
+    target_long val = 0;
    int byte, shift = 0;

    do {
        byte = *p++;
-        val |= (int64_t)(byte & 0x7f) << shift;
+        val |= (target_ulong)(byte & 0x7f) << shift;
        shift += 7;
    } while (byte & 0x80);
    if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
-        val |= -(int64_t)1 << shift;
+        val |= -(target_ulong)1 << shift;
    }

    *pp = p;
@@ -127,26 +128,22 @@ static int64_t decode_sleb128(const uint8_t **pp)
 static int encode_search(TranslationBlock *tb, uint8_t *block)
 {
    uint8_t *highwater = tcg_ctx->code_gen_highwater;
-    uint64_t *insn_data = tcg_ctx->gen_insn_data;
-    uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
    uint8_t *p = block;
    int i, j, n;

    for (i = 0, n = tb->icount; i < n; ++i) {
-        uint64_t prev, curr;
+        target_ulong prev;

        for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
            if (i == 0) {
                prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
            } else {
-                prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
+                prev = tcg_ctx->gen_insn_data[i - 1][j];
            }
-            curr = insn_data[i * TARGET_INSN_START_WORDS + j];
-            p = encode_sleb128(p, curr - prev);
+            p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
        }
-        prev = (i == 0 ? 0 : insn_end_off[i - 1]);
-        curr = insn_end_off[i];
-        p = encode_sleb128(p, curr - prev);
+        prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
+        p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);

        /* Test for (pending) buffer overflow.  The assumption is that any
           one row beginning below the high water mark cannot overrun
@@ -202,6 +199,10 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
                               uintptr_t host_pc)
 {
    uint64_t data[TARGET_INSN_START_WORDS];
+#ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx->prof;
+    int64_t ti = profile_getclock();
+#endif
    int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);

    if (insns_left < 0) {
@@ -218,6 +219,12 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
    }

    cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
+
+#ifdef CONFIG_PROFILER
+    qatomic_set(&prof->restore_time,
+                prof->restore_time + profile_getclock() - ti);
+    qatomic_set(&prof->restore_count, prof->restore_count + 1);
+#endif
 }

 bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -264,7 +271,7 @@ void page_init(void)
 * Return the size of the generated code, or negative on error.
 */
 static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
-                           vaddr pc, void *host_pc,
+                           target_ulong pc, void *host_pc,
                           int *max_insns, int64_t *ti)
 {
    int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
@@ -280,19 +287,29 @@ static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
    tcg_ctx->cpu = NULL;
    *max_insns = tb->icount;

+#ifdef CONFIG_PROFILER
+    qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
+    qatomic_set(&tcg_ctx->prof.interm_time,
+                tcg_ctx->prof.interm_time + profile_getclock() - *ti);
+    *ti = profile_getclock();
+#endif
+
    return tcg_gen_code(tcg_ctx, tb, pc);
 }

 /* Called with mmap_lock held for user mode emulation.  */
 TranslationBlock *tb_gen_code(CPUState *cpu,
-                              vaddr pc, uint64_t cs_base,
+                              target_ulong pc, target_ulong cs_base,
                              uint32_t flags, int cflags)
 {
    CPUArchState *env = cpu->env_ptr;
    TranslationBlock *tb, *existing_tb;
-    tb_page_addr_t phys_pc, phys_p2;
+    tb_page_addr_t phys_pc;
    tcg_insn_unit *gen_code_buf;
    int gen_code_size, search_size, max_insns;
+#ifdef CONFIG_PROFILER
+    TCGProfile *prof = &tcg_ctx->prof;
+#endif
    int64_t ti;
    void *host_pc;

@@ -313,7 +330,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);

 buffer_overflow:
-    assert_no_pages_locked();
    tb = tcg_tb_alloc(tcg_ctx);
    if (unlikely(!tb)) {
        /* flush must be done */
@@ -332,29 +348,18 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    tb->cs_base = cs_base;
    tb->flags = flags;
    tb->cflags = cflags;
+    tb->trace_vcpu_dstate = *cpu->trace_dstate;
    tb_set_page_addr0(tb, phys_pc);
    tb_set_page_addr1(tb, -1);
-    if (phys_pc != -1) {
-        tb_lock_page0(phys_pc);
-    }
-
    tcg_ctx->gen_tb = tb;
-    tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
-#ifdef CONFIG_SOFTMMU
-    tcg_ctx->page_bits = TARGET_PAGE_BITS;
-    tcg_ctx->page_mask = TARGET_PAGE_MASK;
-    tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
-    tcg_ctx->tlb_fast_offset =
-        (int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
-#endif
-    tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
-#ifdef TCG_GUEST_DEFAULT_MO
-    tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
-#else
-    tcg_ctx->guest_mo = TCG_MO_ALL;
+ tb_overflow:
+
+#ifdef CONFIG_PROFILER
+    /* includes aborted translations because of exceptions */
+    qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
+    ti = profile_getclock();
 #endif

- restart_translate:
    trace_translate_block(tb, pc, tb->tc.ptr);

    gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
@@ -373,8 +378,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
                          "Restarting code generation for "
                          "code_gen_buffer overflow\n");
-            tb_unlock_pages(tb);
-            tcg_ctx->gen_tb = NULL;
            goto buffer_overflow;

        case -2:
@@ -393,39 +396,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
                          "Restarting code generation with "
                          "smaller translation block (max %d insns)\n",
                          max_insns);
-
-            /*
-             * The half-sized TB may not cross pages.
-             * TODO: Fix all targets that cross pages except with
-             * the first insn, at which point this can't be reached.
-             */
-            phys_p2 = tb_page_addr1(tb);
-            if (unlikely(phys_p2 != -1)) {
-                tb_unlock_page1(phys_pc, phys_p2);
-                tb_set_page_addr1(tb, -1);
-            }
-            goto restart_translate;
-
-        case -3:
-            /*
-             * We had a page lock ordering problem.  In order to avoid
-             * deadlock we had to drop the lock on page0, which means
-             * that everything we translated so far is compromised.
-             * Restart with locks held on both pages.
-             */
-            qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
-                          "Restarting code generation with re-locked pages");
-            goto restart_translate;
+            goto tb_overflow;

        default:
            g_assert_not_reached();
        }
    }
-    tcg_ctx->gen_tb = NULL;
-
    search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
    if (unlikely(search_size < 0)) {
-        tb_unlock_pages(tb);
        goto buffer_overflow;
    }
    tb->tc.size = gen_code_size;
@@ -436,6 +414,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     */
    perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));

+#ifdef CONFIG_PROFILER
+    qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
+    qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
+    qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
+    qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
+#endif
+
+#ifdef DEBUG_DISAS
    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
        qemu_log_in_addr_range(pc)) {
        FILE *logfile = qemu_log_trylock();
@@ -458,8 +444,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
            /* Dump header and the first instruction */
            fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
            fprintf(logfile,
-                    "  -- guest addr 0x%016" PRIx64 " + tb prologue\n",
-                    tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
+                    "  -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
+                    tcg_ctx->gen_insn_data[insn][0]);
            chunk_start = tcg_ctx->gen_insn_end_off[insn];
            disas(logfile, tb->tc.ptr, chunk_start);

@@ -471,8 +457,8 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
            while (insn < tb->icount) {
                size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
                if (chunk_end > chunk_start) {
-                    fprintf(logfile, "  -- guest addr 0x%016" PRIx64 "\n",
-                            tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
+                    fprintf(logfile, "  -- guest addr 0x" TARGET_FMT_lx "\n",
+                            tcg_ctx->gen_insn_data[insn][0]);
                    disas(logfile, tb->tc.ptr + chunk_start,
                          chunk_end - chunk_start);
                    chunk_start = chunk_end;
@@ -508,6 +494,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
            qemu_log_unlock(logfile);
        }
    }
+#endif

    qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
        ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
@@ -535,7 +522,6 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     * before attempting to link to other TBs or add to the lookup table.
     */
    if (tb_page_addr0(tb) == -1) {
-        assert_no_pages_locked();
        return tb;
    }

@@ -550,9 +536,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
     * No explicit memory barrier is required -- tb_link_page() makes the
     * TB visible in a consistent state.
     */
-    existing_tb = tb_link_page(tb);
-    assert_no_pages_locked();
-
+    existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
    /* if the TB already exists, discard what we just translated */
    if (unlikely(existing_tb != tb)) {
        uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
@@ -581,8 +565,7 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
        /* The exception probably happened in a helper.  The CPU state should
           have been saved before calling it. Fetch the PC from there.  */
        CPUArchState *env = cpu->env_ptr;
-        vaddr pc;
-        uint64_t cs_base;
+        target_ulong pc, cs_base;
        tb_page_addr_t addr;
        uint32_t flags;

@@ -636,10 +619,10 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;

    if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
-        vaddr pc = log_pc(cpu, tb);
+        target_ulong pc = log_pc(cpu, tb);
        if (qemu_log_in_addr_range(pc)) {
-            qemu_log("cpu_io_recompile: rewound execution of TB to %016"
-                     VADDR_PRIx "\n", pc);
+            qemu_log("cpu_io_recompile: rewound execution of TB to "
+                     TARGET_FMT_lx "\n", pc);
        }
    }

--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -8,116 +8,17 @@
 */

 #include "qemu/osdep.h"
-#include "qemu/log.h"
 #include "qemu/error-report.h"
+#include "tcg/tcg.h"
+#include "tcg/tcg-op.h"
 #include "exec/exec-all.h"
+#include "exec/gen-icount.h"
+#include "exec/log.h"
 #include "exec/translator.h"
 #include "exec/plugin-gen.h"
-#include "tcg/tcg-op-common.h"
-#include "internal.h"
+#include "exec/replay-core.h"

-static void gen_io_start(void)
-{
-    tcg_gen_st_i32(tcg_constant_i32(1), cpu_env,
-                   offsetof(ArchCPU, parent_obj.can_do_io) -
-                   offsetof(ArchCPU, env));
-}
-
-bool translator_io_start(DisasContextBase *db)
-{
-    uint32_t cflags = tb_cflags(db->tb);
-
-    if (!(cflags & CF_USE_ICOUNT)) {
-        return false;
-    }
-    if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
-        /* Already started in translator_loop. */
-        return true;
-    }
-
-    gen_io_start();
-
-    /*
-     * Ensure that this instruction will be the last in the TB.
-     * The target may override this to something more forceful.
-     */
-    if (db->is_jmp == DISAS_NEXT) {
-        db->is_jmp = DISAS_TOO_MANY;
-    }
-    return true;
-}
-
-static TCGOp *gen_tb_start(uint32_t cflags)
-{
-    TCGv_i32 count = tcg_temp_new_i32();
-    TCGOp *icount_start_insn = NULL;
-
-    tcg_gen_ld_i32(count, cpu_env,
-                   offsetof(ArchCPU, neg.icount_decr.u32) -
-                   offsetof(ArchCPU, env));
-
-    if (cflags & CF_USE_ICOUNT) {
-        /*
-         * We emit a sub with a dummy immediate argument. Keep the insn index
-         * of the sub so that we later (when we know the actual insn count)
-         * can update the argument with the actual insn count.
-         */
-        tcg_gen_sub_i32(count, count, tcg_constant_i32(0));
-        icount_start_insn = tcg_last_op();
-    }
-
-    /*
-     * Emit the check against icount_decr.u32 to see if we should exit
-     * unless we suppress the check with CF_NOIRQ. If we are using
-     * icount and have suppressed interruption the higher level code
-     * should have ensured we don't run more instructions than the
-     * budget.
-     */
-    if (cflags & CF_NOIRQ) {
-        tcg_ctx->exitreq_label = NULL;
-    } else {
-        tcg_ctx->exitreq_label = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, tcg_ctx->exitreq_label);
-    }
-
-    if (cflags & CF_USE_ICOUNT) {
-        tcg_gen_st16_i32(count, cpu_env,
-                         offsetof(ArchCPU, neg.icount_decr.u16.low) -
-                         offsetof(ArchCPU, env));
-        /*
-         * cpu->can_do_io is cleared automatically here at the beginning of
-         * each translation block.  The cost is minimal and only paid for
-         * -icount, plus it would be very easy to forget doing it in the
-         * translator. Doing it here means we don't need a gen_io_end() to
-         * go with gen_io_start().
-         */
-        tcg_gen_st_i32(tcg_constant_i32(0), cpu_env,
-                       offsetof(ArchCPU, parent_obj.can_do_io) -
-                       offsetof(ArchCPU, env));
-    }
-
-    return icount_start_insn;
-}
-
-static void gen_tb_end(const TranslationBlock *tb, uint32_t cflags,
-                       TCGOp *icount_start_insn, int num_insns)
-{
-    if (cflags & CF_USE_ICOUNT) {
-        /*
-         * Update the num_insn immediate parameter now that we know
-         * the actual insn count.
-         */
-        tcg_set_insn_param(icount_start_insn, 2,
-                           tcgv_i32_arg(tcg_constant_i32(num_insns)));
-    }
-
-    if (tcg_ctx->exitreq_label) {
-        gen_set_label(tcg_ctx->exitreq_label);
-        tcg_gen_exit_tb(tb, TB_EXIT_REQUESTED);
-    }
-}
-
-bool translator_use_goto_tb(DisasContextBase *db, vaddr dest)
+bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
 {
    /* Suppress goto_tb if requested. */
    if (tb_cflags(db->tb) & CF_NO_GOTO_TB) {
@@ -129,11 +30,10 @@ bool translator_use_goto_tb(DisasContextBase *db, vaddr dest)
 }

 void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
-                     vaddr pc, void *host_pc, const TranslatorOps *ops,
-                     DisasContextBase *db)
+                     target_ulong pc, void *host_pc,
+                     const TranslatorOps *ops, DisasContextBase *db)
 {
    uint32_t cflags = tb_cflags(tb);
-    TCGOp *icount_start_insn;
    bool plugin_enabled;

    /* Initialize DisasContext */
@@ -147,11 +47,15 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
    db->host_addr[0] = host_pc;
    db->host_addr[1] = NULL;

+#ifdef CONFIG_USER_ONLY
+    page_protect(pc);
+#endif
+
    ops->init_disas_context(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

    /* Start translating.  */
-    icount_start_insn = gen_tb_start(cflags);
+    gen_tb_start(db->tb);
    ops->tb_start(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */

@@ -208,7 +112,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,

    /* Emit code to exit the TB, as indicated by db->is_jmp.  */
    ops->tb_stop(db, cpu);
-    gen_tb_end(tb, cflags, icount_start_insn, db->num_insns);
+    gen_tb_end(db->tb, db->num_insns);

    if (plugin_enabled) {
        plugin_gen_tb_end(cpu);
@@ -218,6 +122,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
    tb->size = db->pc_next - db->pc_first;
    tb->icount = db->num_insns;

+#ifdef DEBUG_DISAS
    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
        && qemu_log_in_addr_range(db->pc_first)) {
        FILE *logfile = qemu_log_trylock();
@@ -228,13 +133,14 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
            qemu_log_unlock(logfile);
        }
    }
+#endif
 }

 static void *translator_access(CPUArchState *env, DisasContextBase *db,
-                               vaddr pc, size_t len)
+                               target_ulong pc, size_t len)
 {
    void *host;
-    vaddr base, end;
+    target_ulong base, end;
    TranslationBlock *tb;

    tb = db->tb;
@@ -252,36 +158,22 @@ static void *translator_access(CPUArchState *env, DisasContextBase *db,
        host = db->host_addr[1];
        base = TARGET_PAGE_ALIGN(db->pc_first);
        if (host == NULL) {
-            tb_page_addr_t page0, old_page1, new_page1;
-
-            new_page1 = get_page_addr_code_hostp(env, base, &db->host_addr[1]);
+            tb_page_addr_t phys_page =
+                get_page_addr_code_hostp(env, base, &db->host_addr[1]);

            /*
             * If the second page is MMIO, treat as if the first page
             * was MMIO as well, so that we do not cache the TB.
             */
-            if (unlikely(new_page1 == -1)) {
-                tb_unlock_pages(tb);
+            if (unlikely(phys_page == -1)) {
                tb_set_page_addr0(tb, -1);
                return NULL;
            }

-            /*
-             * If this is not the first time around, and page1 matches,
-             * then we already have the page locked.  Alternately, we're
-             * not doing anything to prevent the PTE from changing, so
-             * we might wind up with a different page, requiring us to
-             * re-do the locking.
-             */
-            old_page1 = tb_page_addr1(tb);
-            if (likely(new_page1 != old_page1)) {
-                page0 = tb_page_addr0(tb);
-                if (unlikely(old_page1 != -1)) {
-                    tb_unlock_page1(page0, old_page1);
-                }
-                tb_set_page_addr1(tb, new_page1);
-                tb_lock_page1(page0, new_page1);
-            }
+            tb_set_page_addr1(tb, phys_page);
+#ifdef CONFIG_USER_ONLY
+            page_protect(end);
+#endif
            host = db->host_addr[1];
        }

@@ -295,27 +187,6 @@ static void *translator_access(CPUArchState *env, DisasContextBase *db,
    return host + (pc - base);
 }

-static void plugin_insn_append(abi_ptr pc, const void *from, size_t size)
-{
-#ifdef CONFIG_PLUGIN
-    struct qemu_plugin_insn *insn = tcg_ctx->plugin_insn;
-    abi_ptr off;
-
-    if (insn == NULL) {
-        return;
-    }
-    off = pc - insn->vaddr;
-    if (off < insn->data->len) {
-        g_byte_array_set_size(insn->data, off);
-    } else if (off > insn->data->len) {
-        /* we have an unexpected gap */
-        g_assert_not_reached();
-    }
-
-    insn->data = g_byte_array_append(insn->data, from, size);
-#endif
-}
-
 uint8_t translator_ldub(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
 {
    uint8_t ret;
@@ -374,8 +245,3 @@ uint64_t translator_ldq(CPUArchState *env, DisasContextBase *db, abi_ptr pc)
    plugin_insn_append(pc, &plug, sizeof(ret));
    return ret;
 }
-
-void translator_fake_ldb(uint8_t insn8, abi_ptr pc)
-{
-    plugin_insn_append(pc, &insn8, sizeof(insn8));
-}
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -144,7 +144,7 @@ typedef struct PageFlagsNode {

 static IntervalTreeRoot pageflags_root;

-static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
+static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
 {
    IntervalTreeNode *n;

@@ -153,7 +153,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, target_ulong last)
 }

 static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
-                                     target_ulong last)
+                                     target_long last)
 {
    IntervalTreeNode *n;

@@ -520,19 +520,19 @@ void page_set_flags(target_ulong start, target_ulong last, int flags)
    }
 }

-bool page_check_range(target_ulong start, target_ulong len, int flags)
+int page_check_range(target_ulong start, target_ulong len, int flags)
 {
    target_ulong last;
    int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
-    bool ret;
+    int ret;

    if (len == 0) {
-        return true;  /* trivial length */
+        return 0;  /* trivial length */
    }

    last = start + len - 1;
    if (last < start) {
-        return false; /* wrap around */
+        return -1; /* wrap around */
    }

    locked = have_mmap_lock();
@@ -551,33 +551,33 @@ bool page_check_range(target_ulong start, target_ulong len, int flags)
                p = pageflags_find(start, last);
            }
            if (!p) {
-                ret = false; /* entire region invalid */
+                ret = -1; /* entire region invalid */
                break;
            }
        }
        if (start < p->itree.start) {
-            ret = false; /* initial bytes invalid */
+            ret = -1; /* initial bytes invalid */
            break;
        }

        missing = flags & ~p->flags;
-        if (missing & ~PAGE_WRITE) {
-            ret = false; /* page doesn't match */
+        if (missing & PAGE_READ) {
+            ret = -1; /* page not readable */
            break;
        }
        if (missing & PAGE_WRITE) {
            if (!(p->flags & PAGE_WRITE_ORG)) {
-                ret = false; /* page not writable */
+                ret = -1; /* page not writable */
                break;
            }
            /* Asking about writable, but has been protected: undo. */
            if (!page_unprotect(start, 0)) {
-                ret = false;
+                ret = -1;
                break;
            }
            /* TODO: page_unprotect should take a range, not a single page. */
            if (last - start < TARGET_PAGE_SIZE) {
-                ret = true; /* ok */
+                ret = 0; /* ok */
                break;
            }
            start += TARGET_PAGE_SIZE;
@@ -585,7 +585,7 @@ bool page_check_range(target_ulong start, target_ulong len, int flags)
        }

        if (last <= p->itree.last) {
-            ret = true; /* ok */
+            ret = 0; /* ok */
            break;
        }
        start = p->itree.last + 1;
@@ -598,54 +598,6 @@ bool page_check_range(target_ulong start, target_ulong len, int flags)
    return ret;
 }

-bool page_check_range_empty(target_ulong start, target_ulong last)
-{
-    assert(last >= start);
-    assert_memory_lock();
-    return pageflags_find(start, last) == NULL;
-}
-
-target_ulong page_find_range_empty(target_ulong min, target_ulong max,
-                                   target_ulong len, target_ulong align)
-{
-    target_ulong len_m1, align_m1;
-
-    assert(min <= max);
-    assert(max <= GUEST_ADDR_MAX);
-    assert(len != 0);
-    assert(is_power_of_2(align));
-    assert_memory_lock();
-
-    len_m1 = len - 1;
-    align_m1 = align - 1;
-
-    /* Iteratively narrow the search region. */
-    while (1) {
-        PageFlagsNode *p;
-
-        /* Align min and double-check there's enough space remaining. */
-        min = (min + align_m1) & ~align_m1;
-        if (min > max) {
-            return -1;
-        }
-        if (len_m1 > max - min) {
-            return -1;
-        }
-
-        p = pageflags_find(min, min + len_m1);
-        if (p == NULL) {
-            /* Found! */
-            return min;
-        }
-        if (max <= p->itree.last) {
-            /* Existing allocation fills the remainder of the search region. */
-            return -1;
-        }
-        /* Skip across existing allocation. */
-        min = p->itree.last + 1;
-    }
-}
-
 void page_protect(tb_page_addr_t address)
 {
    PageFlagsNode *p;
@@ -769,7 +721,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
    return current_tb_invalidated ? 2 : 1;
 }

-static int probe_access_internal(CPUArchState *env, vaddr addr,
+static int probe_access_internal(CPUArchState *env, target_ulong addr,
                                 int fault_size, MMUAccessType access_type,
                                 bool nonfault, uintptr_t ra)
 {
@@ -793,10 +745,6 @@ static int probe_access_internal(CPUArchState *env, vaddr addr,
    if (guest_addr_valid_untagged(addr)) {
        int page_flags = page_get_flags(addr);
        if (page_flags & acc_flag) {
-            if ((acc_flag == PAGE_READ || acc_flag == PAGE_WRITE)
-                && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
-                return TLB_MMIO;
-            }
            return 0; /* success */
        }
        maperr = !(page_flags & PAGE_VALID);
@@ -811,7 +759,7 @@ static int probe_access_internal(CPUArchState *env, vaddr addr,
    cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
 }

-int probe_access_flags(CPUArchState *env, vaddr addr, int size,
+int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
                       MMUAccessType access_type, int mmu_idx,
                       bool nonfault, void **phost, uintptr_t ra)
 {
@@ -819,23 +767,23 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int size,

    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
    flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
-    *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
+    *phost = flags ? NULL : g2h(env_cpu(env), addr);
    return flags;
 }

-void *probe_access(CPUArchState *env, vaddr addr, int size,
+void *probe_access(CPUArchState *env, target_ulong addr, int size,
                   MMUAccessType access_type, int mmu_idx, uintptr_t ra)
 {
    int flags;

    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
    flags = probe_access_internal(env, addr, size, access_type, false, ra);
-    g_assert((flags & ~TLB_MMIO) == 0);
+    g_assert(flags == 0);

    return size ? g2h(env_cpu(env), addr) : NULL;
 }

-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
                                        void **hostp)
 {
    int flags;
@@ -941,9 +889,35 @@ void page_reset_target_data(target_ulong start, target_ulong last) { }

 /* The softmmu versions of these helpers are in cputlb.c.  */

-static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr,
-                            MemOp mop, uintptr_t ra, MMUAccessType type)
+/*
+ * Verify that we have passed the correct MemOp to the correct function.
+ *
+ * We could present one function to target code, and dispatch based on
+ * the MemOp, but so far we have worked hard to avoid an indirect function
+ * call along the memory path.
+ */
+static void validate_memop(MemOpIdx oi, MemOp expected)
 {
+#ifdef CONFIG_DEBUG_TCG
+    MemOp have = get_memop(oi) & (MO_SIZE | MO_BSWAP);
+    assert(have == expected);
+#endif
+}
+
+void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
+{
+    cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
+}
+
+void helper_unaligned_st(CPUArchState *env, target_ulong addr)
+{
+    cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
+}
+
+static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
+                            MemOpIdx oi, uintptr_t ra, MMUAccessType type)
+{
+    MemOp mop = get_memop(oi);
    int a_bits = get_alignment_bits(mop);
    void *ret;

@@ -957,330 +931,251 @@ static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr,
    return ret;
 }

-#include "ldst_atomicity.c.inc"
-
-static uint8_t do_ld1_mmu(CPUArchState *env, abi_ptr addr,
-                          MemOp mop, uintptr_t ra)
+uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
+                    MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;
    uint8_t ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_8);
-    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
+    validate_memop(oi, MO_UB);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
    ret = ldub_p(haddr);
    clear_helper_retaddr();
-    return ret;
-}
-
-tcg_target_ulong helper_ldub_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return do_ld1_mmu(env, addr, get_memop(oi), ra);
-}
-
-tcg_target_ulong helper_ldsb_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return (int8_t)do_ld1_mmu(env, addr, get_memop(oi), ra);
-}
-
-uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    uint8_t ret = do_ld1_mmu(env, addr, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
    return ret;
 }

-static uint16_t do_ld2_mmu(CPUArchState *env, abi_ptr addr,
-                           MemOp mop, uintptr_t ra)
+uint16_t cpu_ldw_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;
    uint16_t ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_16);
-    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
-    ret = load_atom_2(env, ra, haddr, mop);
+    validate_memop(oi, MO_BEUW);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = lduw_be_p(haddr);
    clear_helper_retaddr();
-
-    if (mop & MO_BSWAP) {
-        ret = bswap16(ret);
-    }
-    return ret;
-}
-
-tcg_target_ulong helper_lduw_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return do_ld2_mmu(env, addr, get_memop(oi), ra);
-}
-
-tcg_target_ulong helper_ldsw_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return (int16_t)do_ld2_mmu(env, addr, get_memop(oi), ra);
-}
-
-uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
-                     MemOpIdx oi, uintptr_t ra)
-{
-    uint16_t ret = do_ld2_mmu(env, addr, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
    return ret;
 }

-static uint32_t do_ld4_mmu(CPUArchState *env, abi_ptr addr,
-                           MemOp mop, uintptr_t ra)
+uint32_t cpu_ldl_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;
    uint32_t ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_32);
-    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
-    ret = load_atom_4(env, ra, haddr, mop);
+    validate_memop(oi, MO_BEUL);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldl_be_p(haddr);
    clear_helper_retaddr();
-
-    if (mop & MO_BSWAP) {
-        ret = bswap32(ret);
-    }
-    return ret;
-}
-
-tcg_target_ulong helper_ldul_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return do_ld4_mmu(env, addr, get_memop(oi), ra);
-}
-
-tcg_target_ulong helper_ldsl_mmu(CPUArchState *env, uint64_t addr,
-                                 MemOpIdx oi, uintptr_t ra)
-{
-    return (int32_t)do_ld4_mmu(env, addr, get_memop(oi), ra);
-}
-
-uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
-                     MemOpIdx oi, uintptr_t ra)
-{
-    uint32_t ret = do_ld4_mmu(env, addr, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
    return ret;
 }

-static uint64_t do_ld8_mmu(CPUArchState *env, abi_ptr addr,
-                           MemOp mop, uintptr_t ra)
+uint64_t cpu_ldq_be_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;
    uint64_t ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_64);
-    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
-    ret = load_atom_8(env, ra, haddr, mop);
+    validate_memop(oi, MO_BEUQ);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldq_be_p(haddr);
    clear_helper_retaddr();
-
-    if (mop & MO_BSWAP) {
-        ret = bswap64(ret);
-    }
-    return ret;
-}
-
-uint64_t helper_ldq_mmu(CPUArchState *env, uint64_t addr,
-                        MemOpIdx oi, uintptr_t ra)
-{
-    return do_ld8_mmu(env, addr, get_memop(oi), ra);
-}
-
-uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
-                     MemOpIdx oi, uintptr_t ra)
-{
-    uint64_t ret = do_ld8_mmu(env, addr, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
    return ret;
 }

-static Int128 do_ld16_mmu(CPUArchState *env, abi_ptr addr,
-                          MemOp mop, uintptr_t ra)
+uint16_t cpu_ldw_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+    uint16_t ret;
+
+    validate_memop(oi, MO_LEUW);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = lduw_le_p(haddr);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+    return ret;
+}
+
+uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+    uint32_t ret;
+
+    validate_memop(oi, MO_LEUL);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldl_le_p(haddr);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+    return ret;
+}
+
+uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
+                        MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+    uint64_t ret;
+
+    validate_memop(oi, MO_LEUQ);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    ret = ldq_le_p(haddr);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+    return ret;
+}
+
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+                       MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;
    Int128 ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_128);
-    cpu_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_LOAD);
-    ret = load_atom_16(env, ra, haddr, mop);
+    validate_memop(oi, MO_128 | MO_BE);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    memcpy(&ret, haddr, 16);
    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);

-    if (mop & MO_BSWAP) {
+    if (!HOST_BIG_ENDIAN) {
        ret = bswap128(ret);
    }
    return ret;
 }

-Int128 helper_ld16_mmu(CPUArchState *env, uint64_t addr,
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
                       MemOpIdx oi, uintptr_t ra)
-{
-    return do_ld16_mmu(env, addr, get_memop(oi), ra);
-}
-
-Int128 helper_ld_i128(CPUArchState *env, uint64_t addr, MemOpIdx oi)
-{
-    return helper_ld16_mmu(env, addr, oi, GETPC());
-}
-
-Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    Int128 ret = do_ld16_mmu(env, addr, get_memop(oi), ra);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
-    return ret;
-}
-
-static void do_st1_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
-                       MemOp mop, uintptr_t ra)
 {
    void *haddr;
+    Int128 ret;

-    tcg_debug_assert((mop & MO_SIZE) == MO_8);
-    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
-    stb_p(haddr, val);
+    validate_memop(oi, MO_128 | MO_LE);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+    memcpy(&ret, haddr, 16);
    clear_helper_retaddr();
-}
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);

-void helper_stb_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st1_mmu(env, addr, val, get_memop(oi), ra);
+    if (HOST_BIG_ENDIAN) {
+        ret = bswap128(ret);
+    }
+    return ret;
 }

 void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
                 MemOpIdx oi, uintptr_t ra)
-{
-    do_st1_mmu(env, addr, val, get_memop(oi), ra);
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
-}
-
-static void do_st2_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
-                       MemOp mop, uintptr_t ra)
 {
    void *haddr;

-    tcg_debug_assert((mop & MO_SIZE) == MO_16);
-    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
-
-    if (mop & MO_BSWAP) {
-        val = bswap16(val);
-    }
-    store_atom_2(env, ra, haddr, mop, val);
+    validate_memop(oi, MO_UB);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stb_p(haddr, val);
    clear_helper_retaddr();
-}
-
-void helper_stw_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st2_mmu(env, addr, val, get_memop(oi), ra);
-}
-
-void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st2_mmu(env, addr, val, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }

-static void do_st4_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
-                       MemOp mop, uintptr_t ra)
+void cpu_stw_be_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;

-    tcg_debug_assert((mop & MO_SIZE) == MO_32);
-    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
-
-    if (mop & MO_BSWAP) {
-        val = bswap32(val);
-    }
-    store_atom_4(env, ra, haddr, mop, val);
+    validate_memop(oi, MO_BEUW);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stw_be_p(haddr, val);
    clear_helper_retaddr();
-}
-
-void helper_stl_mmu(CPUArchState *env, uint64_t addr, uint32_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st4_mmu(env, addr, val, get_memop(oi), ra);
-}
-
-void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
-                 MemOpIdx oi, uintptr_t ra)
-{
-    do_st4_mmu(env, addr, val, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }

-static void do_st8_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
-                       MemOp mop, uintptr_t ra)
+void cpu_stl_be_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;

-    tcg_debug_assert((mop & MO_SIZE) == MO_64);
-    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
-
-    if (mop & MO_BSWAP) {
-        val = bswap64(val);
-    }
-    store_atom_8(env, ra, haddr, mop, val);
+    validate_memop(oi, MO_BEUL);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stl_be_p(haddr, val);
    clear_helper_retaddr();
-}
-
-void helper_stq_mmu(CPUArchState *env, uint64_t addr, uint64_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st8_mmu(env, addr, val, get_memop(oi), ra);
-}
-
-void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
-                    MemOpIdx oi, uintptr_t ra)
-{
-    do_st8_mmu(env, addr, val, get_memop(oi), ra);
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }

-static void do_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
-                        MemOp mop, uintptr_t ra)
+void cpu_stq_be_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra)
 {
    void *haddr;

-    tcg_debug_assert((mop & MO_SIZE) == MO_128);
-    cpu_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
-    haddr = cpu_mmu_lookup(env, addr, mop, ra, MMU_DATA_STORE);
+    validate_memop(oi, MO_BEUQ);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stq_be_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}

-    if (mop & MO_BSWAP) {
+void cpu_stw_le_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
+                    MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+
+    validate_memop(oi, MO_LEUW);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stw_le_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_stl_le_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
+                    MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+
+    validate_memop(oi, MO_LEUL);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stl_le_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
+                    MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+
+    validate_memop(oi, MO_LEUQ);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    stq_le_p(haddr, val);
+    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
+                     Int128 val, MemOpIdx oi, uintptr_t ra)
+{
+    void *haddr;
+
+    validate_memop(oi, MO_128 | MO_BE);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    if (!HOST_BIG_ENDIAN) {
        val = bswap128(val);
    }
-    store_atom_16(env, ra, haddr, mop, val);
+    memcpy(haddr, &val, 16);
    clear_helper_retaddr();
+    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }

-void helper_st16_mmu(CPUArchState *env, uint64_t addr, Int128 val,
-                     MemOpIdx oi, uintptr_t ra)
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
+                     Int128 val, MemOpIdx oi, uintptr_t ra)
 {
-    do_st16_mmu(env, addr, val, get_memop(oi), ra);
-}
+    void *haddr;

-void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
-{
-    helper_st16_mmu(env, addr, val, oi, GETPC());
-}
-
-void cpu_st16_mmu(CPUArchState *env, abi_ptr addr,
-                  Int128 val, MemOpIdx oi, uintptr_t ra)
-{
-    do_st16_mmu(env, addr, val, get_memop(oi), ra);
+    validate_memop(oi, MO_128 | MO_LE);
+    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+    if (HOST_BIG_ENDIAN) {
+        val = bswap128(val);
+    }
+    memcpy(haddr, &val, 16);
+    clear_helper_retaddr();
    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
 }

@@ -1324,70 +1219,16 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
    return ret;
 }

-uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
-                         MemOpIdx oi, uintptr_t ra)
-{
-    void *haddr;
-    uint8_t ret;
-
-    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
-    ret = ldub_p(haddr);
-    clear_helper_retaddr();
-    return ret;
-}
-
-uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
-                          MemOpIdx oi, uintptr_t ra)
-{
-    void *haddr;
-    uint16_t ret;
-
-    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
-    ret = lduw_p(haddr);
-    clear_helper_retaddr();
-    if (get_memop(oi) & MO_BSWAP) {
-        ret = bswap16(ret);
-    }
-    return ret;
-}
-
-uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
-                          MemOpIdx oi, uintptr_t ra)
-{
-    void *haddr;
-    uint32_t ret;
-
-    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
-    ret = ldl_p(haddr);
-    clear_helper_retaddr();
-    if (get_memop(oi) & MO_BSWAP) {
-        ret = bswap32(ret);
-    }
-    return ret;
-}
-
-uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
-                          MemOpIdx oi, uintptr_t ra)
-{
-    void *haddr;
-    uint64_t ret;
-
-    haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
-    ret = ldq_p(haddr);
-    clear_helper_retaddr();
-    if (get_memop(oi) & MO_BSWAP) {
-        ret = bswap64(ret);
-    }
-    return ret;
-}
-
 #include "ldst_common.c.inc"

 /*
 * Do not allow unaligned operations to proceed.  Return the host address.
+ *
+ * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
 */
-static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
-                               int size, uintptr_t retaddr)
+static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+                               MemOpIdx oi, int size, int prot,
+                               uintptr_t retaddr)
 {
    MemOp mop = get_memop(oi);
    int a_bits = get_alignment_bits(mop);
@@ -1395,7 +1236,8 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,

    /* Enforce guest required alignment.  */
    if (unlikely(addr & ((1 << a_bits) - 1))) {
-        cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, retaddr);
+        MMUAccessType t = prot == PAGE_READ ? MMU_DATA_LOAD : MMU_DATA_STORE;
+        cpu_loop_exit_sigbus(env_cpu(env), addr, t, retaddr);
    }

    /* Enforce qemu required alignment.  */
@@ -1433,7 +1275,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr, MemOpIdx oi,
 #include "atomic_template.h"
 #endif

-#if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
+#if HAVE_ATOMIC128 || HAVE_CMPXCHG128
 #define DATA_SIZE 16
 #include "atomic_template.h"
 #endif
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2061,9 +2061,6 @@ void audio_create_pdos(Audiodev *dev)
 #ifdef CONFIG_AUDIO_PA
        CASE(PA, pa, Pa);
 #endif
-#ifdef CONFIG_AUDIO_PIPEWIRE
-        CASE(PIPEWIRE, pipewire, Pipewire);
-#endif
 #ifdef CONFIG_AUDIO_SDL
        CASE(SDL, sdl, Sdl);
 #endif
--- a/audio/audio_legacy.c
+++ b/audio/audio_legacy.c
@@ -35,8 +35,8 @@

 static uint32_t toui32(const char *str)
 {
-    uint64_t ret;
-    if (parse_uint_full(str, 10, &ret) || ret > UINT32_MAX) {
+    unsigned long long ret;
+    if (parse_uint_full(str, &ret, 10) || ret > UINT32_MAX) {
        dolog("Invalid integer value `%s'\n", str);
        exit(1);
    }
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -362,10 +362,6 @@ AudiodevPerDirectionOptions *glue(audio_get_pdo_, TYPE)(Audiodev *dev)
    case AUDIODEV_DRIVER_PA:
        return qapi_AudiodevPaPerDirectionOptions_base(dev->u.pa.TYPE);
 #endif
-#ifdef CONFIG_AUDIO_PIPEWIRE
-    case AUDIODEV_DRIVER_PIPEWIRE:
-        return qapi_AudiodevPipewirePerDirectionOptions_base(dev->u.pipewire.TYPE);
-#endif
 #ifdef CONFIG_AUDIO_SDL
    case AUDIODEV_DRIVER_SDL:
        return qapi_AudiodevSdlPerDirectionOptions_base(dev->u.sdl.TYPE);
--- a/audio/dbusaudio.c
+++ b/audio/dbusaudio.c
@@ -29,11 +29,7 @@
 #include "qemu/timer.h"
 #include "qemu/dbus.h"

-#ifdef G_OS_UNIX
 #include <gio/gunixfdlist.h>
-#endif
-
-#include "ui/dbus.h"
 #include "ui/dbus-display1.h"

 #define AUDIO_CAP "dbus"
@@ -448,9 +444,7 @@ listener_in_vanished_cb(GDBusConnection *connection,
 static gboolean
 dbus_audio_register_listener(AudioState *s,
                             GDBusMethodInvocation *invocation,
-#ifdef G_OS_UNIX
                             GUnixFDList *fd_list,
-#endif
                             GVariant *arg_listener,
                             bool out)
 {
@@ -477,11 +471,6 @@ dbus_audio_register_listener(AudioState *s,
        return DBUS_METHOD_INVOCATION_HANDLED;
    }

-#ifdef G_OS_WIN32
-    if (!dbus_win32_import_socket(invocation, arg_listener, &fd)) {
-        return DBUS_METHOD_INVOCATION_HANDLED;
-    }
-#else
    fd = g_unix_fd_list_get(fd_list, g_variant_get_handle(arg_listener), &err);
    if (err) {
        g_dbus_method_invocation_return_error(invocation,
@@ -491,7 +480,6 @@ dbus_audio_register_listener(AudioState *s,
                                              err->message);
        return DBUS_METHOD_INVOCATION_HANDLED;
    }
-#endif

    socket = g_socket_new_from_fd(fd, &err);
    if (err) {
@@ -500,28 +488,15 @@ dbus_audio_register_listener(AudioState *s,
                                              DBUS_DISPLAY_ERROR_FAILED,
                                              "Couldn't make a socket: %s",
                                              err->message);
-#ifdef G_OS_WIN32
-        closesocket(fd);
-#else
-        close(fd);
-#endif
        return DBUS_METHOD_INVOCATION_HANDLED;
    }
    socket_conn = g_socket_connection_factory_create_connection(socket);
    if (out) {
        qemu_dbus_display1_audio_complete_register_out_listener(
-            da->iface, invocation
-#ifdef G_OS_UNIX
-            , NULL
-#endif
-            );
+            da->iface, invocation, NULL);
    } else {
        qemu_dbus_display1_audio_complete_register_in_listener(
-            da->iface, invocation
-#ifdef G_OS_UNIX
-            , NULL
-#endif
-            );
+            da->iface, invocation, NULL);
    }

    listener_conn =
@@ -599,32 +574,22 @@ dbus_audio_register_listener(AudioState *s,
 static gboolean
 dbus_audio_register_out_listener(AudioState *s,
                                 GDBusMethodInvocation *invocation,
-#ifdef G_OS_UNIX
                                 GUnixFDList *fd_list,
-#endif
                                 GVariant *arg_listener)
 {
    return dbus_audio_register_listener(s, invocation,
-#ifdef G_OS_UNIX
-                                        fd_list,
-#endif
-                                        arg_listener, true);
+                                        fd_list, arg_listener, true);

 }

 static gboolean
 dbus_audio_register_in_listener(AudioState *s,
                                GDBusMethodInvocation *invocation,
-#ifdef G_OS_UNIX
                                GUnixFDList *fd_list,
-#endif
                                GVariant *arg_listener)
 {
    return dbus_audio_register_listener(s, invocation,
-#ifdef G_OS_UNIX
-                                        fd_list,
-#endif
-                                        arg_listener, false);
+                                        fd_list, arg_listener, false);
 }

 static void
--- a/audio/meson.build
+++ b/audio/meson.build
@@ -1,5 +1,5 @@
-system_ss.add([spice_headers, files('audio.c')])
-system_ss.add(files(
+softmmu_ss.add([spice_headers, files('audio.c')])
+softmmu_ss.add(files(
  'audio-hmp-cmds.c',
  'audio_legacy.c',
  'mixeng.c',
@@ -8,8 +8,8 @@ system_ss.add(files(
  'wavcapture.c',
 ))

-system_ss.add(when: coreaudio, if_true: files('coreaudio.m'))
-system_ss.add(when: dsound, if_true: files('dsoundaudio.c', 'audio_win_int.c'))
+softmmu_ss.add(when: coreaudio, if_true: files('coreaudio.m'))
+softmmu_ss.add(when: dsound, if_true: files('dsoundaudio.c', 'audio_win_int.c'))

 audio_modules = {}
 foreach m : [
@@ -19,7 +19,6 @@ foreach m : [
  ['sdl', sdl, files('sdlaudio.c')],
  ['jack', jack, files('jackaudio.c')],
  ['sndio', sndio, files('sndioaudio.c')],
-  ['pipewire', pipewire, files('pwaudio.c')],
  ['spice', spice, files('spiceaudio.c')]
 ]
  if m[1].found()
@@ -31,7 +30,7 @@ endforeach

 if dbus_display
    module_ss = ss.source_set()
-    module_ss.add(when: [gio, pixman], if_true: files('dbusaudio.c'))
+    module_ss.add(when: gio, if_true: files('dbusaudio.c'))
    audio_modules += {'dbus': module_ss}
 endif

--- a/audio/pwaudio.c
+++ b/audio/pwaudio.c
@@ -1,857 +0,0 @@
-/*
- * QEMU PipeWire audio driver
- *
- * Copyright (c) 2023 Red Hat Inc.
- *
- * Author: Dorinda Bassey       <dbassey@redhat.com>
- *
- * SPDX-License-Identifier: GPL-2.0-or-later
- */
-
-#include "qemu/osdep.h"
-#include "qemu/module.h"
-#include "audio.h"
-#include <errno.h>
-#include "qemu/error-report.h"
-#include <spa/param/audio/format-utils.h>
-#include <spa/utils/ringbuffer.h>
-#include <spa/utils/result.h>
-#include <spa/param/props.h>
-
-#include <pipewire/pipewire.h>
-#include "trace.h"
-
-#define AUDIO_CAP "pipewire"
-#define RINGBUFFER_SIZE    (1u << 22)
-#define RINGBUFFER_MASK    (RINGBUFFER_SIZE - 1)
-
-#include "audio_int.h"
-
-typedef struct pwvolume {
-    uint32_t channels;
-    float values[SPA_AUDIO_MAX_CHANNELS];
-} pwvolume;
-
-typedef struct pwaudio {
-    Audiodev *dev;
-    struct pw_thread_loop *thread_loop;
-    struct pw_context *context;
-
-    struct pw_core *core;
-    struct spa_hook core_listener;
-    int last_seq, pending_seq, error;
-} pwaudio;
-
-typedef struct PWVoice {
-    pwaudio *g;
-    struct pw_stream *stream;
-    struct spa_hook stream_listener;
-    struct spa_audio_info_raw info;
-    uint32_t highwater_mark;
-    uint32_t frame_size, req;
-    struct spa_ringbuffer ring;
-    uint8_t buffer[RINGBUFFER_SIZE];
-
-    pwvolume volume;
-    bool muted;
-} PWVoice;
-
-typedef struct PWVoiceOut {
-    HWVoiceOut hw;
-    PWVoice v;
-} PWVoiceOut;
-
-typedef struct PWVoiceIn {
-    HWVoiceIn hw;
-    PWVoice v;
-} PWVoiceIn;
-
-#define PW_VOICE_IN(v) ((PWVoiceIn *)v)
-#define PW_VOICE_OUT(v) ((PWVoiceOut *)v)
-
-static void
-stream_destroy(void *data)
-{
-    PWVoice *v = (PWVoice *) data;
-    spa_hook_remove(&v->stream_listener);
-    v->stream = NULL;
-}
-
-/* output data processing function to read stuffs from the buffer */
-static void
-playback_on_process(void *data)
-{
-    PWVoice *v = data;
-    void *p;
-    struct pw_buffer *b;
-    struct spa_buffer *buf;
-    uint32_t req, index, n_bytes;
-    int32_t avail;
-
-    assert(v->stream);
-
-    /* obtain a buffer to read from */
-    b = pw_stream_dequeue_buffer(v->stream);
-    if (b == NULL) {
-        error_report("out of buffers: %s", strerror(errno));
-        return;
-    }
-
-    buf = b->buffer;
-    p = buf->datas[0].data;
-    if (p == NULL) {
-        return;
-    }
-    /* calculate the total no of bytes to read data from buffer */
-    req = b->requested * v->frame_size;
-    if (req == 0) {
-        req = v->req;
-    }
-    n_bytes = SPA_MIN(req, buf->datas[0].maxsize);
-
-    /* get no of available bytes to read data from buffer */
-    avail = spa_ringbuffer_get_read_index(&v->ring, &index);
-
-    if (avail <= 0) {
-        PWVoiceOut *vo = container_of(data, PWVoiceOut, v);
-        audio_pcm_info_clear_buf(&vo->hw.info, p, n_bytes / v->frame_size);
-    } else {
-        if ((uint32_t) avail < n_bytes) {
-            /*
-             * PipeWire immediately calls this callback again if we provide
-             * less than n_bytes. Then audio_pcm_info_clear_buf() fills the
-             * rest of the buffer with silence.
-             */
-            n_bytes = avail;
-        }
-
-        spa_ringbuffer_read_data(&v->ring,
-                                    v->buffer, RINGBUFFER_SIZE,
-                                    index & RINGBUFFER_MASK, p, n_bytes);
-
-        index += n_bytes;
-        spa_ringbuffer_read_update(&v->ring, index);
-
-    }
-    buf->datas[0].chunk->offset = 0;
-    buf->datas[0].chunk->stride = v->frame_size;
-    buf->datas[0].chunk->size = n_bytes;
-
-    /* queue the buffer for playback */
-    pw_stream_queue_buffer(v->stream, b);
-}
-
-/* output data processing function to generate stuffs in the buffer */
-static void
-capture_on_process(void *data)
-{
-    PWVoice *v = (PWVoice *) data;
-    void *p;
-    struct pw_buffer *b;
-    struct spa_buffer *buf;
-    int32_t filled;
-    uint32_t index, offs, n_bytes;
-
-    assert(v->stream);
-
-    /* obtain a buffer */
-    b = pw_stream_dequeue_buffer(v->stream);
-    if (b == NULL) {
-        error_report("out of buffers: %s", strerror(errno));
-        return;
-    }
-
-    /* Write data into buffer */
-    buf = b->buffer;
-    p = buf->datas[0].data;
-    if (p == NULL) {
-        return;
-    }
-    offs = SPA_MIN(buf->datas[0].chunk->offset, buf->datas[0].maxsize);
-    n_bytes = SPA_MIN(buf->datas[0].chunk->size, buf->datas[0].maxsize - offs);
-
-    filled = spa_ringbuffer_get_write_index(&v->ring, &index);
-
-
-    if (filled < 0) {
-        error_report("%p: underrun write:%u filled:%d", p, index, filled);
-    } else {
-        if ((uint32_t) filled + n_bytes > RINGBUFFER_SIZE) {
-            error_report("%p: overrun write:%u filled:%d + size:%u > max:%u",
-            p, index, filled, n_bytes, RINGBUFFER_SIZE);
-        }
-    }
-    spa_ringbuffer_write_data(&v->ring,
-                                v->buffer, RINGBUFFER_SIZE,
-                                index & RINGBUFFER_MASK,
-                                SPA_PTROFF(p, offs, void), n_bytes);
-    index += n_bytes;
-    spa_ringbuffer_write_update(&v->ring, index);
-
-    /* queue the buffer for playback */
-    pw_stream_queue_buffer(v->stream, b);
-}
-
-static void
-on_stream_state_changed(void *data, enum pw_stream_state old,
-                        enum pw_stream_state state, const char *error)
-{
-    PWVoice *v = (PWVoice *) data;
-
-    trace_pw_state_changed(pw_stream_get_node_id(v->stream),
-                           pw_stream_state_as_string(state));
-}
-
-static const struct pw_stream_events capture_stream_events = {
-    PW_VERSION_STREAM_EVENTS,
-    .destroy = stream_destroy,
-    .state_changed = on_stream_state_changed,
-    .process = capture_on_process
-};
-
-static const struct pw_stream_events playback_stream_events = {
-    PW_VERSION_STREAM_EVENTS,
-    .destroy = stream_destroy,
-    .state_changed = on_stream_state_changed,
-    .process = playback_on_process
-};
-
-static size_t
-qpw_read(HWVoiceIn *hw, void *data, size_t len)
-{
-    PWVoiceIn *pw = (PWVoiceIn *) hw;
-    PWVoice *v = &pw->v;
-    pwaudio *c = v->g;
-    const char *error = NULL;
-    size_t l;
-    int32_t avail;
-    uint32_t index;
-
-    pw_thread_loop_lock(c->thread_loop);
-    if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) {
-        /* wait for stream to become ready */
-        l = 0;
-        goto done_unlock;
-    }
-    /* get no of available bytes to read data from buffer */
-    avail = spa_ringbuffer_get_read_index(&v->ring, &index);
-
-    trace_pw_read(avail, index, len);
-
-    if (avail < (int32_t) len) {
-        len = avail;
-    }
-
-    spa_ringbuffer_read_data(&v->ring,
-                             v->buffer, RINGBUFFER_SIZE,
-                             index & RINGBUFFER_MASK, data, len);
-    index += len;
-    spa_ringbuffer_read_update(&v->ring, index);
-    l = len;
-
-done_unlock:
-    pw_thread_loop_unlock(c->thread_loop);
-    return l;
-}
-
-static size_t qpw_buffer_get_free(HWVoiceOut *hw)
-{
-    PWVoiceOut *pw = (PWVoiceOut *)hw;
-    PWVoice *v = &pw->v;
-    pwaudio *c = v->g;
-    const char *error = NULL;
-    int32_t filled, avail;
-    uint32_t index;
-
-    pw_thread_loop_lock(c->thread_loop);
-    if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) {
-        /* wait for stream to become ready */
-        avail = 0;
-        goto done_unlock;
-    }
-
-    filled = spa_ringbuffer_get_write_index(&v->ring, &index);
-    avail = v->highwater_mark - filled;
-
-done_unlock:
-    pw_thread_loop_unlock(c->thread_loop);
-    return avail;
-}
-
-static size_t
-qpw_write(HWVoiceOut *hw, void *data, size_t len)
-{
-    PWVoiceOut *pw = (PWVoiceOut *) hw;
-    PWVoice *v = &pw->v;
-    pwaudio *c = v->g;
-    const char *error = NULL;
-    int32_t filled, avail;
-    uint32_t index;
-
-    pw_thread_loop_lock(c->thread_loop);
-    if (pw_stream_get_state(v->stream, &error) != PW_STREAM_STATE_STREAMING) {
-        /* wait for stream to become ready */
-        len = 0;
-        goto done_unlock;
-    }
-    filled = spa_ringbuffer_get_write_index(&v->ring, &index);
-    avail = v->highwater_mark - filled;
-
-    trace_pw_write(filled, avail, index, len);
-
-    if (len > avail) {
-        len = avail;
-    }
-
-    if (filled < 0) {
-        error_report("%p: underrun write:%u filled:%d", pw, index, filled);
-    } else {
-        if ((uint32_t) filled + len > RINGBUFFER_SIZE) {
-            error_report("%p: overrun write:%u filled:%d + size:%zu > max:%u",
-            pw, index, filled, len, RINGBUFFER_SIZE);
-        }
-    }
-
-    spa_ringbuffer_write_data(&v->ring,
-                                v->buffer, RINGBUFFER_SIZE,
-                                index & RINGBUFFER_MASK, data, len);
-    index += len;
-    spa_ringbuffer_write_update(&v->ring, index);
-
-done_unlock:
-    pw_thread_loop_unlock(c->thread_loop);
-    return len;
-}
-
-static int
-audfmt_to_pw(AudioFormat fmt, int endianness)
-{
-    int format;
-
-    switch (fmt) {
-    case AUDIO_FORMAT_S8:
-        format = SPA_AUDIO_FORMAT_S8;
-        break;
-    case AUDIO_FORMAT_U8:
-        format = SPA_AUDIO_FORMAT_U8;
-        break;
-    case AUDIO_FORMAT_S16:
-        format = endianness ? SPA_AUDIO_FORMAT_S16_BE : SPA_AUDIO_FORMAT_S16_LE;
-        break;
-    case AUDIO_FORMAT_U16:
-        format = endianness ? SPA_AUDIO_FORMAT_U16_BE : SPA_AUDIO_FORMAT_U16_LE;
-        break;
-    case AUDIO_FORMAT_S32:
-        format = endianness ? SPA_AUDIO_FORMAT_S32_BE : SPA_AUDIO_FORMAT_S32_LE;
-        break;
-    case AUDIO_FORMAT_U32:
-        format = endianness ? SPA_AUDIO_FORMAT_U32_BE : SPA_AUDIO_FORMAT_U32_LE;
-        break;
-    case AUDIO_FORMAT_F32:
-        format = endianness ? SPA_AUDIO_FORMAT_F32_BE : SPA_AUDIO_FORMAT_F32_LE;
-        break;
-    default:
-        dolog("Internal logic error: Bad audio format %d\n", fmt);
-        format = SPA_AUDIO_FORMAT_U8;
-        break;
-    }
-    return format;
-}
-
-static AudioFormat
-pw_to_audfmt(enum spa_audio_format fmt, int *endianness,
-             uint32_t *sample_size)
-{
-    switch (fmt) {
-    case SPA_AUDIO_FORMAT_S8:
-        *sample_size = 1;
-        return AUDIO_FORMAT_S8;
-    case SPA_AUDIO_FORMAT_U8:
-        *sample_size = 1;
-        return AUDIO_FORMAT_U8;
-    case SPA_AUDIO_FORMAT_S16_BE:
-        *sample_size = 2;
-        *endianness = 1;
-        return AUDIO_FORMAT_S16;
-    case SPA_AUDIO_FORMAT_S16_LE:
-        *sample_size = 2;
-        *endianness = 0;
-        return AUDIO_FORMAT_S16;
-    case SPA_AUDIO_FORMAT_U16_BE:
-        *sample_size = 2;
-        *endianness = 1;
-        return AUDIO_FORMAT_U16;
-    case SPA_AUDIO_FORMAT_U16_LE:
-        *sample_size = 2;
-        *endianness = 0;
-        return AUDIO_FORMAT_U16;
-    case SPA_AUDIO_FORMAT_S32_BE:
-        *sample_size = 4;
-        *endianness = 1;
-        return AUDIO_FORMAT_S32;
-    case SPA_AUDIO_FORMAT_S32_LE:
-        *sample_size = 4;
-        *endianness = 0;
-        return AUDIO_FORMAT_S32;
-    case SPA_AUDIO_FORMAT_U32_BE:
-        *sample_size = 4;
-        *endianness = 1;
-        return AUDIO_FORMAT_U32;
-    case SPA_AUDIO_FORMAT_U32_LE:
-        *sample_size = 4;
-        *endianness = 0;
-        return AUDIO_FORMAT_U32;
-    case SPA_AUDIO_FORMAT_F32_BE:
-        *sample_size = 4;
-        *endianness = 1;
-        return AUDIO_FORMAT_F32;
-    case SPA_AUDIO_FORMAT_F32_LE:
-        *sample_size = 4;
-        *endianness = 0;
-        return AUDIO_FORMAT_F32;
-    default:
-        *sample_size = 1;
-        dolog("Internal logic error: Bad spa_audio_format %d\n", fmt);
-        return AUDIO_FORMAT_U8;
-    }
-}
-
-static int
-qpw_stream_new(pwaudio *c, PWVoice *v, const char *stream_name,
-               const char *name, enum spa_direction dir)
-{
-    int res;
-    uint32_t n_params;
-    const struct spa_pod *params[2];
-    uint8_t buffer[1024];
-    struct spa_pod_builder b;
-    uint64_t buf_samples;
-    struct pw_properties *props;
-
-    props = pw_properties_new(NULL, NULL);
-    if (!props) {
-        error_report("Failed to create PW properties: %s", g_strerror(errno));
-        return -1;
-    }
-
-    /* 75% of the timer period for faster updates */
-    buf_samples = (uint64_t)v->g->dev->timer_period * v->info.rate
-                    * 3 / 4 / 1000000;
-    pw_properties_setf(props, PW_KEY_NODE_LATENCY, "%" PRIu64 "/%u",
-                       buf_samples, v->info.rate);
-
-    trace_pw_period(buf_samples, v->info.rate);
-    if (name) {
-        pw_properties_set(props, PW_KEY_TARGET_OBJECT, name);
-    }
-    v->stream = pw_stream_new(c->core, stream_name, props);
-    if (v->stream == NULL) {
-        error_report("Failed to create PW stream: %s", g_strerror(errno));
-        return -1;
-    }
-
-    if (dir == SPA_DIRECTION_INPUT) {
-        pw_stream_add_listener(v->stream,
-                            &v->stream_listener, &capture_stream_events, v);
-    } else {
-        pw_stream_add_listener(v->stream,
-                            &v->stream_listener, &playback_stream_events, v);
-    }
-
-    n_params = 0;
-    spa_pod_builder_init(&b, buffer, sizeof(buffer));
-    params[n_params++] = spa_format_audio_raw_build(&b,
-                            SPA_PARAM_EnumFormat,
-                            &v->info);
-
-    /* connect the stream to a sink or source */
-    res = pw_stream_connect(v->stream,
-                            dir ==
-                            SPA_DIRECTION_INPUT ? PW_DIRECTION_INPUT :
-                            PW_DIRECTION_OUTPUT, PW_ID_ANY,
-                            PW_STREAM_FLAG_AUTOCONNECT |
-                            PW_STREAM_FLAG_INACTIVE |
-                            PW_STREAM_FLAG_MAP_BUFFERS |
-                            PW_STREAM_FLAG_RT_PROCESS, params, n_params);
-    if (res < 0) {
-        error_report("Failed to connect PW stream: %s", g_strerror(errno));
-        pw_stream_destroy(v->stream);
-        return -1;
-    }
-
-    return 0;
-}
-
-static void
-qpw_set_position(uint32_t channels, uint32_t position[SPA_AUDIO_MAX_CHANNELS])
-{
-    memcpy(position, (uint32_t[SPA_AUDIO_MAX_CHANNELS]) { SPA_AUDIO_CHANNEL_UNKNOWN, },
-           sizeof(uint32_t) * SPA_AUDIO_MAX_CHANNELS);
-    /*
-     * TODO: This currently expects the only frontend supporting more than 2
-     * channels is the usb-audio.  We will need some means to set channel
-     * order when a new frontend gains multi-channel support.
-     */
-    switch (channels) {
-    case 8:
-        position[6] = SPA_AUDIO_CHANNEL_SL;
-        position[7] = SPA_AUDIO_CHANNEL_SR;
-        /* fallthrough */
-    case 6:
-        position[2] = SPA_AUDIO_CHANNEL_FC;
-        position[3] = SPA_AUDIO_CHANNEL_LFE;
-        position[4] = SPA_AUDIO_CHANNEL_RL;
-        position[5] = SPA_AUDIO_CHANNEL_RR;
-        /* fallthrough */
-    case 2:
-        position[0] = SPA_AUDIO_CHANNEL_FL;
-        position[1] = SPA_AUDIO_CHANNEL_FR;
-        break;
-    case 1:
-        position[0] = SPA_AUDIO_CHANNEL_MONO;
-        break;
-    default:
-        dolog("Internal error: unsupported channel count %d\n", channels);
-    }
-}
-
-static int
-qpw_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque)
-{
-    PWVoiceOut *pw = (PWVoiceOut *) hw;
-    PWVoice *v = &pw->v;
-    struct audsettings obt_as = *as;
-    pwaudio *c = v->g = drv_opaque;
-    AudiodevPipewireOptions *popts = &c->dev->u.pipewire;
-    AudiodevPipewirePerDirectionOptions *ppdo = popts->out;
-    int r;
-
-    pw_thread_loop_lock(c->thread_loop);
-
-    v->info.format = audfmt_to_pw(as->fmt, as->endianness);
-    v->info.channels = as->nchannels;
-    qpw_set_position(as->nchannels, v->info.position);
-    v->info.rate = as->freq;
-
-    obt_as.fmt =
-        pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size);
-    v->frame_size *= as->nchannels;
-
-    v->req = (uint64_t)c->dev->timer_period * v->info.rate
-        * 1 / 2 / 1000000 * v->frame_size;
-
-    /* call the function that creates a new stream for playback */
-    r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id,
-                       ppdo->name, SPA_DIRECTION_OUTPUT);
-    if (r < 0) {
-        pw_thread_loop_unlock(c->thread_loop);
-        return -1;
-    }
-
-    /* report the audio format we support */
-    audio_pcm_init_info(&hw->info, &obt_as);
-
-    /* report the buffer size to qemu */
-    hw->samples = audio_buffer_frames(
-        qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440);
-    v->highwater_mark = MIN(RINGBUFFER_SIZE,
-                            (ppdo->has_latency ? ppdo->latency : 46440)
-                            * (uint64_t)v->info.rate / 1000000 * v->frame_size);
-
-    pw_thread_loop_unlock(c->thread_loop);
-    return 0;
-}
-
-static int
-qpw_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
-{
-    PWVoiceIn *pw = (PWVoiceIn *) hw;
-    PWVoice *v = &pw->v;
-    struct audsettings obt_as = *as;
-    pwaudio *c = v->g = drv_opaque;
-    AudiodevPipewireOptions *popts = &c->dev->u.pipewire;
-    AudiodevPipewirePerDirectionOptions *ppdo = popts->in;
-    int r;
-
-    pw_thread_loop_lock(c->thread_loop);
-
-    v->info.format = audfmt_to_pw(as->fmt, as->endianness);
-    v->info.channels = as->nchannels;
-    qpw_set_position(as->nchannels, v->info.position);
-    v->info.rate = as->freq;
-
-    obt_as.fmt =
-        pw_to_audfmt(v->info.format, &obt_as.endianness, &v->frame_size);
-    v->frame_size *= as->nchannels;
-
-    /* call the function that creates a new stream for recording */
-    r = qpw_stream_new(c, v, ppdo->stream_name ? : c->dev->id,
-                       ppdo->name, SPA_DIRECTION_INPUT);
-    if (r < 0) {
-        pw_thread_loop_unlock(c->thread_loop);
-        return -1;
-    }
-
-    /* report the audio format we support */
-    audio_pcm_init_info(&hw->info, &obt_as);
-
-    /* report the buffer size to qemu */
-    hw->samples = audio_buffer_frames(
-        qapi_AudiodevPipewirePerDirectionOptions_base(ppdo), &obt_as, 46440);
-
-    pw_thread_loop_unlock(c->thread_loop);
-    return 0;
-}
-
-static void
-qpw_voice_fini(PWVoice *v)
-{
-    pwaudio *c = v->g;
-
-    if (!v->stream) {
-        return;
-    }
-    pw_thread_loop_lock(c->thread_loop);
-    pw_stream_destroy(v->stream);
-    v->stream = NULL;
-    pw_thread_loop_unlock(c->thread_loop);
-}
-
-static void
-qpw_fini_out(HWVoiceOut *hw)
-{
-    qpw_voice_fini(&PW_VOICE_OUT(hw)->v);
-}
-
-static void
-qpw_fini_in(HWVoiceIn *hw)
-{
-    qpw_voice_fini(&PW_VOICE_IN(hw)->v);
-}
-
-static void
-qpw_voice_set_enabled(PWVoice *v, bool enable)
-{
-    pwaudio *c = v->g;
-    pw_thread_loop_lock(c->thread_loop);
-    pw_stream_set_active(v->stream, enable);
-    pw_thread_loop_unlock(c->thread_loop);
-}
-
-static void
-qpw_enable_out(HWVoiceOut *hw, bool enable)
-{
-    qpw_voice_set_enabled(&PW_VOICE_OUT(hw)->v, enable);
-}
-
-static void
-qpw_enable_in(HWVoiceIn *hw, bool enable)
-{
-    qpw_voice_set_enabled(&PW_VOICE_IN(hw)->v, enable);
-}
-
-static void
-qpw_voice_set_volume(PWVoice *v, Volume *vol)
-{
-    pwaudio *c = v->g;
-    int i, ret;
-
-    pw_thread_loop_lock(c->thread_loop);
-    v->volume.channels = vol->channels;
-
-    for (i = 0; i < vol->channels; ++i) {
-        v->volume.values[i] = (float)vol->vol[i] / 255;
-    }
-
-    ret = pw_stream_set_control(v->stream,
-        SPA_PROP_channelVolumes, v->volume.channels, v->volume.values, 0);
-    trace_pw_vol(ret == 0 ? "success" : "failed");
-
-    v->muted = vol->mute;
-    float val = v->muted ? 1.f : 0.f;
-    ret = pw_stream_set_control(v->stream, SPA_PROP_mute, 1, &val, 0);
-    pw_thread_loop_unlock(c->thread_loop);
-}
-
-static void
-qpw_volume_out(HWVoiceOut *hw, Volume *vol)
-{
-    qpw_voice_set_volume(&PW_VOICE_OUT(hw)->v, vol);
-}
-
-static void
-qpw_volume_in(HWVoiceIn *hw, Volume *vol)
-{
-    qpw_voice_set_volume(&PW_VOICE_IN(hw)->v, vol);
-}
-
-static int wait_resync(pwaudio *pw)
-{
-    int res;
-    pw->pending_seq = pw_core_sync(pw->core, PW_ID_CORE, pw->pending_seq);
-
-    while (true) {
-        pw_thread_loop_wait(pw->thread_loop);
-
-        res = pw->error;
-        if (res < 0) {
-            pw->error = 0;
-            return res;
-        }
-        if (pw->pending_seq == pw->last_seq) {
-            break;
-        }
-    }
-    return 0;
-}
-
-static void
-on_core_error(void *data, uint32_t id, int seq, int res, const char *message)
-{
-    pwaudio *pw = data;
-
-    error_report("error id:%u seq:%d res:%d (%s): %s",
-                id, seq, res, spa_strerror(res), message);
-
-    /* stop and exit the thread loop */
-    pw_thread_loop_signal(pw->thread_loop, FALSE);
-}
-
-static void
-on_core_done(void *data, uint32_t id, int seq)
-{
-    pwaudio *pw = data;
-    assert(id == PW_ID_CORE);
-    pw->last_seq = seq;
-    if (pw->pending_seq == seq) {
-        /* stop and exit the thread loop */
-        pw_thread_loop_signal(pw->thread_loop, FALSE);
-    }
-}
-
-static const struct pw_core_events core_events = {
-    PW_VERSION_CORE_EVENTS,
-    .done = on_core_done,
-    .error = on_core_error,
-};
-
-static void *
-qpw_audio_init(Audiodev *dev)
-{
-    g_autofree pwaudio *pw = g_new0(pwaudio, 1);
-
-    assert(dev->driver == AUDIODEV_DRIVER_PIPEWIRE);
-    trace_pw_audio_init();
-
-    pw_init(NULL, NULL);
-
-    pw->dev = dev;
-    pw->thread_loop = pw_thread_loop_new("PipeWire thread loop", NULL);
-    if (pw->thread_loop == NULL) {
-        error_report("Could not create PipeWire loop: %s", g_strerror(errno));
-        goto fail;
-    }
-
-    pw->context =
-        pw_context_new(pw_thread_loop_get_loop(pw->thread_loop), NULL, 0);
-    if (pw->context == NULL) {
-        error_report("Could not create PipeWire context: %s", g_strerror(errno));
-        goto fail;
-    }
-
-    if (pw_thread_loop_start(pw->thread_loop) < 0) {
-        error_report("Could not start PipeWire loop: %s", g_strerror(errno));
-        goto fail;
-    }
-
-    pw_thread_loop_lock(pw->thread_loop);
-
-    pw->core = pw_context_connect(pw->context, NULL, 0);
-    if (pw->core == NULL) {
-        pw_thread_loop_unlock(pw->thread_loop);
-        goto fail;
-    }
-
-    if (pw_core_add_listener(pw->core, &pw->core_listener,
-                             &core_events, pw) < 0) {
-        pw_thread_loop_unlock(pw->thread_loop);
-        goto fail;
-    }
-    if (wait_resync(pw) < 0) {
-        pw_thread_loop_unlock(pw->thread_loop);
-    }
-
-    pw_thread_loop_unlock(pw->thread_loop);
-
-    return g_steal_pointer(&pw);
-
-fail:
-    AUD_log(AUDIO_CAP, "Failed to initialize PW context");
-    if (pw->thread_loop) {
-        pw_thread_loop_stop(pw->thread_loop);
-    }
-    g_clear_pointer(&pw->context, pw_context_destroy);
-    g_clear_pointer(&pw->thread_loop, pw_thread_loop_destroy);
-    return NULL;
-}
-
-static void
-qpw_audio_fini(void *opaque)
-{
-    pwaudio *pw = opaque;
-
-    if (pw->thread_loop) {
-        pw_thread_loop_stop(pw->thread_loop);
-    }
-
-    if (pw->core) {
-        spa_hook_remove(&pw->core_listener);
-        spa_zero(pw->core_listener);
-        pw_core_disconnect(pw->core);
-    }
-
-    if (pw->context) {
-        pw_context_destroy(pw->context);
-    }
-    pw_thread_loop_destroy(pw->thread_loop);
-
-    g_free(pw);
-}
-
-static struct audio_pcm_ops qpw_pcm_ops = {
-    .init_out = qpw_init_out,
-    .fini_out = qpw_fini_out,
-    .write = qpw_write,
-    .buffer_get_free = qpw_buffer_get_free,
-    .run_buffer_out = audio_generic_run_buffer_out,
-    .enable_out = qpw_enable_out,
-    .volume_out = qpw_volume_out,
-    .volume_in = qpw_volume_in,
-
-    .init_in = qpw_init_in,
-    .fini_in = qpw_fini_in,
-    .read = qpw_read,
-    .run_buffer_in = audio_generic_run_buffer_in,
-    .enable_in = qpw_enable_in
-};
-
-static struct audio_driver pw_audio_driver = {
-    .name = "pipewire",
-    .descr = "http://www.pipewire.org/",
-    .init = qpw_audio_init,
-    .fini = qpw_audio_fini,
-    .pcm_ops = &qpw_pcm_ops,
-    .can_be_default = 1,
-    .max_voices_out = INT_MAX,
-    .max_voices_in = INT_MAX,
-    .voice_size_out = sizeof(PWVoiceOut),
-    .voice_size_in = sizeof(PWVoiceIn),
-};
-
-static void
-register_audio_pw(void)
-{
-    audio_driver_register(&pw_audio_driver);
-}
-
-type_init(register_audio_pw);
--- a/audio/trace-events
+++ b/audio/trace-events
@@ -18,14 +18,6 @@ dbus_audio_register(const char *s, const char *dir) "sender = %s, dir = %s"
 dbus_audio_put_buffer_out(size_t len) "len = %zu"
 dbus_audio_read(size_t len) "len = %zu"

-# pwaudio.c
-pw_state_changed(int nodeid, const char *s) "node id: %d stream state: %s"
-pw_read(int32_t avail, uint32_t index, size_t len) "avail=%d index=%u len=%zu"
-pw_write(int32_t filled, int32_t avail, uint32_t index, size_t len) "filled=%d avail=%d index=%u len=%zu"
-pw_vol(const char *ret) "set volume: %s"
-pw_period(uint64_t quantum, uint32_t rate) "period =%" PRIu64 "/%u"
-pw_audio_init(void) "Initialize PipeWire context"
-
 # audio.c
 audio_timer_start(int interval) "interval %d ms"
 audio_timer_stop(void) ""
--- a/backends/cryptodev-vhost-user.c
+++ b/backends/cryptodev-vhost-user.c
@@ -232,9 +232,9 @@ static void cryptodev_vhost_user_init(
    backend->conf.max_auth_key_len = VHOST_USER_MAX_AUTH_KEY_LEN;
 }

-static int64_t cryptodev_vhost_user_crypto_create_session(
+static int64_t cryptodev_vhost_user_sym_create_session(
           CryptoDevBackend *backend,
-           CryptoDevBackendSessionInfo *sess_info,
+           CryptoDevBackendSymSessionInfo *sess_info,
           uint32_t queue_index, Error **errp)
 {
    CryptoDevBackendClient *cc =
@@ -266,17 +266,18 @@ static int cryptodev_vhost_user_create_session(
           void *opaque)
 {
    uint32_t op_code = sess_info->op_code;
+    CryptoDevBackendSymSessionInfo *sym_sess_info;
    int64_t ret;
    Error *local_error = NULL;
    int status;

    switch (op_code) {
    case VIRTIO_CRYPTO_CIPHER_CREATE_SESSION:
-    case VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION:
    case VIRTIO_CRYPTO_HASH_CREATE_SESSION:
    case VIRTIO_CRYPTO_MAC_CREATE_SESSION:
    case VIRTIO_CRYPTO_AEAD_CREATE_SESSION:
-        ret = cryptodev_vhost_user_crypto_create_session(backend, sess_info,
+        sym_sess_info = &sess_info->u.sym_sess_info;
+        ret = cryptodev_vhost_user_sym_create_session(backend, sym_sess_info,
                   queue_index, &local_error);
        break;

--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -532,7 +532,7 @@ static int cryptodev_backend_stats_query(Object *obj, void *data)

    entry = g_new0(StatsResult, 1);
    entry->provider = STATS_PROVIDER_CRYPTODEV;
-    entry->qom_path = object_get_canonical_path(obj);
+    entry->qom_path = g_strdup(object_get_canonical_path(obj));
    entry->stats = stats_list;
    QAPI_LIST_PREPEND(*stats_results, entry);

--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -27,7 +27,6 @@ struct HostMemoryBackendFile {

    char *mem_path;
    uint64_t align;
-    uint64_t offset;
    bool discard_data;
    bool is_pmem;
    bool readonly;
@@ -57,11 +56,9 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
-    ram_flags |= RAM_NAMED_FILE;
    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
                                     backend->size, fb->align, ram_flags,
-                                     fb->mem_path, fb->offset, fb->readonly,
-                                     errp);
+                                     fb->mem_path, fb->readonly, errp);
    g_free(name);
 #endif
 }
@@ -128,36 +125,6 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
    fb->align = val;
 }

-static void file_memory_backend_get_offset(Object *o, Visitor *v,
-                                          const char *name, void *opaque,
-                                          Error **errp)
-{
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-    uint64_t val = fb->offset;
-
-    visit_type_size(v, name, &val, errp);
-}
-
-static void file_memory_backend_set_offset(Object *o, Visitor *v,
-                                          const char *name, void *opaque,
-                                          Error **errp)
-{
-    HostMemoryBackend *backend = MEMORY_BACKEND(o);
-    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
-    uint64_t val;
-
-    if (host_memory_backend_mr_inited(backend)) {
-        error_setg(errp, "cannot change property '%s' of %s", name,
-                   object_get_typename(o));
-        return;
-    }
-
-    if (!visit_type_size(v, name, &val, errp)) {
-        return;
-    }
-    fb->offset = val;
-}
-
 #ifdef CONFIG_LIBPMEM
 static bool file_memory_backend_get_pmem(Object *o, Error **errp)
 {
@@ -230,12 +197,6 @@ file_backend_class_init(ObjectClass *oc, void *data)
        file_memory_backend_get_align,
        file_memory_backend_set_align,
        NULL, NULL);
-    object_class_property_add(oc, "offset", "int",
-        file_memory_backend_get_offset,
-        file_memory_backend_set_offset,
-        NULL, NULL);
-    object_class_property_set_description(oc, "offset",
-        "Offset into the target file (ex: 1G)");
 #ifdef CONFIG_LIBPMEM
    object_class_property_add_bool(oc, "pmem",
        file_memory_backend_get_pmem, file_memory_backend_set_pmem);
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -1,4 +1,4 @@
-system_ss.add([files(
+softmmu_ss.add([files(
  'cryptodev-builtin.c',
  'cryptodev-hmp-cmds.c',
  'cryptodev.c',
@@ -10,20 +10,20 @@ system_ss.add([files(
  'confidential-guest-support.c',
 ), numa])

-system_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
-system_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
-system_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
+softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
+softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
+softmmu_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
 if keyutils.found()
-    system_ss.add(keyutils, files('cryptodev-lkcf.c'))
+    softmmu_ss.add(keyutils, files('cryptodev-lkcf.c'))
 endif
 if have_vhost_user
-  system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
+  softmmu_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
 endif
-system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
+softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
 if have_vhost_user_crypto
-  system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
+  softmmu_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
 endif
-system_ss.add(when: gio, if_true: files('dbus-vmstate.c'))
-system_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))
+softmmu_ss.add(when: gio, if_true: files('dbus-vmstate.c'))
+softmmu_ss.add(when: 'CONFIG_SGX', if_true: files('hostmem-epc.c'))

 subdir('tpm')
--- a/backends/tpm/meson.build
+++ b/backends/tpm/meson.build
@@ -1,6 +1,6 @@
 if have_tpm
-  system_ss.add(files('tpm_backend.c'))
-  system_ss.add(files('tpm_util.c'))
-  system_ss.add(when: 'CONFIG_TPM_PASSTHROUGH', if_true: files('tpm_passthrough.c'))
-  system_ss.add(when: 'CONFIG_TPM_EMULATOR', if_true: files('tpm_emulator.c'))
+  softmmu_ss.add(files('tpm_backend.c'))
+  softmmu_ss.add(files('tpm_util.c'))
+  softmmu_ss.add(when: 'CONFIG_TPM_PASSTHROUGH', if_true: files('tpm_passthrough.c'))
+  softmmu_ss.add(when: 'CONFIG_TPM_EMULATOR', if_true: files('tpm_emulator.c'))
 endif
--- a/backends/tpm/tpm_backend.c
+++ b/backends/tpm/tpm_backend.c
@@ -100,6 +100,8 @@ bool tpm_backend_had_startup_error(TPMBackend *s)

 void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)
 {
+    ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
+
    if (s->cmd != NULL) {
        error_report("There is a TPM request pending");
        return;
@@ -107,7 +109,7 @@ void tpm_backend_deliver_request(TPMBackend *s, TPMBackendCmd *cmd)

    s->cmd = cmd;
    object_ref(OBJECT(s));
-    thread_pool_submit_aio(tpm_backend_worker_thread, s,
+    thread_pool_submit_aio(pool, tpm_backend_worker_thread, s,
                           tpm_backend_request_completed, s);
 }

--- a/block.c
+++ b/block.c
@@ -533,6 +533,7 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
    int ret;
    GLOBAL_STATE_CODE();
    ERRP_GUARD();
+    assert_bdrv_graph_readable();

    if (!drv->bdrv_co_create_opts) {
        error_setg(errp, "Driver '%s' does not support image creation",
@@ -555,9 +556,8 @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
 * On success, return @blk's actual length.
 * Otherwise, return -errno.
 */
-static int64_t coroutine_fn GRAPH_UNLOCKED
-create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,
-                              Error **errp)
+static int64_t create_file_fallback_truncate(BlockBackend *blk,
+                                             int64_t minimum_size, Error **errp)
 {
    Error *local_err = NULL;
    int64_t size;
@@ -565,14 +565,14 @@ create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,

    GLOBAL_STATE_CODE();

-    ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
-                          &local_err);
+    ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
+                       &local_err);
    if (ret < 0 && ret != -ENOTSUP) {
        error_propagate(errp, local_err);
        return ret;
    }

-    size = blk_co_getlength(blk);
+    size = blk_getlength(blk);
    if (size < 0) {
        error_free(local_err);
        error_setg_errno(errp, -size,
@@ -1610,11 +1610,10 @@ out:
 * bdrv_refresh_total_sectors() which polls when called from non-coroutine
 * context.
 */
-static int no_coroutine_fn GRAPH_UNLOCKED
-bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
-                 QDict *options, int open_flags, Error **errp)
+static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
+                            const char *node_name, QDict *options,
+                            int open_flags, Error **errp)
 {
-    AioContext *ctx;
    Error *local_err = NULL;
    int i, ret;
    GLOBAL_STATE_CODE();
@@ -1662,22 +1661,13 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
    bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
    bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;

-    /* Get the context after .bdrv_open, it can change the context */
-    ctx = bdrv_get_aio_context(bs);
-    aio_context_acquire(ctx);
-
    ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
-        aio_context_release(ctx);
        return ret;
    }

-    bdrv_graph_rdlock_main_loop();
    bdrv_refresh_limits(bs, NULL, &local_err);
-    bdrv_graph_rdunlock_main_loop();
-    aio_context_release(ctx);
-
    if (local_err) {
        error_propagate(errp, local_err);
        return -EINVAL;
@@ -2855,7 +2845,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
 * Replaces the node that a BdrvChild points to without updating permissions.
 *
 * If @new_bs is non-NULL, the parent of @child must already be drained through
- * @child and the caller must hold the AioContext lock for @new_bs.
+ * @child.
 */
 static void bdrv_replace_child_noperm(BdrvChild *child,
                                      BlockDriverState *new_bs)
@@ -2894,7 +2884,7 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
    }

    /* TODO Pull this up into the callers to avoid polling here */
-    bdrv_graph_wrlock(new_bs);
+    bdrv_graph_wrlock();
    if (old_bs) {
        if (child->klass->detach) {
            child->klass->detach(child);
@@ -2990,10 +2980,6 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
 * Function doesn't update permissions, caller is responsible for this.
 *
 * Returns new created child.
- *
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
 */
 static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
                                           const char *child_name,
@@ -3004,7 +2990,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
                                           Transaction *tran, Error **errp)
 {
    BdrvChild *new_child;
-    AioContext *parent_ctx, *new_child_ctx;
+    AioContext *parent_ctx;
    AioContext *child_ctx = bdrv_get_aio_context(child_bs);

    assert(child_class->get_parent_desc);
@@ -3055,12 +3041,6 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
        }
    }

-    new_child_ctx = bdrv_get_aio_context(child_bs);
-    if (new_child_ctx != child_ctx) {
-        aio_context_release(child_ctx);
-        aio_context_acquire(new_child_ctx);
-    }
-
    bdrv_ref(child_bs);
    /*
     * Let every new BdrvChild start with a drained parent. Inserting the child
@@ -3090,20 +3070,11 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
    };
    tran_add(tran, &bdrv_attach_child_common_drv, s);

-    if (new_child_ctx != child_ctx) {
-        aio_context_release(new_child_ctx);
-        aio_context_acquire(child_ctx);
-    }
-
    return new_child;
 }

 /*
 * Function doesn't update permissions, caller is responsible for this.
- *
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
 */
 static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs,
                                           BlockDriverState *child_bs,
@@ -3367,10 +3338,6 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
 * callers which don't need their own reference any more must call bdrv_unref().
 *
 * Function doesn't update permissions, caller is responsible for this.
- *
- * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * @child_bs can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
 */
 static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
                                           BlockDriverState *child_bs,
@@ -3452,18 +3419,11 @@ static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
    }

 out:
-    bdrv_graph_rdlock_main_loop();
    bdrv_refresh_limits(parent_bs, tran, NULL);
-    bdrv_graph_rdunlock_main_loop();

    return 0;
 }

-/*
- * The caller must hold the AioContext lock for @backing_hd. Both @bs and
- * @backing_hd can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
- */
 static int bdrv_set_backing_noperm(BlockDriverState *bs,
                                   BlockDriverState *backing_hd,
                                   Transaction *tran, Error **errp)
@@ -3514,8 +3474,6 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
 * itself, all options starting with "${bdref_key}." are considered part of the
 * BlockdevRef.
 *
- * The caller must hold the main AioContext lock.
- *
 * TODO Can this be unified with bdrv_open_image()?
 */
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
@@ -3527,7 +3485,6 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    int ret = 0;
    bool implicit_backing = false;
    BlockDriverState *backing_hd;
-    AioContext *backing_hd_ctx;
    QDict *options;
    QDict *tmp_parent_options = NULL;
    Error *local_err = NULL;
@@ -3612,12 +3569,8 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,

    /* Hook up the backing file link; drop our reference, bs owns the
     * backing_hd reference now */
-    backing_hd_ctx = bdrv_get_aio_context(backing_hd);
-    aio_context_acquire(backing_hd_ctx);
    ret = bdrv_set_backing_hd(bs, backing_hd, errp);
    bdrv_unref(backing_hd);
-    aio_context_release(backing_hd_ctx);
-
    if (ret < 0) {
        goto free_exit;
    }
@@ -3687,10 +3640,6 @@ done:
 * BlockdevRef.
 *
 * The BlockdevRef will be removed from the options QDict.
- *
- * The caller must hold the lock of the main AioContext and no other AioContext.
- * @parent can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
 */
 BdrvChild *bdrv_open_child(const char *filename,
                           QDict *options, const char *bdref_key,
@@ -3700,8 +3649,6 @@ BdrvChild *bdrv_open_child(const char *filename,
                           bool allow_none, Error **errp)
 {
    BlockDriverState *bs;
-    BdrvChild *child;
-    AioContext *ctx;

    GLOBAL_STATE_CODE();

@@ -3711,21 +3658,12 @@ BdrvChild *bdrv_open_child(const char *filename,
        return NULL;
    }

-    ctx = bdrv_get_aio_context(bs);
-    aio_context_acquire(ctx);
-    child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
-                              errp);
-    aio_context_release(ctx);
-
-    return child;
+    return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
+                             errp);
 }

 /*
 * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
- *
- * The caller must hold the lock of the main AioContext and no other AioContext.
- * @parent can move to a different AioContext in this function. Callers must
- * make sure that their AioContext locking is still correct after this.
 */
 int bdrv_open_file_child(const char *filename,
                         QDict *options, const char *bdref_key,
@@ -3800,7 +3738,6 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    int64_t total_size;
    QemuOpts *opts = NULL;
    BlockDriverState *bs_snapshot = NULL;
-    AioContext *ctx = bdrv_get_aio_context(bs);
    int ret;

    GLOBAL_STATE_CODE();
@@ -3809,10 +3746,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
       instead of opening 'filename' directly */

    /* Get the required size from the image */
-    aio_context_acquire(ctx);
    total_size = bdrv_getlength(bs);
-    aio_context_release(ctx);
-
    if (total_size < 0) {
        error_setg_errno(errp, -total_size, "Could not get image size");
        goto out;
@@ -3846,10 +3780,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
        goto out;
    }

-    aio_context_acquire(ctx);
    ret = bdrv_append(bs_snapshot, bs, errp);
-    aio_context_release(ctx);
-
    if (ret < 0) {
        bs_snapshot = NULL;
        goto out;
@@ -3875,7 +3806,9 @@ out:
 * should be opened. If specified, neither options nor a filename may be given,
 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
 *
- * The caller must always hold the main AioContext lock.
+ * The caller must always hold @filename AioContext lock, because this
+ * function eventually calls bdrv_refresh_total_sectors() which polls
+ * when called from non-coroutine context.
 */
 static BlockDriverState * no_coroutine_fn
 bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
@@ -3893,7 +3826,6 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
    Error *local_err = NULL;
    QDict *snapshot_options = NULL;
    int snapshot_flags = 0;
-    AioContext *ctx = qemu_get_aio_context();

    assert(!child_class || !flags);
    assert(!child_class == !parent);
@@ -4031,13 +3963,9 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
            /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
             * looking at the header to guess the image format. This works even
             * in cases where a guest would not see a consistent state. */
-            ctx = bdrv_get_aio_context(file_bs);
-            aio_context_acquire(ctx);
-            file = blk_new(ctx, 0, BLK_PERM_ALL);
+            file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
            blk_insert_bs(file, file_bs, &local_err);
            bdrv_unref(file_bs);
-            aio_context_release(ctx);
-
            if (local_err) {
                goto fail;
            }
@@ -4083,13 +4011,8 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
        goto fail;
    }

-    /* The AioContext could have changed during bdrv_open_common() */
-    ctx = bdrv_get_aio_context(bs);
-
    if (file) {
-        aio_context_acquire(ctx);
        blk_unref(file);
-        aio_context_release(ctx);
        file = NULL;
    }

@@ -4147,16 +4070,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
         * (snapshot_bs); thus, we have to drop the strong reference to bs
         * (which we obtained by calling bdrv_new()). bs will not be deleted,
         * though, because the overlay still has a reference to it. */
-        aio_context_acquire(ctx);
        bdrv_unref(bs);
-        aio_context_release(ctx);
        bs = snapshot_bs;
    }

    return bs;

 fail:
-    aio_context_acquire(ctx);
    blk_unref(file);
    qobject_unref(snapshot_options);
    qobject_unref(bs->explicit_options);
@@ -4165,21 +4085,22 @@ fail:
    bs->options = NULL;
    bs->explicit_options = NULL;
    bdrv_unref(bs);
-    aio_context_release(ctx);
    error_propagate(errp, local_err);
    return NULL;

 close_and_fail:
-    aio_context_acquire(ctx);
    bdrv_unref(bs);
-    aio_context_release(ctx);
    qobject_unref(snapshot_options);
    qobject_unref(options);
    error_propagate(errp, local_err);
    return NULL;
 }

-/* The caller must always hold the main AioContext lock. */
+/*
+ * The caller must always hold @filename AioContext lock, because this
+ * function eventually calls bdrv_refresh_total_sectors() which polls
+ * when called from non-coroutine context.
+ */
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp)
 {
@@ -4644,11 +4565,6 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
 * backing BlockDriverState (or NULL).
 *
 * Return 0 on success, otherwise return < 0 and set @errp.
- *
- * The caller must hold the AioContext lock of @reopen_state->bs.
- * @reopen_state->bs can move to a different AioContext in this function.
- * Callers must make sure that their AioContext locking is still correct after
- * this.
 */
 static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
                                             bool is_backing, Transaction *tran,
@@ -4661,8 +4577,6 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
    const char *child_name = is_backing ? "backing" : "file";
    QObject *value;
    const char *str;
-    AioContext *ctx, *old_ctx;
-    int ret;

    GLOBAL_STATE_CODE();

@@ -4727,22 +4641,8 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
        reopen_state->old_file_bs = old_child_bs;
    }

-    old_ctx = bdrv_get_aio_context(bs);
-    ctx = bdrv_get_aio_context(new_child_bs);
-    if (old_ctx != ctx) {
-        aio_context_release(old_ctx);
-        aio_context_acquire(ctx);
-    }
-
-    ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
-                                          tran, errp);
-
-    if (old_ctx != ctx) {
-        aio_context_release(ctx);
-        aio_context_acquire(old_ctx);
-    }
-
-    return ret;
+    return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
+                                           tran, errp);
 }

 /*
@@ -4761,7 +4661,6 @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
 * It is the responsibility of the caller to then call the abort() or
 * commit() for any other BDS that have been left in a prepare() state
 *
- * The caller must hold the AioContext lock of @reopen_state->bs.
 */
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                               BlockReopenQueue *queue,
@@ -5018,9 +4917,7 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
    qdict_del(bs->explicit_options, "backing");
    qdict_del(bs->options, "backing");

-    bdrv_graph_rdlock_main_loop();
    bdrv_refresh_limits(bs, NULL, NULL);
-    bdrv_graph_rdunlock_main_loop();
    bdrv_refresh_total_sectors(bs, bs->total_sectors);
 }

@@ -5419,9 +5316,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
 out:
    tran_finalize(tran, ret);

-    bdrv_graph_rdlock_main_loop();
    bdrv_refresh_limits(bs_top, NULL, NULL);
-    bdrv_graph_rdunlock_main_loop();

    if (new_context && old_context != new_context) {
        aio_context_release(new_context);
@@ -5487,17 +5382,12 @@ static void bdrv_delete(BlockDriverState *bs)
 * empty set of options. The reference to the QDict belongs to the block layer
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
- *
- * The caller holds the AioContext lock for @bs. It must make sure that @bs
- * stays in the same AioContext, i.e. @options must not refer to nodes in a
- * different AioContext.
 */
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
                                   int flags, Error **errp)
 {
    ERRP_GUARD();
    int ret;
-    AioContext *ctx = bdrv_get_aio_context(bs);
    BlockDriverState *new_node_bs = NULL;
    const char *drvname, *node_name;
    BlockDriver *drv;
@@ -5518,14 +5408,8 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,

    GLOBAL_STATE_CODE();

-    aio_context_release(ctx);
-    aio_context_acquire(qemu_get_aio_context());
    new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
                                            errp);
-    aio_context_release(qemu_get_aio_context());
-    aio_context_acquire(ctx);
-    assert(bdrv_get_aio_context(bs) == ctx);
-
    options = NULL; /* bdrv_new_open_driver() eats options */
    if (!new_node_bs) {
        error_prepend(errp, "Could not create node: ");
@@ -5866,8 +5750,7 @@ exit:
 * sums the size of all data-bearing children.  (This excludes backing
 * children.)
 */
-static int64_t coroutine_fn GRAPH_RDLOCK
-bdrv_sum_allocated_file_size(BlockDriverState *bs)
+static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
 {
    BdrvChild *child;
    int64_t child_size, sum = 0;
@@ -5895,7 +5778,6 @@ int64_t coroutine_fn bdrv_co_get_allocated_file_size(BlockDriverState *bs)
 {
    BlockDriver *drv = bs->drv;
    IO_CODE();
-    assert_bdrv_graph_readable();

    if (!drv) {
        return -ENOMEDIUM;
@@ -6465,8 +6347,6 @@ int coroutine_fn bdrv_co_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
    int ret;
    BlockDriver *drv = bs->drv;
    IO_CODE();
-    assert_bdrv_graph_readable();
-
    /* if bs->drv == NULL, bs is closed, so there's nothing to do here */
    if (!drv) {
        return -ENOMEDIUM;
@@ -6515,8 +6395,6 @@ BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs)
 void coroutine_fn bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event)
 {
    IO_CODE();
-    assert_bdrv_graph_readable();
-
    if (!bs || !bs->drv || !bs->drv->bdrv_co_debug_event) {
        return;
    }
@@ -7151,8 +7029,6 @@ void bdrv_img_create(const char *filename, const char *fmt,
        return;
    }

-    aio_context_acquire(qemu_get_aio_context());
-
    /* Create parameter list */
    create_opts = qemu_opts_append(create_opts, drv->create_opts);
    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
@@ -7246,7 +7122,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
            if (!backing_fmt) {
                error_setg(&local_err,
                           "Backing file specified without backing format");
-                error_append_hint(&local_err, "Detected format of %s.\n",
+                error_append_hint(&local_err, "Detected format of %s.",
                                  bs->drv->format_name);
                goto out;
            }
@@ -7302,7 +7178,6 @@ out:
    qemu_opts_del(opts);
    qemu_opts_free(create_opts);
    error_propagate(errp, local_err);
-    aio_context_release(qemu_get_aio_context());
 }

 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
@@ -7393,6 +7268,9 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
        bs->drv->bdrv_detach_aio_context(bs);
    }

+    if (bs->quiesce_counter) {
+        aio_enable_external(bs->aio_context);
+    }
    bs->aio_context = NULL;
 }

@@ -7402,6 +7280,10 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
    BdrvAioNotifier *ban, *ban_tmp;
    GLOBAL_STATE_CODE();

+    if (bs->quiesce_counter) {
+        aio_disable_external(new_context);
+    }
+
    bs->aio_context = new_context;

    if (bs->drv && bs->drv->bdrv_attach_aio_context) {
@@ -8085,25 +7967,6 @@ void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
        return;
    }

-    /*
-     * Non-zoned block drivers do not follow zoned storage constraints
-     * (i.e. sequential writes to zones). Refuse mixing zoned and non-zoned
-     * drivers in a graph.
-     */
-    if (!parent_bs->drv->supports_zoned_children &&
-        child_bs->bl.zoned == BLK_Z_HM) {
-        /*
-         * The host-aware model allows zoned storage constraints and random
-         * write. Allow mixing host-aware and non-zoned drivers. Using
-         * host-aware device as a regular device.
-         */
-        error_setg(errp, "Cannot add a %s child to a %s parent",
-                   child_bs->bl.zoned == BLK_Z_HM ? "zoned" : "non-zoned",
-                   parent_bs->drv->supports_zoned_children ?
-                   "support zoned children" : "not support zoned children");
-        return;
-    }
-
    if (!QLIST_EMPTY(&child_bs->parents)) {
        error_setg(errp, "The node %s already has a parent",
                   child_bs->node_name);
--- a/block/amend.c
+++ b/block/amend.c
@@ -46,7 +46,6 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
 {
    BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
    int ret;
-    GRAPH_RDLOCK_GUARD();

    job_progress_set_remaining(&s->common, 1);
    ret = s->bs->drv->bdrv_co_amend(s->bs, s->opts, s->force, errp);
@@ -55,8 +54,7 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
    return ret;
 }

-static int GRAPH_RDLOCK
-blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
+static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
 {
    if (s->bs->drv->bdrv_amend_pre_run) {
        return s->bs->drv->bdrv_amend_pre_run(s->bs, errp);
@@ -69,11 +67,9 @@ static void blockdev_amend_free(Job *job)
 {
    BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);

-    bdrv_graph_rdlock_main_loop();
    if (s->bs->drv->bdrv_amend_clean) {
        s->bs->drv->bdrv_amend_clean(s->bs);
    }
-    bdrv_graph_rdunlock_main_loop();

    bdrv_unref(s->bs);
 }
@@ -97,8 +93,6 @@ void qmp_x_blockdev_amend(const char *job_id,
    BlockDriver *drv = bdrv_find_format(fmt);
    BlockDriverState *bs;

-    GRAPH_RDLOCK_GUARD_MAINLOOP();
-
    bs = bdrv_lookup_bs(NULL, node_name, errp);
    if (!bs) {
        return;
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -583,8 +583,8 @@ out:
    return ret;
 }

-static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
-                                   uint64_t bytes, BlkdebugIOType iotype)
+static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                      BlkdebugIOType iotype)
 {
    BDRVBlkdebugState *s = bs->opaque;
    BlkdebugRule *rule = NULL;
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -17,11 +17,20 @@
 #include "qemu/error-report.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/module.h"
-#include "sysemu/block-backend.h"
 #include "exec/memory.h" /* for ram_block_discard_disable() */

 #include "block/block-io.h"

+/*
+ * Keep the QEMU BlockDriver names identical to the libblkio driver names.
+ * Using macros instead of typing out the string literals avoids typos.
+ */
+#define DRIVER_IO_URING "io_uring"
+#define DRIVER_NVME_IO_URING "nvme-io_uring"
+#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
+#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
+#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
+
 /*
 * Allocated bounce buffers are kept in a list sorted by buffer address.
 */
@@ -297,42 +306,33 @@ static void blkio_attach_aio_context(BlockDriverState *bs,
 {
    BDRVBlkioState *s = bs->opaque;

-    aio_set_fd_handler(new_context, s->completion_fd,
-                       blkio_completion_fd_read, NULL,
+    aio_set_fd_handler(new_context,
+                       s->completion_fd,
+                       false,
+                       blkio_completion_fd_read,
+                       NULL,
                       blkio_completion_fd_poll,
-                       blkio_completion_fd_poll_ready, bs);
+                       blkio_completion_fd_poll_ready,
+                       bs);
 }

 static void blkio_detach_aio_context(BlockDriverState *bs)
 {
    BDRVBlkioState *s = bs->opaque;

-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->completion_fd, NULL, NULL,
-                       NULL, NULL, NULL);
+    aio_set_fd_handler(bdrv_get_aio_context(bs),
+                       s->completion_fd,
+                       false, NULL, NULL, NULL, NULL, NULL);
 }

-/*
- * Called by blk_io_unplug() or immediately if not plugged. Called without
- * blkio_lock.
- */
-static void blkio_unplug_fn(void *opaque)
-{
-    BDRVBlkioState *s = opaque;
-
-    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
-        blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
-    }
-}
-
-/*
- * Schedule I/O submission after enqueuing a new request. Called without
- * blkio_lock.
- */
+/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
 static void blkio_submit_io(BlockDriverState *bs)
 {
-    BDRVBlkioState *s = bs->opaque;
+    if (qatomic_read(&bs->io_plugged) == 0) {
+        BDRVBlkioState *s = bs->opaque;

-    blk_io_plug_call(blkio_unplug_fn, s);
+        blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
+    }
 }

 static int coroutine_fn
@@ -345,9 +345,9 @@ blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)

    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
+        blkio_submit_io(bs);
    }

-    blkio_submit_io(bs);
    qemu_coroutine_yield();
    return cod.ret;
 }
@@ -378,9 +378,9 @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,

    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
+        blkio_submit_io(bs);
    }

-    blkio_submit_io(bs);
    qemu_coroutine_yield();

    if (use_bounce_buffer) {
@@ -423,9 +423,9 @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,

    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
+        blkio_submit_io(bs);
    }

-    blkio_submit_io(bs);
    qemu_coroutine_yield();

    if (use_bounce_buffer) {
@@ -444,9 +444,9 @@ static int coroutine_fn blkio_co_flush(BlockDriverState *bs)

    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        blkioq_flush(s->blkioq, &cod, 0);
+        blkio_submit_io(bs);
    }

-    blkio_submit_io(bs);
    qemu_coroutine_yield();
    return cod.ret;
 }
@@ -472,13 +472,22 @@ static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,

    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
+        blkio_submit_io(bs);
    }

-    blkio_submit_io(bs);
    qemu_coroutine_yield();
    return cod.ret;
 }

+static void coroutine_fn blkio_co_io_unplug(BlockDriverState *bs)
+{
+    BDRVBlkioState *s = bs->opaque;
+
+    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
+        blkio_submit_io(bs);
+    }
+}
+
 typedef enum {
    BMRR_OK,
    BMRR_SKIP,
@@ -603,8 +612,8 @@ static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
    }
 }

-static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
-                                  int flags, Error **errp)
+static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
+                               Error **errp)
 {
    const char *filename = qdict_get_str(options, "filename");
    BDRVBlkioState *s = bs->opaque;
@@ -627,18 +636,11 @@ static int blkio_io_uring_connect(BlockDriverState *bs, QDict *options,
        }
    }

-    ret = blkio_connect(s->blkio);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
-                         blkio_get_error_msg());
-        return ret;
-    }
-
    return 0;
 }

-static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
-                                       int flags, Error **errp)
+static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
+                               Error **errp)
 {
    const char *path = qdict_get_try_str(options, "path");
    BDRVBlkioState *s = bs->opaque;
@@ -662,128 +664,33 @@ static int blkio_nvme_io_uring_connect(BlockDriverState *bs, QDict *options,
        return -EINVAL;
    }

-    ret = blkio_connect(s->blkio);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
-                         blkio_get_error_msg());
-        return ret;
-    }
-
    return 0;
 }

-static int blkio_virtio_blk_connect(BlockDriverState *bs, QDict *options,
-                                    int flags, Error **errp)
+static int blkio_virtio_blk_common_open(BlockDriverState *bs,
+        QDict *options, int flags, Error **errp)
 {
    const char *path = qdict_get_try_str(options, "path");
    BDRVBlkioState *s = bs->opaque;
-    bool fd_supported = false;
-    int fd = -1, ret;
+    int ret;

    if (!path) {
        error_setg(errp, "missing 'path' option");
        return -EINVAL;
    }

-    if (!(flags & BDRV_O_NOCACHE)) {
-        error_setg(errp, "cache.direct=off is not supported");
-        return -EINVAL;
-    }
-
-    if (blkio_set_int(s->blkio, "fd", -1) == 0) {
-        fd_supported = true;
-    }
-
-    /*
-     * If the libblkio driver supports fd passing, let's always use qemu_open()
-     * to open the `path`, so we can handle fd passing from the management
-     * layer through the "/dev/fdset/N" special path.
-     */
-    if (fd_supported) {
-        /*
-         * `path` can contain the path of a character device
-         * (e.g. /dev/vhost-vdpa-0 or /dev/vfio/vfio) or a unix socket.
-         *
-         * So, we should always open it with O_RDWR flag, also if BDRV_O_RDWR
-         * is not set in the open flags, because the exchange of IOCTL commands
-         * for example will fail.
-         *
-         * In order to open the device read-only, we are using the `read-only`
-         * property of the libblkio driver in blkio_file_open().
-         */
-        fd = qemu_open(path, O_RDWR, NULL);
-        if (fd < 0) {
-            /*
-             * qemu_open() can fail if the user specifies a path that is not
-             * a file or device, for example in the case of Unix Domain Socket
-             * for the virtio-blk-vhost-user driver. In such cases let's have
-             * libblkio open the path directly.
-             */
-            fd_supported = false;
-        } else {
-            ret = blkio_set_int(s->blkio, "fd", fd);
-            if (ret < 0) {
-                fd_supported = false;
-                qemu_close(fd);
-                fd = -1;
-            }
-        }
-    }
-
-    if (!fd_supported) {
-        ret = blkio_set_str(s->blkio, "path", path);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "failed to set path: %s",
-                             blkio_get_error_msg());
-            return ret;
-        }
-    }
-
-    ret = blkio_connect(s->blkio);
-    if (ret < 0 && fd >= 0) {
-        /* Failed to give the FD to libblkio, close it */
-        qemu_close(fd);
-        fd = -1;
-    }
-
-    /*
-     * Before https://gitlab.com/libblkio/libblkio/-/merge_requests/208
-     * (libblkio <= v1.3.0), setting the `fd` property is not enough to check
-     * whether the driver supports the `fd` property or not. In that case,
-     * blkio_connect() will fail with -EINVAL.
-     * So let's try calling blkio_connect() again by directly setting `path`
-     * to cover this scenario.
-     */
-    if (fd_supported && ret == -EINVAL) {
-        /*
-         * We need to clear the `fd` property we set previously by setting
-         * it to -1.
-         */
-        ret = blkio_set_int(s->blkio, "fd", -1);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "failed to set fd: %s",
-                             blkio_get_error_msg());
-            return ret;
-        }
-
-        ret = blkio_set_str(s->blkio, "path", path);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "failed to set path: %s",
-                             blkio_get_error_msg());
-            return ret;
-        }
-
-        ret = blkio_connect(s->blkio);
-    }
-
+    ret = blkio_set_str(s->blkio, "path", path);
+    qdict_del(options, "path");
    if (ret < 0) {
-        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+        error_setg_errno(errp, -ret, "failed to set path: %s",
                         blkio_get_error_msg());
        return ret;
    }

-    qdict_del(options, "path");
-
+    if (!(flags & BDRV_O_NOCACHE)) {
+        error_setg(errp, "cache.direct=off is not supported");
+        return -EINVAL;
+    }
    return 0;
 }

@@ -801,6 +708,24 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
        return ret;
    }

+    if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
+        ret = blkio_io_uring_open(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
+        ret = blkio_nvme_io_uring(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
+        ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
+        ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
+    } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
+        ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
+    } else {
+        g_assert_not_reached();
+    }
+    if (ret < 0) {
+        blkio_destroy(&s->blkio);
+        return ret;
+    }
+
    if (!(flags & BDRV_O_RDWR)) {
        ret = blkio_set_bool(s->blkio, "read-only", true);
        if (ret < 0) {
@@ -811,20 +736,10 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
        }
    }

-    if (strcmp(blkio_driver, "io_uring") == 0) {
-        ret = blkio_io_uring_connect(bs, options, flags, errp);
-    } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) {
-        ret = blkio_nvme_io_uring_connect(bs, options, flags, errp);
-    } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) {
-        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
-    } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) {
-        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
-    } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) {
-        ret = blkio_virtio_blk_connect(bs, options, flags, errp);
-    } else {
-        g_assert_not_reached();
-    }
+    ret = blkio_connect(s->blkio);
    if (ret < 0) {
+        error_setg_errno(errp, -ret, "blkio_connect failed: %s",
+                         blkio_get_error_msg());
        blkio_destroy(&s->blkio);
        return ret;
    }
@@ -904,7 +819,6 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
    QLIST_INIT(&s->bounce_bufs);
    s->blkioq = blkio_get_queue(s->blkio, 0);
    s->completion_fd = blkioq_get_completion_fd(s->blkioq);
-    blkioq_set_completion_fd_enabled(s->blkioq, true);

    blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
    return 0;
@@ -1078,63 +992,50 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
 * - truncate
 */

-/*
- * Do not include .format_name and .protocol_name because module_block.py
- * does not parse macros in the source code.
- */
-#define BLKIO_DRIVER_COMMON \
-    .instance_size           = sizeof(BDRVBlkioState), \
-    .bdrv_file_open          = blkio_file_open, \
-    .bdrv_close              = blkio_close, \
-    .bdrv_co_getlength       = blkio_co_getlength, \
-    .bdrv_co_truncate        = blkio_truncate, \
-    .bdrv_co_get_info        = blkio_co_get_info, \
-    .bdrv_attach_aio_context = blkio_attach_aio_context, \
-    .bdrv_detach_aio_context = blkio_detach_aio_context, \
-    .bdrv_co_pdiscard        = blkio_co_pdiscard, \
-    .bdrv_co_preadv          = blkio_co_preadv, \
-    .bdrv_co_pwritev         = blkio_co_pwritev, \
-    .bdrv_co_flush_to_disk   = blkio_co_flush, \
-    .bdrv_co_pwrite_zeroes   = blkio_co_pwrite_zeroes, \
-    .bdrv_refresh_limits     = blkio_refresh_limits, \
-    .bdrv_register_buf       = blkio_register_buf, \
-    .bdrv_unregister_buf     = blkio_unregister_buf,
+#define BLKIO_DRIVER(name, ...) \
+    { \
+        .format_name             = name, \
+        .protocol_name           = name, \
+        .instance_size           = sizeof(BDRVBlkioState), \
+        .bdrv_file_open          = blkio_file_open, \
+        .bdrv_close              = blkio_close, \
+        .bdrv_co_getlength       = blkio_co_getlength, \
+        .bdrv_co_truncate        = blkio_truncate, \
+        .bdrv_co_get_info        = blkio_co_get_info, \
+        .bdrv_attach_aio_context = blkio_attach_aio_context, \
+        .bdrv_detach_aio_context = blkio_detach_aio_context, \
+        .bdrv_co_pdiscard        = blkio_co_pdiscard, \
+        .bdrv_co_preadv          = blkio_co_preadv, \
+        .bdrv_co_pwritev         = blkio_co_pwritev, \
+        .bdrv_co_flush_to_disk   = blkio_co_flush, \
+        .bdrv_co_pwrite_zeroes   = blkio_co_pwrite_zeroes, \
+        .bdrv_co_io_unplug       = blkio_co_io_unplug, \
+        .bdrv_refresh_limits     = blkio_refresh_limits, \
+        .bdrv_register_buf       = blkio_register_buf, \
+        .bdrv_unregister_buf     = blkio_unregister_buf, \
+        __VA_ARGS__ \
+    }

-/*
- * Use the same .format_name and .protocol_name as the libblkio driver name for
- * consistency.
- */
-
-static BlockDriver bdrv_io_uring = {
-    .format_name         = "io_uring",
-    .protocol_name       = "io_uring",
+static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
+    DRIVER_IO_URING,
    .bdrv_needs_filename = true,
-    BLKIO_DRIVER_COMMON
-};
+);

-static BlockDriver bdrv_nvme_io_uring = {
-    .format_name         = "nvme-io_uring",
-    .protocol_name       = "nvme-io_uring",
-    BLKIO_DRIVER_COMMON
-};
+static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
+    DRIVER_NVME_IO_URING,
+);

-static BlockDriver bdrv_virtio_blk_vfio_pci = {
-    .format_name         = "virtio-blk-vfio-pci",
-    .protocol_name       = "virtio-blk-vfio-pci",
-    BLKIO_DRIVER_COMMON
-};
+static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
+    DRIVER_VIRTIO_BLK_VFIO_PCI
+);

-static BlockDriver bdrv_virtio_blk_vhost_user = {
-    .format_name         = "virtio-blk-vhost-user",
-    .protocol_name       = "virtio-blk-vhost-user",
-    BLKIO_DRIVER_COMMON
-};
+static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
+    DRIVER_VIRTIO_BLK_VHOST_USER
+);

-static BlockDriver bdrv_virtio_blk_vhost_vdpa = {
-    .format_name         = "virtio-blk-vhost-vdpa",
-    .protocol_name       = "virtio-blk-vhost-vdpa",
-    BLKIO_DRIVER_COMMON
-};
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
+    DRIVER_VIRTIO_BLK_VHOST_VDPA
+);

 static void bdrv_blkio_init(void)
 {
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -265,9 +265,8 @@ static int coroutine_fn GRAPH_RDLOCK blkverify_co_flush(BlockDriverState *bs)
    return bdrv_co_flush(s->test_file->bs);
 }

-static bool GRAPH_RDLOCK
-blkverify_recurse_can_replace(BlockDriverState *bs,
-                              BlockDriverState *to_replace)
+static bool blkverify_recurse_can_replace(BlockDriverState *bs,
+                                          BlockDriverState *to_replace)
 {
    BDRVBlkverifyState *s = bs->opaque;

--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -80,10 +80,9 @@ struct BlockBackend {
    NotifierList remove_bs_notifiers, insert_bs_notifiers;
    QLIST_HEAD(, BlockBackendAioNotifier) aio_notifiers;

-    int quiesce_counter; /* atomic: written under BQL, read by other threads */
-    QemuMutex queued_requests_lock; /* protects queued_requests */
+    int quiesce_counter;
    CoQueue queued_requests;
-    bool disable_request_queuing; /* atomic */
+    bool disable_request_queuing;

    VMChangeStateEntry *vmsh;
    bool force_allow_inactivate;
@@ -369,7 +368,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)

    block_acct_init(&blk->stats);

-    qemu_mutex_init(&blk->queued_requests_lock);
    qemu_co_queue_init(&blk->queued_requests);
    notifier_list_init(&blk->remove_bs_notifiers);
    notifier_list_init(&blk->insert_bs_notifiers);
@@ -389,8 +387,6 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 * Both sets of permissions can be changed later using blk_set_perm().
 *
 * Return the new BlockBackend on success, null on failure.
- *
- * Callers must hold the AioContext lock of @bs.
 */
 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
                              uint64_t shared_perm, Error **errp)
@@ -408,15 +404,11 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,

 /*
 * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
- * By default, the new BlockBackend is in the main AioContext, but if the
- * parameters connect it with any existing node in a different AioContext, it
- * may end up there instead.
+ * The new BlockBackend is in the main AioContext.
 *
 * Just as with bdrv_open(), after having called this function the reference to
 * @options belongs to the block layer (even on failure).
 *
- * Called without holding an AioContext lock.
- *
 * TODO: Remove @filename and @flags; it should be possible to specify a whole
 * BDS tree just by specifying the @options QDict (or @reference,
 * alternatively). At the time of adding this function, this is not possible,
@@ -428,7 +420,6 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
-    AioContext *ctx;
    uint64_t perm = 0;
    uint64_t shared = BLK_PERM_ALL;

@@ -458,24 +449,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
    }

-    aio_context_acquire(qemu_get_aio_context());
+    blk = blk_new(qemu_get_aio_context(), perm, shared);
    bs = bdrv_open(filename, reference, options, flags, errp);
-    aio_context_release(qemu_get_aio_context());
    if (!bs) {
+        blk_unref(blk);
        return NULL;
    }

-    /* bdrv_open() could have moved bs to a different AioContext */
-    ctx = bdrv_get_aio_context(bs);
-    blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
-    blk->perm = perm;
-    blk->shared_perm = shared;
-
-    aio_context_acquire(ctx);
-    blk_insert_bs(blk, bs, errp);
-    bdrv_unref(bs);
-    aio_context_release(ctx);
-
+    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
+                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
+                                       perm, shared, blk, errp);
    if (!blk->root) {
        blk_unref(blk);
        return NULL;
@@ -502,8 +485,6 @@ static void blk_delete(BlockBackend *blk)
    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
    assert(QLIST_EMPTY(&blk->aio_notifiers));
-    assert(qemu_co_queue_empty(&blk->queued_requests));
-    qemu_mutex_destroy(&blk->queued_requests_lock);
    QTAILQ_REMOVE(&block_backends, blk, link);
    drive_info_del(blk->legacy_dinfo);
    block_acct_cleanup(&blk->stats);
@@ -916,8 +897,6 @@ void blk_remove_bs(BlockBackend *blk)

 /*
 * Associates a new BlockDriverState with @blk.
- *
- * Callers must hold the AioContext lock of @bs.
 */
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
@@ -1078,7 +1057,7 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
    blk->dev_opaque = opaque;

    /* Are we currently quiesced? Should we enforce this right now? */
-    if (qatomic_read(&blk->quiesce_counter) && ops && ops->drained_begin) {
+    if (blk->quiesce_counter && ops && ops->drained_begin) {
        ops->drained_begin(opaque);
    }
 }
@@ -1253,7 +1232,7 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow)
 void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
 {
    IO_CODE();
-    qatomic_set(&blk->disable_request_queuing, disable);
+    blk->disable_request_queuing = disable;
 }

 static int coroutine_fn GRAPH_RDLOCK
@@ -1287,30 +1266,15 @@ blk_check_byte_request(BlockBackend *blk, int64_t offset, int64_t bytes)
    return 0;
 }

-/* Are we currently in a drained section? */
-bool blk_in_drain(BlockBackend *blk)
-{
-    GLOBAL_STATE_CODE(); /* change to IO_OR_GS_CODE(), if necessary */
-    return qatomic_read(&blk->quiesce_counter);
-}
-
 /* To be called between exactly one pair of blk_inc/dec_in_flight() */
 static void coroutine_fn blk_wait_while_drained(BlockBackend *blk)
 {
    assert(blk->in_flight > 0);

-    if (qatomic_read(&blk->quiesce_counter) &&
-        !qatomic_read(&blk->disable_request_queuing)) {
-        /*
-         * Take lock before decrementing in flight counter so main loop thread
-         * waits for us to enqueue ourselves before it can leave the drained
-         * section.
-         */
-        qemu_mutex_lock(&blk->queued_requests_lock);
+    if (blk->quiesce_counter && !blk->disable_request_queuing) {
        blk_dec_in_flight(blk);
-        qemu_co_queue_wait(&blk->queued_requests, &blk->queued_requests_lock);
+        qemu_co_queue_wait(&blk->queued_requests, NULL);
        blk_inc_in_flight(blk);
-        qemu_mutex_unlock(&blk->queued_requests_lock);
    }
 }

@@ -1869,204 +1833,6 @@ int coroutine_fn blk_co_flush(BlockBackend *blk)
    return ret;
 }

-static void coroutine_fn blk_aio_zone_report_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    rwco->ret = blk_co_zone_report(rwco->blk, rwco->offset,
-                                   (unsigned int*)(uintptr_t)acb->bytes,
-                                   rwco->iobuf);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_zone_report(BlockBackend *blk, int64_t offset,
-                                unsigned int *nr_zones,
-                                BlockZoneDescriptor  *zones,
-                                BlockCompletionFunc *cb, void *opaque)
-{
-    BlkAioEmAIOCB *acb;
-    Coroutine *co;
-    IO_CODE();
-
-    blk_inc_in_flight(blk);
-    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
-    acb->rwco = (BlkRwCo) {
-        .blk    = blk,
-        .offset = offset,
-        .iobuf  = zones,
-        .ret    = NOT_DONE,
-    };
-    acb->bytes = (int64_t)(uintptr_t)nr_zones,
-    acb->has_returned = false;
-
-    co = qemu_coroutine_create(blk_aio_zone_report_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
-
-    acb->has_returned = true;
-    if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
-                                         blk_aio_complete_bh, acb);
-    }
-
-    return &acb->common;
-}
-
-static void coroutine_fn blk_aio_zone_mgmt_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    rwco->ret = blk_co_zone_mgmt(rwco->blk,
-                                 (BlockZoneOp)(uintptr_t)rwco->iobuf,
-                                 rwco->offset, acb->bytes);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
-                              int64_t offset, int64_t len,
-                              BlockCompletionFunc *cb, void *opaque) {
-    BlkAioEmAIOCB *acb;
-    Coroutine *co;
-    IO_CODE();
-
-    blk_inc_in_flight(blk);
-    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
-    acb->rwco = (BlkRwCo) {
-        .blk    = blk,
-        .offset = offset,
-        .iobuf  = (void *)(uintptr_t)op,
-        .ret    = NOT_DONE,
-    };
-    acb->bytes = len;
-    acb->has_returned = false;
-
-    co = qemu_coroutine_create(blk_aio_zone_mgmt_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
-
-    acb->has_returned = true;
-    if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
-                                         blk_aio_complete_bh, acb);
-    }
-
-    return &acb->common;
-}
-
-static void coroutine_fn blk_aio_zone_append_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    rwco->ret = blk_co_zone_append(rwco->blk, (int64_t *)(uintptr_t)acb->bytes,
-                                   rwco->iobuf, rwco->flags);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_zone_append(BlockBackend *blk, int64_t *offset,
-                                QEMUIOVector *qiov, BdrvRequestFlags flags,
-                                BlockCompletionFunc *cb, void *opaque) {
-    BlkAioEmAIOCB *acb;
-    Coroutine *co;
-    IO_CODE();
-
-    blk_inc_in_flight(blk);
-    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
-    acb->rwco = (BlkRwCo) {
-        .blk    = blk,
-        .ret    = NOT_DONE,
-        .flags  = flags,
-        .iobuf  = qiov,
-    };
-    acb->bytes = (int64_t)(uintptr_t)offset;
-    acb->has_returned = false;
-
-    co = qemu_coroutine_create(blk_aio_zone_append_entry, acb);
-    aio_co_enter(blk_get_aio_context(blk), co);
-    acb->has_returned = true;
-    if (acb->rwco.ret != NOT_DONE) {
-        replay_bh_schedule_oneshot_event(blk_get_aio_context(blk),
-                                         blk_aio_complete_bh, acb);
-    }
-
-    return &acb->common;
-}
-
-/*
- * Send a zone_report command.
- * offset is a byte offset from the start of the device. No alignment
- * required for offset.
- * nr_zones represents IN maximum and OUT actual.
- */
-int coroutine_fn blk_co_zone_report(BlockBackend *blk, int64_t offset,
-                                    unsigned int *nr_zones,
-                                    BlockZoneDescriptor *zones)
-{
-    int ret;
-    IO_CODE();
-
-    blk_inc_in_flight(blk); /* increase before waiting */
-    blk_wait_while_drained(blk);
-    GRAPH_RDLOCK_GUARD();
-    if (!blk_is_available(blk)) {
-        blk_dec_in_flight(blk);
-        return -ENOMEDIUM;
-    }
-    ret = bdrv_co_zone_report(blk_bs(blk), offset, nr_zones, zones);
-    blk_dec_in_flight(blk);
-    return ret;
-}
-
-/*
- * Send a zone_management command.
- * op is the zone operation;
- * offset is the byte offset from the start of the zoned device;
- * len is the maximum number of bytes the command should operate on. It
- * should be aligned with the device zone size.
- */
-int coroutine_fn blk_co_zone_mgmt(BlockBackend *blk, BlockZoneOp op,
-        int64_t offset, int64_t len)
-{
-    int ret;
-    IO_CODE();
-
-    blk_inc_in_flight(blk);
-    blk_wait_while_drained(blk);
-    GRAPH_RDLOCK_GUARD();
-
-    ret = blk_check_byte_request(blk, offset, len);
-    if (ret < 0) {
-        blk_dec_in_flight(blk);
-        return ret;
-    }
-
-    ret = bdrv_co_zone_mgmt(blk_bs(blk), op, offset, len);
-    blk_dec_in_flight(blk);
-    return ret;
-}
-
-/*
- * Send a zone_append command.
- */
-int coroutine_fn blk_co_zone_append(BlockBackend *blk, int64_t *offset,
-        QEMUIOVector *qiov, BdrvRequestFlags flags)
-{
-    int ret;
-    IO_CODE();
-
-    blk_inc_in_flight(blk);
-    blk_wait_while_drained(blk);
-    GRAPH_RDLOCK_GUARD();
-    if (!blk_is_available(blk)) {
-        blk_dec_in_flight(blk);
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_co_zone_append(blk_bs(blk), offset, qiov, flags);
-    blk_dec_in_flight(blk);
-    return ret;
-}
-
 void blk_drain(BlockBackend *blk)
 {
    BlockDriverState *bs = blk_bs(blk);
@@ -2079,7 +1845,7 @@ void blk_drain(BlockBackend *blk)

    /* We may have -ENOMEDIUM completions in flight */
    AIO_WAIT_WHILE(blk_get_aio_context(blk),
-                   qatomic_read(&blk->in_flight) > 0);
+                   qatomic_mb_read(&blk->in_flight) > 0);

    if (bs) {
        bdrv_drained_end(bs);
@@ -2096,8 +1862,14 @@ void blk_drain_all(void)
    bdrv_drain_all_begin();

    while ((blk = blk_all_next(blk)) != NULL) {
+        AioContext *ctx = blk_get_aio_context(blk);
+
+        aio_context_acquire(ctx);
+
        /* We may have -ENOMEDIUM completions in flight */
-        AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_read(&blk->in_flight) > 0);
+        AIO_WAIT_WHILE(ctx, qatomic_mb_read(&blk->in_flight) > 0);
+
+        aio_context_release(ctx);
    }

    bdrv_drain_all_end();
@@ -2418,14 +2190,9 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)

 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
-    BlockDriverState *bs;
+    BlockDriverState *bs = blk_bs(blk);
    IO_CODE();

-    if (!blk) {
-        return qemu_get_aio_context();
-    }
-
-    bs = blk_bs(blk);
    if (bs) {
        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
        assert(ctx == blk->ctx);
@@ -2440,31 +2207,52 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
    return blk_get_aio_context(blk_acb->blk);
 }

+static int blk_do_set_aio_context(BlockBackend *blk, AioContext *new_context,
+                                  bool update_root_node, Error **errp)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
+    int ret;
+
+    if (bs) {
+        bdrv_ref(bs);
+
+        if (update_root_node) {
+            /*
+             * update_root_node MUST be false for blk_root_set_aio_ctx_commit(),
+             * as we are already in the commit function of a transaction.
+             */
+            ret = bdrv_try_change_aio_context(bs, new_context, blk->root, errp);
+            if (ret < 0) {
+                bdrv_unref(bs);
+                return ret;
+            }
+        }
+        /*
+         * Make blk->ctx consistent with the root node before we invoke any
+         * other operations like drain that might inquire blk->ctx
+         */
+        blk->ctx = new_context;
+        if (tgm->throttle_state) {
+            bdrv_drained_begin(bs);
+            throttle_group_detach_aio_context(tgm);
+            throttle_group_attach_aio_context(tgm, new_context);
+            bdrv_drained_end(bs);
+        }
+
+        bdrv_unref(bs);
+    } else {
+        blk->ctx = new_context;
+    }
+
+    return 0;
+}
+
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
                        Error **errp)
 {
-    bool old_allow_change;
-    BlockDriverState *bs = blk_bs(blk);
-    int ret;
-
    GLOBAL_STATE_CODE();
-
-    if (!bs) {
-        blk->ctx = new_context;
-        return 0;
-    }
-
-    bdrv_ref(bs);
-
-    old_allow_change = blk->allow_aio_context_change;
-    blk->allow_aio_context_change = true;
-
-    ret = bdrv_try_change_aio_context(bs, new_context, NULL, errp);
-
-    blk->allow_aio_context_change = old_allow_change;
-
-    bdrv_unref(bs);
-    return ret;
+    return blk_do_set_aio_context(blk, new_context, true, errp);
 }

 typedef struct BdrvStateBlkRootContext {
@@ -2476,14 +2264,8 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
 {
    BdrvStateBlkRootContext *s = opaque;
    BlockBackend *blk = s->blk;
-    AioContext *new_context = s->new_ctx;
-    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;

-    blk->ctx = new_context;
-    if (tgm->throttle_state) {
-        throttle_group_detach_aio_context(tgm);
-        throttle_group_attach_aio_context(tgm, new_context);
-    }
+    blk_do_set_aio_context(blk, s->new_ctx, false, &error_abort);
 }

 static TransactionActionDrv set_blk_root_context = {
@@ -2582,6 +2364,28 @@ void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
    notifier_list_add(&blk->insert_bs_notifiers, notify);
 }

+void coroutine_fn blk_co_io_plug(BlockBackend *blk)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+
+    if (bs) {
+        bdrv_co_io_plug(bs);
+    }
+}
+
+void coroutine_fn blk_co_io_unplug(BlockBackend *blk)
+{
+    BlockDriverState *bs = blk_bs(blk);
+    IO_CODE();
+    GRAPH_RDLOCK_GUARD();
+
+    if (bs) {
+        bdrv_co_io_unplug(bs);
+    }
+}
+
 BlockAcctStats *blk_get_stats(BlockBackend *blk)
 {
    IO_CODE();
@@ -2799,7 +2603,7 @@ static void blk_root_drained_begin(BdrvChild *child)
    BlockBackend *blk = child->opaque;
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;

-    if (qatomic_fetch_inc(&blk->quiesce_counter) == 0) {
+    if (++blk->quiesce_counter == 1) {
        if (blk->dev_ops && blk->dev_ops->drained_begin) {
            blk->dev_ops->drained_begin(blk->dev_opaque);
        }
@@ -2817,7 +2621,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;
    bool busy = false;
-    assert(qatomic_read(&blk->quiesce_counter));
+    assert(blk->quiesce_counter);

    if (blk->dev_ops && blk->dev_ops->drained_poll) {
        busy = blk->dev_ops->drained_poll(blk->dev_opaque);
@@ -2828,21 +2632,18 @@ static bool blk_root_drained_poll(BdrvChild *child)
 static void blk_root_drained_end(BdrvChild *child)
 {
    BlockBackend *blk = child->opaque;
-    assert(qatomic_read(&blk->quiesce_counter));
+    assert(blk->quiesce_counter);

    assert(blk->public.throttle_group_member.io_limits_disabled);
    qatomic_dec(&blk->public.throttle_group_member.io_limits_disabled);

-    if (qatomic_fetch_dec(&blk->quiesce_counter) == 1) {
+    if (--blk->quiesce_counter == 0) {
        if (blk->dev_ops && blk->dev_ops->drained_end) {
            blk->dev_ops->drained_end(blk->dev_opaque);
        }
-        qemu_mutex_lock(&blk->queued_requests_lock);
-        while (qemu_co_enter_next(&blk->queued_requests,
-                                  &blk->queued_requests_lock)) {
+        while (qemu_co_enter_next(&blk->queued_requests, NULL)) {
            /* Resume all queued requests */
        }
-        qemu_mutex_unlock(&blk->queued_requests_lock);
    }
 }

--- a/block/bochs.c
+++ b/block/bochs.c
@@ -203,8 +203,7 @@ static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }

-static int64_t coroutine_fn GRAPH_RDLOCK
-seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
 {
    BDRVBochsState *s = bs->opaque;
    uint64_t offset = sector_num * 512;
@@ -225,8 +224,8 @@ seek_to_sector(BlockDriverState *bs, int64_t sector_num)
        (s->extent_blocks + s->bitmap_blocks));

    /* read in bitmap for current extent */
-    ret = bdrv_co_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
-                        &bitmap_entry, 0);
+    ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
+                     &bitmap_entry, 0);
    if (ret < 0) {
        return ret;
    }
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -212,8 +212,7 @@ static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
    bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
 }

-static int coroutine_fn GRAPH_RDLOCK
-cloop_read_block(BlockDriverState *bs, int block_num)
+static inline int cloop_read_block(BlockDriverState *bs, int block_num)
 {
    BDRVCloopState *s = bs->opaque;

@@ -221,8 +220,8 @@ cloop_read_block(BlockDriverState *bs, int block_num)
        int ret;
        uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];

-        ret = bdrv_co_pread(bs->file, s->offsets[block_num], bytes,
-                            s->compressed_block, 0);
+        ret = bdrv_pread(bs->file, s->offsets[block_num], bytes,
+                         s->compressed_block, 0);
        if (ret < 0) {
            return -1;
        }
@@ -245,7 +244,7 @@ cloop_read_block(BlockDriverState *bs, int block_num)
    return 0;
 }

-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn
 cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
                QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
--- a/block/commit.c
+++ b/block/commit.c
@@ -116,6 +116,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 {
    CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
    int64_t offset;
+    uint64_t delay_ns = 0;
    int ret = 0;
    int64_t n = 0; /* bytes */
    QEMU_AUTO_VFREE void *buf = NULL;
@@ -148,7 +149,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
        /* Note that even when no rate limit is applied we need to yield
         * with no pending I/O here so that bdrv_drain_all() returns.
         */
-        block_job_ratelimit_sleep(&s->common);
+        job_sleep_ns(&s->common.job, delay_ns);
        if (job_is_cancelled(&s->common.job)) {
            break;
        }
@@ -183,7 +184,9 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
        job_progress_update(&s->common.job, n);

        if (copy) {
-            block_job_ratelimit_processed_bytes(&s->common, n);
+            delay_ns = block_job_ratelimit_get_delay(&s->common, n);
+        } else {
+            delay_ns = 0;
        }
    }

--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -412,7 +412,6 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t cluster_size;
    g_autoptr(BlockdevOptions) full_opts = NULL;
    BlockdevOptionsCbw *opts;
-    AioContext *ctx;
    int ret;

    full_opts = cbw_parse_options(options, errp);
@@ -433,15 +432,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
        return -EINVAL;
    }

-    ctx = bdrv_get_aio_context(bs);
-    aio_context_acquire(ctx);
-
    if (opts->bitmap) {
        bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
                                           opts->bitmap->name, NULL, errp);
        if (!bitmap) {
-            ret = -EINVAL;
-            goto out;
+            return -EINVAL;
        }
    }
    s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
@@ -459,24 +454,21 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
    if (!s->bcs) {
        error_prepend(errp, "Cannot create block-copy-state: ");
-        ret = -EINVAL;
-        goto out;
+        return -EINVAL;
    }

    cluster_size = block_copy_cluster_size(s->bcs);

    s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
    if (!s->done_bitmap) {
-        ret = -EINVAL;
-        goto out;
+        return -EINVAL;
    }
    bdrv_disable_dirty_bitmap(s->done_bitmap);

    /* s->access_bitmap starts equal to bcs bitmap */
    s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
    if (!s->access_bitmap) {
-        ret = -EINVAL;
-        goto out;
+        return -EINVAL;
    }
    bdrv_disable_dirty_bitmap(s->access_bitmap);
    bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
@@ -486,10 +478,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_co_mutex_init(&s->lock);
    QLIST_INIT(&s->frozen_read_reqs);

-    ret = 0;
-out:
-    aio_context_release(ctx);
-    return ret;
+    return 0;
 }

 static void cbw_close(BlockDriverState *bs)
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -61,7 +61,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 int coroutine_fn GRAPH_RDLOCK
 bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);

-int coroutine_fn GRAPH_RDLOCK
+int coroutine_fn
 nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
                               Error **errp);

@@ -85,8 +85,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
                               int64_t *map,
                               BlockDriverState **file,
                               int *depth);
-
-int co_wrapper_mixed_bdrv_rdlock
+int co_wrapper_mixed
 nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp);

 #endif /* BLOCK_COROUTINES_H */
--- a/block/create.c
+++ b/block/create.c
@@ -43,6 +43,7 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
    int ret;

    GLOBAL_STATE_CODE();
+    GRAPH_RDLOCK_GUARD();

    job_progress_set_remaining(&s->common, 1);
    ret = s->drv->bdrv_co_create(s->opts, errp);
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -99,10 +99,12 @@ struct BlockCryptoCreateData {
 };


-static int coroutine_fn GRAPH_UNLOCKED
-block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
-                               const uint8_t *buf, size_t buflen, void *opaque,
-                               Error **errp)
+static int block_crypto_create_write_func(QCryptoBlock *block,
+                                          size_t offset,
+                                          const uint8_t *buf,
+                                          size_t buflen,
+                                          void *opaque,
+                                          Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    ssize_t ret;
@@ -115,9 +117,10 @@ block_crypto_create_write_func(QCryptoBlock *block, size_t offset,
    return 0;
 }

-static int coroutine_fn GRAPH_UNLOCKED
-block_crypto_create_init_func(QCryptoBlock *block, size_t headerlen,
-                              void *opaque, Error **errp)
+static int block_crypto_create_init_func(QCryptoBlock *block,
+                                         size_t headerlen,
+                                         void *opaque,
+                                         Error **errp)
 {
    struct BlockCryptoCreateData *data = opaque;
    Error *local_error = NULL;
@@ -311,7 +314,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 }


-static int coroutine_fn GRAPH_UNLOCKED
+static int coroutine_fn
 block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
                               QCryptoBlockCreateOptions *opts,
                               PreallocMode prealloc, Error **errp)
@@ -624,7 +627,7 @@ static int block_crypto_open_luks(BlockDriverState *bs,
                                     bs, options, flags, errp);
 }

-static int coroutine_fn GRAPH_UNLOCKED
+static int coroutine_fn
 block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
 {
    BlockdevCreateOptionsLUKS *luks_opts;
@@ -662,7 +665,7 @@ fail:
    return ret;
 }

-static int coroutine_fn GRAPH_UNLOCKED
+static int coroutine_fn GRAPH_RDLOCK
 block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
                                 QemuOpts *opts, Error **errp)
 {
@@ -724,9 +727,7 @@ fail:
     * beforehand, it has been truncated and corrupted in the process.
     */
    if (ret) {
-        bdrv_graph_co_rdlock();
        bdrv_co_delete_file_noerr(bs);
-        bdrv_graph_co_rdunlock();
    }

    bdrv_co_unref(bs);
@@ -735,7 +736,7 @@ fail:
    return ret;
 }

-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn
 block_crypto_co_get_info_luks(BlockDriverState *bs, BlockDriverInfo *bdi)
 {
    BlockDriverInfo subbdi;
--- a/block/curl.c
+++ b/block/curl.c
@@ -132,7 +132,7 @@ static gboolean curl_drop_socket(void *key, void *value, void *opaque)
    CURLSocket *socket = value;
    BDRVCURLState *s = socket->s;

-    aio_set_fd_handler(s->aio_context, socket->fd,
+    aio_set_fd_handler(s->aio_context, socket->fd, false,
                       NULL, NULL, NULL, NULL, NULL);
    return true;
 }
@@ -180,20 +180,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
    trace_curl_sock_cb(action, (int)fd);
    switch (action) {
        case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd,
+            aio_set_fd_handler(s->aio_context, fd, false,
                               curl_multi_do, NULL, NULL, NULL, socket);
            break;
        case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd,
+            aio_set_fd_handler(s->aio_context, fd, false,
                               NULL, curl_multi_do, NULL, NULL, socket);
            break;
        case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd,
+            aio_set_fd_handler(s->aio_context, fd, false,
                               curl_multi_do, curl_multi_do,
                               NULL, NULL, socket);
            break;
        case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd,
+            aio_set_fd_handler(s->aio_context, fd, false,
                               NULL, NULL, NULL, NULL, NULL);
            break;
    }
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -31,8 +31,11 @@
 #include "qemu/memalign.h"
 #include "dmg.h"

-BdrvDmgUncompressFunc *dmg_uncompress_bz2;
-BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
+int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                          char *next_out, unsigned int avail_out);
+
+int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+                            char *next_out, unsigned int avail_out);

 enum {
    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -616,8 +619,7 @@ err:
    return s->n_chunks; /* error */
 }

-static int coroutine_fn GRAPH_RDLOCK
-dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
 {
    BDRVDMGState *s = bs->opaque;

@@ -634,8 +636,8 @@ dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
        case UDZO: { /* zlib compressed */
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
-                                s->compressed_chunk, 0);
+            ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                             s->compressed_chunk, 0);
            if (ret < 0) {
                return -1;
            }
@@ -660,8 +662,8 @@ dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            }
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
-                                s->compressed_chunk, 0);
+            ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                             s->compressed_chunk, 0);
            if (ret < 0) {
                return -1;
            }
@@ -681,8 +683,8 @@ dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            }
            /* we need to buffer, because only the chunk as whole can be
             * inflated. */
-            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
-                                s->compressed_chunk, 0);
+            ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                             s->compressed_chunk, 0);
            if (ret < 0) {
                return -1;
            }
@@ -697,8 +699,8 @@ dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
            }
            break;
        case UDRW: /* copy */
-            ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
-                                s->uncompressed_chunk, 0);
+            ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
+                             s->uncompressed_chunk, 0);
            if (ret < 0) {
                return -1;
            }
@@ -714,7 +716,7 @@ dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
    return 0;
 }

-static int coroutine_fn GRAPH_RDLOCK
+static int coroutine_fn
 dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
              QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
--- a/block/dmg.h
+++ b/block/dmg.h
@@ -51,10 +51,10 @@ typedef struct BDRVDMGState {
    z_stream zstream;
 } BDRVDMGState;

-typedef int BdrvDmgUncompressFunc(char *next_in, unsigned int avail_in,
-                                  char *next_out, unsigned int avail_out);
+extern int (*dmg_uncompress_bz2)(char *next_in, unsigned int avail_in,
+                                 char *next_out, unsigned int avail_out);

-extern BdrvDmgUncompressFunc *dmg_uncompress_bz2;
-extern BdrvDmgUncompressFunc *dmg_uncompress_lzfse;
+extern int (*dmg_uncompress_lzfse)(char *next_in, unsigned int avail_in,
+                                   char *next_out, unsigned int avail_out);

 #endif
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -204,10 +204,11 @@ fail:
    return NULL;
 }

+/* Callers must hold exp->ctx lock */
 void blk_exp_ref(BlockExport *exp)
 {
-    assert(qatomic_read(&exp->refcount) > 0);
-    qatomic_inc(&exp->refcount);
+    assert(exp->refcount > 0);
+    exp->refcount++;
 }

 /* Runs in the main thread */
@@ -230,10 +231,11 @@ static void blk_exp_delete_bh(void *opaque)
    aio_context_release(aio_context);
 }

+/* Callers must hold exp->ctx lock */
 void blk_exp_unref(BlockExport *exp)
 {
-    assert(qatomic_read(&exp->refcount) > 0);
-    if (qatomic_fetch_dec(&exp->refcount) == 1) {
+    assert(exp->refcount > 0);
+    if (--exp->refcount == 0) {
        /* Touch the block_exports list only in the main thread */
        aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh,
                                exp);
@@ -308,7 +310,7 @@ void blk_exp_close_all_type(BlockExportType type)
        blk_exp_request_shutdown(exp);
    }

-    AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type));
+    AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
 }

 void blk_exp_close_all(void)
@@ -341,8 +343,7 @@ void qmp_block_export_del(const char *id,
    if (!has_mode) {
        mode = BLOCK_EXPORT_REMOVE_MODE_SAFE;
    }
-    if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE &&
-        qatomic_read(&exp->refcount) > 1) {
+    if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) {
        error_setg(errp, "export '%s' still in use", exp->id);
        error_append_hint(errp, "Use mode='hard' to force client "
                          "disconnect\n");
--- a/block/export/fuse.c
+++ b/block/export/fuse.c
@@ -50,7 +50,6 @@ typedef struct FuseExport {

    struct fuse_session *fuse_session;
    struct fuse_buf fuse_buf;
-    unsigned int in_flight; /* atomic */
    bool mounted, fd_handler_set_up;

    char *mountpoint;
@@ -79,42 +78,6 @@ static void read_from_fuse_export(void *opaque);
 static bool is_regular_file(const char *path, Error **errp);


-static void fuse_export_drained_begin(void *opaque)
-{
-    FuseExport *exp = opaque;
-
-    aio_set_fd_handler(exp->common.ctx,
-                       fuse_session_fd(exp->fuse_session),
-                       NULL, NULL, NULL, NULL, NULL);
-    exp->fd_handler_set_up = false;
-}
-
-static void fuse_export_drained_end(void *opaque)
-{
-    FuseExport *exp = opaque;
-
-    /* Refresh AioContext in case it changed */
-    exp->common.ctx = blk_get_aio_context(exp->common.blk);
-
-    aio_set_fd_handler(exp->common.ctx,
-                       fuse_session_fd(exp->fuse_session),
-                       read_from_fuse_export, NULL, NULL, NULL, exp);
-    exp->fd_handler_set_up = true;
-}
-
-static bool fuse_export_drained_poll(void *opaque)
-{
-    FuseExport *exp = opaque;
-
-    return qatomic_read(&exp->in_flight) > 0;
-}
-
-static const BlockDevOps fuse_export_blk_dev_ops = {
-    .drained_begin = fuse_export_drained_begin,
-    .drained_end   = fuse_export_drained_end,
-    .drained_poll  = fuse_export_drained_poll,
-};
-
 static int fuse_export_create(BlockExport *blk_exp,
                              BlockExportOptions *blk_exp_args,
                              Error **errp)
@@ -138,15 +101,6 @@ static int fuse_export_create(BlockExport *blk_exp,
        }
    }

-    blk_set_dev_ops(exp->common.blk, &fuse_export_blk_dev_ops, exp);
-
-    /*
-     * We handle draining ourselves using an in-flight counter and by disabling
-     * the FUSE fd handler. Do not queue BlockBackend requests, they need to
-     * complete so the in-flight counter reaches zero.
-     */
-    blk_set_disable_request_queuing(exp->common.blk, true);
-
    init_exports_table();

    /*
@@ -270,7 +224,7 @@ static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
    g_hash_table_insert(exports, g_strdup(mountpoint), NULL);

    aio_set_fd_handler(exp->common.ctx,
-                       fuse_session_fd(exp->fuse_session),
+                       fuse_session_fd(exp->fuse_session), true,
                       read_from_fuse_export, NULL, NULL, NULL, exp);
    exp->fd_handler_set_up = true;

@@ -292,8 +246,6 @@ static void read_from_fuse_export(void *opaque)

    blk_exp_ref(&exp->common);

-    qatomic_inc(&exp->in_flight);
-
    do {
        ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
    } while (ret == -EINTR);
@@ -304,10 +256,6 @@ static void read_from_fuse_export(void *opaque)
    fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);

 out:
-    if (qatomic_fetch_dec(&exp->in_flight) == 1) {
-        aio_wait_kick(); /* wake AIO_WAIT_WHILE() */
-    }
-
    blk_exp_unref(&exp->common);
 }

@@ -320,7 +268,7 @@ static void fuse_export_shutdown(BlockExport *blk_exp)

        if (exp->fd_handler_set_up) {
            aio_set_fd_handler(exp->common.ctx,
-                               fuse_session_fd(exp->fuse_session),
+                               fuse_session_fd(exp->fuse_session), true,
                               NULL, NULL, NULL, NULL, NULL);
            exp->fd_handler_set_up = false;
        }
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -31,8 +31,7 @@ typedef struct VduseBlkExport {
    VduseDev *dev;
    uint16_t num_queues;
    char *recon_file;
-    unsigned int inflight; /* atomic */
-    bool vqs_started;
+    unsigned int inflight;
 } VduseBlkExport;

 typedef struct VduseBlkReq {
@@ -42,20 +41,13 @@ typedef struct VduseBlkReq {

 static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
 {
-    if (qatomic_fetch_inc(&vblk_exp->inflight) == 0) {
-        /* Prevent export from being deleted */
-        blk_exp_ref(&vblk_exp->export);
-    }
+    vblk_exp->inflight++;
 }

 static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
 {
-    if (qatomic_fetch_dec(&vblk_exp->inflight) == 1) {
-        /* Wake AIO_WAIT_WHILE() */
+    if (--vblk_exp->inflight == 0) {
        aio_wait_kick();
-
-        /* Now the export can be deleted */
-        blk_exp_unref(&vblk_exp->export);
    }
 }

@@ -132,12 +124,8 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
 {
    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);

-    if (!vblk_exp->vqs_started) {
-        return; /* vduse_blk_drained_end() will start vqs later */
-    }
-
    aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
-                       on_vduse_vq_kick, NULL, NULL, NULL, vq);
+                       true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
    /* Make sure we don't miss any kick afer reconnecting */
    eventfd_write(vduse_queue_get_fd(vq), 1);
 }
@@ -145,14 +133,9 @@ static void vduse_blk_enable_queue(VduseDev *dev, VduseVirtq *vq)
 static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
 {
    VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
-    int fd = vduse_queue_get_fd(vq);

-    if (fd < 0) {
-        return;
-    }
-
-    aio_set_fd_handler(vblk_exp->export.ctx, fd,
-                       NULL, NULL, NULL, NULL, NULL);
+    aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
+                       true, NULL, NULL, NULL, NULL, NULL);
 }

 static const VduseOps vduse_blk_ops = {
@@ -169,19 +152,42 @@ static void on_vduse_dev_kick(void *opaque)

 static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
 {
+    int i;
+
    aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
-                       on_vduse_dev_kick, NULL, NULL, NULL,
+                       true, on_vduse_dev_kick, NULL, NULL, NULL,
                       vblk_exp->dev);

-    /* Virtqueues are handled by vduse_blk_drained_end() */
+    for (i = 0; i < vblk_exp->num_queues; i++) {
+        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
+        int fd = vduse_queue_get_fd(vq);
+
+        if (fd < 0) {
+            continue;
+        }
+        aio_set_fd_handler(vblk_exp->export.ctx, fd, true,
+                           on_vduse_vq_kick, NULL, NULL, NULL, vq);
+    }
 }

 static void vduse_blk_detach_ctx(VduseBlkExport *vblk_exp)
 {
-    aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
-                       NULL, NULL, NULL, NULL, NULL);
+    int i;

-    /* Virtqueues are handled by vduse_blk_drained_begin() */
+    for (i = 0; i < vblk_exp->num_queues; i++) {
+        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
+        int fd = vduse_queue_get_fd(vq);
+
+        if (fd < 0) {
+            continue;
+        }
+        aio_set_fd_handler(vblk_exp->export.ctx, fd,
+                           true, NULL, NULL, NULL, NULL, NULL);
+    }
+    aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
+                       true, NULL, NULL, NULL, NULL, NULL);
+
+    AIO_WAIT_WHILE(vblk_exp->export.ctx, vblk_exp->inflight > 0);
 }


@@ -214,55 +220,8 @@ static void vduse_blk_resize(void *opaque)
                            (char *)&config.capacity);
 }

-static void vduse_blk_stop_virtqueues(VduseBlkExport *vblk_exp)
-{
-    for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
-        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
-        vduse_blk_disable_queue(vblk_exp->dev, vq);
-    }
-
-    vblk_exp->vqs_started = false;
-}
-
-static void vduse_blk_start_virtqueues(VduseBlkExport *vblk_exp)
-{
-    vblk_exp->vqs_started = true;
-
-    for (uint16_t i = 0; i < vblk_exp->num_queues; i++) {
-        VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
-        vduse_blk_enable_queue(vblk_exp->dev, vq);
-    }
-}
-
-static void vduse_blk_drained_begin(void *opaque)
-{
-    BlockExport *exp = opaque;
-    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
-
-    vduse_blk_stop_virtqueues(vblk_exp);
-}
-
-static void vduse_blk_drained_end(void *opaque)
-{
-    BlockExport *exp = opaque;
-    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
-
-    vduse_blk_start_virtqueues(vblk_exp);
-}
-
-static bool vduse_blk_drained_poll(void *opaque)
-{
-    BlockExport *exp = opaque;
-    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
-
-    return qatomic_read(&vblk_exp->inflight) > 0;
-}
-
 static const BlockDevOps vduse_block_ops = {
-    .resize_cb     = vduse_blk_resize,
-    .drained_begin = vduse_blk_drained_begin,
-    .drained_end   = vduse_blk_drained_end,
-    .drained_poll  = vduse_blk_drained_poll,
+    .resize_cb = vduse_blk_resize,
 };

 static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
@@ -309,7 +268,6 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
    vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
    vblk_exp->handler.logical_block_size = logical_block_size;
    vblk_exp->handler.writable = opts->writable;
-    vblk_exp->vqs_started = true;

    config.capacity =
            cpu_to_le64(blk_getlength(exp->blk) >> VIRTIO_BLK_SECTOR_BITS);
@@ -364,19 +322,13 @@ static int vduse_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
        vduse_dev_setup_queue(vblk_exp->dev, i, queue_size);
    }

-    aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev),
+    aio_set_fd_handler(exp->ctx, vduse_dev_get_fd(vblk_exp->dev), true,
                       on_vduse_dev_kick, NULL, NULL, NULL, vblk_exp->dev);

    blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                 vblk_exp);
-    blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);

-    /*
-     * We handle draining ourselves using an in-flight counter and by disabling
-     * virtqueue fd handlers. Do not queue BlockBackend requests, they need to
-     * complete so the in-flight counter reaches zero.
-     */
-    blk_set_disable_request_queuing(exp->blk, true);
+    blk_set_dev_ops(exp->blk, &vduse_block_ops, exp);

    return 0;
 err:
@@ -392,9 +344,6 @@ static void vduse_blk_exp_delete(BlockExport *exp)
    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);
    int ret;

-    assert(qatomic_read(&vblk_exp->inflight) == 0);
-
-    vduse_blk_detach_ctx(vblk_exp);
    blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                    vblk_exp);
    ret = vduse_dev_destroy(vblk_exp->dev);
@@ -405,12 +354,13 @@ static void vduse_blk_exp_delete(BlockExport *exp)
    g_free(vblk_exp->handler.serial);
 }

-/* Called with exp->ctx acquired */
 static void vduse_blk_exp_request_shutdown(BlockExport *exp)
 {
    VduseBlkExport *vblk_exp = container_of(exp, VduseBlkExport, export);

-    vduse_blk_stop_virtqueues(vblk_exp);
+    aio_context_acquire(vblk_exp->export.ctx);
+    vduse_blk_detach_ctx(vblk_exp);
+    aio_context_acquire(vblk_exp->export.ctx);
 }

 const BlockExportDriver blk_exp_vduse_blk = {
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -10,7 +10,6 @@
 * later.  See the COPYING file in the top-level directory.
 */
 #include "qemu/osdep.h"
-#include "qemu/error-report.h"
 #include "block/block.h"
 #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
 #include "standard-headers/linux/virtio_blk.h"
@@ -50,10 +49,7 @@ static void vu_blk_req_complete(VuBlkReq *req, size_t in_len)
    free(req);
 }

-/*
- * Called with server in_flight counter increased, must decrease before
- * returning.
- */
+/* Called with server refcount increased, must decrease before returning */
 static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
 {
    VuBlkReq *req = opaque;
@@ -71,12 +67,12 @@ static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
                                    in_num, out_num);
    if (in_len < 0) {
        free(req);
-        vhost_user_server_dec_in_flight(server);
+        vhost_user_server_unref(server);
        return;
    }

    vu_blk_req_complete(req, in_len);
-    vhost_user_server_dec_in_flight(server);
+    vhost_user_server_unref(server);
 }

 static void vu_blk_process_vq(VuDev *vu_dev, int idx)
@@ -98,7 +94,7 @@ static void vu_blk_process_vq(VuDev *vu_dev, int idx)
        Coroutine *co =
            qemu_coroutine_create(vu_blk_virtio_process_req, req);

-        vhost_user_server_inc_in_flight(server);
+        vhost_user_server_ref(server);
        qemu_coroutine_enter(co);
    }
 }
@@ -167,7 +163,7 @@ vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
    uint8_t wce;

    /* don't support live migration */
-    if (flags != VHOST_SET_CONFIG_TYPE_FRONTEND) {
+    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
        return -EINVAL;
    }

@@ -212,21 +208,15 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
 {
    VuBlkExport *vexp = opaque;

-    /*
-     * The actual attach will happen in vu_blk_drained_end() and we just
-     * restore ctx here.
-     */
    vexp->export.ctx = ctx;
+    vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
 }

 static void blk_aio_detach(void *opaque)
 {
    VuBlkExport *vexp = opaque;

-    /*
-     * The actual detach already happened in vu_blk_drained_begin() but from
-     * this point on we must not access ctx anymore.
-     */
+    vhost_user_server_detach_aio_context(&vexp->vu_server);
    vexp->export.ctx = NULL;
 }

@@ -261,58 +251,6 @@ static void vu_blk_exp_request_shutdown(BlockExport *exp)
    vhost_user_server_stop(&vexp->vu_server);
 }

-static void vu_blk_exp_resize(void *opaque)
-{
-    VuBlkExport *vexp = opaque;
-    BlockDriverState *bs = blk_bs(vexp->handler.blk);
-    int64_t new_size = bdrv_getlength(bs);
-
-    if (new_size < 0) {
-        error_printf("Failed to get length of block node '%s'",
-                     bdrv_get_node_name(bs));
-        return;
-    }
-
-    vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS);
-
-    vu_config_change_msg(&vexp->vu_server.vu_dev);
-}
-
-/* Called with vexp->export.ctx acquired */
-static void vu_blk_drained_begin(void *opaque)
-{
-    VuBlkExport *vexp = opaque;
-
-    vhost_user_server_detach_aio_context(&vexp->vu_server);
-}
-
-/* Called with vexp->export.blk AioContext acquired */
-static void vu_blk_drained_end(void *opaque)
-{
-    VuBlkExport *vexp = opaque;
-
-    vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
-}
-
-/*
- * Ensures that bdrv_drained_begin() waits until in-flight requests complete.
- *
- * Called with vexp->export.ctx acquired.
- */
-static bool vu_blk_drained_poll(void *opaque)
-{
-    VuBlkExport *vexp = opaque;
-
-    return vhost_user_server_has_in_flight(&vexp->vu_server);
-}
-
-static const BlockDevOps vu_blk_dev_ops = {
-    .drained_begin = vu_blk_drained_begin,
-    .drained_end   = vu_blk_drained_end,
-    .drained_poll  = vu_blk_drained_poll,
-    .resize_cb = vu_blk_exp_resize,
-};
-
 static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
                             Error **errp)
 {
@@ -354,8 +292,6 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
    blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
                                 vexp);

-    blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp);
-
    if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
                                 num_queues, &vu_blk_iface, errp)) {
        blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
--- a/block/file-posix.c
+++ b/block/file-posix.c
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -153,6 +153,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
        BlockCompletionFunc *cb, void *opaque, int type)
 {
    RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
+    ThreadPool *pool;

    acb->bs = bs;
    acb->hfile = hfile;
@@ -167,7 +168,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
    acb->aio_offset = offset;

    trace_file_paio_submit(acb, opaque, offset, count, type);
-    return thread_pool_submit_aio(aio_worker, acb, cb, opaque);
+    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
+    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
 }

 int qemu_ftruncate64(int fd, int64_t length)
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -424,7 +424,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
    int ret;
    int old_errno;
    SocketAddressList *server;
-    uint64_t port;
+    unsigned long long port;

    glfs = glfs_find_preopened(gconf->volume);
    if (glfs) {
@@ -445,7 +445,7 @@ static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
                                   server->value->u.q_unix.path, 0);
            break;
        case SOCKET_ADDRESS_TYPE_INET:
-            if (parse_uint_full(server->value->u.inet.port, 10, &port) < 0 ||
+            if (parse_uint_full(server->value->u.inet.port, &port, 10) < 0 ||
                port > 65535) {
                error_setg(errp, "'%s' is not a valid port number",
                           server->value->u.inet.port);
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -30,8 +30,10 @@ BdrvGraphLock graph_lock;
 /* Protects the list of aiocontext and orphaned_reader_count */
 static QemuMutex aio_context_list_lock;

+#if 0
 /* Written and read with atomic operations. */
 static int has_writer;
+#endif

 /*
 * A reader coroutine could move from an AioContext to another.
@@ -88,6 +90,7 @@ void unregister_aiocontext(AioContext *ctx)
    g_free(ctx->bdrv_graph);
 }

+#if 0
 static uint32_t reader_count(void)
 {
    BdrvGraphRWlock *brdv_graph;
@@ -105,26 +108,18 @@ static uint32_t reader_count(void)
    assert((int32_t)rd >= 0);
    return rd;
 }
+#endif

-void bdrv_graph_wrlock(BlockDriverState *bs)
+void bdrv_graph_wrlock(void)
 {
-    AioContext *ctx = NULL;
-
    GLOBAL_STATE_CODE();
-    assert(!qatomic_read(&has_writer));
-
    /*
-     * Release only non-mainloop AioContext. The mainloop often relies on the
-     * BQL and doesn't lock the main AioContext before doing things.
+     * TODO Some callers hold an AioContext lock when this is called, which
+     * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
+     * AioContext locks are gone).
     */
-    if (bs) {
-        ctx = bdrv_get_aio_context(bs);
-        if (ctx != qemu_get_aio_context()) {
-            aio_context_release(ctx);
-        } else {
-            ctx = NULL;
-        }
-    }
+#if 0
+    assert(!qatomic_read(&has_writer));

    /* Make sure that constantly arriving new I/O doesn't cause starvation */
    bdrv_drain_all_begin_nopoll();
@@ -142,7 +137,7 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
         * reader lock.
         */
        qatomic_set(&has_writer, 0);
-        AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
+        AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1);
        qatomic_set(&has_writer, 1);

        /*
@@ -154,15 +149,13 @@ void bdrv_graph_wrlock(BlockDriverState *bs)
    } while (reader_count() >= 1);

    bdrv_drain_all_end();
-
-    if (ctx) {
-        aio_context_acquire(bdrv_get_aio_context(bs));
-    }
+#endif
 }

 void bdrv_graph_wrunlock(void)
 {
    GLOBAL_STATE_CODE();
+#if 0
    QEMU_LOCK_GUARD(&aio_context_list_lock);
    assert(qatomic_read(&has_writer));

@@ -174,13 +167,21 @@ void bdrv_graph_wrunlock(void)

    /* Wake up all coroutine that are waiting to read the graph */
    qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
+#endif
 }

 void coroutine_fn bdrv_graph_co_rdlock(void)
 {
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
    BdrvGraphRWlock *bdrv_graph;
    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;

+    /* Do not lock if in main thread */
+    if (qemu_in_main_thread()) {
+        return;
+    }
+
    for (;;) {
        qatomic_set(&bdrv_graph->reader_count,
                    bdrv_graph->reader_count + 1);
@@ -237,13 +238,20 @@ void coroutine_fn bdrv_graph_co_rdlock(void)
            qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
        }
    }
+#endif
 }

 void coroutine_fn bdrv_graph_co_rdunlock(void)
 {
+#if 0
    BdrvGraphRWlock *bdrv_graph;
    bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;

+    /* Do not lock if in main thread */
+    if (qemu_in_main_thread()) {
+        return;
+    }
+
    qatomic_store_release(&bdrv_graph->reader_count,
                          bdrv_graph->reader_count - 1);
    /* make sure writer sees reader_count before we check has_writer */
@@ -258,6 +266,7 @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
    if (qatomic_read(&has_writer)) {
        aio_wait_kick();
    }
+#endif
 }

 void bdrv_graph_rdlock_main_loop(void)
@@ -275,13 +284,19 @@ void bdrv_graph_rdunlock_main_loop(void)
 void assert_bdrv_graph_readable(void)
 {
    /* reader_count() is slow due to aio_context_list_lock lock contention */
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
 #ifdef CONFIG_DEBUG_GRAPH_LOCK
    assert(qemu_in_main_thread() || reader_count());
 #endif
+#endif
 }

 void assert_bdrv_graph_writable(void)
 {
    assert(qemu_in_main_thread());
+    /* TODO Reenable when wrlock is reenabled */
+#if 0
    assert(qatomic_read(&has_writer));
+#endif
 }
--- a/block/io.c
+++ b/block/io.c
@@ -60,7 +60,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)

 void bdrv_parent_drained_end_single(BdrvChild *c)
 {
-    GLOBAL_STATE_CODE();
+    IO_OR_GS_CODE();

    assert(c->quiesced_parent);
    c->quiesced_parent = false;
@@ -108,7 +108,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,

 void bdrv_parent_drained_begin_single(BdrvChild *c)
 {
-    GLOBAL_STATE_CODE();
+    IO_OR_GS_CODE();

    assert(!c->quiesced_parent);
    c->quiesced_parent = true;
@@ -160,6 +160,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
    bool have_limits;

    GLOBAL_STATE_CODE();
+    assume_graph_lock(); /* FIXME */

    if (tran) {
        BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
@@ -247,7 +248,7 @@ typedef struct {
 bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
                     bool ignore_bds_parents)
 {
-    GLOBAL_STATE_CODE();
+    IO_OR_GS_CODE();

    if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
        return true;
@@ -334,8 +335,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
    if (ctx != co_ctx) {
        aio_context_release(ctx);
    }
-    replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
-                                     bdrv_co_drain_bh_cb, &data);
+    replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);

    qemu_coroutine_yield();
    /* If we are resumed from some other event (such as an aio completion or a
@@ -358,10 +358,9 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
        return;
    }

-    GLOBAL_STATE_CODE();
-
    /* Stop things in parent-to-child order */
    if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+        aio_disable_external(bdrv_get_aio_context(bs));
        bdrv_parent_drained_begin(bs, parent);
        if (bs->drv && bs->drv->bdrv_drain_begin) {
            bs->drv->bdrv_drain_begin(bs);
@@ -401,14 +400,11 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
 {
    int old_quiesce_counter;

-    IO_OR_GS_CODE();
-
    if (qemu_in_coroutine()) {
        bdrv_co_yield_to_drain(bs, false, parent, false);
        return;
    }
    assert(bs->quiesce_counter > 0);
-    GLOBAL_STATE_CODE();

    /* Re-enable things in child-to-parent order */
    old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
@@ -417,6 +413,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
            bs->drv->bdrv_drain_end(bs);
        }
        bdrv_parent_drained_end(bs, parent);
+        aio_enable_external(bdrv_get_aio_context(bs));
    }
 }

@@ -527,7 +524,7 @@ void bdrv_drain_all_begin(void)
    bdrv_drain_all_begin_nopoll();

    /* Now poll the in-flight requests */
-    AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
+    AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());

    while ((bs = bdrv_next_all_states(bs))) {
        bdrv_drain_assert_idle(bs);
@@ -730,9 +727,10 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
 /**
 * Round a region to cluster boundaries
 */
-void coroutine_fn GRAPH_RDLOCK
-bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
-                       int64_t *cluster_offset, int64_t *cluster_bytes)
+void coroutine_fn bdrv_round_to_clusters(BlockDriverState *bs,
+                            int64_t offset, int64_t bytes,
+                            int64_t *cluster_offset,
+                            int64_t *cluster_bytes)
 {
    BlockDriverInfo bdi;
    IO_CODE();
@@ -746,7 +744,7 @@ bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
    }
 }

-static int coroutine_fn GRAPH_RDLOCK bdrv_get_cluster_size(BlockDriverState *bs)
+static coroutine_fn int bdrv_get_cluster_size(BlockDriverState *bs)
 {
    BlockDriverInfo bdi;
    int ret;
@@ -1379,7 +1377,7 @@ bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
    }

    /* Forward the request to the BlockDriver, possibly fragmenting it */
-    total_bytes = bdrv_co_getlength(bs);
+    total_bytes = bdrv_getlength(bs);
    if (total_bytes < 0) {
        ret = total_bytes;
        goto out;
@@ -1441,14 +1439,6 @@ out:
 * @merge_reads is true for small requests,
 * if @buf_len == @head + bytes + @tail. In this case it is possible that both
 * head and tail exist but @buf_len == align and @tail_buf == @buf.
- *
- * @write is true for write requests, false for read requests.
- *
- * If padding makes the vector too long (exceeding IOV_MAX), then we need to
- * merge existing vector elements into a single one.  @collapse_bounce_buf acts
- * as the bounce buffer in such cases.  @pre_collapse_qiov has the pre-collapse
- * I/O vector elements so for read requests, the data can be copied back after
- * the read is done.
 */
 typedef struct BdrvRequestPadding {
    uint8_t *buf;
@@ -1457,17 +1447,11 @@ typedef struct BdrvRequestPadding {
    size_t head;
    size_t tail;
    bool merge_reads;
-    bool write;
    QEMUIOVector local_qiov;
-
-    uint8_t *collapse_bounce_buf;
-    size_t collapse_len;
-    QEMUIOVector pre_collapse_qiov;
 } BdrvRequestPadding;

 static bool bdrv_init_padding(BlockDriverState *bs,
                              int64_t offset, int64_t bytes,
-                              bool write,
                              BdrvRequestPadding *pad)
 {
    int64_t align = bs->bl.request_alignment;
@@ -1499,8 +1483,6 @@ static bool bdrv_init_padding(BlockDriverState *bs,
        pad->tail_buf = pad->buf + pad->buf_len - align;
    }

-    pad->write = write;
-
    return true;
 }

@@ -1565,23 +1547,8 @@ zero_mem:
    return 0;
 }

-/**
- * Free *pad's associated buffers, and perform any necessary finalization steps.
- */
-static void bdrv_padding_finalize(BdrvRequestPadding *pad)
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
 {
-    if (pad->collapse_bounce_buf) {
-        if (!pad->write) {
-            /*
-             * If padding required elements in the vector to be collapsed into a
-             * bounce buffer, copy the bounce buffer content back
-             */
-            qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
-                                pad->collapse_bounce_buf, pad->collapse_len);
-        }
-        qemu_vfree(pad->collapse_bounce_buf);
-        qemu_iovec_destroy(&pad->pre_collapse_qiov);
-    }
    if (pad->buf) {
        qemu_vfree(pad->buf);
        qemu_iovec_destroy(&pad->local_qiov);
@@ -1589,101 +1556,6 @@ static void bdrv_padding_finalize(BdrvRequestPadding *pad)
    memset(pad, 0, sizeof(*pad));
 }

-/*
- * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
- * ensuring that the resulting vector will not exceed IOV_MAX elements.
- *
- * To ensure this, when necessary, the first two or three elements of @iov are
- * merged into pad->collapse_bounce_buf and replaced by a reference to that
- * bounce buffer in pad->local_qiov.
- *
- * After performing a read request, the data from the bounce buffer must be
- * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
- */
-static int bdrv_create_padded_qiov(BlockDriverState *bs,
-                                   BdrvRequestPadding *pad,
-                                   struct iovec *iov, int niov,
-                                   size_t iov_offset, size_t bytes)
-{
-    int padded_niov, surplus_count, collapse_count;
-
-    /* Assert this invariant */
-    assert(niov <= IOV_MAX);
-
-    /*
-     * Cannot pad if resulting length would exceed SIZE_MAX.  Returning an error
-     * to the guest is not ideal, but there is little else we can do.  At least
-     * this will practically never happen on 64-bit systems.
-     */
-    if (SIZE_MAX - pad->head < bytes ||
-        SIZE_MAX - pad->head - bytes < pad->tail)
-    {
-        return -EINVAL;
-    }
-
-    /* Length of the resulting IOV if we just concatenated everything */
-    padded_niov = !!pad->head + niov + !!pad->tail;
-
-    qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
-
-    if (pad->head) {
-        qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
-    }
-
-    /*
-     * If padded_niov > IOV_MAX, we cannot just concatenate everything.
-     * Instead, merge the first two or three elements of @iov to reduce the
-     * number of vector elements as necessary.
-     */
-    if (padded_niov > IOV_MAX) {
-        /*
-         * Only head and tail can have lead to the number of entries exceeding
-         * IOV_MAX, so we can exceed it by the head and tail at most.  We need
-         * to reduce the number of elements by `surplus_count`, so we merge that
-         * many elements plus one into one element.
-         */
-        surplus_count = padded_niov - IOV_MAX;
-        assert(surplus_count <= !!pad->head + !!pad->tail);
-        collapse_count = surplus_count + 1;
-
-        /*
-         * Move the elements to collapse into `pad->pre_collapse_qiov`, then
-         * advance `iov` (and associated variables) by those elements.
-         */
-        qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
-        qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
-                              collapse_count, iov_offset, SIZE_MAX);
-        iov += collapse_count;
-        iov_offset = 0;
-        niov -= collapse_count;
-        bytes -= pad->pre_collapse_qiov.size;
-
-        /*
-         * Construct the bounce buffer to match the length of the to-collapse
-         * vector elements, and for write requests, initialize it with the data
-         * from those elements.  Then add it to `pad->local_qiov`.
-         */
-        pad->collapse_len = pad->pre_collapse_qiov.size;
-        pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
-        if (pad->write) {
-            qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
-                              pad->collapse_bounce_buf, pad->collapse_len);
-        }
-        qemu_iovec_add(&pad->local_qiov,
-                       pad->collapse_bounce_buf, pad->collapse_len);
-    }
-
-    qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
-
-    if (pad->tail) {
-        qemu_iovec_add(&pad->local_qiov,
-                       pad->buf + pad->buf_len - pad->tail, pad->tail);
-    }
-
-    assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
-    return 0;
-}
-
 /*
 * bdrv_pad_request
 *
@@ -1691,8 +1563,6 @@ static int bdrv_create_padded_qiov(BlockDriverState *bs,
 * read of padding, bdrv_padding_rmw_read() should be called separately if
 * needed.
 *
- * @write is true for write requests, false for read requests.
- *
 * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
 *  - on function start they represent original request
 *  - on failure or when padding is not needed they are unchanged
@@ -1701,38 +1571,26 @@ static int bdrv_create_padded_qiov(BlockDriverState *bs,
 static int bdrv_pad_request(BlockDriverState *bs,
                            QEMUIOVector **qiov, size_t *qiov_offset,
                            int64_t *offset, int64_t *bytes,
-                            bool write,
                            BdrvRequestPadding *pad, bool *padded,
                            BdrvRequestFlags *flags)
 {
    int ret;
-    struct iovec *sliced_iov;
-    int sliced_niov;
-    size_t sliced_head, sliced_tail;

-    /* Should have been checked by the caller already */
-    ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
-    if (ret < 0) {
-        return ret;
-    }
+    bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);

-    if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
+    if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
        if (padded) {
            *padded = false;
        }
        return 0;
    }

-    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
-                                  &sliced_head, &sliced_tail,
-                                  &sliced_niov);
-
-    /* Guaranteed by bdrv_check_request32() */
-    assert(*bytes <= SIZE_MAX);
-    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
-                                  sliced_head, *bytes);
+    ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+                                   *qiov, *qiov_offset, *bytes,
+                                   pad->buf + pad->buf_len - pad->tail,
+                                   pad->tail);
    if (ret < 0) {
-        bdrv_padding_finalize(pad);
+        bdrv_padding_destroy(pad);
        return ret;
    }
    *bytes += pad->head + pad->tail;
@@ -1799,8 +1657,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
        flags |= BDRV_REQ_COPY_ON_READ;
    }

-    ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
-                           &pad, NULL, &flags);
+    ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+                           NULL, &flags);
    if (ret < 0) {
        goto fail;
    }
@@ -1810,7 +1668,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
                              bs->bl.request_alignment,
                              qiov, qiov_offset, flags);
    tracked_request_end(&req);
-    bdrv_padding_finalize(&pad);
+    bdrv_padding_destroy(&pad);

 fail:
    bdrv_dec_in_flight(bs);
@@ -1942,7 +1800,7 @@ fail:
    return ret;
 }

-static inline int coroutine_fn GRAPH_RDLOCK
+static inline int coroutine_fn
 bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
                          BdrvTrackedRequest *req, int flags)
 {
@@ -2142,7 +2000,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
    /* This flag doesn't make sense for padding or zero writes */
    flags &= ~BDRV_REQ_REGISTERED_BUF;

-    padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
+    padding = bdrv_init_padding(bs, offset, bytes, &pad);
    if (padding) {
        assert(!(flags & BDRV_REQ_NO_WAIT));
        bdrv_make_request_serialising(req, align);
@@ -2190,7 +2048,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
    }

 out:
-    bdrv_padding_finalize(&pad);
+    bdrv_padding_destroy(&pad);

    return ret;
 }
@@ -2258,8 +2116,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
         * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
         * alignment only if there is no ZERO flag.
         */
-        ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
-                               &pad, &padded, &flags);
+        ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+                               &padded, &flags);
        if (ret < 0) {
            return ret;
        }
@@ -2289,7 +2147,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
    ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
                               qiov, qiov_offset, flags);

-    bdrv_padding_finalize(&pad);
+    bdrv_padding_destroy(&pad);

 out:
    tracked_request_end(&req);
@@ -2392,7 +2250,7 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
    assert(pnum);
    assert_bdrv_graph_readable();
    *pnum = 0;
-    total_size = bdrv_co_getlength(bs);
+    total_size = bdrv_getlength(bs);
    if (total_size < 0) {
        ret = total_size;
        goto early_out;
@@ -2412,7 +2270,7 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
        bytes = n;
    }

-    /* Must be non-NULL or bdrv_co_getlength() would have failed */
+    /* Must be non-NULL or bdrv_getlength() would have failed */
    assert(bs->drv);
    has_filtered_child = bdrv_filter_child(bs);
    if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
@@ -2550,7 +2408,7 @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
        if (!cow_bs) {
            ret |= BDRV_BLOCK_ZERO;
        } else if (want_zero) {
-            int64_t size2 = bdrv_co_getlength(cow_bs);
+            int64_t size2 = bdrv_getlength(cow_bs);

            if (size2 >= 0 && offset >= size2) {
                ret |= BDRV_BLOCK_ZERO;
@@ -3015,7 +2873,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
    }

    /* Write back cached data to the OS even with cache=unsafe */
-    BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
+    BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
    if (bs->drv->bdrv_co_flush_to_os) {
        ret = bs->drv->bdrv_co_flush_to_os(bs);
        if (ret < 0) {
@@ -3033,7 +2891,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
        goto flush_children;
    }

-    BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
+    BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
    if (!bs->drv) {
        /* bs->drv->bdrv_co_flush() might have ejected the BDS
         * (even in case of apparent success) */
@@ -3257,74 +3115,6 @@ out:
    return co.ret;
 }

-int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
-                        unsigned int *nr_zones,
-                        BlockZoneDescriptor *zones)
-{
-    BlockDriver *drv = bs->drv;
-    CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-    };
-    IO_CODE();
-
-    bdrv_inc_in_flight(bs);
-    if (!drv || !drv->bdrv_co_zone_report || bs->bl.zoned == BLK_Z_NONE) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-    co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones);
-out:
-    bdrv_dec_in_flight(bs);
-    return co.ret;
-}
-
-int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
-        int64_t offset, int64_t len)
-{
-    BlockDriver *drv = bs->drv;
-    CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-    };
-    IO_CODE();
-
-    bdrv_inc_in_flight(bs);
-    if (!drv || !drv->bdrv_co_zone_mgmt || bs->bl.zoned == BLK_Z_NONE) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-    co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len);
-out:
-    bdrv_dec_in_flight(bs);
-    return co.ret;
-}
-
-int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset,
-                        QEMUIOVector *qiov,
-                        BdrvRequestFlags flags)
-{
-    int ret;
-    BlockDriver *drv = bs->drv;
-    CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-    };
-    IO_CODE();
-
-    ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL);
-    if (ret < 0) {
-        return ret;
-    }
-
-    bdrv_inc_in_flight(bs);
-    if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-    co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags);
-out:
-    bdrv_dec_in_flight(bs);
-    return co.ret;
-}
-
 void *qemu_blockalign(BlockDriverState *bs, size_t size)
 {
    IO_CODE();
@@ -3363,6 +3153,43 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
    return mem;
 }

+void coroutine_fn bdrv_co_io_plug(BlockDriverState *bs)
+{
+    BdrvChild *child;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        bdrv_co_io_plug(child->bs);
+    }
+
+    if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
+        BlockDriver *drv = bs->drv;
+        if (drv && drv->bdrv_co_io_plug) {
+            drv->bdrv_co_io_plug(bs);
+        }
+    }
+}
+
+void coroutine_fn bdrv_co_io_unplug(BlockDriverState *bs)
+{
+    BdrvChild *child;
+    IO_CODE();
+    assert_bdrv_graph_readable();
+
+    assert(bs->io_plugged);
+    if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
+        BlockDriver *drv = bs->drv;
+        if (drv && drv->bdrv_co_io_unplug) {
+            drv->bdrv_co_io_unplug(bs);
+        }
+    }
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        bdrv_co_io_unplug(child->bs);
+    }
+}
+
 /* Helper that undoes bdrv_register_buf() when it fails partway through */
 static void GRAPH_RDLOCK
 bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
@@ -3596,7 +3423,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
        return ret;
    }

-    old_size = bdrv_co_getlength(bs);
+    old_size = bdrv_getlength(bs);
    if (old_size < 0) {
        error_setg_errno(errp, -old_size, "Failed to get old image size");
        return old_size;
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -16,12 +16,8 @@
 #include "block/raw-aio.h"
 #include "qemu/coroutine.h"
 #include "qapi/error.h"
-#include "sysemu/block-backend.h"
 #include "trace.h"

-/* Only used for assertions.  */
-#include "qemu/coroutine_int.h"
-
 /* io_uring ring size */
 #define MAX_ENTRIES 128

@@ -42,6 +38,7 @@ typedef struct LuringAIOCB {
 } LuringAIOCB;

 typedef struct LuringQueue {
+    int plugged;
    unsigned int in_queue;
    unsigned int in_flight;
    bool blocked;
@@ -53,9 +50,10 @@ typedef struct LuringState {

    struct io_uring ring;

-    /* No locking required, only accessed from AioContext home thread */
+    /* io queue for submit at batch.  Protected by AioContext lock. */
    LuringQueue io_q;

+    /* I/O completion processing.  Only runs in I/O thread.  */
    QEMUBH *completion_bh;
 } LuringState;

@@ -211,7 +209,6 @@ end:
         * eventually runs later. Coroutines cannot be entered recursively
         * so avoid doing that!
         */
-        assert(luringcb->co->ctx == s->aio_context);
        if (!qemu_coroutine_entered(luringcb->co)) {
            aio_co_wake(luringcb->co);
        }
@@ -265,11 +262,13 @@ static int ioq_submit(LuringState *s)

 static void luring_process_completions_and_submit(LuringState *s)
 {
+    aio_context_acquire(s->aio_context);
    luring_process_completions(s);

-    if (s->io_q.in_queue > 0) {
+    if (!s->io_q.plugged && s->io_q.in_queue > 0) {
        ioq_submit(s);
    }
+    aio_context_release(s->aio_context);
 }

 static void qemu_luring_completion_bh(void *opaque)
@@ -301,17 +300,25 @@ static void qemu_luring_poll_ready(void *opaque)
 static void ioq_init(LuringQueue *io_q)
 {
    QSIMPLEQ_INIT(&io_q->submit_queue);
+    io_q->plugged = 0;
    io_q->in_queue = 0;
    io_q->in_flight = 0;
    io_q->blocked = false;
 }

-static void luring_unplug_fn(void *opaque)
+void luring_io_plug(BlockDriverState *bs, LuringState *s)
 {
-    LuringState *s = opaque;
-    trace_luring_unplug_fn(s, s->io_q.blocked, s->io_q.in_queue,
-                           s->io_q.in_flight);
-    if (!s->io_q.blocked && s->io_q.in_queue > 0) {
+    trace_luring_io_plug(s);
+    s->io_q.plugged++;
+}
+
+void luring_io_unplug(BlockDriverState *bs, LuringState *s)
+{
+    assert(s->io_q.plugged);
+    trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
+                           s->io_q.in_queue, s->io_q.in_flight);
+    if (--s->io_q.plugged == 0 &&
+        !s->io_q.blocked && s->io_q.in_queue > 0) {
        ioq_submit(s);
    }
 }
@@ -338,10 +345,6 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
        io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
                             luringcb->qiov->niov, offset);
        break;
-    case QEMU_AIO_ZONE_APPEND:
-        io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
-                             luringcb->qiov->niov, offset);
-        break;
    case QEMU_AIO_READ:
        io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
                            luringcb->qiov->niov, offset);
@@ -358,26 +361,22 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,

    QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
    s->io_q.in_queue++;
-    trace_luring_do_submit(s, s->io_q.blocked, s->io_q.in_queue,
-                           s->io_q.in_flight);
-    if (!s->io_q.blocked) {
-        if (s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES) {
-            ret = ioq_submit(s);
-            trace_luring_do_submit_done(s, ret);
-            return ret;
-        }
-
-        blk_io_plug_call(luring_unplug_fn, s);
+    trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged,
+                           s->io_q.in_queue, s->io_q.in_flight);
+    if (!s->io_q.blocked &&
+        (!s->io_q.plugged ||
+         s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
+        ret = ioq_submit(s);
+        trace_luring_do_submit_done(s, ret);
+        return ret;
    }
    return 0;
 }

-int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
-                                  QEMUIOVector *qiov, int type)
+int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
+                                  uint64_t offset, QEMUIOVector *qiov, int type)
 {
    int ret;
-    AioContext *ctx = qemu_get_current_aio_context();
-    LuringState *s = aio_get_linux_io_uring(ctx);
    LuringAIOCB luringcb = {
        .co         = qemu_coroutine_self(),
        .ret        = -EINPROGRESS,
@@ -400,7 +399,7 @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,

 void luring_detach_aio_context(LuringState *s, AioContext *old_context)
 {
-    aio_set_fd_handler(old_context, s->ring.ring_fd,
+    aio_set_fd_handler(old_context, s->ring.ring_fd, false,
                       NULL, NULL, NULL, NULL, s);
    qemu_bh_delete(s->completion_bh);
    s->aio_context = NULL;
@@ -410,7 +409,7 @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
 {
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
-    aio_set_fd_handler(s->aio_context, s->ring.ring_fd,
+    aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
                       qemu_luring_completion_cb, NULL,
                       qemu_luring_poll_cb, qemu_luring_poll_ready, s);
 }
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -363,6 +363,7 @@ iscsi_set_events(IscsiLun *iscsilun)

    if (ev != iscsilun->events) {
        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+                           false,
                           (ev & POLLIN) ? iscsi_process_read : NULL,
                           (ev & POLLOUT) ? iscsi_process_write : NULL,
                           NULL, NULL,
@@ -1539,7 +1540,7 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
    IscsiLun *iscsilun = bs->opaque;

    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       NULL, NULL, NULL, NULL, NULL);
+                       false, NULL, NULL, NULL, NULL, NULL);
    iscsilun->events = 0;

    if (iscsilun->nop_timer) {
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -15,10 +15,6 @@
 #include "qemu/event_notifier.h"
 #include "qemu/coroutine.h"
 #include "qapi/error.h"
-#include "sysemu/block-backend.h"
-
-/* Only used for assertions.  */
-#include "qemu/coroutine_int.h"

 #include <libaio.h>

@@ -47,6 +43,7 @@ struct qemu_laiocb {
 };

 typedef struct {
+    int plugged;
    unsigned int in_queue;
    unsigned int in_flight;
    bool blocked;
@@ -59,8 +56,10 @@ struct LinuxAioState {
    io_context_t ctx;
    EventNotifier e;

-    /* No locking required, only accessed from AioContext home thread */
+    /* io queue for submit at batch.  Protected by AioContext lock. */
    LaioQueue io_q;
+
+    /* I/O completion processing.  Only runs in I/O thread.  */
    QEMUBH *completion_bh;
    int event_idx;
    int event_max;
@@ -103,7 +102,6 @@ static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
     * later.  Coroutines cannot be entered recursively so avoid doing
     * that!
     */
-    assert(laiocb->co->ctx == laiocb->ctx->aio_context);
    if (!qemu_coroutine_entered(laiocb->co)) {
        aio_co_wake(laiocb->co);
    }
@@ -234,11 +232,13 @@ static void qemu_laio_process_completions(LinuxAioState *s)

 static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
 {
+    aio_context_acquire(s->aio_context);
    qemu_laio_process_completions(s);

-    if (!QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
        ioq_submit(s);
    }
+    aio_context_release(s->aio_context);
 }

 static void qemu_laio_completion_bh(void *opaque)
@@ -277,6 +277,7 @@ static void qemu_laio_poll_ready(EventNotifier *opaque)
 static void ioq_init(LaioQueue *io_q)
 {
    QSIMPLEQ_INIT(&io_q->pending);
+    io_q->plugged = 0;
    io_q->in_queue = 0;
    io_q->in_flight = 0;
    io_q->blocked = false;
@@ -353,11 +354,26 @@ static uint64_t laio_max_batch(LinuxAioState *s, uint64_t dev_max_batch)
    return max_batch;
 }

-static void laio_unplug_fn(void *opaque)
+void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
 {
-    LinuxAioState *s = opaque;
+    s->io_q.plugged++;
+}

-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
+                    uint64_t dev_max_batch)
+{
+    assert(s->io_q.plugged);
+    s->io_q.plugged--;
+
+    /*
+     * Why max batch checking is performed here:
+     * Another BDS may have queued requests with a higher dev_max_batch and
+     * therefore in_queue could now exceed our dev_max_batch. Re-check the max
+     * batch so we can honor our device's dev_max_batch.
+     */
+    if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
+        (!s->io_q.plugged &&
+         !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
        ioq_submit(s);
    }
 }
@@ -373,9 +389,6 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
    case QEMU_AIO_WRITE:
        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
        break;
-    case QEMU_AIO_ZONE_APPEND:
-        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
-        break;
    case QEMU_AIO_READ:
        io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
        break;
@@ -389,26 +402,24 @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,

    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
    s->io_q.in_queue++;
-    if (!s->io_q.blocked) {
-        if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch)) {
-            ioq_submit(s);
-        } else {
-            blk_io_plug_call(laio_unplug_fn, s);
-        }
+    if (!s->io_q.blocked &&
+        (!s->io_q.plugged ||
+         s->io_q.in_queue >= laio_max_batch(s, dev_max_batch))) {
+        ioq_submit(s);
    }

    return 0;
 }

-int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
-                                int type, uint64_t dev_max_batch)
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+                                uint64_t offset, QEMUIOVector *qiov, int type,
+                                uint64_t dev_max_batch)
 {
    int ret;
-    AioContext *ctx = qemu_get_current_aio_context();
    struct qemu_laiocb laiocb = {
        .co         = qemu_coroutine_self(),
        .nbytes     = qiov->size,
-        .ctx        = aio_get_linux_aio(ctx),
+        .ctx        = s,
        .ret        = -EINPROGRESS,
        .is_read    = (type == QEMU_AIO_READ),
        .qiov       = qiov,
@@ -427,7 +438,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,

 void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
 {
-    aio_set_event_notifier(old_context, &s->e, NULL, NULL, NULL);
+    aio_set_event_notifier(old_context, &s->e, false, NULL, NULL, NULL);
    qemu_bh_delete(s->completion_bh);
    s->aio_context = NULL;
 }
@@ -436,7 +447,7 @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
 {
    s->aio_context = new_context;
    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e,
+    aio_set_event_notifier(new_context, &s->e, false,
                           qemu_laio_completion_cb,
                           qemu_laio_poll_cb,
                           qemu_laio_poll_ready);
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .1.0
 .0.4