tests/qtest/migration: Use the new migration_test_add

Replace the tests registration with the new function that prints tests names. Signed-off-by: Fabiano Rosas <farosas@suse.de>
tests/qtest/migration: Add a wrapper to print test names
2023-11-14 13:30:16 -03:00 · 2023-11-14 13:30:16 -03:00 · 2023-11-14 13:30:16 -03:00 · 2023-11-14 13:30:16 -03:00 · 2023-11-14 13:30:16 -03:00 · 2023-11-14 13:30:16 -03:00
2431 changed files with 38286 additions and 71340 deletions
--- a/.gitlab-ci.d/base.yml
+++ b/.gitlab-ci.d/base.yml
@@ -41,10 +41,6 @@ variables:
    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_COMMIT_TAG'
      when: never
    # Scheduled runs on mainline don't get pipelines except for the special Coverity job
    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_PIPELINE_SOURCE == "schedule"'
      when: never
    # Cirrus jobs can't run unless the creds / target repo are set
    - if: '$QEMU_JOB_CIRRUS && ($CIRRUS_GITHUB_REPO == null || $CIRRUS_API_TOKEN == null)'
      when: never
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -61,7 +61,7 @@ avocado-system-ubuntu:
  variables:
    IMAGE: ubuntu2204
    MAKE_CHECK_ARGS: check-avocado
-    AVOCADO_TAGS: arch:alpha arch:microblazeel arch:mips64el
+    AVOCADO_TAGS: arch:alpha arch:microblaze arch:mips64el
 build-system-debian:
  extends:
@@ -70,7 +70,7 @@ build-system-debian:
  needs:
    job: amd64-debian-container
  variables:
-    IMAGE: debian
+    IMAGE: debian-amd64
    CONFIGURE_ARGS: --with-coroutine=sigaltstack
    TARGETS: arm-softmmu i386-softmmu riscv64-softmmu sh4eb-softmmu
      sparc-softmmu xtensa-softmmu
@@ -82,7 +82,7 @@ check-system-debian:
    - job: build-system-debian
      artifacts: true
  variables:
-    IMAGE: debian
+    IMAGE: debian-amd64
    MAKE_CHECK_ARGS: check
 avocado-system-debian:
@@ -91,7 +91,7 @@ avocado-system-debian:
    - job: build-system-debian
      artifacts: true
  variables:
-    IMAGE: debian
+    IMAGE: debian-amd64
    MAKE_CHECK_ARGS: check-avocado
    AVOCADO_TAGS: arch:arm arch:i386 arch:riscv64 arch:sh4 arch:sparc arch:xtensa
@@ -101,7 +101,7 @@ crash-test-debian:
    - job: build-system-debian
      artifacts: true
  variables:
-    IMAGE: debian
+    IMAGE: debian-amd64
  script:
    - cd build
    - make NINJA=":" check-venv
@@ -167,73 +167,6 @@ build-system-centos:
      x86_64-softmmu rx-softmmu sh4-softmmu nios2-softmmu
    MAKE_CHECK_ARGS: check-build
 # Previous QEMU release. Used for cross-version migration tests.
 build-previous-qemu:
  extends: .native_build_job_template
  artifacts:
    when: on_success
    expire_in: 2 days
    paths:
      - build-previous
    exclude:
      - build-previous/**/*.p
      - build-previous/**/*.a.p
      - build-previous/**/*.fa.p
      - build-previous/**/*.c.o
      - build-previous/**/*.c.o.d
      - build-previous/**/*.fa
  needs:
    job: amd64-opensuse-leap-container
  variables:
    IMAGE: opensuse-leap
    TARGETS: x86_64-softmmu aarch64-softmmu
  before_script:
    - export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)"
    - git remote add upstream https://gitlab.com/qemu-project/qemu
    - git fetch upstream refs/tags/$QEMU_PREV_VERSION:refs/tags/$QEMU_PREV_VERSION
    - git checkout $QEMU_PREV_VERSION
  after_script:
    - mv build build-previous
 .migration-compat-common:
  extends: .common_test_job_template
  needs:
    - job: build-previous-qemu
    - job: build-system-opensuse
  # The old QEMU could have bugs unrelated to migration that are
  # already fixed in the current development branch, so this test
  # might fail.
  allow_failure: true
  variables:
    IMAGE: opensuse-leap
    MAKE_CHECK_ARGS: check-build
  script:
    # Use the migration-tests from the older QEMU tree. This avoids
    # testing an old QEMU against new features/tests that it is not
    # compatible with.
    - cd build-previous
    # old to new
    - QTEST_QEMU_BINARY_SRC=./qemu-system-${TARGET}
          QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
    # new to old
    - QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET}
          QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
 # This job needs to be disabled until we can have an aarch64 CPU model that
 # will both (1) support both KVM and TCG, and (2) provide a stable ABI.
 # Currently only "-cpu max" can provide (1), however it doesn't guarantee
 # (2).  Mark this test skipped until later.
 migration-compat-aarch64:
  extends: .migration-compat-common
  variables:
    TARGET: aarch64
    QEMU_JOB_SKIPPED: 1
 migration-compat-x86_64:
  extends: .migration-compat-common
  variables:
    TARGET: x86_64
 check-system-centos:
  extends: .native_test_job_template
  needs:
@@ -251,7 +184,7 @@ avocado-system-centos:
  variables:
    IMAGE: centos8
    MAKE_CHECK_ARGS: check-avocado
-    AVOCADO_TAGS: arch:ppc64 arch:or1k arch:s390x arch:x86_64 arch:rx
+    AVOCADO_TAGS: arch:ppc64 arch:or1k arch:390x arch:x86_64 arch:rx
      arch:sh4 arch:nios2
 build-system-opensuse:
@@ -284,36 +217,6 @@ avocado-system-opensuse:
    MAKE_CHECK_ARGS: check-avocado
    AVOCADO_TAGS: arch:s390x arch:x86_64 arch:aarch64
 #
 # Flaky tests. We don't run these by default and they are allow fail
 # but often the CI system is the only way to trigger the failures.
 #
 build-system-flaky:
  extends:
    - .native_build_job_template
    - .native_build_artifact_template
  needs:
    job: amd64-debian-container
  variables:
    IMAGE: debian
    QEMU_JOB_OPTIONAL: 1
    TARGETS: aarch64-softmmu arm-softmmu mips64el-softmmu
      ppc64-softmmu rx-softmmu s390x-softmmu sh4-softmmu x86_64-softmmu
    MAKE_CHECK_ARGS: check-build
 avocado-system-flaky:
  extends: .avocado_test_job_template
  needs:
    - job: build-system-flaky
      artifacts: true
  allow_failure: true
  variables:
    IMAGE: debian
    MAKE_CHECK_ARGS: check-avocado
    QEMU_JOB_OPTIONAL: 1
    QEMU_TEST_FLAKY_TESTS: 1
    AVOCADO_TAGS: flaky
 # This jobs explicitly disable TCG (--disable-tcg), KVM is detected by
 # the configure script. The container doesn't contain Xen headers so
@@ -659,7 +562,7 @@ build-without-defaults:
      --disable-pie
      --disable-qom-cast-debug
      --disable-strip
-    TARGETS: avr-softmmu s390x-softmmu sh4-softmmu
+    TARGETS: avr-softmmu mips64-softmmu s390x-softmmu sh4-softmmu
      sparc64-softmmu hexagon-linux-user i386-linux-user s390x-linux-user
    MAKE_CHECK_ARGS: check
@@ -686,7 +589,7 @@ build-tools-and-docs-debian:
    # when running on 'master' we use pre-existing container
    optional: true
  variables:
-    IMAGE: debian
+    IMAGE: debian-amd64
    MAKE_CHECK_ARGS: check-unit ctags TAGS cscope
    CONFIGURE_ARGS: --disable-system --disable-user --enable-docs --enable-tools
    QEMU_JOB_PUBLISH: 1
@@ -706,7 +609,7 @@ build-tools-and-docs-debian:
 # of what topic branch they're currently using
 pages:
  extends: .base_job_template
-  image: $CI_REGISTRY_IMAGE/qemu/debian:$QEMU_CI_CONTAINER_TAG
+  image: $CI_REGISTRY_IMAGE/qemu/debian-amd64:$QEMU_CI_CONTAINER_TAG
  stage: test
  needs:
    - job: build-tools-and-docs-debian
@@ -714,10 +617,7 @@ pages:
    - mkdir -p public
    # HTML-ised source tree
    - make gtags
-    # We unset variables to work around a bug in some htags versions
+    - htags -anT --tree-view=filetree -m qemu_init
    # which causes it to fail when the environment is large
    - CI_COMMIT_MESSAGE= CI_COMMIT_TAG_MESSAGE= htags
        -anT --tree-view=filetree -m qemu_init
        -t "Welcome to the QEMU sourcecode"
    - mv HTML public/src
    # Project documentation
@@ -729,40 +629,3 @@ pages:
      - public
  variables:
    QEMU_JOB_PUBLISH: 1
 coverity:
  image: $CI_REGISTRY_IMAGE/qemu/fedora:$QEMU_CI_CONTAINER_TAG
  stage: build
  allow_failure: true
  timeout: 3h
  needs:
    - job: amd64-fedora-container
      optional: true
  before_script:
    - dnf install -y curl wget
  script:
    # would be nice to cancel the job if over quota (https://gitlab.com/gitlab-org/gitlab/-/issues/256089)
    # for example:
    #   curl --request POST --header "PRIVATE-TOKEN: $CI_JOB_TOKEN" "${CI_SERVER_URL}/api/v4/projects/${CI_PROJECT_ID}/jobs/${CI_JOB_ID}/cancel
    - 'scripts/coverity-scan/run-coverity-scan --check-upload-only || { exitcode=$?; if test $exitcode = 1; then
        exit 0;
      else
        exit $exitcode;
      fi; };
      scripts/coverity-scan/run-coverity-scan --update-tools-only > update-tools.log 2>&1 || { cat update-tools.log; exit 1; };
      scripts/coverity-scan/run-coverity-scan --no-update-tools'
  rules:
    - if: '$COVERITY_TOKEN == null'
      when: never
    - if: '$COVERITY_EMAIL == null'
      when: never
    # Never included on upstream pipelines, except for schedules
    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_PIPELINE_SOURCE == "schedule"'
      when: on_success
    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM'
      when: never
    # Forks don't get any pipeline unless QEMU_CI=1 or QEMU_CI=2 is set
    - if: '$QEMU_CI != "1" && $QEMU_CI != "2"'
      when: never
    # Always manual on forks even if $QEMU_CI == "2"
    - when: manual
--- a/.gitlab-ci.d/cirrus.yml
+++ b/.gitlab-ci.d/cirrus.yml
@@ -52,20 +52,20 @@ x64-freebsd-13-build:
    NAME: freebsd-13
    CIRRUS_VM_INSTANCE_TYPE: freebsd_instance
    CIRRUS_VM_IMAGE_SELECTOR: image_family
-    CIRRUS_VM_IMAGE_NAME: freebsd-13-3
+    CIRRUS_VM_IMAGE_NAME: freebsd-13-2
    CIRRUS_VM_CPUS: 8
    CIRRUS_VM_RAM: 8G
    UPDATE_COMMAND: pkg update; pkg upgrade -y
    INSTALL_COMMAND: pkg install -y
    TEST_TARGETS: check
-aarch64-macos-13-base-build:
+aarch64-macos-12-base-build:
  extends: .cirrus_build_job
  variables:
-    NAME: macos-13
+    NAME: macos-12
    CIRRUS_VM_INSTANCE_TYPE: macos_instance
    CIRRUS_VM_IMAGE_SELECTOR: image
-    CIRRUS_VM_IMAGE_NAME: ghcr.io/cirruslabs/macos-ventura-base:latest
+    CIRRUS_VM_IMAGE_NAME: ghcr.io/cirruslabs/macos-monterey-base:latest
    CIRRUS_VM_CPUS: 12
    CIRRUS_VM_RAM: 24G
    UPDATE_COMMAND: brew update
@@ -74,22 +74,6 @@ aarch64-macos-13-base-build:
    PKG_CONFIG_PATH: /opt/homebrew/curl/lib/pkgconfig:/opt/homebrew/ncurses/lib/pkgconfig:/opt/homebrew/readline/lib/pkgconfig
    TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat check-qtest-x86_64
 aarch64-macos-14-base-build:
  extends: .cirrus_build_job
  variables:
    NAME: macos-14
    CIRRUS_VM_INSTANCE_TYPE: macos_instance
    CIRRUS_VM_IMAGE_SELECTOR: image
    CIRRUS_VM_IMAGE_NAME: ghcr.io/cirruslabs/macos-sonoma-base:latest
    CIRRUS_VM_CPUS: 12
    CIRRUS_VM_RAM: 24G
    UPDATE_COMMAND: brew update
    INSTALL_COMMAND: brew install
    PATH_EXTRA: /opt/homebrew/ccache/libexec:/opt/homebrew/gettext/bin
    PKG_CONFIG_PATH: /opt/homebrew/curl/lib/pkgconfig:/opt/homebrew/ncurses/lib/pkgconfig:/opt/homebrew/readline/lib/pkgconfig
    TEST_TARGETS: check-unit check-block check-qapi-schema check-softfloat check-qtest-x86_64
    QEMU_JOB_OPTIONAL: 1
 # The following jobs run VM-based tests via KVM on a Linux-based Cirrus-CI job
 .cirrus_kvm_job:
--- a/.gitlab-ci.d/cirrus/build.yml
+++ b/.gitlab-ci.d/cirrus/build.yml
@@ -21,7 +21,7 @@ build_task:
  install_script:
    - @UPDATE_COMMAND@
    - @INSTALL_COMMAND@ @PKGS@
-    - if test -n "@PYPI_PKGS@" ; then PYLIB=$(@PYTHON@ -c 'import sysconfig; print(sysconfig.get_path("stdlib"))'); rm -f $PYLIB/EXTERNALLY-MANAGED; @PIP3@ install @PYPI_PKGS@ ; fi
+    - if test -n "@PYPI_PKGS@" ; then @PIP3@ install @PYPI_PKGS@ ; fi
  clone_script:
    - git clone --depth 100 "$CI_REPOSITORY_URL" .
    - git fetch origin "$CI_COMMIT_REF_NAME"
--- a/.gitlab-ci.d/cirrus/macos-12.vars
+++ b/.gitlab-ci.d/cirrus/macos-12.vars
@@ -1,6 +1,6 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool variables macos-13 qemu
+#  $ lcitool variables macos-12 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
--- a/.gitlab-ci.d/cirrus/macos-14.vars
+++ b/.gitlab-ci.d/cirrus/macos-14.vars
@@ -1,16 +0,0 @@
 # THIS FILE WAS AUTO-GENERATED
 #
 #  $ lcitool variables macos-14 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 CCACHE='/opt/homebrew/bin/ccache'
 CPAN_PKGS=''
 CROSS_PKGS=''
 MAKE='/opt/homebrew/bin/gmake'
 NINJA='/opt/homebrew/bin/ninja'
 PACKAGING_COMMAND='brew'
 PIP3='/opt/homebrew/bin/pip3'
 PKGS='bash bc bison bzip2 capstone ccache cmocka ctags curl dbus diffutils dtc flex gcovr gettext git glib gnu-sed gnutls gtk+3 jemalloc jpeg-turbo json-c libepoxy libffi libgcrypt libiscsi libnfs libpng libslirp libssh libtasn1 libusb llvm lzo make meson mtools ncurses nettle ninja pixman pkg-config python3 rpm2cpio sdl2 sdl2_image snappy socat sparse spice-protocol swtpm tesseract usbredir vde vte3 xorriso zlib zstd'
 PYPI_PKGS='PyYAML numpy pillow sphinx sphinx-rtd-theme tomli'
 PYTHON='/opt/homebrew/bin/python3'
--- a/.gitlab-ci.d/container-cross.yml
+++ b/.gitlab-ci.d/container-cross.yml
@@ -46,12 +46,6 @@ loongarch-debian-cross-container:
  variables:
    NAME: debian-loongarch-cross
 i686-debian-cross-container:
  extends: .container_job_template
  stage: containers
  variables:
    NAME: debian-i686-cross
 mips64el-debian-cross-container:
  extends: .container_job_template
  stage: containers
@@ -101,6 +95,16 @@ cris-fedora-cross-container:
  variables:
    NAME: fedora-cris-cross
 i386-fedora-cross-container:
  extends: .container_job_template
  variables:
    NAME: fedora-i386-cross
 win32-fedora-cross-container:
  extends: .container_job_template
  variables:
    NAME: fedora-win32-cross
 win64-fedora-cross-container:
  extends: .container_job_template
  variables:
--- a/.gitlab-ci.d/containers.yml
+++ b/.gitlab-ci.d/containers.yml
@@ -11,7 +11,7 @@ amd64-debian-container:
  extends: .container_job_template
  stage: containers
  variables:
-    NAME: debian
+    NAME: debian-amd64
 amd64-ubuntu2204-container:
  extends: .container_job_template
--- a/.gitlab-ci.d/crossbuilds.yml
+++ b/.gitlab-ci.d/crossbuilds.yml
@@ -37,25 +37,25 @@ cross-arm64-kvm-only:
    IMAGE: debian-arm64-cross
    EXTRA_CONFIGURE_OPTS: --disable-tcg --without-default-features
-cross-i686-user:
+cross-i386-user:
  extends:
    - .cross_user_build_job
    - .cross_test_artifacts
  needs:
-    job: i686-debian-cross-container
+    job: i386-fedora-cross-container
  variables:
-    IMAGE: debian-i686-cross
+    IMAGE: fedora-i386-cross
    MAKE_CHECK_ARGS: check
-cross-i686-tci:
+cross-i386-tci:
  extends:
    - .cross_accel_build_job
    - .cross_test_artifacts
  timeout: 60m
  needs:
-    job: i686-debian-cross-container
+    job: i386-fedora-cross-container
  variables:
-    IMAGE: debian-i686-cross
+    IMAGE: fedora-i386-cross
    ACCEL: tcg-interpreter
    EXTRA_CONFIGURE_OPTS: --target-list=i386-softmmu,i386-linux-user,aarch64-softmmu,aarch64-linux-user,ppc-softmmu,ppc-linux-user --disable-plugins
    MAKE_CHECK_ARGS: check check-tcg
@@ -159,6 +159,20 @@ cross-mips64el-kvm-only:
    IMAGE: debian-mips64el-cross
    EXTRA_CONFIGURE_OPTS: --disable-tcg --target-list=mips64el-softmmu
 cross-win32-system:
  extends: .cross_system_build_job
  needs:
    job: win32-fedora-cross-container
  variables:
    IMAGE: fedora-win32-cross
    EXTRA_CONFIGURE_OPTS: --enable-fdt=internal --disable-plugins
    CROSS_SKIP_TARGETS: alpha-softmmu avr-softmmu hppa-softmmu m68k-softmmu
                        microblazeel-softmmu mips64el-softmmu nios2-softmmu
  artifacts:
    when: on_success
    paths:
      - build/qemu-setup*.exe
 cross-win64-system:
  extends: .cross_system_build_job
  needs:
--- a/.gitlab-ci.d/opensbi.yml
+++ b/.gitlab-ci.d/opensbi.yml
@@ -24,10 +24,6 @@
    - if: '$QEMU_CI == "1" && $CI_PROJECT_NAMESPACE != "qemu-project" && $CI_COMMIT_MESSAGE =~ /opensbi/i'
      when: manual
    # Scheduled runs on mainline don't get pipelines except for the special Coverity job
    - if: '$CI_PROJECT_NAMESPACE == $QEMU_CI_UPSTREAM && $CI_PIPELINE_SOURCE == "schedule"'
      when: never
    # Run if any files affecting the build output are touched
    - changes:
        - .gitlab-ci.d/opensbi.yml
--- a/.gitlab-ci.d/windows.yml
+++ b/.gitlab-ci.d/windows.yml
@@ -1,4 +1,4 @@
-msys2-64bit:
+.shared_msys2_builder:
  extends: .base_job_template
  tags:
  - shared-windows
@@ -14,20 +14,9 @@ msys2-64bit:
  stage: build
  timeout: 100m
  variables:
    # Select the "64 bit, gcc and MSVCRT" MSYS2 environment
    MSYSTEM: MINGW64
    # This feature doesn't (currently) work with PowerShell, it stops
    # the echo'ing of commands being run and doesn't show any timing
    FF_SCRIPT_SECTIONS: 0
    # do not remove "--without-default-devices"!
    # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices"
    # changed to compile QEMU with the --without-default-devices switch
    # for this job, because otherwise the build could not complete within
    # the project timeout.
    CONFIGURE_ARGS:  --target-list=x86_64-softmmu --without-default-devices -Ddebug=false -Doptimization=0
    # qTests don't run successfully with "--without-default-devices",
    # so let's exclude the qtests from CI for now.
    TEST_ARGS: --no-suite qtest
  artifacts:
    name: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
    expire_in: 7 days
@@ -83,35 +72,35 @@ msys2-64bit:
  - .\msys64\usr\bin\bash -lc "pacman -Sy --noconfirm --needed
      bison diffutils flex
      git grep make sed
-      mingw-w64-x86_64-binutils
+      $MINGW_TARGET-binutils
-      mingw-w64-x86_64-capstone
+      $MINGW_TARGET-capstone
-      mingw-w64-x86_64-ccache
+      $MINGW_TARGET-ccache
-      mingw-w64-x86_64-curl
+      $MINGW_TARGET-curl
-      mingw-w64-x86_64-cyrus-sasl
+      $MINGW_TARGET-cyrus-sasl
-      mingw-w64-x86_64-dtc
+      $MINGW_TARGET-dtc
-      mingw-w64-x86_64-gcc
+      $MINGW_TARGET-gcc
-      mingw-w64-x86_64-glib2
+      $MINGW_TARGET-glib2
-      mingw-w64-x86_64-gnutls
+      $MINGW_TARGET-gnutls
-      mingw-w64-x86_64-gtk3
+      $MINGW_TARGET-gtk3
-      mingw-w64-x86_64-libgcrypt
+      $MINGW_TARGET-libgcrypt
-      mingw-w64-x86_64-libjpeg-turbo
+      $MINGW_TARGET-libjpeg-turbo
-      mingw-w64-x86_64-libnfs
+      $MINGW_TARGET-libnfs
-      mingw-w64-x86_64-libpng
+      $MINGW_TARGET-libpng
-      mingw-w64-x86_64-libssh
+      $MINGW_TARGET-libssh
-      mingw-w64-x86_64-libtasn1
+      $MINGW_TARGET-libtasn1
-      mingw-w64-x86_64-libusb
+      $MINGW_TARGET-libusb
-      mingw-w64-x86_64-lzo2
+      $MINGW_TARGET-lzo2
-      mingw-w64-x86_64-nettle
+      $MINGW_TARGET-nettle
-      mingw-w64-x86_64-ninja
+      $MINGW_TARGET-ninja
-      mingw-w64-x86_64-pixman
+      $MINGW_TARGET-pixman
-      mingw-w64-x86_64-pkgconf
+      $MINGW_TARGET-pkgconf
-      mingw-w64-x86_64-python
+      $MINGW_TARGET-python
-      mingw-w64-x86_64-SDL2
+      $MINGW_TARGET-SDL2
-      mingw-w64-x86_64-SDL2_image
+      $MINGW_TARGET-SDL2_image
-      mingw-w64-x86_64-snappy
+      $MINGW_TARGET-snappy
-      mingw-w64-x86_64-spice
+      $MINGW_TARGET-spice
-      mingw-w64-x86_64-usbredir
+      $MINGW_TARGET-usbredir
-      mingw-w64-x86_64-zstd"
+      $MINGW_TARGET-zstd "
  - Write-Output "Running build at $(Get-Date -Format u)"
  - $env:CHERE_INVOKING = 'yes'  # Preserve the current working directory
  - $env:MSYS = 'winsymlinks:native' # Enable native Windows symlink
@@ -128,3 +117,25 @@ msys2-64bit:
  - ..\msys64\usr\bin\bash -lc "make check MTESTARGS='$TEST_ARGS' || { cat meson-logs/testlog.txt; exit 1; } ;"
  - ..\msys64\usr\bin\bash -lc "ccache --show-stats"
  - Write-Output "Finished build at $(Get-Date -Format u)"
 msys2-64bit:
  extends: .shared_msys2_builder
  variables:
    MINGW_TARGET: mingw-w64-x86_64
    MSYSTEM: MINGW64
    # do not remove "--without-default-devices"!
    # commit 9f8e6cad65a6 ("gitlab-ci: Speed up the msys2-64bit job by using --without-default-devices"
    # changed to compile QEMU with the --without-default-devices switch
    # for the msys2 64-bit job, due to the build could not complete within
    CONFIGURE_ARGS:  --target-list=x86_64-softmmu --without-default-devices -Ddebug=false -Doptimization=0
    # qTests don't run successfully with "--without-default-devices",
    # so let's exclude the qtests from CI for now.
    TEST_ARGS: --no-suite qtest
 msys2-32bit:
  extends: .shared_msys2_builder
  variables:
    MINGW_TARGET: mingw-w64-i686
    MSYSTEM: MINGW32
    CONFIGURE_ARGS:  --target-list=ppc64-softmmu -Ddebug=false -Doptimization=0
    TEST_ARGS: --no-suite qtest
--- a/.mailmap
+++ b/.mailmap
@@ -36,8 +36,6 @@ Marek Dolata <mkdolata@us.ibm.com> mkdolata@us.ibm.com <mkdolata@us.ibm.com>
 Michael Ellerman <mpe@ellerman.id.au> michael@ozlabs.org <michael@ozlabs.org>
 Nick Hudson <hnick@vmware.com> hnick@vmware.com <hnick@vmware.com>
 Timothée Cocault <timothee.cocault@gmail.com> timothee.cocault@gmail.com <timothee.cocault@gmail.com>
 Stefan Weil <sw@weilnetz.de> <weil@mail.berlios.de>
 Stefan Weil <sw@weilnetz.de> Stefan Weil <stefan@kiwi.(none)>
 # There is also a:
 #    (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162>
@@ -62,7 +60,6 @@ Ian McKellar <ianloic@google.com> Ian McKellar via Qemu-devel <qemu-devel@nongnu
 Julia Suvorova <jusual@mail.ru> Julia Suvorova via Qemu-devel <qemu-devel@nongnu.org>
 Justin Terry (VM) <juterry@microsoft.com> Justin Terry (VM) via Qemu-devel <qemu-devel@nongnu.org>
 Stefan Weil <sw@weilnetz.de> Stefan Weil via <qemu-devel@nongnu.org>
 Stefan Weil <sw@weilnetz.de> Stefan Weil via <qemu-trivial@nongnu.org>
 Andrey Drobyshev <andrey.drobyshev@virtuozzo.com> Andrey Drobyshev via <qemu-block@nongnu.org>
 BALATON Zoltan <balaton@eik.bme.hu> BALATON Zoltan via <qemu-ppc@nongnu.org>
@@ -84,7 +81,6 @@ Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
 Huacai Chen <chenhuacai@kernel.org> <chenhc@lemote.com>
 Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
 James Hogan <jhogan@kernel.org> <james.hogan@imgtec.com>
 Juan Quintela <quintela@trasno.org> <quintela@redhat.com>
 Leif Lindholm <quic_llindhol@quicinc.com> <leif.lindholm@linaro.org>
 Leif Lindholm <quic_llindhol@quicinc.com> <leif@nuviainc.com>
 Luc Michel <luc@lmichel.fr> <luc.michel@git.antfield.fr>
@@ -101,7 +97,6 @@ Philippe Mathieu-Daudé <philmd@linaro.org> <philmd@redhat.com>
 Philippe Mathieu-Daudé <philmd@linaro.org> <philmd@fungible.com>
 Roman Bolshakov <rbolshakov@ddn.com> <r.bolshakov@yadro.com>
 Stefan Brankovic <stefan.brankovic@syrmia.com> <stefan.brankovic@rt-rk.com.com>
 Stefan Weil <sw@weilnetz.de> Stefan Weil <stefan@weilnetz.de>
 Taylor Simpson <ltaylorsimpson@gmail.com> <tsimpson@quicinc.com>
 Yongbok Kim <yongbok.kim@mips.com> <yongbok.kim@imgtec.com>
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -5,21 +5,16 @@
 # Required
 version: 2
 # Set the version of Python and other tools you might need
 build:
  os: ubuntu-22.04
  tools:
    python: "3.11"
 # Build documentation in the docs/ directory with Sphinx
 sphinx:
  configuration: docs/conf.py
 # We recommend specifying your dependencies to enable reproducible builds:
 # https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
 python:
  install:
    - requirements: docs/requirements.txt
 # We want all the document formats
 formats: all
 # For consistency, we require that QEMU's Sphinx extensions
 # run with at least the same minimum version of Python that
 # we require for other Python in our codebase (our conf.py
 # enforces this, and some code needs it.)
 python:
  version: 3.6
--- a/139
+++ b/139
@@ -70,6 +70,7 @@ R: Daniel P. Berrangé <berrange@redhat.com>
 R: Thomas Huth <thuth@redhat.com>
 R: Markus Armbruster <armbru@redhat.com>
 R: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Juan Quintela <quintela@redhat.com>
 W: https://www.qemu.org/docs/master/devel/index.html
 S: Odd Fixes
 F: docs/devel/style.rst
@@ -130,17 +131,6 @@ K: ^Subject:.*(?i)mips
 F: docs/system/target-mips.rst
 F: configs/targets/mips*
 X86 general architecture support
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
 F: configs/devices/i386-softmmu/default.mak
 F: configs/targets/i386-softmmu.mak
 F: configs/targets/x86_64-softmmu.mak
 F: docs/system/target-i386*
 F: target/i386/*.[ch]
 F: target/i386/Kconfig
 F: target/i386/meson.build
 Guest CPU cores (TCG)
 ---------------------
 Overall TCG CPUs
@@ -173,7 +163,6 @@ F: include/hw/core/tcg-cpu-ops.h
 F: host/include/*/host/cpuinfo.h
 F: util/cpuinfo-*.c
 F: include/tcg/
 F: tests/decode/
 FPU emulation
 M: Aurelien Jarno <aurelien@aurel32.net>
@@ -642,7 +631,6 @@ R: Strahinja Jankovic <strahinja.p.jankovic@gmail.com>
 L: qemu-arm@nongnu.org
 S: Odd Fixes
 F: hw/*/allwinner*
 F: hw/ide/ahci-allwinner.c
 F: include/hw/*/allwinner*
 F: hw/arm/cubieboard.c
 F: docs/system/arm/cubieboard.rst
@@ -669,7 +657,6 @@ F: include/hw/dma/pl080.h
 F: hw/dma/pl330.c
 F: hw/gpio/pl061.c
 F: hw/input/pl050.c
 F: include/hw/input/pl050.h
 F: hw/intc/pl190.c
 F: hw/sd/pl181.c
 F: hw/ssi/pl022.c
@@ -820,13 +807,12 @@ F: include/hw/misc/imx7_*.h
 F: hw/pci-host/designware.c
 F: include/hw/pci-host/designware.h
-MPS2 / MPS3
+MPS2
 M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/mps2.c
 F: hw/arm/mps2-tz.c
 F: hw/arm/mps3r.c
 F: hw/misc/mps2-*.c
 F: include/hw/misc/mps2-*.h
 F: hw/arm/armsse.c
@@ -941,7 +927,6 @@ F: hw/*/pxa2xx*
 F: hw/display/tc6393xb.c
 F: hw/gpio/max7310.c
 F: hw/gpio/zaurus.c
 F: hw/input/ads7846.c
 F: hw/misc/mst_fpga.c
 F: hw/adc/max111x.c
 F: include/hw/adc/max111x.h
@@ -994,9 +979,7 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/stellaris*
 F: hw/display/ssd03*
 F: include/hw/input/gamepad.h
 F: include/hw/timer/stellaris-gptm.h
 F: docs/system/arm/stellaris.rst
 STM32VLDISCOVERY
@@ -1011,7 +994,6 @@ M: Peter Maydell <peter.maydell@linaro.org>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/vexpress.c
 F: hw/display/sii9022.c
 F: docs/system/arm/vexpress.rst
 Versatile PB
@@ -1124,25 +1106,6 @@ L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/olimex-stm32-h405.c
 STM32L4x5 SoC Family
 M: Arnaud Minier <arnaud.minier@telecom-paris.fr>
 M: Inès Varhol <ines.varhol@telecom-paris.fr>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/stm32l4x5_soc.c
 F: hw/misc/stm32l4x5_exti.c
 F: hw/misc/stm32l4x5_syscfg.c
 F: hw/misc/stm32l4x5_rcc.c
 F: hw/gpio/stm32l4x5_gpio.c
 F: include/hw/*/stm32l4x5_*.h
 B-L475E-IOT01A IoT Node
 M: Arnaud Minier <arnaud.minier@telecom-paris.fr>
 M: Inès Varhol <ines.varhol@telecom-paris.fr>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/arm/b-l475e-iot01a.c
 SmartFusion2
 M: Subbaraya Sundeep <sundeep.lkml@gmail.com>
 M: Peter Maydell <peter.maydell@linaro.org>
@@ -1175,7 +1138,9 @@ R: Joel Stanley <joel@jms.id.au>
 L: qemu-arm@nongnu.org
 S: Maintained
 F: hw/*/*aspeed*
 F: hw/misc/pca9552.c
 F: include/hw/*/*aspeed*
 F: include/hw/misc/pca9552*.h
 F: hw/net/ftgmac100.c
 F: include/hw/net/ftgmac100.h
 F: docs/system/arm/aspeed.rst
@@ -1361,7 +1326,6 @@ M: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Aurelien Jarno <aurelien@aurel32.net>
 S: Odd Fixes
 F: hw/isa/piix.c
 F: hw/isa/fdc37m81x-superio.c
 F: hw/acpi/piix4.c
 F: hw/mips/malta.c
 F: hw/pci-host/gt64120.c
@@ -1426,7 +1390,6 @@ Bamboo
 L: qemu-ppc@nongnu.org
 S: Orphan
 F: hw/ppc/ppc440_bamboo.c
 F: hw/pci-host/ppc4xx_pci.c
 F: tests/avocado/ppc_bamboo.py
 e500
@@ -1529,7 +1492,6 @@ F: tests/qtest/libqos/*spapr*
 F: tests/qtest/rtas*
 F: tests/qtest/libqos/rtas*
 F: tests/avocado/ppc_pseries.py
 F: tests/avocado/ppc_hv_tests.py
 PowerNV (Non-Virtualized)
 M: Cédric Le Goater <clg@kaod.org>
@@ -1547,14 +1509,6 @@ F: include/hw/pci-host/pnv*
 F: pc-bios/skiboot.lid
 F: tests/qtest/pnv*
 pca955x
 M: Glenn Miles <milesg@linux.vnet.ibm.com>
 L: qemu-ppc@nongnu.org
 L: qemu-arm@nongnu.org
 S: Odd Fixes
 F: hw/misc/pca955*.c
 F: include/hw/misc/pca955*.h
 virtex_ml507
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
 L: qemu-ppc@nongnu.org
@@ -1568,7 +1522,7 @@ L: qemu-ppc@nongnu.org
 S: Maintained
 F: hw/ppc/sam460ex.c
 F: hw/ppc/ppc440_uc.c
-F: hw/pci-host/ppc440_pcix.c
+F: hw/ppc/ppc440_pcix.c
 F: hw/display/sm501*
 F: hw/ide/sii3112.c
 F: hw/rtc/m41t80.c
@@ -1723,7 +1677,7 @@ F: hw/rtc/sun4v-rtc.c
 F: include/hw/rtc/sun4v-rtc.h
 Leon3
-M: Clément Chigot <chigot@adacore.com>
+M: Fabien Chouteau <chouteau@adacore.com>
 M: Frederic Konrad <konrad.frederic@yahoo.fr>
 S: Maintained
 F: hw/sparc/leon3.c
@@ -1882,8 +1836,7 @@ M: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
 R: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Yanan Wang <wangyanan55@huawei.com>
 S: Supported
-F: hw/core/cpu-common.c
+F: hw/core/cpu.c
 F: hw/core/cpu-sysemu.c
 F: hw/core/machine-qmp-cmds.c
 F: hw/core/machine.c
 F: hw/core/machine-smp.c
@@ -1949,6 +1902,7 @@ IDE
 M: John Snow <jsnow@redhat.com>
 L: qemu-block@nongnu.org
 S: Odd Fixes
 F: include/hw/ide.h
 F: include/hw/ide/
 F: hw/ide/
 F: hw/block/block.c
@@ -2082,7 +2036,6 @@ F: hw/ppc/ppc4xx*.c
 F: hw/ppc/ppc440_uc.c
 F: hw/ppc/ppc440.h
 F: hw/i2c/ppc4xx_i2c.c
 F: include/hw/pci-host/ppc4xx.h
 F: include/hw/ppc/ppc4xx.h
 F: include/hw/i2c/ppc4xx_i2c.h
 F: hw/intc/ppc-uic.c
@@ -2197,17 +2150,6 @@ F: hw/vfio/ap.c
 F: docs/system/s390x/vfio-ap.rst
 L: qemu-s390x@nongnu.org
 iommufd
 M: Yi Liu <yi.l.liu@intel.com>
 M: Eric Auger <eric.auger@redhat.com>
 M: Zhenzhong Duan <zhenzhong.duan@intel.com>
 S: Supported
 F: backends/iommufd.c
 F: include/sysemu/iommufd.h
 F: include/qemu/chardev_open.h
 F: util/chardev_open.c
 F: docs/devel/vfio-iommufd.rst
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -2299,14 +2241,13 @@ M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Supported
 F: hw/virtio/vhost-user-fs*
 F: include/hw/virtio/vhost-user-fs.h
-L: virtio-fs@lists.linux.dev
+L: virtio-fs@redhat.com
 virtio-input
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Odd Fixes
-F: docs/system/devices/vhost-user-input.rst
+F: hw/input/vhost-user-input.c
 F: hw/input/virtio-input*.c
 F: hw/virtio/vhost-user-input.c
 F: include/hw/virtio/virtio-input.h
 F: contrib/vhost-user-input/*
@@ -2335,12 +2276,6 @@ F: include/sysemu/rng*.h
 F: backends/rng*.c
 F: tests/qtest/virtio-rng-test.c
 vhost-user-stubs
 M: Alex Bennée <alex.bennee@linaro.org>
 S: Maintained
 F: hw/virtio/vhost-user-base.c
 F: hw/virtio/vhost-user-device*
 vhost-user-rng
 M: Mathieu Poirier <mathieu.poirier@linaro.org>
 S: Supported
@@ -2358,13 +2293,6 @@ F: hw/virtio/vhost-user-gpio*
 F: include/hw/virtio/vhost-user-gpio.h
 F: tests/qtest/libqos/virtio-gpio.*
 vhost-user-snd
 M: Alex Bennée <alex.bennee@linaro.org>
 R: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
 S: Maintained
 F: hw/virtio/vhost-user-snd*
 F: include/hw/virtio/vhost-user-snd.h
 vhost-user-scmi
 R: mzamazal@redhat.com
 S: Supported
@@ -2443,13 +2371,8 @@ F: hw/net/net_tx_pkt*
 Vmware
 M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
 S: Maintained
 F: docs/specs/vmw_pvscsi-spec.txt
 F: hw/display/vmware_vga.c
 F: hw/net/vmxnet*
 F: hw/scsi/vmw_pvscsi*
 F: pc-bios/efi-vmxnet3.rom
 F: pc-bios/vgabios-vmware.bin
 F: roms/config.vga-vmware
 F: tests/qtest/vmxnet3-test.c
 F: docs/specs/vwm_pvscsi-spec.rst
@@ -2503,12 +2426,6 @@ S: Maintained
 F: hw/i2c/i2c_mux_pca954x.c
 F: include/hw/i2c/i2c_mux_pca954x.h
 pcf8574
 M: Dmitrii Sharikhin <d.sharikhin@yadro.com>
 S: Maintained
 F: hw/gpio/pcf8574.c
 F: include/gpio/pcf8574.h
 Generic Loader
 M: Alistair Francis <alistair@alistair23.me>
 S: Maintained
@@ -2591,7 +2508,7 @@ F: include/hw/virtio/virtio-gpu.h
 F: docs/system/devices/virtio-gpu.rst
 vhost-user-blk
-M: Raphael Norwitz <raphael@enfabrica.net>
+M: Raphael Norwitz <raphael.norwitz@nutanix.com>
 S: Maintained
 F: contrib/vhost-user-blk/
 F: contrib/vhost-user-scsi/
@@ -2929,7 +2846,6 @@ S: Supported
 F: hw/cxl/
 F: hw/mem/cxl_type3.c
 F: include/hw/cxl/
 F: qapi/cxl.json
 Dirty Bitmaps
 M: Eric Blake <eblake@redhat.com>
@@ -3356,7 +3272,6 @@ Stats
 S: Orphan
 F: include/sysemu/stats.h
 F: stats/
 F: qapi/stats.json
 Streams
 M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -3407,8 +3322,10 @@ S: Odd Fixes
 F: scripts/checkpatch.pl
 Migration
 M: Juan Quintela <quintela@redhat.com>
 M: Peter Xu <peterx@redhat.com>
 M: Fabiano Rosas <farosas@suse.de>
 R: Leonardo Bras <leobras@redhat.com>
 S: Maintained
 F: hw/core/vmstate-if.c
 F: include/hw/vmstate-if.h
@@ -3425,8 +3342,10 @@ F: util/userfaultfd.c
 X: migration/rdma*
 RDMA Migration
 M: Juan Quintela <quintela@redhat.com>
 R: Li Zhijian <lizhijian@fujitsu.com>
 R: Peter Xu <peterx@redhat.com>
 R: Leonardo Bras <leobras@redhat.com>
 S: Odd Fixes
 F: migration/rdma*
@@ -3439,11 +3358,6 @@ F: migration/dirtyrate.c
 F: migration/dirtyrate.h
 F: include/sysemu/dirtyrate.h
 Detached LUKS header
 M: Hyman Huang <yong.huang@smartx.com>
 S: Maintained
 F: tests/qemu-iotests/tests/luks-detached-header
 D-Bus
 M: Marc-André Lureau <marcandre.lureau@redhat.com>
 S: Maintained
@@ -3593,7 +3507,6 @@ F: util/iova-tree.c
 elf2dmp
 M: Viktor Prutyanov <viktor.prutyanov@phystech.edu>
 R: Akihiko Odaki <akihiko.odaki@daynix.com>
 S: Maintained
 F: contrib/elf2dmp/
@@ -3628,15 +3541,6 @@ F: tests/qtest/adm1272-test.c
 F: tests/qtest/max34451-test.c
 F: tests/qtest/isl_pmbus_vr-test.c
 FSI
 M: Ninad Palsule <ninad@linux.ibm.com>
 R: Cédric Le Goater <clg@kaod.org>
 S: Maintained
 F: hw/fsi/*
 F: include/hw/fsi/*
 F: docs/specs/fsi.rst
 F: tests/qtest/aspeed_fsi-test.c
 Firmware schema specifications
 M: Philippe Mathieu-Daudé <philmd@linaro.org>
 R: Daniel P. Berrange <berrange@redhat.com>
@@ -3659,6 +3563,7 @@ F: tests/uefi-test-tools/
 VT-d Emulation
 M: Michael S. Tsirkin <mst@redhat.com>
 M: Peter Xu <peterx@redhat.com>
 R: Jason Wang <jasowang@redhat.com>
 S: Supported
 F: hw/i386/intel_iommu.c
@@ -3687,16 +3592,6 @@ F: hw/core/clock-vmstate.c
 F: hw/core/qdev-clock.c
 F: docs/devel/clocks.rst
 Reset framework
 M: Peter Maydell <peter.maydell@linaro.org>
 S: Maintained
 F: include/hw/resettable.h
 F: include/hw/core/resetcontainer.h
 F: include/sysemu/reset.h
 F: hw/core/reset.c
 F: hw/core/resettable.c
 F: hw/core/resetcontainer.c
 Usermode Emulation
 ------------------
 Overall usermode emulation
@@ -3737,7 +3632,6 @@ TCG Plugins
 M: Alex Bennée <alex.bennee@linaro.org>
 R: Alexandre Iooss <erdnaxe@crans.org>
 R: Mahmoud Mandour <ma.mandourr@gmail.com>
 R: Pierrick Bouvier <pierrick.bouvier@linaro.org>
 S: Maintained
 F: docs/devel/tcg-plugins.rst
 F: plugins/
@@ -4238,7 +4132,6 @@ F: docs/conf.py
 F: docs/*/conf.py
 F: docs/sphinx/
 F: docs/_templates/
 F: docs/devel/docs.rst
 Miscellaneous
 -------------
--- a/1
+++ b/1
@@ -202,7 +202,6 @@ clean: recurse-clean
 		! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \
 		-exec rm {} +
 	rm -f TAGS cscope.* *~ */*~
 	@$(MAKE) -Ctests/qemu-iotests clean
 VERSION = $(shell cat $(SRC_PATH)/VERSION)
--- a/2
+++ b/2
@@ -1 +1 @@
-8.2.50
+8.1.50
--- a/accel/Kconfig
+++ b/accel/Kconfig
@@ -16,4 +16,3 @@ config KVM
 config XEN
    bool
    select FSDEV_9P if VIRTFS
    select XEN_BUS
--- a/accel/accel-blocker.c
+++ b/accel/accel-blocker.c
@@ -41,7 +41,7 @@ void accel_blocker_init(void)
 void accel_ioctl_begin(void)
 {
-    if (likely(bql_locked())) {
+    if (likely(qemu_mutex_iothread_locked())) {
        return;
    }
@@ -51,7 +51,7 @@ void accel_ioctl_begin(void)
 void accel_ioctl_end(void)
 {
-    if (likely(bql_locked())) {
+    if (likely(qemu_mutex_iothread_locked())) {
        return;
    }
@@ -62,7 +62,7 @@ void accel_ioctl_end(void)
 void accel_cpu_ioctl_begin(CPUState *cpu)
 {
-    if (unlikely(bql_locked())) {
+    if (unlikely(qemu_mutex_iothread_locked())) {
        return;
    }
@@ -72,7 +72,7 @@ void accel_cpu_ioctl_begin(CPUState *cpu)
 void accel_cpu_ioctl_end(CPUState *cpu)
 {
-    if (unlikely(bql_locked())) {
+    if (unlikely(qemu_mutex_iothread_locked())) {
        return;
    }
@@ -105,7 +105,7 @@ void accel_ioctl_inhibit_begin(void)
     * We allow to inhibit only when holding the BQL, so we can identify
     * when an inhibitor wants to issue an ioctl easily.
     */
-    g_assert(bql_locked());
+    g_assert(qemu_mutex_iothread_locked());
    /* Block further invocations of the ioctls outside the BQL.  */
    CPU_FOREACH(cpu) {
--- a/accel/accel-system.c
+++ b/accel/accel-system.c
@@ -62,7 +62,7 @@ void accel_setup_post(MachineState *ms)
 }
 /* initialize the arch-independent accel operation interfaces */
-void accel_system_init_ops_interfaces(AccelClass *ac)
+void accel_init_ops_interfaces(AccelClass *ac)
 {
    const char *ac_name;
    char *ops_name;
--- a/accel/accel-system.h
+++ b/accel/accel-system.h
@@ -10,6 +10,6 @@
 #ifndef ACCEL_SYSTEM_H
 #define ACCEL_SYSTEM_H
-void accel_system_init_ops_interfaces(AccelClass *ac);
+void accel_init_ops_interfaces(AccelClass *ac);
 #endif /* ACCEL_SYSTEM_H */
--- a/accel/accel-target.c
+++ b/accel/accel-target.c
@@ -104,7 +104,7 @@ static void accel_init_cpu_interfaces(AccelClass *ac)
 void accel_init_interfaces(AccelClass *ac)
 {
 #ifndef CONFIG_USER_ONLY
-    accel_system_init_ops_interfaces(ac);
+    accel_init_ops_interfaces(ac);
 #endif /* !CONFIG_USER_ONLY */
    accel_init_cpu_interfaces(ac);
--- a/accel/dummy-cpus.c
+++ b/accel/dummy-cpus.c
@@ -24,9 +24,10 @@ static void *dummy_cpu_thread_fn(void *arg)
    rcu_register_thread();
-    bql_lock();
+    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
    cpu->neg.can_do_io = true;
    current_cpu = cpu;
 #ifndef _WIN32
@@ -42,7 +43,7 @@ static void *dummy_cpu_thread_fn(void *arg)
    qemu_guest_random_seed_thread_part2(cpu->random_seed);
    do {
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
 #ifndef _WIN32
        do {
            int sig;
@@ -55,11 +56,11 @@ static void *dummy_cpu_thread_fn(void *arg)
 #else
        qemu_sem_wait(&cpu->sem);
 #endif
-        bql_lock();
+        qemu_mutex_lock_iothread();
        qemu_wait_io_event(cpu);
    } while (!cpu->unplug);
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    rcu_unregister_thread();
    return NULL;
 }
--- a/accel/hvf/hvf-accel-ops.c
+++ b/accel/hvf/hvf-accel-ops.c
@@ -424,10 +424,11 @@ static void *hvf_cpu_thread_fn(void *arg)
    rcu_register_thread();
-    bql_lock();
+    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
    cpu->neg.can_do_io = true;
    current_cpu = cpu;
    hvf_init_vcpu(cpu);
@@ -448,7 +449,7 @@ static void *hvf_cpu_thread_fn(void *arg)
    hvf_vcpu_destroy(cpu);
    cpu_thread_signal_destroyed(cpu);
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    rcu_unregister_thread();
    return NULL;
 }
--- a/accel/kvm/kvm-accel-ops.c
+++ b/accel/kvm/kvm-accel-ops.c
@@ -33,9 +33,10 @@ static void *kvm_vcpu_thread_fn(void *arg)
    rcu_register_thread();
-    bql_lock();
+    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
    cpu->neg.can_do_io = true;
    current_cpu = cpu;
    r = kvm_init_vcpu(cpu, &error_fatal);
@@ -57,7 +58,7 @@ static void *kvm_vcpu_thread_fn(void *arg)
    kvm_destroy_vcpu(cpu);
    cpu_thread_signal_destroyed(cpu);
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    rcu_unregister_thread();
    return NULL;
 }
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -69,6 +69,16 @@
 #define KVM_GUESTDBG_BLOCKIRQ 0
 #endif
 //#define DEBUG_KVM
 #ifdef DEBUG_KVM
 #define DPRINTF(fmt, ...) \
    do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
 #else
 #define DPRINTF(fmt, ...) \
    do { } while (0)
 #endif
 struct KVMParkedVcpu {
    unsigned long vcpu_id;
    int kvm_fd;
@@ -88,7 +98,7 @@ bool kvm_allowed;
 bool kvm_readonly_mem_allowed;
 bool kvm_vm_attributes_allowed;
 bool kvm_msi_use_devid;
-static bool kvm_has_guest_debug;
+bool kvm_has_guest_debug;
 static int kvm_sstep_flags;
 static bool kvm_immediate_exit;
 static hwaddr kvm_max_slot_size = ~0;
@@ -321,7 +331,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
    struct KVMParkedVcpu *vcpu = NULL;
    int ret = 0;
-    trace_kvm_destroy_vcpu();
+    DPRINTF("kvm_destroy_vcpu\n");
    ret = kvm_arch_destroy_vcpu(cpu);
    if (ret < 0) {
@@ -331,7 +341,7 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
    mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
    if (mmap_size < 0) {
        ret = mmap_size;
-        trace_kvm_failed_get_vcpu_mmap_size();
+        DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
        goto err;
    }
@@ -433,6 +443,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
                                   PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
        if (cpu->kvm_dirty_gfns == MAP_FAILED) {
            ret = -errno;
            DPRINTF("mmap'ing vcpu dirty gfns failed: %d\n", ret);
            goto err;
        }
    }
@@ -806,7 +817,7 @@ static void kvm_dirty_ring_flush(void)
     * should always be with BQL held, serialization is guaranteed.
     * However, let's be sure of it.
     */
-    assert(bql_locked());
+    assert(qemu_mutex_iothread_locked());
    /*
     * First make sure to flush the hardware buffers by kicking all
     * vcpus out in a synchronous way.
@@ -1119,11 +1130,6 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
    return ret;
 }
 /*
 * We track the poisoned pages to be able to:
 * - replace them on VM reset
 * - block a migration for a VM with a poisoned page
 */
 typedef struct HWPoisonPage {
    ram_addr_t ram_addr;
    QLIST_ENTRY(HWPoisonPage) list;
@@ -1157,11 +1163,6 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
    QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
 }
 bool kvm_hwpoisoned_mem(void)
 {
    return !QLIST_EMPTY(&hwpoison_page_list);
 }
 static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
 {
 #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
@@ -1401,9 +1402,9 @@ static void *kvm_dirty_ring_reaper_thread(void *data)
        trace_kvm_dirty_ring_reaper("wakeup");
        r->reaper_state = KVM_DIRTY_RING_REAPER_REAPING;
-        bql_lock();
+        qemu_mutex_lock_iothread();
        kvm_dirty_ring_reap(s, NULL);
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
        r->reaper_iteration++;
    }
@@ -2359,7 +2360,7 @@ static int kvm_init(MachineState *ms)
    QTAILQ_INIT(&s->kvm_sw_breakpoints);
 #endif
    QLIST_INIT(&s->kvm_parked_vcpus);
-    s->fd = qemu_open_old(s->device ?: "/dev/kvm", O_RDWR);
+    s->fd = qemu_open_old("/dev/kvm", O_RDWR);
    if (s->fd == -1) {
        fprintf(stderr, "Could not access KVM kernel module: %m\n");
        ret = -errno;
@@ -2820,14 +2821,14 @@ int kvm_cpu_exec(CPUState *cpu)
    struct kvm_run *run = cpu->kvm_run;
    int ret, run_ret;
-    trace_kvm_cpu_exec();
+    DPRINTF("kvm_cpu_exec()\n");
    if (kvm_arch_process_async_events(cpu)) {
        qatomic_set(&cpu->exit_request, 0);
        return EXCP_HLT;
    }
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    cpu_exec_start(cpu);
    do {
@@ -2847,7 +2848,7 @@ int kvm_cpu_exec(CPUState *cpu)
        kvm_arch_pre_run(cpu, run);
        if (qatomic_read(&cpu->exit_request)) {
-	    trace_kvm_interrupt_exit_request();
+            DPRINTF("interrupt exit requested\n");
            /*
             * KVM requires us to reenter the kernel after IO exits to complete
             * instruction emulation. This self-signal will ensure that we
@@ -2867,17 +2868,17 @@ int kvm_cpu_exec(CPUState *cpu)
 #ifdef KVM_HAVE_MCE_INJECTION
        if (unlikely(have_sigbus_pending)) {
-            bql_lock();
+            qemu_mutex_lock_iothread();
            kvm_arch_on_sigbus_vcpu(cpu, pending_sigbus_code,
                                    pending_sigbus_addr);
            have_sigbus_pending = false;
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
        }
 #endif
        if (run_ret < 0) {
            if (run_ret == -EINTR || run_ret == -EAGAIN) {
-                trace_kvm_io_window_exit();
+                DPRINTF("io window exit\n");
                kvm_eat_signals(cpu);
                ret = EXCP_INTERRUPT;
                break;
@@ -2899,6 +2900,7 @@ int kvm_cpu_exec(CPUState *cpu)
        trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
        switch (run->exit_reason) {
        case KVM_EXIT_IO:
            DPRINTF("handle_io\n");
            /* Called outside BQL */
            kvm_handle_io(run->io.port, attrs,
                          (uint8_t *)run + run->io.data_offset,
@@ -2908,6 +2910,7 @@ int kvm_cpu_exec(CPUState *cpu)
            ret = 0;
            break;
        case KVM_EXIT_MMIO:
            DPRINTF("handle_mmio\n");
            /* Called outside BQL */
            address_space_rw(&address_space_memory,
                             run->mmio.phys_addr, attrs,
@@ -2917,9 +2920,11 @@ int kvm_cpu_exec(CPUState *cpu)
            ret = 0;
            break;
        case KVM_EXIT_IRQ_WINDOW_OPEN:
            DPRINTF("irq_window_open\n");
            ret = EXCP_INTERRUPT;
            break;
        case KVM_EXIT_SHUTDOWN:
            DPRINTF("shutdown\n");
            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
            ret = EXCP_INTERRUPT;
            break;
@@ -2937,7 +2942,7 @@ int kvm_cpu_exec(CPUState *cpu)
             * still full.  Got kicked by KVM_RESET_DIRTY_RINGS.
             */
            trace_kvm_dirty_ring_full(cpu->cpu_index);
-            bql_lock();
+            qemu_mutex_lock_iothread();
            /*
             * We throttle vCPU by making it sleep once it exit from kernel
             * due to dirty ring full. In the dirtylimit scenario, reaping
@@ -2949,12 +2954,11 @@ int kvm_cpu_exec(CPUState *cpu)
            } else {
                kvm_dirty_ring_reap(kvm_state, NULL);
            }
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
            dirtylimit_vcpu_execute(cpu);
            ret = 0;
            break;
        case KVM_EXIT_SYSTEM_EVENT:
            trace_kvm_run_exit_system_event(cpu->cpu_index, run->system_event.type);
            switch (run->system_event.type) {
            case KVM_SYSTEM_EVENT_SHUTDOWN:
                qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
@@ -2966,24 +2970,26 @@ int kvm_cpu_exec(CPUState *cpu)
                break;
            case KVM_SYSTEM_EVENT_CRASH:
                kvm_cpu_synchronize_state(cpu);
-                bql_lock();
+                qemu_mutex_lock_iothread();
                qemu_system_guest_panicked(cpu_get_crash_info(cpu));
-                bql_unlock();
+                qemu_mutex_unlock_iothread();
                ret = 0;
                break;
            default:
                DPRINTF("kvm_arch_handle_exit\n");
                ret = kvm_arch_handle_exit(cpu, run);
                break;
            }
            break;
        default:
            DPRINTF("kvm_arch_handle_exit\n");
            ret = kvm_arch_handle_exit(cpu, run);
            break;
        }
    } while (ret == 0);
    cpu_exec_end(cpu);
-    bql_lock();
+    qemu_mutex_lock_iothread();
    if (ret < 0) {
        cpu_dump_state(cpu, stderr, CPU_DUMP_CODE);
@@ -3595,24 +3601,6 @@ static void kvm_set_dirty_ring_size(Object *obj, Visitor *v,
    s->kvm_dirty_ring_size = value;
 }
 static char *kvm_get_device(Object *obj,
                            Error **errp G_GNUC_UNUSED)
 {
    KVMState *s = KVM_STATE(obj);
    return g_strdup(s->device);
 }
 static void kvm_set_device(Object *obj,
                           const char *value,
                           Error **errp G_GNUC_UNUSED)
 {
    KVMState *s = KVM_STATE(obj);
    g_free(s->device);
    s->device = g_strdup(value);
 }
 static void kvm_accel_instance_init(Object *obj)
 {
    KVMState *s = KVM_STATE(obj);
@@ -3631,7 +3619,6 @@ static void kvm_accel_instance_init(Object *obj)
    s->xen_version = 0;
    s->xen_gnttab_max_frames = 64;
    s->xen_evtchn_max_pirq = 256;
    s->device = NULL;
 }
 /**
@@ -3672,10 +3659,6 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
    object_class_property_set_description(oc, "dirty-ring-size",
        "Size of KVM dirty page ring buffer (default: 0, i.e. use bitmap)");
    object_class_property_add_str(oc, "device", kvm_get_device, kvm_set_device);
    object_class_property_set_description(oc, "device",
        "Path to the device node to use (default: /dev/kvm)");
    kvm_arch_accel_class_init(oc);
 }
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -25,9 +25,4 @@ kvm_dirty_ring_reaper(const char *s) "%s"
 kvm_dirty_ring_reap(uint64_t count, int64_t t) "reaped %"PRIu64" pages (took %"PRIi64" us)"
 kvm_dirty_ring_reaper_kick(const char *reason) "%s"
 kvm_dirty_ring_flush(int finished) "%d"
-kvm_destroy_vcpu(void) ""
+
 kvm_failed_get_vcpu_mmap_size(void) ""
 kvm_cpu_exec(void) ""
 kvm_interrupt_exit_request(void) ""
 kvm_io_window_exit(void) ""
 kvm_run_exit_system_event(int cpu_index, uint32_t event_type) "cpu_index %d, system_even_type %"PRIu32
--- a/accel/stubs/kvm-stub.c
+++ b/accel/stubs/kvm-stub.c
@@ -124,8 +124,3 @@ uint32_t kvm_dirty_ring_size(void)
 {
    return 0;
 }
 bool kvm_hwpoisoned_mem(void)
 {
    return false;
 }
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -30,6 +30,9 @@
 #include "qemu/rcu.h"
 #include "exec/log.h"
 #include "qemu/main-loop.h"
 #if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
 #include "hw/i386/apic.h"
 #endif
 #include "sysemu/cpus.h"
 #include "exec/cpu-all.h"
 #include "sysemu/cpu-timers.h"
@@ -250,29 +253,43 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
    hash = tb_jmp_cache_hash_func(pc);
    jc = cpu->tb_jmp_cache;
-    tb = qatomic_read(&jc->array[hash].tb);
+    if (cflags & CF_PCREL) {
-    if (likely(tb &&
+        /* Use acquire to ensure current load of pc from jc. */
-               jc->array[hash].pc == pc &&
+        tb = qatomic_load_acquire(&jc->array[hash].tb);
-               tb->cs_base == cs_base &&
+
-               tb->flags == flags &&
+        if (likely(tb &&
-               tb_cflags(tb) == cflags)) {
+                   jc->array[hash].pc == pc &&
-        goto hit;
+                   tb->cs_base == cs_base &&
                   tb->flags == flags &&
                   tb_cflags(tb) == cflags)) {
            return tb;
        }
        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
        if (tb == NULL) {
            return NULL;
        }
        jc->array[hash].pc = pc;
        /* Ensure pc is written first. */
        qatomic_store_release(&jc->array[hash].tb, tb);
    } else {
        /* Use rcu_read to ensure current load of pc from *tb. */
        tb = qatomic_rcu_read(&jc->array[hash].tb);
        if (likely(tb &&
                   tb->pc == pc &&
                   tb->cs_base == cs_base &&
                   tb->flags == flags &&
                   tb_cflags(tb) == cflags)) {
            return tb;
        }
        tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
        if (tb == NULL) {
            return NULL;
        }
        /* Use the pc value already stored in tb->pc. */
        qatomic_set(&jc->array[hash].tb, tb);
    }
    tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
    if (tb == NULL) {
        return NULL;
    }
    jc->array[hash].pc = pc;
    qatomic_set(&jc->array[hash].tb, tb);
 hit:
    /*
     * As long as tb is not NULL, the contents are consistent.  Therefore,
     * the virtual PC has to match for non-CF_PCREL translations.
     */
    assert((tb_cflags(tb) & CF_PCREL) || tb->pc == pc);
    return tb;
 }
@@ -340,9 +357,9 @@ static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
 #ifdef CONFIG_USER_ONLY
                g_assert_not_reached();
 #else
-                const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+                CPUClass *cc = CPU_GET_CLASS(cpu);
-                assert(tcg_ops->debug_check_breakpoint);
+                assert(cc->tcg_ops->debug_check_breakpoint);
-                match_bp = tcg_ops->debug_check_breakpoint(cpu);
+                match_bp = cc->tcg_ops->debug_check_breakpoint(cpu);
 #endif
            }
@@ -396,14 +413,6 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
    uint64_t cs_base;
    uint32_t flags, cflags;
    /*
     * By definition we've just finished a TB, so I/O is OK.
     * Avoid the possibility of calling cpu_io_recompile() if
     * a page table walk triggered by tb_lookup() calling
     * probe_access_internal() happens to touch an MMIO device.
     * The next TB, if we chain to it, will clear the flag again.
     */
    cpu->neg.can_do_io = true;
    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
    cflags = curr_cflags(cpu);
@@ -467,11 +476,10 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
         * counter hit zero); we must restore the guest PC to the address
         * of the start of the TB.
         */
-        CPUClass *cc = cpu->cc;
+        CPUClass *cc = CPU_GET_CLASS(cpu);
        const TCGCPUOps *tcg_ops = cc->tcg_ops;
-        if (tcg_ops->synchronize_from_tb) {
+        if (cc->tcg_ops->synchronize_from_tb) {
-            tcg_ops->synchronize_from_tb(cpu, last_tb);
+            cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
        } else {
            tcg_debug_assert(!(tb_cflags(last_tb) & CF_PCREL));
            assert(cc->set_pc);
@@ -503,19 +511,19 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit)
 static void cpu_exec_enter(CPUState *cpu)
 {
-    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+    CPUClass *cc = CPU_GET_CLASS(cpu);
-    if (tcg_ops->cpu_exec_enter) {
+    if (cc->tcg_ops->cpu_exec_enter) {
-        tcg_ops->cpu_exec_enter(cpu);
+        cc->tcg_ops->cpu_exec_enter(cpu);
    }
 }
 static void cpu_exec_exit(CPUState *cpu)
 {
-    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+    CPUClass *cc = CPU_GET_CLASS(cpu);
-    if (tcg_ops->cpu_exec_exit) {
+    if (cc->tcg_ops->cpu_exec_exit) {
-        tcg_ops->cpu_exec_exit(cpu);
+        cc->tcg_ops->cpu_exec_exit(cpu);
    }
 }
@@ -550,8 +558,8 @@ static void cpu_exec_longjmp_cleanup(CPUState *cpu)
        tcg_ctx->gen_tb = NULL;
    }
 #endif
-    if (bql_locked()) {
+    if (qemu_mutex_iothread_locked()) {
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
    }
    assert_no_pages_locked();
 }
@@ -669,11 +677,15 @@ static inline bool cpu_handle_halt(CPUState *cpu)
 {
 #ifndef CONFIG_USER_ONLY
    if (cpu->halted) {
-        const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+#if defined(TARGET_I386)
-
+        if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
-        if (tcg_ops->cpu_exec_halt) {
+            X86CPU *x86_cpu = X86_CPU(cpu);
-            tcg_ops->cpu_exec_halt(cpu);
+            qemu_mutex_lock_iothread();
            apic_poll_irq(x86_cpu->apic_state);
            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
            qemu_mutex_unlock_iothread();
        }
 #endif /* TARGET_I386 */
        if (!cpu_has_work(cpu)) {
            return true;
        }
@@ -687,7 +699,7 @@ static inline bool cpu_handle_halt(CPUState *cpu)
 static inline void cpu_handle_debug_exception(CPUState *cpu)
 {
-    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+    CPUClass *cc = CPU_GET_CLASS(cpu);
    CPUWatchpoint *wp;
    if (!cpu->watchpoint_hit) {
@@ -696,8 +708,8 @@ static inline void cpu_handle_debug_exception(CPUState *cpu)
        }
    }
-    if (tcg_ops->debug_excp_handler) {
+    if (cc->tcg_ops->debug_excp_handler) {
-        tcg_ops->debug_excp_handler(cpu);
+        cc->tcg_ops->debug_excp_handler(cpu);
    }
 }
@@ -709,12 +721,11 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
            && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0) {
            /* Execute just one insn to trigger exception pending in the log */
            cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT)
-                | CF_NOIRQ | 1;
+                | CF_LAST_IO | CF_NOIRQ | 1;
        }
 #endif
        return false;
    }
    if (cpu->exception_index >= EXCP_INTERRUPT) {
        /* exit request from the cpu execution loop */
        *ret = cpu->exception_index;
@@ -723,59 +734,62 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
        }
        cpu->exception_index = -1;
        return true;
-    }
+    } else {
 #if defined(CONFIG_USER_ONLY)
-    /*
+        /* if user mode only, we simulate a fake exception
-     * If user mode only, we simulate a fake exception which will be
+           which will be handled outside the cpu execution
-     * handled outside the cpu execution loop.
+           loop */
     */
 #if defined(TARGET_I386)
-    const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+        CPUClass *cc = CPU_GET_CLASS(cpu);
-    tcg_ops->fake_user_interrupt(cpu);
+        cc->tcg_ops->fake_user_interrupt(cpu);
 #endif /* TARGET_I386 */
-    *ret = cpu->exception_index;
+        *ret = cpu->exception_index;
    cpu->exception_index = -1;
    return true;
 #else
    if (replay_exception()) {
        const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
        bql_lock();
        tcg_ops->do_interrupt(cpu);
        bql_unlock();
        cpu->exception_index = -1;
        return true;
 #else
        if (replay_exception()) {
            CPUClass *cc = CPU_GET_CLASS(cpu);
            qemu_mutex_lock_iothread();
            cc->tcg_ops->do_interrupt(cpu);
            qemu_mutex_unlock_iothread();
            cpu->exception_index = -1;
-        if (unlikely(cpu->singlestep_enabled)) {
+            if (unlikely(cpu->singlestep_enabled)) {
-            /*
+                /*
-             * After processing the exception, ensure an EXCP_DEBUG is
+                 * After processing the exception, ensure an EXCP_DEBUG is
-             * raised when single-stepping so that GDB doesn't miss the
+                 * raised when single-stepping so that GDB doesn't miss the
-             * next instruction.
+                 * next instruction.
-             */
+                 */
-            *ret = EXCP_DEBUG;
+                *ret = EXCP_DEBUG;
-            cpu_handle_debug_exception(cpu);
+                cpu_handle_debug_exception(cpu);
                return true;
            }
        } else if (!replay_has_interrupt()) {
            /* give a chance to iothread in replay mode */
            *ret = EXCP_INTERRUPT;
            return true;
        }
    } else if (!replay_has_interrupt()) {
        /* give a chance to iothread in replay mode */
        *ret = EXCP_INTERRUPT;
        return true;
    }
 #endif
    }
    return false;
 }
-static inline bool icount_exit_request(CPUState *cpu)
+#ifndef CONFIG_USER_ONLY
 /*
 * CPU_INTERRUPT_POLL is a virtual event which gets converted into a
 * "real" interrupt event later. It does not need to be recorded for
 * replay purposes.
 */
 static inline bool need_replay_interrupt(int interrupt_request)
 {
-    if (!icount_enabled()) {
+#if defined(TARGET_I386)
-        return false;
+    return !(interrupt_request & CPU_INTERRUPT_POLL);
-    }
+#else
-    if (cpu->cflags_next_tb != -1 && !(cpu->cflags_next_tb & CF_USE_ICOUNT)) {
+    return true;
-        return false;
+#endif
    }
    return cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0;
 }
 #endif /* !CONFIG_USER_ONLY */
 static inline bool cpu_handle_interrupt(CPUState *cpu,
                                        TranslationBlock **last_tb)
@@ -798,7 +812,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
    if (unlikely(qatomic_read(&cpu->interrupt_request))) {
        int interrupt_request;
-        bql_lock();
+        qemu_mutex_lock_iothread();
        interrupt_request = cpu->interrupt_request;
        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
            /* Mask out external interrupts for this step. */
@@ -807,7 +821,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
            cpu->exception_index = EXCP_DEBUG;
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
            return true;
        }
 #if !defined(CONFIG_USER_ONLY)
@@ -818,7 +832,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
            cpu->halted = 1;
            cpu->exception_index = EXCP_HLT;
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
            return true;
        }
 #if defined(TARGET_I386)
@@ -829,14 +843,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
            do_cpu_init(x86_cpu);
            cpu->exception_index = EXCP_HALTED;
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
            return true;
        }
 #else
        else if (interrupt_request & CPU_INTERRUPT_RESET) {
            replay_interrupt();
            cpu_reset(cpu);
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
            return true;
        }
 #endif /* !TARGET_I386 */
@@ -845,12 +859,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
           True when it is, and we should restart on a new TB,
           and via longjmp via cpu_loop_exit.  */
        else {
-            const TCGCPUOps *tcg_ops = cpu->cc->tcg_ops;
+            CPUClass *cc = CPU_GET_CLASS(cpu);
-            if (tcg_ops->cpu_exec_interrupt &&
+            if (cc->tcg_ops->cpu_exec_interrupt &&
-                tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
+                cc->tcg_ops->cpu_exec_interrupt(cpu, interrupt_request)) {
-                if (!tcg_ops->need_replay_interrupt ||
+                if (need_replay_interrupt(interrupt_request)) {
                    tcg_ops->need_replay_interrupt(interrupt_request)) {
                    replay_interrupt();
                }
                /*
@@ -860,7 +873,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
                 */
                if (unlikely(cpu->singlestep_enabled)) {
                    cpu->exception_index = EXCP_DEBUG;
-                    bql_unlock();
+                    qemu_mutex_unlock_iothread();
                    return true;
                }
                cpu->exception_index = -1;
@@ -879,11 +892,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
        }
        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
    }
    /* Finally, check if we need to exit to the main loop.  */
-    if (unlikely(qatomic_read(&cpu->exit_request)) || icount_exit_request(cpu)) {
+    if (unlikely(qatomic_read(&cpu->exit_request))
        || (icount_enabled()
            && (cpu->cflags_next_tb == -1 || cpu->cflags_next_tb & CF_USE_ICOUNT)
            && cpu->neg.icount_decr.u16.low + cpu->icount_extra == 0)) {
        qatomic_set(&cpu->exit_request, 0);
        if (cpu->exception_index == -1) {
            cpu->exception_index = EXCP_INTERRUPT;
@@ -996,8 +1012,14 @@ cpu_exec_loop(CPUState *cpu, SyncClocks *sc)
                 */
                h = tb_jmp_cache_hash_func(pc);
                jc = cpu->tb_jmp_cache;
-                jc->array[h].pc = pc;
+                if (cflags & CF_PCREL) {
-                qatomic_set(&jc->array[h].tb, tb);
+                    jc->array[h].pc = pc;
                    /* Ensure pc is written first. */
                    qatomic_store_release(&jc->array[h].tb, tb);
                } else {
                    /* Use the pc value already stored in tb->pc. */
                    qatomic_set(&jc->array[h].tb, tb);
                }
            }
 #ifndef CONFIG_USER_ONLY
@@ -1048,7 +1070,7 @@ int cpu_exec(CPUState *cpu)
        return EXCP_HALTED;
    }
-    RCU_READ_LOCK_GUARD();
+    rcu_read_lock();
    cpu_exec_enter(cpu);
    /*
@@ -1062,15 +1084,18 @@ int cpu_exec(CPUState *cpu)
    ret = cpu_exec_setjmp(cpu, &sc);
    cpu_exec_exit(cpu);
    rcu_read_unlock();
    return ret;
 }
 bool tcg_exec_realizefn(CPUState *cpu, Error **errp)
 {
    static bool tcg_target_initialized;
    CPUClass *cc = CPU_GET_CLASS(cpu);
    if (!tcg_target_initialized) {
-        cpu->cc->tcg_ops->initialize();
+        cc->tcg_ops->initialize();
        tcg_target_initialized = true;
    }
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1145,11 +1145,14 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
              " prot=%x idx=%d\n",
              addr, full->phys_addr, prot, mmu_idx);
-    read_flags = full->tlb_fill_flags;
+    read_flags = 0;
    if (full->lg_page_size < TARGET_PAGE_BITS) {
        /* Repeat the MMU check and TLB fill on every access.  */
        read_flags |= TLB_INVALID_MASK;
    }
    if (full->attrs.byte_swap) {
        read_flags |= TLB_BSWAP;
    }
    is_ram = memory_region_is_ram(section->mr);
    is_romd = memory_region_is_romd(section->mr);
@@ -1453,8 +1456,9 @@ static int probe_access_internal(CPUState *cpu, vaddr addr,
    flags |= full->slow_flags[access_type];
    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
-    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
+    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))
-        || (access_type != MMU_INST_FETCH && force_mmio)) {
+        ||
        (access_type != MMU_INST_FETCH && force_mmio)) {
        *phost = NULL;
        return TLB_MMIO;
    }
@@ -1475,8 +1479,7 @@ int probe_access_full(CPUArchState *env, vaddr addr, int size,
    /* Handle clean RAM pages.  */
    if (unlikely(flags & TLB_NOTDIRTY)) {
-        int dirtysize = size == 0 ? 1 : size;
+        notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr);
        flags &= ~TLB_NOTDIRTY;
    }
@@ -1499,8 +1502,7 @@ int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
    /* Handle clean RAM pages.  */
    if (unlikely(flags & TLB_NOTDIRTY)) {
-        int dirtysize = size == 0 ? 1 : size;
+        notdirty_write(env_cpu(env), addr, 1, *pfull, 0);
        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0);
        flags &= ~TLB_NOTDIRTY;
    }
@@ -1522,8 +1524,7 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int size,
    /* Handle clean RAM pages. */
    if (unlikely(flags & TLB_NOTDIRTY)) {
-        int dirtysize = size == 0 ? 1 : size;
+        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
        notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr);
        flags &= ~TLB_NOTDIRTY;
    }
@@ -1559,7 +1560,7 @@ void *probe_access(CPUArchState *env, vaddr addr, int size,
        /* Handle clean RAM pages.  */
        if (flags & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, full, retaddr);
+            notdirty_write(env_cpu(env), addr, 1, full, retaddr);
        }
    }
@@ -1597,7 +1598,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
    void *p;
    (void)probe_access_internal(env_cpu(env), addr, 1, MMU_INST_FETCH,
-                                cpu_mmu_index(env_cpu(env), true), false,
+                                cpu_mmu_index(env, true), false,
                                &p, &full, 0, false);
    if (p == NULL) {
        return -1;
@@ -1835,31 +1836,6 @@ static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
        tcg_debug_assert((flags & TLB_BSWAP) == 0);
    }
    /*
     * This alignment check differs from the one above, in that this is
     * based on the atomicity of the operation. The intended use case is
     * the ARM memory type field of each PTE, where access to pages with
     * Device memory type require alignment.
     */
    if (unlikely(flags & TLB_CHECK_ALIGNED)) {
        MemOp size = l->memop & MO_SIZE;
        switch (l->memop & MO_ATOM_MASK) {
        case MO_ATOM_NONE:
            size = MO_8;
            break;
        case MO_ATOM_IFALIGN_PAIR:
        case MO_ATOM_WITHIN16_PAIR:
            size = size ? size - 1 : 0;
            break;
        default:
            break;
        }
        if (addr & ((1 << size) - 1)) {
            cpu_unaligned_access(cpu, addr, type, l->mmu_idx, ra);
        }
    }
    return crosspage;
 }
@@ -1996,7 +1972,7 @@ static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
 * @size: number of bytes
 * @mmu_idx: virtual address context
 * @ra: return address into tcg generated code, or 0
- * Context: BQL held
+ * Context: iothread lock held
 *
 * Load @size bytes from @addr, which is memory-mapped i/o.
 * The bytes are concatenated in big-endian order with @ret_be.
@@ -2043,6 +2019,7 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
    MemoryRegion *mr;
    hwaddr mr_offset;
    MemTxAttrs attrs;
    uint64_t ret;
    tcg_debug_assert(size > 0 && size <= 8);
@@ -2050,9 +2027,12 @@ static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
    mr = section->mr;
-    BQL_LOCK_GUARD();
+    qemu_mutex_lock_iothread();
-    return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx,
+    ret = int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx,
-                           type, ra, mr, mr_offset);
+                          type, ra, mr, mr_offset);
    qemu_mutex_unlock_iothread();
    return ret;
 }
 static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
@@ -2071,11 +2051,13 @@ static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
    mr = section->mr;
-    BQL_LOCK_GUARD();
+    qemu_mutex_lock_iothread();
    a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx,
                        MMU_DATA_LOAD, ra, mr, mr_offset);
    b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx,
                        MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
    qemu_mutex_unlock_iothread();
    return int128_make128(b, a);
 }
@@ -2536,7 +2518,7 @@ static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
 * @size: number of bytes
 * @mmu_idx: virtual address context
 * @ra: return address into tcg generated code, or 0
- * Context: BQL held
+ * Context: iothread lock held
 *
 * Store @size bytes at @addr, which is memory-mapped i/o.
 * The bytes to store are extracted in little-endian order from @val_le;
@@ -2584,6 +2566,7 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
    hwaddr mr_offset;
    MemoryRegion *mr;
    MemTxAttrs attrs;
    uint64_t ret;
    tcg_debug_assert(size > 0 && size <= 8);
@@ -2591,9 +2574,12 @@ static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
    mr = section->mr;
-    BQL_LOCK_GUARD();
+    qemu_mutex_lock_iothread();
-    return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx,
+    ret = int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx,
-                           ra, mr, mr_offset);
+                          ra, mr, mr_offset);
    qemu_mutex_unlock_iothread();
    return ret;
 }
 static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
@@ -2604,6 +2590,7 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
    MemoryRegion *mr;
    hwaddr mr_offset;
    MemTxAttrs attrs;
    uint64_t ret;
    tcg_debug_assert(size > 8 && size <= 16);
@@ -2611,11 +2598,14 @@ static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
    mr = section->mr;
-    BQL_LOCK_GUARD();
+    qemu_mutex_lock_iothread();
    int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8,
                    mmu_idx, ra, mr, mr_offset);
-    return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8,
+    ret = int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8,
-                           size - 8, mmu_idx, ra, mr, mr_offset + 8);
+                          size - 8, mmu_idx, ra, mr, mr_offset + 8);
    qemu_mutex_unlock_iothread();
    return ret;
 }
 /*
@@ -2966,30 +2956,26 @@ static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
 uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr)
 {
-    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(env, true));
-    MemOpIdx oi = make_memop_idx(MO_UB, cpu_mmu_index(cs, true));
+    return do_ld1_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
    return do_ld1_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr)
 {
-    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(env, true));
-    MemOpIdx oi = make_memop_idx(MO_TEUW, cpu_mmu_index(cs, true));
+    return do_ld2_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
    return do_ld2_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr)
 {
-    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(env, true));
-    MemOpIdx oi = make_memop_idx(MO_TEUL, cpu_mmu_index(cs, true));
+    return do_ld4_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
    return do_ld4_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
 {
-    CPUState *cs = env_cpu(env);
+    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
-    MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(cs, true));
+    return do_ld8_mmu(env_cpu(env), addr, oi, 0, MMU_INST_FETCH);
    return do_ld8_mmu(cs, addr, oi, 0, MMU_INST_FETCH);
 }
 uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
--- a/accel/tcg/debuginfo.c
+++ b/accel/tcg/debuginfo.c
@@ -6,10 +6,11 @@
 #include "qemu/osdep.h"
 #include "qemu/lockable.h"
 #include "tcg/debuginfo.h"
 #include <elfutils/libdwfl.h>
 #include "debuginfo.h"
 static QemuMutex lock;
 static Dwfl *dwfl;
 static const Dwfl_Callbacks dwfl_callbacks = {
--- a/include/tcg/debuginfo.h
+++ b/include/tcg/debuginfo.h
@@ -4,8 +4,8 @@
 * SPDX-License-Identifier: GPL-2.0-or-later
 */
-#ifndef TCG_DEBUGINFO_H
+#ifndef ACCEL_TCG_DEBUGINFO_H
-#define TCG_DEBUGINFO_H
+#define ACCEL_TCG_DEBUGINFO_H
 #include "qemu/bitops.h"
--- a/accel/tcg/icount-common.c
+++ b/accel/tcg/icount-common.c
@@ -49,19 +49,21 @@ static bool icount_sleep = true;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10
-/* Do not count executed instructions */
+/*
-ICountMode use_icount = ICOUNT_DISABLED;
+ * 0 = Do not count executed instructions.
 * 1 = Fixed conversion of insn to ns via "shift" option
 * 2 = Runtime adaptive algorithm to compute shift
 */
 int use_icount;
 static void icount_enable_precise(void)
 {
-    /* Fixed conversion of insn to ns via "shift" option */
+    use_icount = 1;
    use_icount = ICOUNT_PRECISE;
 }
 static void icount_enable_adaptive(void)
 {
-    /* Runtime adaptive algorithm to compute shift */
+    use_icount = 2;
    use_icount = ICOUNT_ADAPTATIVE;
 }
 /*
@@ -254,7 +256,7 @@ static void icount_warp_rt(void)
        int64_t warp_delta;
        warp_delta = clock - timers_state.vm_clock_warp_start;
-        if (icount_enabled() == ICOUNT_ADAPTATIVE) {
+        if (icount_enabled() == 2) {
            /*
             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far
             * ahead of real time (it might already be ahead so careful not
@@ -417,7 +419,7 @@ void icount_account_warp_timer(void)
    icount_warp_rt();
 }
-bool icount_configure(QemuOpts *opts, Error **errp)
+void icount_configure(QemuOpts *opts, Error **errp)
 {
    const char *option = qemu_opt_get(opts, "shift");
    bool sleep = qemu_opt_get_bool(opts, "sleep", true);
@@ -427,28 +429,27 @@ bool icount_configure(QemuOpts *opts, Error **errp)
    if (!option) {
        if (qemu_opt_get(opts, "align") != NULL) {
            error_setg(errp, "Please specify shift option when using align");
            return false;
        }
-        return true;
+        return;
    }
    if (align && !sleep) {
        error_setg(errp, "align=on and sleep=off are incompatible");
-        return false;
+        return;
    }
    if (strcmp(option, "auto") != 0) {
        if (qemu_strtol(option, NULL, 0, &time_shift) < 0
            || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) {
            error_setg(errp, "icount: Invalid shift value");
-            return false;
+            return;
        }
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
-        return false;
+        return;
    } else if (!icount_sleep) {
        error_setg(errp, "shift=auto and sleep=off are incompatible");
-        return false;
+        return;
    }
    icount_sleep = sleep;
@@ -462,7 +463,7 @@ bool icount_configure(QemuOpts *opts, Error **errp)
    if (time_shift >= 0) {
        timers_state.icount_time_shift = time_shift;
        icount_enable_precise();
-        return true;
+        return;
    }
    icount_enable_adaptive();
@@ -490,14 +491,11 @@ bool icount_configure(QemuOpts *opts, Error **errp)
    timer_mod(timers_state.icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                   NANOSECONDS_PER_SECOND / 10);
    return true;
 }
 void icount_notify_exit(void)
 {
-    assert(icount_enabled());
+    if (icount_enabled() && current_cpu) {
    if (current_cpu) {
        qemu_cpu_kick(current_cpu);
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
--- a/accel/tcg/ldst_atomicity.c.inc
+++ b/accel/tcg/ldst_atomicity.c.inc
@@ -76,7 +76,7 @@ static int required_atomicity(CPUState *cpu, uintptr_t p, MemOp memop)
        /*
         * Examine the alignment of p to determine if there are subobjects
         * that must be aligned.  Note that we only really need ctz4() --
-         * any more significant bits are discarded by the immediately
+         * any more sigificant bits are discarded by the immediately
         * following comparison.
         */
        tmp = ctz32(p);
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -354,8 +354,7 @@ void cpu_stq_le_mmuidx_ra(CPUArchState *env, abi_ptr addr, uint64_t val,
 uint32_t cpu_ldub_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldub_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_ldub_mmuidx_ra(env, addr, mmu_index, ra);
 }
 int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -365,8 +364,7 @@ int cpu_ldsb_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 uint32_t cpu_lduw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_lduw_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_lduw_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -376,20 +374,17 @@ int cpu_ldsw_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 uint32_t cpu_ldl_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldl_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_ldl_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 uint64_t cpu_ldq_be_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldq_be_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_ldq_be_mmuidx_ra(env, addr, mmu_index, ra);
 }
 uint32_t cpu_lduw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_lduw_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_lduw_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
@@ -399,63 +394,54 @@ int cpu_ldsw_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 uint32_t cpu_ldl_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldl_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_ldl_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 uint64_t cpu_ldq_le_data_ra(CPUArchState *env, abi_ptr addr, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    return cpu_ldq_le_mmuidx_ra(env, addr, cpu_mmu_index(env, false), ra);
    return cpu_ldq_le_mmuidx_ra(env, addr, mmu_index, ra);
 }
 void cpu_stb_data_ra(CPUArchState *env, abi_ptr addr,
                     uint32_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stb_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stb_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stw_be_data_ra(CPUArchState *env, abi_ptr addr,
                        uint32_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stw_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stw_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stl_be_data_ra(CPUArchState *env, abi_ptr addr,
                        uint32_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stl_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stl_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stq_be_data_ra(CPUArchState *env, abi_ptr addr,
                        uint64_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stq_be_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stq_be_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stw_le_data_ra(CPUArchState *env, abi_ptr addr,
                        uint32_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stw_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stw_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stl_le_data_ra(CPUArchState *env, abi_ptr addr,
                        uint32_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stl_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stl_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 void cpu_stq_le_data_ra(CPUArchState *env, abi_ptr addr,
                        uint64_t val, uintptr_t ra)
 {
-    int mmu_index = cpu_mmu_index(env_cpu(env), false);
+    cpu_stq_le_mmuidx_ra(env, addr, val, cpu_mmu_index(env, false), ra);
    cpu_stq_le_mmuidx_ra(env, addr, val, mmu_index, ra);
 }
 /*--------------------------*/
--- a/accel/tcg/meson.build
+++ b/accel/tcg/meson.build
@@ -1,8 +1,8 @@
 tcg_ss = ss.source_set()
 common_ss.add(when: 'CONFIG_TCG', if_true: files(
  'cpu-exec-common.c',
 ))
-tcg_specific_ss = ss.source_set()
+tcg_ss.add(files(
 tcg_specific_ss.add(files(
  'tcg-all.c',
  'cpu-exec.c',
  'tb-maint.c',
@@ -11,16 +11,17 @@ tcg_specific_ss.add(files(
  'translate-all.c',
  'translator.c',
 ))
-tcg_specific_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
+tcg_ss.add(when: 'CONFIG_USER_ONLY', if_true: files('user-exec.c'))
-tcg_specific_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c'))
+tcg_ss.add(when: 'CONFIG_SYSTEM_ONLY', if_false: files('user-exec-stub.c'))
 if get_option('plugins')
-  tcg_specific_ss.add(files('plugin-gen.c'))
+  tcg_ss.add(files('plugin-gen.c'))
 endif
-specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_specific_ss)
+tcg_ss.add(when: libdw, if_true: files('debuginfo.c'))
 tcg_ss.add(when: 'CONFIG_LINUX', if_true: files('perf.c'))
 specific_ss.add_all(when: 'CONFIG_TCG', if_true: tcg_ss)
 specific_ss.add(when: ['CONFIG_SYSTEM_ONLY', 'CONFIG_TCG'], if_true: files(
  'cputlb.c',
  'watchpoint.c',
 ))
 system_ss.add(when: ['CONFIG_TCG'], if_true: files(
--- a/accel/tcg/perf.c
+++ b/accel/tcg/perf.c
@@ -10,13 +10,13 @@
 #include "qemu/osdep.h"
 #include "elf.h"
-#include "exec/target_page.h"
+#include "exec/exec-all.h"
 #include "exec/translation-block.h"
 #include "qemu/timer.h"
 #include "tcg/debuginfo.h"
 #include "tcg/perf.h"
 #include "tcg/tcg.h"
 #include "debuginfo.h"
 #include "perf.h"
 static FILE *safe_fopen_w(const char *path)
 {
    int saved_errno;
@@ -335,7 +335,11 @@ void perf_report_code(uint64_t guest_pc, TranslationBlock *tb,
        /* FIXME: This replicates the restore_state_to_opc() logic. */
        q[insn].address = gen_insn_data[insn * start_words + 0];
        if (tb_cflags(tb) & CF_PCREL) {
-            q[insn].address |= (guest_pc & qemu_target_page_mask());
+            q[insn].address |= (guest_pc & TARGET_PAGE_MASK);
        } else {
 #if defined(TARGET_I386)
            q[insn].address -= tb->cs_base;
 #endif
        }
        q[insn].flags = DEBUGINFO_SYMBOL | (jitdump ? DEBUGINFO_LINE : 0);
    }
--- a/include/tcg/perf.h
+++ b/include/tcg/perf.h
@@ -4,8 +4,8 @@
 * SPDX-License-Identifier: GPL-2.0-or-later
 */
-#ifndef TCG_PERF_H
+#ifndef ACCEL_TCG_PERF_H
-#define TCG_PERF_H
+#define ACCEL_TCG_PERF_H
 #if defined(CONFIG_TCG) && defined(CONFIG_LINUX)
 /* Start writing perf-<pid>.map. */
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -43,7 +43,6 @@
 * CPU's index into a TCG temp, since the first callback did it already.
 */
 #include "qemu/osdep.h"
 #include "qemu/plugin.h"
 #include "cpu.h"
 #include "tcg/tcg.h"
 #include "tcg/tcg-temp-internal.h"
@@ -80,7 +79,6 @@ enum plugin_gen_from {
 enum plugin_gen_cb {
    PLUGIN_GEN_CB_UDATA,
    PLUGIN_GEN_CB_UDATA_R,
    PLUGIN_GEN_CB_INLINE,
    PLUGIN_GEN_CB_MEM,
    PLUGIN_GEN_ENABLE_MEM_HELPER,
@@ -92,10 +90,7 @@ enum plugin_gen_cb {
 * These helpers are stubs that get dynamically switched out for calls
 * direct to the plugin if they are subscribed to.
 */
-void HELPER(plugin_vcpu_udata_cb_no_wg)(uint32_t cpu_index, void *udata)
+void HELPER(plugin_vcpu_udata_cb)(uint32_t cpu_index, void *udata)
 { }
 void HELPER(plugin_vcpu_udata_cb_no_rwg)(uint32_t cpu_index, void *udata)
 { }
 void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
@@ -103,7 +98,7 @@ void HELPER(plugin_vcpu_mem_cb)(unsigned int vcpu_index,
                                void *userdata)
 { }
-static void gen_empty_udata_cb(void (*gen_helper)(TCGv_i32, TCGv_ptr))
+static void gen_empty_udata_cb(void)
 {
    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
    TCGv_ptr udata = tcg_temp_ebb_new_ptr();
@@ -111,50 +106,28 @@ static void gen_empty_udata_cb(void (*gen_helper)(TCGv_i32, TCGv_ptr))
    tcg_gen_movi_ptr(udata, 0);
    tcg_gen_ld_i32(cpu_index, tcg_env,
                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
-    gen_helper(cpu_index, udata);
+    gen_helper_plugin_vcpu_udata_cb(cpu_index, udata);
    tcg_temp_free_ptr(udata);
    tcg_temp_free_i32(cpu_index);
 }
 static void gen_empty_udata_cb_no_wg(void)
 {
    gen_empty_udata_cb(gen_helper_plugin_vcpu_udata_cb_no_wg);
 }
 static void gen_empty_udata_cb_no_rwg(void)
 {
    gen_empty_udata_cb(gen_helper_plugin_vcpu_udata_cb_no_rwg);
 }
 /*
 * For now we only support addi_i64.
 * When we support more ops, we can generate one empty inline cb for each.
 */
 static void gen_empty_inline_cb(void)
 {
    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
    TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
    TCGv_i64 val = tcg_temp_ebb_new_i64();
    TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
    tcg_gen_ld_i32(cpu_index, tcg_env,
                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
    /* second operand will be replaced by immediate value */
    tcg_gen_mul_i32(cpu_index, cpu_index, cpu_index);
    tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
    tcg_gen_movi_ptr(ptr, 0);
    tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
    tcg_gen_ld_i64(val, ptr, 0);
-    /* second operand will be replaced by immediate value */
+    /* pass an immediate != 0 so that it doesn't get optimized away */
-    tcg_gen_add_i64(val, val, val);
+    tcg_gen_addi_i64(val, val, 0xdeadface);
    tcg_gen_st_i64(val, ptr, 0);
    tcg_temp_free_ptr(ptr);
    tcg_temp_free_i64(val);
    tcg_temp_free_ptr(cpu_index_as_ptr);
    tcg_temp_free_i32(cpu_index);
 }
 static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
@@ -219,8 +192,7 @@ static void plugin_gen_empty_callback(enum plugin_gen_from from)
                    gen_empty_mem_helper);
        /* fall through */
    case PLUGIN_GEN_FROM_TB:
-        gen_wrapped(from, PLUGIN_GEN_CB_UDATA, gen_empty_udata_cb_no_rwg);
+        gen_wrapped(from, PLUGIN_GEN_CB_UDATA, gen_empty_udata_cb);
        gen_wrapped(from, PLUGIN_GEN_CB_UDATA_R, gen_empty_udata_cb_no_wg);
        gen_wrapped(from, PLUGIN_GEN_CB_INLINE, gen_empty_inline_cb);
        break;
    default:
@@ -302,37 +274,12 @@ static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
    return op;
 }
 static TCGOp *copy_ld_i32(TCGOp **begin_op, TCGOp *op)
 {
    return copy_op(begin_op, op, INDEX_op_ld_i32);
 }
 static TCGOp *copy_ext_i32_ptr(TCGOp **begin_op, TCGOp *op)
 {
    if (UINTPTR_MAX == UINT32_MAX) {
        op = copy_op(begin_op, op, INDEX_op_mov_i32);
    } else {
        op = copy_op(begin_op, op, INDEX_op_ext_i32_i64);
    }
    return op;
 }
 static TCGOp *copy_add_ptr(TCGOp **begin_op, TCGOp *op)
 {
    if (UINTPTR_MAX == UINT32_MAX) {
        op = copy_op(begin_op, op, INDEX_op_add_i32);
    } else {
        op = copy_op(begin_op, op, INDEX_op_add_i64);
    }
    return op;
 }
 static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
 {
    if (TCG_TARGET_REG_BITS == 32) {
        /* 2x ld_i32 */
-        op = copy_ld_i32(begin_op, op);
+        op = copy_op(begin_op, op, INDEX_op_ld_i32);
-        op = copy_ld_i32(begin_op, op);
+        op = copy_op(begin_op, op, INDEX_op_ld_i32);
    } else {
        /* ld_i64 */
        op = copy_op(begin_op, op, INDEX_op_ld_i64);
@@ -368,13 +315,6 @@ static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
    return op;
 }
 static TCGOp *copy_mul_i32(TCGOp **begin_op, TCGOp *op, uint32_t v)
 {
    op = copy_op(begin_op, op, INDEX_op_mul_i32);
    op->args[2] = tcgv_i32_arg(tcg_constant_i32(v));
    return op;
 }
 static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
 {
    if (UINTPTR_MAX == UINT32_MAX) {
@@ -440,19 +380,18 @@ static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb,
                               TCGOp *begin_op, TCGOp *op,
                               int *unused)
 {
-    char *ptr = cb->inline_insn.entry.score->data->data;
+    /* const_ptr */
-    size_t elem_size = g_array_get_element_size(
+    op = copy_const_ptr(&begin_op, op, cb->userp);
        cb->inline_insn.entry.score->data);
    size_t offset = cb->inline_insn.entry.offset;
-    op = copy_ld_i32(&begin_op, op);
+    /* ld_i64 */
    op = copy_mul_i32(&begin_op, op, elem_size);
    op = copy_ext_i32_ptr(&begin_op, op);
    op = copy_const_ptr(&begin_op, op, ptr + offset);
    op = copy_add_ptr(&begin_op, op);
    op = copy_ld_i64(&begin_op, op);
    /* add_i64 */
    op = copy_add_i64(&begin_op, op, cb->inline_insn.imm);
    /* st_i64 */
    op = copy_st_i64(&begin_op, op);
    return op;
 }
@@ -649,12 +588,6 @@ static void plugin_gen_tb_udata(const struct qemu_plugin_tb *ptb,
    inject_udata_cb(ptb->cbs[PLUGIN_CB_REGULAR], begin_op);
 }
 static void plugin_gen_tb_udata_r(const struct qemu_plugin_tb *ptb,
                                  TCGOp *begin_op)
 {
    inject_udata_cb(ptb->cbs[PLUGIN_CB_REGULAR_R], begin_op);
 }
 static void plugin_gen_tb_inline(const struct qemu_plugin_tb *ptb,
                                 TCGOp *begin_op)
 {
@@ -669,14 +602,6 @@ static void plugin_gen_insn_udata(const struct qemu_plugin_tb *ptb,
    inject_udata_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR], begin_op);
 }
 static void plugin_gen_insn_udata_r(const struct qemu_plugin_tb *ptb,
                                    TCGOp *begin_op, int insn_idx)
 {
    struct qemu_plugin_insn *insn = g_ptr_array_index(ptb->insns, insn_idx);
    inject_udata_cb(insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR_R], begin_op);
 }
 static void plugin_gen_insn_inline(const struct qemu_plugin_tb *ptb,
                                   TCGOp *begin_op, int insn_idx)
 {
@@ -796,9 +721,6 @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
                case PLUGIN_GEN_CB_UDATA:
                    plugin_gen_tb_udata(plugin_tb, op);
                    break;
                case PLUGIN_GEN_CB_UDATA_R:
                    plugin_gen_tb_udata_r(plugin_tb, op);
                    break;
                case PLUGIN_GEN_CB_INLINE:
                    plugin_gen_tb_inline(plugin_tb, op);
                    break;
@@ -815,9 +737,6 @@ static void plugin_gen_inject(struct qemu_plugin_tb *plugin_tb)
                case PLUGIN_GEN_CB_UDATA:
                    plugin_gen_insn_udata(plugin_tb, op, insn_idx);
                    break;
                case PLUGIN_GEN_CB_UDATA_R:
                    plugin_gen_insn_udata_r(plugin_tb, op, insn_idx);
                    break;
                case PLUGIN_GEN_CB_INLINE:
                    plugin_gen_insn_inline(plugin_tb, op, insn_idx);
                    break;
@@ -877,7 +796,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const DisasContextBase *db,
 {
    bool ret = false;
-    if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_state->event_mask)) {
+    if (test_bit(QEMU_PLUGIN_EV_VCPU_TB_TRANS, cpu->plugin_mask)) {
        struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
        int i;
--- a/accel/tcg/plugin-helpers.h
+++ b/accel/tcg/plugin-helpers.h
@@ -1,5 +1,4 @@
 #ifdef CONFIG_PLUGIN
-DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb_no_wg, TCG_CALL_NO_WG | TCG_CALL_PLUGIN, void, i32, ptr)
+DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb, TCG_CALL_NO_RWG | TCG_CALL_PLUGIN, void, i32, ptr)
 DEF_HELPER_FLAGS_2(plugin_vcpu_udata_cb_no_rwg, TCG_CALL_NO_RWG | TCG_CALL_PLUGIN, void, i32, ptr)
 DEF_HELPER_FLAGS_4(plugin_vcpu_mem_cb, TCG_CALL_NO_RWG | TCG_CALL_PLUGIN, void, i32, i32, i64, ptr)
 #endif
--- a/accel/tcg/tb-jmp-cache.h
+++ b/accel/tcg/tb-jmp-cache.h
@@ -13,11 +13,9 @@
 #define TB_JMP_CACHE_SIZE (1 << TB_JMP_CACHE_BITS)
 /*
- * Invalidated in parallel; all accesses to 'tb' must be atomic.
+ * Accessed in parallel; all accesses to 'tb' must be atomic.
- * A valid entry is read/written by a single CPU, therefore there is
+ * For CF_PCREL, accesses to 'pc' must be protected by a
- * no need for qatomic_rcu_read() and pc is always consistent with a
+ * load_acquire/store_release to 'tb'.
 * non-NULL value of 'tb'.  Strictly speaking pc is only needed for
 * CF_PCREL, but it's used always for simplicity.
 */
 struct CPUJumpCache {
    struct rcu_head rcu;
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -1021,7 +1021,7 @@ void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
 * Called with mmap_lock held for user-mode emulation
 * NOTE: this function must not be called while a TB is running.
 */
-static void tb_invalidate_phys_page(tb_page_addr_t addr)
+void tb_invalidate_phys_page(tb_page_addr_t addr)
 {
    tb_page_addr_t start, last;
@@ -1083,7 +1083,8 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
    if (current_tb_modified) {
        /* Force execution of one insn next time.  */
        CPUState *cpu = current_cpu;
-        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
+        cpu->cflags_next_tb =
            1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu);
        return true;
    }
    return false;
@@ -1153,13 +1154,36 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages,
    if (current_tb_modified) {
        page_collection_unlock(pages);
        /* Force execution of one insn next time.  */
-        current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
+        current_cpu->cflags_next_tb =
            1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu);
        mmap_unlock();
        cpu_loop_exit_noexc(current_cpu);
    }
 #endif
 }
 /*
 * Invalidate all TBs which intersect with the target physical
 * address page @addr.
 */
 void tb_invalidate_phys_page(tb_page_addr_t addr)
 {
    struct page_collection *pages;
    tb_page_addr_t start, last;
    PageDesc *p;
    p = page_find(addr >> TARGET_PAGE_BITS);
    if (p == NULL) {
        return;
    }
    start = addr & TARGET_PAGE_MASK;
    last = addr | ~TARGET_PAGE_MASK;
    pages = page_collection_lock(start, last);
    tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
    page_collection_unlock(pages);
 }
 /*
 * Invalidate all TBs which intersect with the target physical address range
 * [start;last]. NOTE: start and end may refer to *different* physical pages.
--- a/accel/tcg/tcg-accel-ops-icount.c
+++ b/accel/tcg/tcg-accel-ops-icount.c
@@ -123,12 +123,12 @@ void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
    if (cpu->icount_budget == 0) {
        /*
-         * We're called without the BQL, so must take it while
+         * We're called without the iothread lock, so must take it while
         * we're calling timer handlers.
         */
-        bql_lock();
+        qemu_mutex_lock_iothread();
        icount_notify_aio_contexts();
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
    }
 }
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -76,7 +76,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
    rcu_add_force_rcu_notifier(&force_rcu.notifier);
    tcg_register_thread();
-    bql_lock();
+    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
@@ -91,9 +91,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
    do {
        if (cpu_can_run(cpu)) {
            int r;
-            bql_unlock();
+            qemu_mutex_unlock_iothread();
-            r = tcg_cpu_exec(cpu);
+            r = tcg_cpus_exec(cpu);
-            bql_lock();
+            qemu_mutex_lock_iothread();
            switch (r) {
            case EXCP_DEBUG:
                cpu_handle_guest_debug(cpu);
@@ -105,9 +105,9 @@ static void *mttcg_cpu_thread_fn(void *arg)
                 */
                break;
            case EXCP_ATOMIC:
-                bql_unlock();
+                qemu_mutex_unlock_iothread();
                cpu_exec_step_atomic(cpu);
-                bql_lock();
+                qemu_mutex_lock_iothread();
            default:
                /* Ignore everything else? */
                break;
@@ -118,8 +118,8 @@ static void *mttcg_cpu_thread_fn(void *arg)
        qemu_wait_io_event(cpu);
    } while (!cpu->unplug || cpu_can_run(cpu));
-    tcg_cpu_destroy(cpu);
+    tcg_cpus_destroy(cpu);
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    rcu_remove_force_rcu_notifier(&force_rcu.notifier);
    rcu_unregister_thread();
    return NULL;
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -109,9 +109,9 @@ static void rr_wait_io_event(void)
 {
    CPUState *cpu;
-    while (all_cpu_threads_idle() && replay_can_wait()) {
+    while (all_cpu_threads_idle()) {
        rr_stop_kick_timer();
-        qemu_cond_wait_bql(first_cpu->halt_cond);
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
    }
    rr_start_kick_timer();
@@ -131,7 +131,7 @@ static void rr_deal_with_unplugged_cpus(void)
    CPU_FOREACH(cpu) {
        if (cpu->unplug && !cpu_can_run(cpu)) {
-            tcg_cpu_destroy(cpu);
+            tcg_cpus_destroy(cpu);
            break;
        }
    }
@@ -188,7 +188,7 @@ static void *rr_cpu_thread_fn(void *arg)
    rcu_add_force_rcu_notifier(&force_rcu);
    tcg_register_thread();
-    bql_lock();
+    qemu_mutex_lock_iothread();
    qemu_thread_get_self(cpu->thread);
    cpu->thread_id = qemu_get_thread_id();
@@ -198,7 +198,7 @@ static void *rr_cpu_thread_fn(void *arg)
    /* wait for initial kick-off after machine start */
    while (first_cpu->stopped) {
-        qemu_cond_wait_bql(first_cpu->halt_cond);
+        qemu_cond_wait_iothread(first_cpu->halt_cond);
        /* process any pending work */
        CPU_FOREACH(cpu) {
@@ -218,9 +218,9 @@ static void *rr_cpu_thread_fn(void *arg)
        /* Only used for icount_enabled() */
        int64_t cpu_budget = 0;
-        bql_unlock();
+        qemu_mutex_unlock_iothread();
        replay_mutex_lock();
-        bql_lock();
+        qemu_mutex_lock_iothread();
        if (icount_enabled()) {
            int cpu_count = rr_cpu_count();
@@ -254,23 +254,23 @@ static void *rr_cpu_thread_fn(void *arg)
            if (cpu_can_run(cpu)) {
                int r;
-                bql_unlock();
+                qemu_mutex_unlock_iothread();
                if (icount_enabled()) {
                    icount_prepare_for_run(cpu, cpu_budget);
                }
-                r = tcg_cpu_exec(cpu);
+                r = tcg_cpus_exec(cpu);
                if (icount_enabled()) {
                    icount_process_data(cpu);
                }
-                bql_lock();
+                qemu_mutex_lock_iothread();
                if (r == EXCP_DEBUG) {
                    cpu_handle_guest_debug(cpu);
                    break;
                } else if (r == EXCP_ATOMIC) {
-                    bql_unlock();
+                    qemu_mutex_unlock_iothread();
                    cpu_exec_step_atomic(cpu);
-                    bql_lock();
+                    qemu_mutex_lock_iothread();
                    break;
                }
            } else if (cpu->stop) {
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -63,12 +63,12 @@ void tcg_cpu_init_cflags(CPUState *cpu, bool parallel)
    cpu->tcg_cflags |= cflags;
 }
-void tcg_cpu_destroy(CPUState *cpu)
+void tcg_cpus_destroy(CPUState *cpu)
 {
    cpu_thread_signal_destroyed(cpu);
 }
-int tcg_cpu_exec(CPUState *cpu)
+int tcg_cpus_exec(CPUState *cpu)
 {
    int ret;
    assert(tcg_enabled());
@@ -88,7 +88,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
 /* mask must never be zero, except for A20 change call */
 void tcg_handle_interrupt(CPUState *cpu, int mask)
 {
-    g_assert(bql_locked());
+    g_assert(qemu_mutex_iothread_locked());
    cpu->interrupt_request |= mask;
--- a/accel/tcg/tcg-accel-ops.h
+++ b/accel/tcg/tcg-accel-ops.h
@@ -14,8 +14,8 @@
 #include "sysemu/cpus.h"
-void tcg_cpu_destroy(CPUState *cpu);
+void tcg_cpus_destroy(CPUState *cpu);
-int tcg_cpu_exec(CPUState *cpu);
+int tcg_cpus_exec(CPUState *cpu);
 void tcg_handle_interrupt(CPUState *cpu, int mask);
 void tcg_cpu_init_cflags(CPUState *cpu, bool parallel);
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -63,7 +63,7 @@
 #include "tb-context.h"
 #include "internal-common.h"
 #include "internal-target.h"
-#include "tcg/perf.h"
+#include "perf.h"
 #include "tcg/insn-start-words.h"
 TBContext tb_ctx;
@@ -256,6 +256,7 @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
 void page_init(void)
 {
    page_size_init();
    page_table_config_init();
 }
@@ -303,7 +304,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
    if (phys_pc == -1) {
        /* Generate a one-shot TB with 1 insn in it */
-        cflags = (cflags & ~CF_COUNT_MASK) | 1;
+        cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
    }
    max_insns = cflags & CF_COUNT_MASK;
@@ -631,7 +632,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
     * operations only (which execute after completion) so we don't
     * double instrument the instruction.
     */
-    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | n;
+    cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
    if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
        vaddr pc = log_pc(cpu, tb);
@@ -648,7 +649,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
 void cpu_interrupt(CPUState *cpu, int mask)
 {
-    g_assert(bql_locked());
+    g_assert(qemu_mutex_iothread_locked());
    cpu->interrupt_request |= mask;
    qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
 }
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -89,7 +89,7 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags)
     * each translation block.  The cost is minimal, plus it would be
     * very easy to forget doing it in the translator.
     */
-    set_can_do_io(db, db->max_insns == 1);
+    set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO));
    return icount_start_insn;
 }
@@ -151,7 +151,13 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
    ops->tb_start(db, cpu);
    tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
-    plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY);
+    if (cflags & CF_MEMI_ONLY) {
        /* We should only see CF_MEMI_ONLY for io_recompile. */
        assert(cflags & CF_LAST_IO);
        plugin_enabled = plugin_gen_tb_start(cpu, db, true);
    } else {
        plugin_enabled = plugin_gen_tb_start(cpu, db, false);
    }
    db->plugin_enabled = plugin_enabled;
    while (true) {
@@ -163,13 +169,11 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
            plugin_gen_insn_start(cpu, db);
        }
-        /*
+        /* Disassemble one instruction.  The translate_insn hook should
-         * Disassemble one instruction.  The translate_insn hook should
+           update db->pc_next and db->is_jmp to indicate what should be
-         * update db->pc_next and db->is_jmp to indicate what should be
+           done next -- either exiting this loop or locate the start of
-         * done next -- either exiting this loop or locate the start of
+           the next instruction.  */
-         * the next instruction.
+        if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) {
         */
        if (db->num_insns == db->max_insns) {
            /* Accept I/O on the last instruction.  */
            set_can_do_io(db, true);
        }
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -651,17 +651,16 @@ void page_protect(tb_page_addr_t address)
 {
    PageFlagsNode *p;
    target_ulong start, last;
    int host_page_size = qemu_real_host_page_size();
    int prot;
    assert_memory_lock();
-    if (host_page_size <= TARGET_PAGE_SIZE) {
+    if (qemu_host_page_size <= TARGET_PAGE_SIZE) {
        start = address & TARGET_PAGE_MASK;
        last = start + TARGET_PAGE_SIZE - 1;
    } else {
-        start = address & -host_page_size;
+        start = address & qemu_host_page_mask;
-        last = start + host_page_size - 1;
+        last = start + qemu_host_page_size - 1;
    }
    p = pageflags_find(start, last);
@@ -672,7 +671,7 @@ void page_protect(tb_page_addr_t address)
    if (unlikely(p->itree.last < last)) {
        /* More than one protection region covers the one host page. */
-        assert(TARGET_PAGE_SIZE < host_page_size);
+        assert(TARGET_PAGE_SIZE < qemu_host_page_size);
        while ((p = pageflags_next(p, start, last)) != NULL) {
            prot |= p->flags;
        }
@@ -680,7 +679,7 @@ void page_protect(tb_page_addr_t address)
    if (prot & PAGE_WRITE) {
        pageflags_set_clear(start, last, 0, PAGE_WRITE);
-        mprotect(g2h_untagged(start), last - start + 1,
+        mprotect(g2h_untagged(start), qemu_host_page_size,
                 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
    }
 }
@@ -726,19 +725,18 @@ int page_unprotect(target_ulong address, uintptr_t pc)
        }
 #endif
    } else {
        int host_page_size = qemu_real_host_page_size();
        target_ulong start, len, i;
        int prot;
-        if (host_page_size <= TARGET_PAGE_SIZE) {
+        if (qemu_host_page_size <= TARGET_PAGE_SIZE) {
            start = address & TARGET_PAGE_MASK;
            len = TARGET_PAGE_SIZE;
            prot = p->flags | PAGE_WRITE;
            pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
            current_tb_invalidated = tb_invalidate_phys_page_unwind(start, pc);
        } else {
-            start = address & -host_page_size;
+            start = address & qemu_host_page_mask;
-            len = host_page_size;
+            len = qemu_host_page_size;
            prot = 0;
            for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
@@ -864,7 +862,7 @@ tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
 typedef struct TargetPageDataNode {
    struct rcu_head rcu;
    IntervalTreeNode itree;
-    char data[] __attribute__((aligned));
+    char data[TPD_PAGES][TARGET_PAGE_DATA_SIZE] __attribute__((aligned));
 } TargetPageDataNode;
 static IntervalTreeRoot targetdata_root;
@@ -902,8 +900,7 @@ void page_reset_target_data(target_ulong start, target_ulong last)
        n_last = MIN(last, n->last);
        p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
-        memset(t->data + p_ofs * TARGET_PAGE_DATA_SIZE, 0,
+        memset(t->data[p_ofs], 0, p_len * TARGET_PAGE_DATA_SIZE);
               p_len * TARGET_PAGE_DATA_SIZE);
    }
 }
@@ -911,7 +908,7 @@ void *page_get_target_data(target_ulong address)
 {
    IntervalTreeNode *n;
    TargetPageDataNode *t;
-    target_ulong page, region, p_ofs;
+    target_ulong page, region;
    page = address & TARGET_PAGE_MASK;
    region = address & TBD_MASK;
@@ -927,8 +924,7 @@ void *page_get_target_data(target_ulong address)
        mmap_lock();
        n = interval_tree_iter_first(&targetdata_root, page, page);
        if (!n) {
-            t = g_malloc0(sizeof(TargetPageDataNode)
+            t = g_new0(TargetPageDataNode, 1);
                          + TPD_PAGES * TARGET_PAGE_DATA_SIZE);
            n = &t->itree;
            n->start = region;
            n->last = region | ~TBD_MASK;
@@ -938,8 +934,7 @@ void *page_get_target_data(target_ulong address)
    }
    t = container_of(n, TargetPageDataNode, itree);
-    p_ofs = (page - region) >> TARGET_PAGE_BITS;
+    return t->data[(page - region) >> TARGET_PAGE_BITS];
    return t->data + p_ofs * TARGET_PAGE_DATA_SIZE;
 }
 #else
 void page_reset_target_data(target_ulong start, target_ulong last) { }
--- a/accel/tcg/watchpoint.c
+++ b/accel/tcg/watchpoint.c
@@ -1,143 +0,0 @@
 /*
 * CPU watchpoints
 *
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "qemu/osdep.h"
 #include "qemu/main-loop.h"
 #include "qemu/error-report.h"
 #include "exec/exec-all.h"
 #include "exec/translate-all.h"
 #include "sysemu/tcg.h"
 #include "sysemu/replay.h"
 #include "hw/core/tcg-cpu-ops.h"
 #include "hw/core/cpu.h"
 /*
 * Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
 static inline bool watchpoint_address_matches(CPUWatchpoint *wp,
                                              vaddr addr, vaddr len)
 {
    /*
     * We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;
    return !(addr > wpend || wp->vaddr > addrend);
 }
 /* Return flags for watchpoints that match addr + prot.  */
 int cpu_watchpoint_address_matches(CPUState *cpu, vaddr addr, vaddr len)
 {
    CPUWatchpoint *wp;
    int ret = 0;
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
        if (watchpoint_address_matches(wp, addr, len)) {
            ret |= wp->flags;
        }
    }
    return ret;
 }
 /* Generate a debug exception if a watchpoint has been hit.  */
 void cpu_check_watchpoint(CPUState *cpu, vaddr addr, vaddr len,
                          MemTxAttrs attrs, int flags, uintptr_t ra)
 {
    CPUClass *cc = CPU_GET_CLASS(cpu);
    CPUWatchpoint *wp;
    assert(tcg_enabled());
    if (cpu->watchpoint_hit) {
        /*
         * We re-entered the check after replacing the TB.
         * Now raise the debug interrupt so that it will
         * trigger after the current instruction.
         */
        bql_lock();
        cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
        bql_unlock();
        return;
    }
    if (cc->tcg_ops->adjust_watchpoint_address) {
        /* this is currently used only by ARM BE32 */
        addr = cc->tcg_ops->adjust_watchpoint_address(cpu, addr, len);
    }
    assert((flags & ~BP_MEM_ACCESS) == 0);
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
        int hit_flags = wp->flags & flags;
        if (hit_flags && watchpoint_address_matches(wp, addr, len)) {
            if (replay_running_debug()) {
                /*
                 * replay_breakpoint reads icount.
                 * Force recompile to succeed, because icount may
                 * be read only at the end of the block.
                 */
                if (!cpu->neg.can_do_io) {
                    /* Force execution of one insn next time.  */
                    cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
                    cpu_loop_exit_restore(cpu, ra);
                }
                /*
                 * Don't process the watchpoints when we are
                 * in a reverse debugging operation.
                 */
                replay_breakpoint();
                return;
            }
            wp->flags |= hit_flags << BP_HIT_SHIFT;
            wp->hitaddr = MAX(addr, wp->vaddr);
            wp->hitattrs = attrs;
            if (wp->flags & BP_CPU
                && cc->tcg_ops->debug_check_watchpoint
                && !cc->tcg_ops->debug_check_watchpoint(cpu, wp)) {
                wp->flags &= ~BP_WATCHPOINT_HIT;
                continue;
            }
            cpu->watchpoint_hit = wp;
            mmap_lock();
            /* This call also restores vCPU state */
            tb_check_watchpoint(cpu, ra);
            if (wp->flags & BP_STOP_BEFORE_ACCESS) {
                cpu->exception_index = EXCP_DEBUG;
                mmap_unlock();
                cpu_loop_exit(cpu);
            } else {
                /* Force execution of one insn next time.  */
                cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(cpu);
                mmap_unlock();
                cpu_loop_exit_noexc(cpu);
            }
        } else {
            wp->flags &= ~BP_WATCHPOINT_HIT;
        }
    }
 }
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -15,7 +15,6 @@
 #include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen_pt.h"
 #include "hw/xen/xen_igd.h"
 #include "chardev/char.h"
 #include "qemu/accel.h"
 #include "sysemu/cpus.h"
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -1683,7 +1683,7 @@ static const VMStateDescription vmstate_audio = {
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = vmstate_audio_needed,
-    .fields = (const VMStateField[]) {
+    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
    }
 };
@@ -1744,7 +1744,7 @@ static AudioState *audio_init(Audiodev *dev, Error **errp)
        if (driver) {
            done = !audio_driver_init(s, driver, dev, errp);
        } else {
-            error_setg(errp, "Unknown audio driver `%s'", drvname);
+            error_setg(errp, "Unknown audio driver `%s'\n", drvname);
        }
        if (!done) {
            goto out;
@@ -1758,15 +1758,12 @@ static AudioState *audio_init(Audiodev *dev, Error **errp)
                goto out;
            }
            s->dev = dev = e->dev;
            QSIMPLEQ_REMOVE_HEAD(&default_audiodevs, next);
            g_free(e);
            drvname = AudiodevDriver_str(dev->driver);
            driver = audio_driver_lookup(drvname);
            if (!audio_driver_init(s, driver, dev, NULL)) {
                break;
            }
-            qapi_free_Audiodev(dev);
+            QSIMPLEQ_REMOVE_HEAD(&default_audiodevs, next);
            s->dev = NULL;
        }
    }
--- a/audio/coreaudio.m
+++ b/audio/coreaudio.m
@@ -299,7 +299,7 @@ COREAUDIO_WRAPPER_FUNC(write, size_t, (HWVoiceOut *hw, void *buf, size_t size),
 #undef COREAUDIO_WRAPPER_FUNC
 /*
- * callback to feed audiooutput buffer. called without BQL.
+ * callback to feed audiooutput buffer. called without iothread lock.
 * allowed to lock "buf_mutex", but disallowed to have any other locks.
 */
 static OSStatus audioDeviceIOProc(
@@ -538,7 +538,7 @@ static void update_device_playback_state(coreaudioVoiceOut *core)
    }
 }
-/* called without BQL. */
+/* called without iothread lock. */
 static OSStatus handle_voice_change(
    AudioObjectID in_object_id,
    UInt32 in_number_addresses,
@@ -547,7 +547,7 @@ static OSStatus handle_voice_change(
 {
    coreaudioVoiceOut *core = in_client_data;
-    bql_lock();
+    qemu_mutex_lock_iothread();
    if (core->outputDeviceID) {
        fini_out_device(core);
@@ -557,7 +557,7 @@ static OSStatus handle_voice_change(
        update_device_playback_state(core);
    }
-    bql_unlock();
+    qemu_mutex_unlock_iothread();
    return 0;
 }
--- a/audio/meson.build
+++ b/audio/meson.build
@@ -30,8 +30,7 @@ endforeach
 if dbus_display
    module_ss = ss.source_set()
-    module_ss.add(when: [gio, dbus_display1_dep, pixman],
+    module_ss.add(when: [gio, pixman], if_true: files('dbusaudio.c'))
                  if_true: files('dbusaudio.c'))
    audio_modules += {'dbus': module_ss}
 endif
--- a/audio/pwaudio.c
+++ b/audio/pwaudio.c
@@ -11,6 +11,7 @@
 #include "qemu/osdep.h"
 #include "qemu/module.h"
 #include "audio.h"
 #include <errno.h>
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include <spa/param/audio/format-utils.h>
--- a/backends/Kconfig
+++ b/backends/Kconfig
@@ -1,5 +1 @@
 source tpm/Kconfig
 config IOMMUFD
    bool
    depends on VFIO
--- a/backends/cryptodev.c
+++ b/backends/cryptodev.c
@@ -398,7 +398,6 @@ static void cryptodev_backend_set_ops(Object *obj, Visitor *v,
 static void
 cryptodev_backend_complete(UserCreatable *uc, Error **errp)
 {
    ERRP_GUARD();
    CryptoDevBackend *backend = CRYPTODEV_BACKEND(uc);
    CryptoDevBackendClass *bc = CRYPTODEV_BACKEND_GET_CLASS(uc);
    uint32_t services;
@@ -407,20 +406,11 @@ cryptodev_backend_complete(UserCreatable *uc, Error **errp)
    QTAILQ_INIT(&backend->opinfos);
    value = backend->tc.buckets[THROTTLE_OPS_TOTAL].avg;
    cryptodev_backend_set_throttle(backend, THROTTLE_OPS_TOTAL, value, errp);
    if (*errp) {
        return;
    }
    value = backend->tc.buckets[THROTTLE_BPS_TOTAL].avg;
    cryptodev_backend_set_throttle(backend, THROTTLE_BPS_TOTAL, value, errp);
    if (*errp) {
        return;
    }
    if (bc->init) {
        bc->init(backend, errp);
        if (*errp) {
            return;
        }
    }
    services = backend->conf.crypto_services;
--- a/backends/dbus-vmstate.c
+++ b/backends/dbus-vmstate.c
@@ -393,7 +393,7 @@ static const VMStateDescription dbus_vmstate = {
    .version_id = 0,
    .pre_save = dbus_vmstate_pre_save,
    .post_load = dbus_vmstate_post_load,
-    .fields = (const VMStateField[]) {
+    .fields = (VMStateField[]) {
        VMSTATE_UINT32(data_size, DBusVMState),
        VMSTATE_VBUFFER_ALLOC_UINT32(data, DBusVMState, 0, 0, data_size),
        VMSTATE_END_OF_LIST()
--- a/backends/hostmem-epc.c
+++ b/backends/hostmem-epc.c
@@ -17,29 +17,31 @@
 #include "sysemu/hostmem.h"
 #include "hw/i386/hostmem-epc.h"
-static bool
+static void
 sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
    g_autofree char *name = NULL;
    uint32_t ram_flags;
    char *name;
    int fd;
    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return false;
+        return;
    }
    fd = qemu_open_old("/dev/sgx_vepc", O_RDWR);
    if (fd < 0) {
        error_setg_errno(errp, errno,
                         "failed to open /dev/sgx_vepc to alloc SGX EPC");
-        return false;
+        return;
    }
    name = object_get_canonical_path(OBJECT(backend));
    ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED;
-    return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend),
-                                          backend->size, ram_flags, fd, 0, errp);
+                                   name, backend->size, ram_flags,
                                   fd, 0, errp);
    g_free(name);
 }
 static void sgx_epc_backend_instance_init(Object *obj)
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -36,25 +36,24 @@ struct HostMemoryBackendFile {
    OnOffAuto rom;
 };
-static bool
+static void
 file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
 #ifndef CONFIG_POSIX
    error_setg(errp, "backend '%s' not supported on this host",
               object_get_typename(OBJECT(backend)));
    return false;
 #else
    HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(backend);
    g_autofree gchar *name = NULL;
    uint32_t ram_flags;
    gchar *name;
    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return false;
+        return;
    }
    if (!fb->mem_path) {
        error_setg(errp, "mem-path property not set");
-        return false;
+        return;
    }
    switch (fb->rom) {
@@ -66,18 +65,18 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
        if (!fb->readonly) {
            error_setg(errp, "property 'rom' = 'on' is not supported with"
                       " 'readonly' = 'off'");
-            return false;
+            return;
        }
        break;
    case ON_OFF_AUTO_OFF:
        if (fb->readonly && backend->share) {
            error_setg(errp, "property 'rom' = 'off' is incompatible with"
                       " 'readonly' = 'on' and 'share' = 'on'");
-            return false;
+            return;
        }
        break;
    default:
-        g_assert_not_reached();
+        assert(false);
    }
    name = host_memory_backend_get_name(backend);
@@ -87,9 +86,10 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
    ram_flags |= fb->is_pmem ? RAM_PMEM : 0;
    ram_flags |= RAM_NAMED_FILE;
-    return memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
+    memory_region_init_ram_from_file(&backend->mr, OBJECT(backend), name,
-                                            backend->size, fb->align, ram_flags,
+                                     backend->size, fb->align, ram_flags,
-                                            fb->mem_path, fb->offset, errp);
+                                     fb->mem_path, fb->offset, errp);
    g_free(name);
 #endif
 }
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -31,17 +31,17 @@ struct HostMemoryBackendMemfd {
    bool seal;
 };
-static bool
+static void
 memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
    HostMemoryBackendMemfd *m = MEMORY_BACKEND_MEMFD(backend);
    g_autofree char *name = NULL;
    uint32_t ram_flags;
    char *name;
    int fd;
    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return false;
+        return;
    }
    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
@@ -49,14 +49,15 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
                           errp);
    if (fd == -1) {
-        return false;
+        return;
    }
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
-    return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
+    memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name,
-                                          backend->size, ram_flags, fd, 0, errp);
+                                   backend->size, ram_flags, fd, 0, errp);
    g_free(name);
 }
 static bool
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -16,23 +16,23 @@
 #include "qemu/module.h"
 #include "qom/object_interfaces.h"
-static bool
+static void
 ram_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
 {
    g_autofree char *name = NULL;
    uint32_t ram_flags;
    char *name;
    if (!backend->size) {
        error_setg(errp, "can't create backend with size 0");
-        return false;
+        return;
    }
    name = host_memory_backend_get_name(backend);
    ram_flags = backend->share ? RAM_SHARED : 0;
    ram_flags |= backend->reserve ? 0 : RAM_NORESERVE;
-    return memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend),
+    memory_region_init_ram_flags_nomigrate(&backend->mr, OBJECT(backend), name,
-                                                  name, backend->size,
+                                           backend->size, ram_flags, errp);
-                                                  ram_flags, errp);
+    g_free(name);
 }
 static void
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -20,7 +20,6 @@
 #include "qom/object_interfaces.h"
 #include "qemu/mmap-alloc.h"
 #include "qemu/madvise.h"
 #include "hw/qdev-core.h"
 #ifdef CONFIG_NUMA
 #include <numaif.h>
@@ -220,6 +219,7 @@ static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
 static void host_memory_backend_set_prealloc(Object *obj, bool value,
                                             Error **errp)
 {
    Error *local_err = NULL;
    HostMemoryBackend *backend = MEMORY_BACKEND(obj);
    if (!backend->reserve && value) {
@@ -237,8 +237,10 @@ static void host_memory_backend_set_prealloc(Object *obj, bool value,
        void *ptr = memory_region_get_ram_ptr(&backend->mr);
        uint64_t sz = memory_region_size(&backend->mr);
-        if (!qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
+        qemu_prealloc_mem(fd, ptr, sz, backend->prealloc_threads,
-                               backend->prealloc_context, false, errp)) {
+                          backend->prealloc_context, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            return;
        }
        backend->prealloc = true;
@@ -322,92 +324,91 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
 {
    HostMemoryBackend *backend = MEMORY_BACKEND(uc);
    HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
    Error *local_err = NULL;
    void *ptr;
    uint64_t sz;
    bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED);
-    if (!bc->alloc) {
+    if (bc->alloc) {
-        return;
+        bc->alloc(backend, &local_err);
-    }
+        if (local_err) {
-    if (!bc->alloc(backend, errp)) {
+            goto out;
-        return;
+        }
    }
-    ptr = memory_region_get_ram_ptr(&backend->mr);
+        ptr = memory_region_get_ram_ptr(&backend->mr);
-    sz = memory_region_size(&backend->mr);
+        sz = memory_region_size(&backend->mr);
-    if (backend->merge) {
+        if (backend->merge) {
-        qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
+            qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
-    }
+        }
-    if (!backend->dump) {
+        if (!backend->dump) {
-        qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
+            qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
-    }
+        }
 #ifdef CONFIG_NUMA
-    unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
+        unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
-    /* lastbit == MAX_NODES means maxnode = 0 */
+        /* lastbit == MAX_NODES means maxnode = 0 */
-    unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
+        unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
-    /*
+        /* ensure policy won't be ignored in case memory is preallocated
-     * Ensure policy won't be ignored in case memory is preallocated
+         * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
-     * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
+         * this doesn't catch hugepage case. */
-     * this doesn't catch hugepage case.
+        unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
-     */
+        int mode = backend->policy;
    unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
    int mode = backend->policy;
-    /* check for invalid host-nodes and policies and give more verbose
+        /* check for invalid host-nodes and policies and give more verbose
-     * error messages than mbind(). */
+         * error messages than mbind(). */
-    if (maxnode && backend->policy == MPOL_DEFAULT) {
+        if (maxnode && backend->policy == MPOL_DEFAULT) {
-        error_setg(errp, "host-nodes must be empty for policy default,"
+            error_setg(errp, "host-nodes must be empty for policy default,"
-                   " or you should explicitly specify a policy other"
+                       " or you should explicitly specify a policy other"
-                   " than default");
+                       " than default");
-        return;
+            return;
-    } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
+        } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
-        error_setg(errp, "host-nodes must be set for policy %s",
+            error_setg(errp, "host-nodes must be set for policy %s",
-                   HostMemPolicy_str(backend->policy));
+                       HostMemPolicy_str(backend->policy));
        return;
    }
    /*
     * We can have up to MAX_NODES nodes, but we need to pass maxnode+1
     * as argument to mbind() due to an old Linux bug (feature?) which
     * cuts off the last specified node. This means backend->host_nodes
     * must have MAX_NODES+1 bits available.
     */
    assert(sizeof(backend->host_nodes) >=
           BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
    assert(maxnode <= MAX_NODES);
 #ifdef HAVE_NUMA_HAS_PREFERRED_MANY
    if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
        /*
         * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
         * silently picks the first node.
         */
        mode = MPOL_PREFERRED_MANY;
    }
 #endif
    if (maxnode &&
        mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
        if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
            error_setg_errno(errp, errno,
                             "cannot bind memory to host NUMA nodes");
            return;
        }
-    }
+
        /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
         * as argument to mbind() due to an old Linux bug (feature?) which
         * cuts off the last specified node. This means backend->host_nodes
         * must have MAX_NODES+1 bits available.
         */
        assert(sizeof(backend->host_nodes) >=
               BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
        assert(maxnode <= MAX_NODES);
 #ifdef HAVE_NUMA_HAS_PREFERRED_MANY
        if (mode == MPOL_PREFERRED && numa_has_preferred_many() > 0) {
            /*
             * Replace with MPOL_PREFERRED_MANY otherwise the mbind() below
             * silently picks the first node.
             */
            mode = MPOL_PREFERRED_MANY;
        }
 #endif
-    /*
+
-     * Preallocate memory after the NUMA policy has been instantiated.
+        if (maxnode &&
-     * This is necessary to guarantee memory is allocated with
+            mbind(ptr, sz, mode, backend->host_nodes, maxnode + 1, flags)) {
-     * specified NUMA policy in place.
+            if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
-     */
+                error_setg_errno(errp, errno,
-    if (backend->prealloc && !qemu_prealloc_mem(memory_region_get_fd(&backend->mr),
+                                 "cannot bind memory to host NUMA nodes");
-                                                ptr, sz,
+                return;
-                                                backend->prealloc_threads,
+            }
-                                                backend->prealloc_context,
+        }
-                                                async, errp)) {
+#endif
-        return;
+        /* Preallocate memory after the NUMA policy has been instantiated.
         * This is necessary to guarantee memory is allocated with
         * specified NUMA policy in place.
         */
        if (backend->prealloc) {
            qemu_prealloc_mem(memory_region_get_fd(&backend->mr), ptr, sz,
                              backend->prealloc_threads,
                              backend->prealloc_context, &local_err);
            if (local_err) {
                goto out;
            }
        }
    }
 out:
    error_propagate(errp, local_err);
 }
 static bool
--- a/backends/iommufd.c
+++ b/backends/iommufd.c
@@ -1,233 +0,0 @@
 /*
 * iommufd container backend
 *
 * Copyright (C) 2023 Intel Corporation.
 * Copyright Red Hat, Inc. 2023
 *
 * Authors: Yi Liu <yi.l.liu@intel.com>
 *          Eric Auger <eric.auger@redhat.com>
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */
 #include "qemu/osdep.h"
 #include "sysemu/iommufd.h"
 #include "qapi/error.h"
 #include "qapi/qmp/qerror.h"
 #include "qemu/module.h"
 #include "qom/object_interfaces.h"
 #include "qemu/error-report.h"
 #include "monitor/monitor.h"
 #include "trace.h"
 #include <sys/ioctl.h>
 #include <linux/iommufd.h>
 static void iommufd_backend_init(Object *obj)
 {
    IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
    be->fd = -1;
    be->users = 0;
    be->owned = true;
 }
 static void iommufd_backend_finalize(Object *obj)
 {
    IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
    if (be->owned) {
        close(be->fd);
        be->fd = -1;
    }
 }
 static void iommufd_backend_set_fd(Object *obj, const char *str, Error **errp)
 {
    IOMMUFDBackend *be = IOMMUFD_BACKEND(obj);
    int fd = -1;
    fd = monitor_fd_param(monitor_cur(), str, errp);
    if (fd == -1) {
        error_prepend(errp, "Could not parse remote object fd %s:", str);
        return;
    }
    be->fd = fd;
    be->owned = false;
    trace_iommu_backend_set_fd(be->fd);
 }
 static bool iommufd_backend_can_be_deleted(UserCreatable *uc)
 {
    IOMMUFDBackend *be = IOMMUFD_BACKEND(uc);
    return !be->users;
 }
 static void iommufd_backend_class_init(ObjectClass *oc, void *data)
 {
    UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
    ucc->can_be_deleted = iommufd_backend_can_be_deleted;
    object_class_property_add_str(oc, "fd", NULL, iommufd_backend_set_fd);
 }
 int iommufd_backend_connect(IOMMUFDBackend *be, Error **errp)
 {
    int fd, ret = 0;
    if (be->owned && !be->users) {
        fd = qemu_open_old("/dev/iommu", O_RDWR);
        if (fd < 0) {
            error_setg_errno(errp, errno, "/dev/iommu opening failed");
            ret = fd;
            goto out;
        }
        be->fd = fd;
    }
    be->users++;
 out:
    trace_iommufd_backend_connect(be->fd, be->owned,
                                  be->users, ret);
    return ret;
 }
 void iommufd_backend_disconnect(IOMMUFDBackend *be)
 {
    if (!be->users) {
        goto out;
    }
    be->users--;
    if (!be->users && be->owned) {
        close(be->fd);
        be->fd = -1;
    }
 out:
    trace_iommufd_backend_disconnect(be->fd, be->users);
 }
 int iommufd_backend_alloc_ioas(IOMMUFDBackend *be, uint32_t *ioas_id,
                               Error **errp)
 {
    int ret, fd = be->fd;
    struct iommu_ioas_alloc alloc_data  = {
        .size = sizeof(alloc_data),
        .flags = 0,
    };
    ret = ioctl(fd, IOMMU_IOAS_ALLOC, &alloc_data);
    if (ret) {
        error_setg_errno(errp, errno, "Failed to allocate ioas");
        return ret;
    }
    *ioas_id = alloc_data.out_ioas_id;
    trace_iommufd_backend_alloc_ioas(fd, *ioas_id, ret);
    return ret;
 }
 void iommufd_backend_free_id(IOMMUFDBackend *be, uint32_t id)
 {
    int ret, fd = be->fd;
    struct iommu_destroy des = {
        .size = sizeof(des),
        .id = id,
    };
    ret = ioctl(fd, IOMMU_DESTROY, &des);
    trace_iommufd_backend_free_id(fd, id, ret);
    if (ret) {
        error_report("Failed to free id: %u %m", id);
    }
 }
 int iommufd_backend_map_dma(IOMMUFDBackend *be, uint32_t ioas_id, hwaddr iova,
                            ram_addr_t size, void *vaddr, bool readonly)
 {
    int ret, fd = be->fd;
    struct iommu_ioas_map map = {
        .size = sizeof(map),
        .flags = IOMMU_IOAS_MAP_READABLE |
                 IOMMU_IOAS_MAP_FIXED_IOVA,
        .ioas_id = ioas_id,
        .__reserved = 0,
        .user_va = (uintptr_t)vaddr,
        .iova = iova,
        .length = size,
    };
    if (!readonly) {
        map.flags |= IOMMU_IOAS_MAP_WRITEABLE;
    }
    ret = ioctl(fd, IOMMU_IOAS_MAP, &map);
    trace_iommufd_backend_map_dma(fd, ioas_id, iova, size,
                                  vaddr, readonly, ret);
    if (ret) {
        ret = -errno;
        /* TODO: Not support mapping hardware PCI BAR region for now. */
        if (errno == EFAULT) {
            warn_report("IOMMU_IOAS_MAP failed: %m, PCI BAR?");
        } else {
            error_report("IOMMU_IOAS_MAP failed: %m");
        }
    }
    return ret;
 }
 int iommufd_backend_unmap_dma(IOMMUFDBackend *be, uint32_t ioas_id,
                              hwaddr iova, ram_addr_t size)
 {
    int ret, fd = be->fd;
    struct iommu_ioas_unmap unmap = {
        .size = sizeof(unmap),
        .ioas_id = ioas_id,
        .iova = iova,
        .length = size,
    };
    ret = ioctl(fd, IOMMU_IOAS_UNMAP, &unmap);
    /*
     * IOMMUFD takes mapping as some kind of object, unmapping
     * nonexistent mapping is treated as deleting a nonexistent
     * object and return ENOENT. This is different from legacy
     * backend which allows it. vIOMMU may trigger a lot of
     * redundant unmapping, to avoid flush the log, treat them
     * as succeess for IOMMUFD just like legacy backend.
     */
    if (ret && errno == ENOENT) {
        trace_iommufd_backend_unmap_dma_non_exist(fd, ioas_id, iova, size, ret);
        ret = 0;
    } else {
        trace_iommufd_backend_unmap_dma(fd, ioas_id, iova, size, ret);
    }
    if (ret) {
        ret = -errno;
        error_report("IOMMU_IOAS_UNMAP failed: %m");
    }
    return ret;
 }
 static const TypeInfo iommufd_backend_info = {
    .name = TYPE_IOMMUFD_BACKEND,
    .parent = TYPE_OBJECT,
    .instance_size = sizeof(IOMMUFDBackend),
    .instance_init = iommufd_backend_init,
    .instance_finalize = iommufd_backend_finalize,
    .class_size = sizeof(IOMMUFDBackendClass),
    .class_init = iommufd_backend_class_init,
    .interfaces = (InterfaceInfo[]) {
        { TYPE_USER_CREATABLE },
        { }
    }
 };
 static void register_types(void)
 {
    type_register_static(&iommufd_backend_info);
 }
 type_init(register_types);
--- a/backends/meson.build
+++ b/backends/meson.build
@@ -10,13 +10,9 @@ system_ss.add([files(
  'confidential-guest-support.c',
 ), numa])
-if host_os != 'windows'
+system_ss.add(when: 'CONFIG_POSIX', if_true: files('rng-random.c'))
-  system_ss.add(files('rng-random.c'))
+system_ss.add(when: 'CONFIG_POSIX', if_true: files('hostmem-file.c'))
-  system_ss.add(files('hostmem-file.c'))
+system_ss.add(when: 'CONFIG_LINUX', if_true: files('hostmem-memfd.c'))
 endif
 if host_os == 'linux'
  system_ss.add(files('hostmem-memfd.c'))
 endif
 if keyutils.found()
    system_ss.add(keyutils, files('cryptodev-lkcf.c'))
 endif
@@ -24,7 +20,6 @@ if have_vhost_user
  system_ss.add(when: 'CONFIG_VIRTIO', if_true: files('vhost-user.c'))
 endif
 system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost.c'))
 system_ss.add(when: 'CONFIG_IOMMUFD', if_true: files('iommufd.c'))
 if have_vhost_user_crypto
  system_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('cryptodev-vhost-user.c'))
 endif
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -904,7 +904,7 @@ static void tpm_emulator_vm_state_change(void *opaque, bool running,
    trace_tpm_emulator_vm_state_change(running, state);
-    if (!running || !tpm_emu->relock_storage) {
+    if (!running || state != RUN_STATE_RUNNING || !tpm_emu->relock_storage) {
        return;
    }
@@ -939,7 +939,7 @@ static const VMStateDescription vmstate_tpm_emulator = {
    .version_id = 0,
    .pre_save = tpm_emulator_pre_save,
    .post_load = tpm_emulator_post_load,
-    .fields = (const VMStateField[]) {
+    .fields = (VMStateField[]) {
        VMSTATE_UINT32(state_blobs.permanent_flags, TPMEmulator),
        VMSTATE_UINT32(state_blobs.permanent.size, TPMEmulator),
        VMSTATE_VBUFFER_ALLOC_UINT32(state_blobs.permanent.buffer,
--- a/backends/trace-events
+++ b/backends/trace-events
@@ -5,13 +5,3 @@ dbus_vmstate_pre_save(void)
 dbus_vmstate_post_load(int version_id) "version_id: %d"
 dbus_vmstate_loading(const char *id) "id: %s"
 dbus_vmstate_saving(const char *id) "id: %s"
 # iommufd.c
 iommufd_backend_connect(int fd, bool owned, uint32_t users, int ret) "fd=%d owned=%d users=%d (%d)"
 iommufd_backend_disconnect(int fd, uint32_t users) "fd=%d users=%d"
 iommu_backend_set_fd(int fd) "pre-opened /dev/iommu fd=%d"
 iommufd_backend_map_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, void *vaddr, bool readonly, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" addr=%p readonly=%d (%d)"
 iommufd_backend_unmap_dma_non_exist(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " Unmap nonexistent mapping: iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
 iommufd_backend_unmap_dma(int iommufd, uint32_t ioas, uint64_t iova, uint64_t size, int ret) " iommufd=%d ioas=%d iova=0x%"PRIx64" size=0x%"PRIx64" (%d)"
 iommufd_backend_alloc_ioas(int iommufd, uint32_t ioas, int ret) " iommufd=%d ioas=%d (%d)"
 iommufd_backend_free_id(int iommufd, uint32_t id, int ret) " iommufd=%d id=%d (%d)"
--- a/block.c
+++ b/block.c
@@ -1309,14 +1309,11 @@ static void bdrv_backing_detach(BdrvChild *c)
 }
 static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
-                                        const char *filename,
+                                        const char *filename, Error **errp)
                                        bool backing_mask_protocol,
                                        Error **errp)
 {
    BlockDriverState *parent = c->opaque;
    bool read_only = bdrv_is_read_only(parent);
    int ret;
    const char *format_name;
    GLOBAL_STATE_CODE();
    if (read_only) {
@@ -1326,23 +1323,9 @@ static int bdrv_backing_update_filename(BdrvChild *c, BlockDriverState *base,
        }
    }
-    if (base->drv) {
+    ret = bdrv_change_backing_file(parent, filename,
-        /*
+                                   base->drv ? base->drv->format_name : "",
-         * If the new base image doesn't have a format driver layer, which we
+                                   false);
         * detect by the fact that @base is a protocol driver, we record
         * 'raw' as the format instead of putting the protocol name as the
         * backing format
         */
        if (backing_mask_protocol && base->drv->protocol_name) {
            format_name = "raw";
        } else {
            format_name = base->drv->format_name;
        }
    } else {
        format_name = "";
    }
    ret = bdrv_change_backing_file(parent, filename, format_name, false);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not update backing file link");
    }
@@ -1496,14 +1479,10 @@ static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
 }
 static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
-                                         const char *filename,
+                                         const char *filename, Error **errp)
                                         bool backing_mask_protocol,
                                         Error **errp)
 {
    if (c->role & BDRV_CHILD_COW) {
-        return bdrv_backing_update_filename(c, base, filename,
+        return bdrv_backing_update_filename(c, base, filename, errp);
                                            backing_mask_protocol,
                                            errp);
    }
    return 0;
 }
@@ -1637,10 +1616,16 @@ out:
    g_free(gen_node_name);
 }
 /*
 * The caller must always hold @bs AioContext lock, because this function calls
 * bdrv_refresh_total_sectors() which polls when called from non-coroutine
 * context.
 */
 static int no_coroutine_fn GRAPH_UNLOCKED
 bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
                 QDict *options, int open_flags, Error **errp)
 {
    AioContext *ctx;
    Error *local_err = NULL;
    int i, ret;
    GLOBAL_STATE_CODE();
@@ -1688,15 +1673,21 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
    bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
    bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
    /* Get the context after .bdrv_open, it can change the context */
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    ret = bdrv_refresh_total_sectors(bs, bs->total_sectors);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not refresh total sector count");
        aio_context_release(ctx);
        return ret;
    }
    bdrv_graph_rdlock_main_loop();
    bdrv_refresh_limits(bs, NULL, &local_err);
    bdrv_graph_rdunlock_main_loop();
    aio_context_release(ctx);
    if (local_err) {
        error_propagate(errp, local_err);
@@ -1717,7 +1708,7 @@ bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, const char *node_name,
 open_failed:
    bs->drv = NULL;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    if (bs->file != NULL) {
        bdrv_unref_child(bs, bs->file);
        assert(!bs->file);
@@ -2917,7 +2908,7 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
 * Replaces the node that a BdrvChild points to without updating permissions.
 *
 * If @new_bs is non-NULL, the parent of @child must already be drained through
- * @child.
+ * @child and the caller must hold the AioContext lock for @new_bs.
 */
 static void GRAPH_WRLOCK
 bdrv_replace_child_noperm(BdrvChild *child, BlockDriverState *new_bs)
@@ -3057,8 +3048,9 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
 *
 * Returns new created child.
 *
- * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * function.
+ * @child_bs can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 */
 static BdrvChild * GRAPH_WRLOCK
 bdrv_attach_child_common(BlockDriverState *child_bs,
@@ -3070,7 +3062,7 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
                         Transaction *tran, Error **errp)
 {
    BdrvChild *new_child;
-    AioContext *parent_ctx;
+    AioContext *parent_ctx, *new_child_ctx;
    AioContext *child_ctx = bdrv_get_aio_context(child_bs);
    assert(child_class->get_parent_desc);
@@ -3122,6 +3114,12 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
        }
    }
    new_child_ctx = bdrv_get_aio_context(child_bs);
    if (new_child_ctx != child_ctx) {
        aio_context_release(child_ctx);
        aio_context_acquire(new_child_ctx);
    }
    bdrv_ref(child_bs);
    /*
     * Let every new BdrvChild start with a drained parent. Inserting the child
@@ -3151,14 +3149,20 @@ bdrv_attach_child_common(BlockDriverState *child_bs,
    };
    tran_add(tran, &bdrv_attach_child_common_drv, s);
    if (new_child_ctx != child_ctx) {
        aio_context_release(new_child_ctx);
        aio_context_acquire(child_ctx);
    }
    return new_child;
 }
 /*
 * Function doesn't update permissions, caller is responsible for this.
 *
- * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * function.
+ * @child_bs can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 *
 * After calling this function, the transaction @tran may only be completed
 * while holding a writer lock for the graph.
@@ -3198,6 +3202,9 @@ bdrv_attach_child_noperm(BlockDriverState *parent_bs,
 *
 * On failure NULL is returned, errp is set and the reference to
 * child_bs is also dropped.
 *
 * The caller must hold the AioContext lock @child_bs, but not that of @ctx
 * (unless @child_bs is already in @ctx).
 */
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                  const char *child_name,
@@ -3237,6 +3244,9 @@ out:
 *
 * On failure NULL is returned, errp is set and the reference to
 * child_bs is also dropped.
 *
 * If @parent_bs and @child_bs are in different AioContexts, the caller must
 * hold the AioContext lock for @child_bs, but not for @parent_bs.
 */
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                             BlockDriverState *child_bs,
@@ -3426,8 +3436,9 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
 *
 * Function doesn't update permissions, caller is responsible for this.
 *
- * Both @parent_bs and @child_bs can move to a different AioContext in this
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
- * function.
+ * @child_bs can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 *
 * After calling this function, the transaction @tran may only be completed
 * while holding a writer lock for the graph.
@@ -3520,8 +3531,9 @@ out:
 }
 /*
- * Both @bs and @backing_hd can move to a different AioContext in this
+ * The caller must hold the AioContext lock for @backing_hd. Both @bs and
- * function.
+ * @backing_hd can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 *
 * If a backing child is already present (i.e. we're detaching a node), that
 * child node must be drained.
@@ -3563,7 +3575,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
    bdrv_ref(drain_bs);
    bdrv_drained_begin(drain_bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(backing_hd);
    ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
    bdrv_graph_wrunlock();
    bdrv_drained_end(drain_bs);
@@ -3580,6 +3592,8 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
 * itself, all options starting with "${bdref_key}." are considered part of the
 * BlockdevRef.
 *
 * The caller must hold the main AioContext lock.
 *
 * TODO Can this be unified with bdrv_open_image()?
 */
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
@@ -3591,6 +3605,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    int ret = 0;
    bool implicit_backing = false;
    BlockDriverState *backing_hd;
    AioContext *backing_hd_ctx;
    QDict *options;
    QDict *tmp_parent_options = NULL;
    Error *local_err = NULL;
@@ -3676,8 +3691,11 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
    /* Hook up the backing file link; drop our reference, bs owns the
     * backing_hd reference now */
    backing_hd_ctx = bdrv_get_aio_context(backing_hd);
    aio_context_acquire(backing_hd_ctx);
    ret = bdrv_set_backing_hd(bs, backing_hd, errp);
    bdrv_unref(backing_hd);
    aio_context_release(backing_hd_ctx);
    if (ret < 0) {
        goto free_exit;
@@ -3749,7 +3767,9 @@ done:
 *
 * The BlockdevRef will be removed from the options QDict.
 *
- * @parent can move to a different AioContext in this function.
+ * The caller must hold the lock of the main AioContext and no other AioContext.
 * @parent can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 */
 BdrvChild *bdrv_open_child(const char *filename,
                           QDict *options, const char *bdref_key,
@@ -3760,6 +3780,7 @@ BdrvChild *bdrv_open_child(const char *filename,
 {
    BlockDriverState *bs;
    BdrvChild *child;
    AioContext *ctx;
    GLOBAL_STATE_CODE();
@@ -3769,9 +3790,12 @@ BdrvChild *bdrv_open_child(const char *filename,
        return NULL;
    }
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
                              errp);
    aio_context_release(ctx);
    bdrv_graph_wrunlock();
    return child;
@@ -3780,7 +3804,9 @@ BdrvChild *bdrv_open_child(const char *filename,
 /*
 * Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
 *
- * @parent can move to a different AioContext in this function.
+ * The caller must hold the lock of the main AioContext and no other AioContext.
 * @parent can move to a different AioContext in this function. Callers must
 * make sure that their AioContext locking is still correct after this.
 */
 int bdrv_open_file_child(const char *filename,
                         QDict *options, const char *bdref_key,
@@ -3855,6 +3881,7 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
    int64_t total_size;
    QemuOpts *opts = NULL;
    BlockDriverState *bs_snapshot = NULL;
    AioContext *ctx = bdrv_get_aio_context(bs);
    int ret;
    GLOBAL_STATE_CODE();
@@ -3863,7 +3890,9 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
       instead of opening 'filename' directly */
    /* Get the required size from the image */
    aio_context_acquire(ctx);
    total_size = bdrv_getlength(bs);
    aio_context_release(ctx);
    if (total_size < 0) {
        error_setg_errno(errp, -total_size, "Could not get image size");
@@ -3898,7 +3927,10 @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
        goto out;
    }
    aio_context_acquire(ctx);
    ret = bdrv_append(bs_snapshot, bs, errp);
    aio_context_release(ctx);
    if (ret < 0) {
        bs_snapshot = NULL;
        goto out;
@@ -3923,6 +3955,8 @@ out:
 * The reference parameter may be used to specify an existing block device which
 * should be opened. If specified, neither options nor a filename may be given,
 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
 *
 * The caller must always hold the main AioContext lock.
 */
 static BlockDriverState * no_coroutine_fn
 bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
@@ -3940,6 +3974,7 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
    Error *local_err = NULL;
    QDict *snapshot_options = NULL;
    int snapshot_flags = 0;
    AioContext *ctx = qemu_get_aio_context();
    assert(!child_class || !flags);
    assert(!child_class == !parent);
@@ -4080,10 +4115,12 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
            /* Not requesting BLK_PERM_CONSISTENT_READ because we're only
             * looking at the header to guess the image format. This works even
             * in cases where a guest would not see a consistent state. */
-            AioContext *ctx = bdrv_get_aio_context(file_bs);
+            ctx = bdrv_get_aio_context(file_bs);
            aio_context_acquire(ctx);
            file = blk_new(ctx, 0, BLK_PERM_ALL);
            blk_insert_bs(file, file_bs, &local_err);
            bdrv_unref(file_bs);
            aio_context_release(ctx);
            if (local_err) {
                goto fail;
@@ -4130,8 +4167,13 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
        goto fail;
    }
    /* The AioContext could have changed during bdrv_open_common() */
    ctx = bdrv_get_aio_context(bs);
    if (file) {
        aio_context_acquire(ctx);
        blk_unref(file);
        aio_context_release(ctx);
        file = NULL;
    }
@@ -4189,13 +4231,16 @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
         * (snapshot_bs); thus, we have to drop the strong reference to bs
         * (which we obtained by calling bdrv_new()). bs will not be deleted,
         * though, because the overlay still has a reference to it. */
        aio_context_acquire(ctx);
        bdrv_unref(bs);
        aio_context_release(ctx);
        bs = snapshot_bs;
    }
    return bs;
 fail:
    aio_context_acquire(ctx);
    blk_unref(file);
    qobject_unref(snapshot_options);
    qobject_unref(bs->explicit_options);
@@ -4204,17 +4249,21 @@ fail:
    bs->options = NULL;
    bs->explicit_options = NULL;
    bdrv_unref(bs);
    aio_context_release(ctx);
    error_propagate(errp, local_err);
    return NULL;
 close_and_fail:
    aio_context_acquire(ctx);
    bdrv_unref(bs);
    aio_context_release(ctx);
    qobject_unref(snapshot_options);
    qobject_unref(options);
    error_propagate(errp, local_err);
    return NULL;
 }
 /* The caller must always hold the main AioContext lock. */
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
                            QDict *options, int flags, Error **errp)
 {
@@ -4491,7 +4540,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
    if (bs_queue) {
        BlockReopenQueueEntry *bs_entry, *next;
        QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
            AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
            aio_context_acquire(ctx);
            bdrv_drained_end(bs_entry->state.bs);
            aio_context_release(ctx);
            qobject_unref(bs_entry->state.explicit_options);
            qobject_unref(bs_entry->state.options);
            g_free(bs_entry);
@@ -4523,6 +4577,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
 {
    int ret = -1;
    BlockReopenQueueEntry *bs_entry, *next;
    AioContext *ctx;
    Transaction *tran = tran_new();
    g_autoptr(GSList) refresh_list = NULL;
@@ -4531,7 +4586,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
    GLOBAL_STATE_CODE();
    QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
        ctx = bdrv_get_aio_context(bs_entry->state.bs);
        aio_context_acquire(ctx);
        ret = bdrv_flush(bs_entry->state.bs);
        aio_context_release(ctx);
        if (ret < 0) {
            error_setg_errno(errp, -ret, "Error flushing drive");
            goto abort;
@@ -4540,7 +4598,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
    QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
        assert(bs_entry->state.bs->quiesce_counter > 0);
        ctx = bdrv_get_aio_context(bs_entry->state.bs);
        aio_context_acquire(ctx);
        ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
        aio_context_release(ctx);
        if (ret < 0) {
            goto abort;
        }
@@ -4583,10 +4644,13 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
     * to first element.
     */
    QTAILQ_FOREACH_REVERSE(bs_entry, bs_queue, entry) {
        ctx = bdrv_get_aio_context(bs_entry->state.bs);
        aio_context_acquire(ctx);
        bdrv_reopen_commit(&bs_entry->state);
        aio_context_release(ctx);
    }
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    tran_commit(tran);
    bdrv_graph_wrunlock();
@@ -4594,7 +4658,10 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
        BlockDriverState *bs = bs_entry->state.bs;
        if (bs->drv->bdrv_reopen_commit_post) {
            ctx = bdrv_get_aio_context(bs);
            aio_context_acquire(ctx);
            bs->drv->bdrv_reopen_commit_post(&bs_entry->state);
            aio_context_release(ctx);
        }
    }
@@ -4602,13 +4669,16 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
    goto cleanup;
 abort:
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    tran_abort(tran);
    bdrv_graph_wrunlock();
    QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
        if (bs_entry->prepared) {
            ctx = bdrv_get_aio_context(bs_entry->state.bs);
            aio_context_acquire(ctx);
            bdrv_reopen_abort(&bs_entry->state);
            aio_context_release(ctx);
        }
    }
@@ -4621,13 +4691,24 @@ cleanup:
 int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
                Error **errp)
 {
    AioContext *ctx = bdrv_get_aio_context(bs);
    BlockReopenQueue *queue;
    int ret;
    GLOBAL_STATE_CODE();
    queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
-    return bdrv_reopen_multiple(queue, errp);
+    if (ctx != qemu_get_aio_context()) {
        aio_context_release(ctx);
    }
    ret = bdrv_reopen_multiple(queue, errp);
    if (ctx != qemu_get_aio_context()) {
        aio_context_acquire(ctx);
    }
    return ret;
 }
 int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
@@ -4662,7 +4743,10 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
 *
 * Return 0 on success, otherwise return < 0 and set @errp.
 *
 * The caller must hold the AioContext lock of @reopen_state->bs.
 * @reopen_state->bs can move to a different AioContext in this function.
 * Callers must make sure that their AioContext locking is still correct after
 * this.
 */
 static int GRAPH_UNLOCKED
 bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
@@ -4676,6 +4760,7 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
    const char *child_name = is_backing ? "backing" : "file";
    QObject *value;
    const char *str;
    AioContext *ctx, *old_ctx;
    bool has_child;
    int ret;
@@ -4759,14 +4844,26 @@ bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
        bdrv_drained_begin(old_child_bs);
    }
    old_ctx = bdrv_get_aio_context(bs);
    ctx = bdrv_get_aio_context(new_child_bs);
    if (old_ctx != ctx) {
        aio_context_release(old_ctx);
        aio_context_acquire(ctx);
    }
    bdrv_graph_rdunlock_main_loop();
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(new_child_bs);
    ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
                                          tran, errp);
    bdrv_graph_wrunlock();
    if (old_ctx != ctx) {
        aio_context_release(ctx);
        aio_context_acquire(old_ctx);
    }
    if (old_child_bs) {
        bdrv_drained_end(old_child_bs);
        bdrv_unref(old_child_bs);
@@ -4795,6 +4892,8 @@ out_rdlock:
 * It is the responsibility of the caller to then call the abort() or
 * commit() for any other BDS that have been left in a prepare() state
 *
 * The caller must hold the AioContext lock of @reopen_state->bs.
 *
 * After calling this function, the transaction @change_child_tran may only be
 * completed while holding a writer lock for the graph.
 */
@@ -5110,7 +5209,7 @@ static void bdrv_close(BlockDriverState *bs)
        bs->drv = NULL;
    }
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
        bdrv_unref_child(bs, child);
    }
@@ -5410,7 +5509,7 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
    bdrv_graph_rdunlock_main_loop();
    bdrv_drained_begin(child_bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    ret = bdrv_replace_node_common(bs, child_bs, true, true, errp);
    bdrv_graph_wrunlock();
    bdrv_drained_end(child_bs);
@@ -5429,6 +5528,8 @@ int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
 * child.
 *
 * This function does not create any image files.
 *
 * The caller must hold the AioContext lock for @bs_top.
 */
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
                Error **errp)
@@ -5436,6 +5537,7 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
    int ret;
    BdrvChild *child;
    Transaction *tran = tran_new();
    AioContext *old_context, *new_context = NULL;
    GLOBAL_STATE_CODE();
@@ -5443,10 +5545,23 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
    assert(!bs_new->backing);
    bdrv_graph_rdunlock_main_loop();
    old_context = bdrv_get_aio_context(bs_top);
    bdrv_drained_begin(bs_top);
    bdrv_drained_begin(bs_new);
-    bdrv_graph_wrlock();
+    /*
     * bdrv_drained_begin() requires that only the AioContext of the drained
     * node is locked, and at this point it can still differ from the AioContext
     * of bs_top.
     */
    new_context = bdrv_get_aio_context(bs_new);
    aio_context_release(old_context);
    aio_context_acquire(new_context);
    bdrv_drained_begin(bs_new);
    aio_context_release(new_context);
    aio_context_acquire(old_context);
    new_context = NULL;
    bdrv_graph_wrlock(bs_top);
    child = bdrv_attach_child_noperm(bs_new, bs_top, "backing",
                                     &child_of_bds, bdrv_backing_role(bs_new),
@@ -5456,6 +5571,18 @@ int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
        goto out;
    }
    /*
     * bdrv_attach_child_noperm could change the AioContext of bs_top and
     * bs_new, but at least they are in the same AioContext now. This is the
     * AioContext that we need to lock for the rest of the function.
     */
    new_context = bdrv_get_aio_context(bs_top);
    if (old_context != new_context) {
        aio_context_release(old_context);
        aio_context_acquire(new_context);
    }
    ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
    if (ret < 0) {
        goto out;
@@ -5471,6 +5598,11 @@ out:
    bdrv_drained_end(bs_top);
    bdrv_drained_end(bs_new);
    if (new_context && old_context != new_context) {
        aio_context_release(new_context);
        aio_context_acquire(old_context);
    }
    return ret;
 }
@@ -5488,7 +5620,7 @@ int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
    bdrv_ref(old_bs);
    bdrv_drained_begin(old_bs);
    bdrv_drained_begin(new_bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(new_bs);
    bdrv_replace_child_tran(child, new_bs, tran);
@@ -5535,8 +5667,9 @@ static void bdrv_delete(BlockDriverState *bs)
 * after the call (even on failure), so if the caller intends to reuse the
 * dictionary, it needs to use qobject_ref() before calling bdrv_open.
 *
- * The caller must make sure that @bs stays in the same AioContext, i.e.
+ * The caller holds the AioContext lock for @bs. It must make sure that @bs
- * @options must not refer to nodes in a different AioContext.
+ * stays in the same AioContext, i.e. @options must not refer to nodes in a
 * different AioContext.
 */
 BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
                                   int flags, Error **errp)
@@ -5564,8 +5697,12 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
    GLOBAL_STATE_CODE();
    aio_context_release(ctx);
    aio_context_acquire(qemu_get_aio_context());
    new_node_bs = bdrv_new_open_driver_opts(drv, node_name, options, flags,
                                            errp);
    aio_context_release(qemu_get_aio_context());
    aio_context_acquire(ctx);
    assert(bdrv_get_aio_context(bs) == ctx);
    options = NULL; /* bdrv_new_open_driver() eats options */
@@ -5581,7 +5718,7 @@ BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *options,
    bdrv_ref(bs);
    bdrv_drained_begin(bs);
    bdrv_drained_begin(new_node_bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(new_node_bs);
    ret = bdrv_replace_node(bs, new_node_bs, errp);
    bdrv_graph_wrunlock();
    bdrv_drained_end(new_node_bs);
@@ -5824,8 +5961,7 @@ void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
 *
 */
 int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
-                           const char *backing_file_str,
+                           const char *backing_file_str)
                           bool backing_mask_protocol)
 {
    BlockDriverState *explicit_top = top;
    bool update_inherits_from;
@@ -5839,7 +5975,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
    bdrv_ref(top);
    bdrv_drained_begin(base);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(base);
    if (!top->drv || !base->drv) {
        goto exit_wrlock;
@@ -5891,7 +6027,6 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
        if (c->klass->update_filename) {
            ret = c->klass->update_filename(c, base, backing_file_str,
                                            backing_mask_protocol,
                                            &local_err);
            if (ret < 0) {
                /*
@@ -6902,9 +7037,12 @@ void bdrv_activate_all(Error **errp)
    GRAPH_RDLOCK_GUARD_MAINLOOP();
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        int ret;
        aio_context_acquire(aio_context);
        ret = bdrv_activate(bs, errp);
        aio_context_release(aio_context);
        if (ret < 0) {
            bdrv_next_cleanup(&it);
            return;
@@ -6999,10 +7137,20 @@ int bdrv_inactivate_all(void)
    BlockDriverState *bs = NULL;
    BdrvNextIterator it;
    int ret = 0;
    GSList *aio_ctxs = NULL, *ctx;
    GLOBAL_STATE_CODE();
    GRAPH_RDLOCK_GUARD_MAINLOOP();
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        if (!g_slist_find(aio_ctxs, aio_context)) {
            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
            aio_context_acquire(aio_context);
        }
    }
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
        /* Nodes with BDS parents are covered by recursion from the last
         * parent that gets inactivated. Don't inactivate them a second
@@ -7013,10 +7161,17 @@ int bdrv_inactivate_all(void)
        ret = bdrv_inactivate_recurse(bs);
        if (ret < 0) {
            bdrv_next_cleanup(&it);
-            break;
+            goto out;
        }
    }
 out:
    for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
        AioContext *aio_context = ctx->data;
        aio_context_release(aio_context);
    }
    g_slist_free(aio_ctxs);
    return ret;
 }
@@ -7099,13 +7254,6 @@ void bdrv_unref(BlockDriverState *bs)
    }
 }
 static void bdrv_schedule_unref_bh(void *opaque)
 {
    BlockDriverState *bs = opaque;
    bdrv_unref(bs);
 }
 /*
 * Release a BlockDriverState reference while holding the graph write lock.
 *
@@ -7119,7 +7267,8 @@ void bdrv_schedule_unref(BlockDriverState *bs)
    if (!bs) {
        return;
    }
-    aio_bh_schedule_oneshot(qemu_get_aio_context(), bdrv_schedule_unref_bh, bs);
+    aio_bh_schedule_oneshot(qemu_get_aio_context(),
                            (QEMUBHFunc *) bdrv_unref, bs);
 }
 struct BdrvOpBlocker {
@@ -7240,6 +7389,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
        return;
    }
    aio_context_acquire(qemu_get_aio_context());
    /* Create parameter list */
    create_opts = qemu_opts_append(create_opts, drv->create_opts);
    create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
@@ -7357,10 +7508,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
        goto out;
    }
-    /* Parameter 'size' is not needed for detached LUKS header */
+    if (size == -1) {
    if (size == -1 &&
        !(!strcmp(fmt, "luks") &&
          qemu_opt_get_bool(opts, "detached-header", false))) {
        error_setg(errp, "Image creation needs a size parameter");
        goto out;
    }
@@ -7392,6 +7540,7 @@ out:
    qemu_opts_del(opts);
    qemu_opts_free(create_opts);
    error_propagate(errp, local_err);
    aio_context_release(qemu_get_aio_context());
 }
 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
@@ -7425,6 +7574,33 @@ void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx)
    bdrv_dec_in_flight(bs);
 }
 void coroutine_fn bdrv_co_lock(BlockDriverState *bs)
 {
    AioContext *ctx = bdrv_get_aio_context(bs);
    /* In the main thread, bs->aio_context won't change concurrently */
    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
    /*
     * We're in coroutine context, so we already hold the lock of the main
     * loop AioContext. Don't lock it twice to avoid deadlocks.
     */
    assert(qemu_in_coroutine());
    if (ctx != qemu_get_aio_context()) {
        aio_context_acquire(ctx);
    }
 }
 void coroutine_fn bdrv_co_unlock(BlockDriverState *bs)
 {
    AioContext *ctx = bdrv_get_aio_context(bs);
    assert(qemu_in_coroutine());
    if (ctx != qemu_get_aio_context()) {
        aio_context_release(ctx);
    }
 }
 static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
 {
    GLOBAL_STATE_CODE();
@@ -7543,8 +7719,21 @@ static void bdrv_set_aio_context_commit(void *opaque)
    BdrvStateSetAioContext *state = (BdrvStateSetAioContext *) opaque;
    BlockDriverState *bs = (BlockDriverState *) state->bs;
    AioContext *new_context = state->new_ctx;
    AioContext *old_context = bdrv_get_aio_context(bs);
    /*
     * Take the old AioContex when detaching it from bs.
     * At this point, new_context lock is already acquired, and we are now
     * also taking old_context. This is safe as long as bdrv_detach_aio_context
     * does not call AIO_POLL_WHILE().
     */
    if (old_context != qemu_get_aio_context()) {
        aio_context_acquire(old_context);
    }
    bdrv_detach_aio_context(bs);
    if (old_context != qemu_get_aio_context()) {
        aio_context_release(old_context);
    }
    bdrv_attach_aio_context(bs, new_context);
 }
@@ -7559,6 +7748,10 @@ static TransactionActionDrv set_aio_context = {
 *
 * Must be called from the main AioContext.
 *
 * The caller must own the AioContext lock for the old AioContext of bs, but it
 * must not own the AioContext lock for new_context (unless new_context is the
 * same as the current context of bs).
 *
 * @visited will accumulate all visited BdrvChild objects. The caller is
 * responsible for freeing the list afterwards.
 */
@@ -7611,6 +7804,13 @@ static bool bdrv_change_aio_context(BlockDriverState *bs, AioContext *ctx,
 *
 * If ignore_child is not NULL, that child (and its subgraph) will not
 * be touched.
 *
 * This function still requires the caller to take the bs current
 * AioContext lock, otherwise draining will fail since AIO_WAIT_WHILE
 * assumes the lock is always held if bs is in another AioContext.
 * For the same reason, it temporarily also holds the new AioContext, since
 * bdrv_drained_end calls BDRV_POLL_WHILE that assumes the lock is taken too.
 * Therefore the new AioContext lock must not be taken by the caller.
 */
 int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
                                BdrvChild *ignore_child, Error **errp)
@@ -7618,6 +7818,7 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
    Transaction *tran;
    GHashTable *visited;
    int ret;
    AioContext *old_context = bdrv_get_aio_context(bs);
    GLOBAL_STATE_CODE();
    /*
@@ -7636,8 +7837,8 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
    /*
     * Linear phase: go through all callbacks collected in the transaction.
-     * Run all callbacks collected in the recursion to switch every node's
+     * Run all callbacks collected in the recursion to switch all nodes
-     * AioContext (transaction commit), or undo all changes done in the
+     * AioContext lock (transaction commit), or undo all changes done in the
     * recursion (transaction abort).
     */
@@ -7647,7 +7848,34 @@ int bdrv_try_change_aio_context(BlockDriverState *bs, AioContext *ctx,
        return -EPERM;
    }
    /*
     * Release old AioContext, it won't be needed anymore, as all
     * bdrv_drained_begin() have been called already.
     */
    if (qemu_get_aio_context() != old_context) {
        aio_context_release(old_context);
    }
    /*
     * Acquire new AioContext since bdrv_drained_end() is going to be called
     * after we switched all nodes in the new AioContext, and the function
     * assumes that the lock of the bs is always taken.
     */
    if (qemu_get_aio_context() != ctx) {
        aio_context_acquire(ctx);
    }
    tran_commit(tran);
    if (qemu_get_aio_context() != ctx) {
        aio_context_release(ctx);
    }
    /* Re-acquire the old AioContext, since the caller takes and releases it. */
    if (qemu_get_aio_context() != old_context) {
        aio_context_acquire(old_context);
    }
    return 0;
 }
@@ -7769,6 +7997,7 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                                        const char *node_name, Error **errp)
 {
    BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
    AioContext *aio_context;
    GLOBAL_STATE_CODE();
@@ -7777,8 +8006,12 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
        return NULL;
    }
    aio_context = bdrv_get_aio_context(to_replace_bs);
    aio_context_acquire(aio_context);
    if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
-        return NULL;
+        to_replace_bs = NULL;
        goto out;
    }
    /* We don't want arbitrary node of the BDS chain to be replaced only the top
@@ -7791,9 +8024,12 @@ BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
                   "because it cannot be guaranteed that doing so would not "
                   "lead to an abrupt change of visible data",
                   node_name, parent_bs->node_name);
-        return NULL;
+        to_replace_bs = NULL;
        goto out;
    }
 out:
    aio_context_release(aio_context);
    return to_replace_bs;
 }
--- a/block/backup.c
+++ b/block/backup.c
@@ -496,7 +496,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
    block_copy_set_speed(bcs, speed);
    /* Required permissions are taken by copy-before-write filter target */
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(target);
    block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
                       &error_abort);
    bdrv_graph_wrunlock();
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -68,7 +68,7 @@ typedef struct {
    CoQueue bounce_available;
    /* The value of the "mem-region-alignment" property */
-    uint64_t mem_region_alignment;
+    size_t mem_region_alignment;
    /* Can we skip adding/deleting blkio_mem_regions? */
    bool needs_mem_regions;
@@ -89,9 +89,6 @@ static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
    /* Pad size to reduce frequency of resize calls */
    bytes += 128 * 1024;
    /* Align the pool size to avoid blkio_alloc_mem_region() failure */
    bytes = QEMU_ALIGN_UP(bytes, s->mem_region_alignment);
    WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
        int ret;
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -3,7 +3,7 @@
 *
 * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com>
 * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com>
- * Copyright (c) 2018-2024 Ari Sundholm <ari@tuxera.com>
+ * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
@@ -55,34 +55,9 @@ typedef struct {
    BdrvChild *log_file;
    uint32_t sectorsize;
    uint32_t sectorbits;
    uint64_t update_interval;
    /*
     * The mutable state of the driver, consisting of the current log sector
     * and the number of log entries.
     *
     * May be read and/or written from multiple threads, and the mutex must be
     * held when accessing these fields.
     */
    uint64_t cur_log_sector;
    uint64_t nr_entries;
-    QemuMutex mutex;
+    uint64_t update_interval;
    /*
     * The super block sequence number. Non-zero if a super block update is in
     * progress.
     *
     * The mutex must be held when accessing this field.
     */
    uint64_t super_update_seq;
    /*
     * A coroutine-aware queue to serialize super block updates.
     *
     * Used with the mutex to ensure that only one thread be updating the super
     * block at a time.
     */
    CoQueue super_update_queue;
 } BDRVBlkLogWritesState;
 static QemuOptsList runtime_opts = {
@@ -194,9 +169,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
        goto fail;
    }
    qemu_mutex_init(&s->mutex);
    qemu_co_queue_init(&s->super_update_queue);
    log_append = qemu_opt_get_bool(opts, "log-append", false);
    if (log_append) {
@@ -259,8 +231,6 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
        s->nr_entries = 0;
    }
    s->super_update_seq = 0;
    if (!blk_log_writes_sector_size_valid(log_sector_size)) {
        ret = -EINVAL;
        error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
@@ -281,11 +251,10 @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
    ret = 0;
 fail_log:
    if (ret < 0) {
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(NULL);
        bdrv_unref_child(bs, s->log_file);
        bdrv_graph_wrunlock();
        s->log_file = NULL;
        qemu_mutex_destroy(&s->mutex);
    }
 fail:
    qemu_opts_del(opts);
@@ -296,11 +265,10 @@ static void blk_log_writes_close(BlockDriverState *bs)
 {
    BDRVBlkLogWritesState *s = bs->opaque;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    bdrv_unref_child(bs, s->log_file);
    s->log_file = NULL;
    bdrv_graph_wrunlock();
    qemu_mutex_destroy(&s->mutex);
 }
 static int64_t coroutine_fn GRAPH_RDLOCK
@@ -327,7 +295,7 @@ static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,
 static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
 {
-    const BDRVBlkLogWritesState *s = bs->opaque;
+    BDRVBlkLogWritesState *s = bs->opaque;
    bs->bl.request_alignment = s->sectorsize;
 }
@@ -360,85 +328,38 @@ static void coroutine_fn GRAPH_RDLOCK
 blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
 {
    BDRVBlkLogWritesState *s = lr->bs->opaque;
    uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
-    /*
+    s->nr_entries++;
-     * Determine the offsets and sizes of different parts of the entry, and
+    s->cur_log_sector +=
-     * update the state of the driver.
+            ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
     *
     * This needs to be done in one go, before any actual I/O is done, as the
     * log entry may have to be written in two parts, and the state of the
     * driver may be modified by other driver operations while waiting for the
     * I/O to complete.
     */
    qemu_mutex_lock(&s->mutex);
    const uint64_t entry_start_sector = s->cur_log_sector;
    const uint64_t entry_offset = entry_start_sector << s->sectorbits;
    const uint64_t qiov_aligned_size = ROUND_UP(lr->qiov->size, s->sectorsize);
    const uint64_t entry_aligned_size = qiov_aligned_size +
        ROUND_UP(lr->zero_size, s->sectorsize);
    const uint64_t entry_nr_sectors = entry_aligned_size >> s->sectorbits;
    const uint64_t entry_seq = s->nr_entries + 1;
-    s->nr_entries = entry_seq;
+    lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
    s->cur_log_sector += entry_nr_sectors;
    qemu_mutex_unlock(&s->mutex);
    /*
     * Write the log entry. Note that if this is a "write zeroes" operation,
     * only the entry header is written here, with the zeroing being done
     * separately below.
     */
    lr->log_ret = bdrv_co_pwritev(s->log_file, entry_offset, lr->qiov->size,
                                  lr->qiov, 0);
    /* Logging for the "write zeroes" operation */
    if (lr->log_ret == 0 && lr->zero_size) {
-        const uint64_t zeroes_offset = entry_offset + qiov_aligned_size;
+        cur_log_offset = s->cur_log_sector << s->sectorbits;
        s->cur_log_sector +=
                ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
-        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, zeroes_offset,
+        lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
                                            lr->zero_size, 0);
    }
    /* Update super block on flush or every update interval */
    if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG)
-        || (entry_seq % s->update_interval == 0)))
+        || (s->nr_entries % s->update_interval == 0)))
    {
        struct log_write_super super = {
            .magic      = cpu_to_le64(WRITE_LOG_MAGIC),
            .version    = cpu_to_le64(WRITE_LOG_VERSION),
-            .nr_entries = 0, /* updated below */
+            .nr_entries = cpu_to_le64(s->nr_entries),
            .sectorsize = cpu_to_le32(s->sectorsize),
        };
-        void *zeroes;
+        void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
        QEMUIOVector qiov;
        /*
         * Wait if a super block update is already in progress.
         * Bail out if a newer update got its turn before us.
         */
        WITH_QEMU_LOCK_GUARD(&s->mutex) {
            CoQueueWaitFlags wait_flags = 0;
            while (s->super_update_seq) {
                if (entry_seq < s->super_update_seq) {
                    return;
                }
                qemu_co_queue_wait_flags(&s->super_update_queue,
                    &s->mutex, wait_flags);
                /*
                 * In case the wait condition remains true after wakeup,
                 * to avoid starvation, make sure that this request is
                 * scheduled to rerun next by pushing it to the front of the
                 * queue.
                 */
                wait_flags = CO_QUEUE_WAIT_FRONT;
            }
            s->super_update_seq = entry_seq;
            super.nr_entries = cpu_to_le64(s->nr_entries);
        }
        zeroes = g_malloc0(s->sectorsize - sizeof(super));
        qemu_iovec_init(&qiov, 2);
        qemu_iovec_add(&qiov, &super, sizeof(super));
        qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
@@ -448,13 +369,6 @@ blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
        if (lr->log_ret == 0) {
            lr->log_ret = bdrv_co_flush(s->log_file->bs);
        }
        /* The super block has been updated. Let another request have a go. */
        qemu_mutex_lock(&s->mutex);
        s->super_update_seq = 0;
        (void) qemu_co_queue_next(&s->super_update_queue);
        qemu_mutex_unlock(&s->mutex);
        qemu_iovec_destroy(&qiov);
        g_free(zeroes);
    }
@@ -474,7 +388,7 @@ blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
 {
    QEMUIOVector log_qiov;
    size_t niov = qiov ? qiov->niov : 0;
-    const BDRVBlkLogWritesState *s = bs->opaque;
+    BDRVBlkLogWritesState *s = bs->opaque;
    BlkLogWritesFileReq fr = {
        .bs         = bs,
        .offset     = offset,
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -151,7 +151,7 @@ static void blkverify_close(BlockDriverState *bs)
 {
    BDRVBlkverifyState *s = bs->opaque;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    bdrv_unref_child(bs, s->test_file);
    s->test_file = NULL;
    bdrv_graph_wrunlock();
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -44,7 +44,7 @@ struct BlockBackend {
    char *name;
    int refcnt;
    BdrvChild *root;
-    AioContext *ctx; /* access with atomic operations only */
+    AioContext *ctx;
    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
@@ -390,6 +390,8 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm)
 * Both sets of permissions can be changed later using blk_set_perm().
 *
 * Return the new BlockBackend on success, null on failure.
 *
 * Callers must hold the AioContext lock of @bs.
 */
 BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
                              uint64_t shared_perm, Error **errp)
@@ -414,6 +416,8 @@ BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
 * Just as with bdrv_open(), after having called this function the reference to
 * @options belongs to the block layer (even on failure).
 *
 * Called without holding an AioContext lock.
 *
 * TODO: Remove @filename and @flags; it should be possible to specify a whole
 * BDS tree just by specifying the @options QDict (or @reference,
 * alternatively). At the time of adding this function, this is not possible,
@@ -425,6 +429,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
 {
    BlockBackend *blk;
    BlockDriverState *bs;
    AioContext *ctx;
    uint64_t perm = 0;
    uint64_t shared = BLK_PERM_ALL;
@@ -454,18 +459,23 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
        shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
    }
    aio_context_acquire(qemu_get_aio_context());
    bs = bdrv_open(filename, reference, options, flags, errp);
    aio_context_release(qemu_get_aio_context());
    if (!bs) {
        return NULL;
    }
    /* bdrv_open() could have moved bs to a different AioContext */
    ctx = bdrv_get_aio_context(bs);
    blk = blk_new(bdrv_get_aio_context(bs), perm, shared);
    blk->perm = perm;
    blk->shared_perm = shared;
    aio_context_acquire(ctx);
    blk_insert_bs(blk, bs, errp);
    bdrv_unref(bs);
    aio_context_release(ctx);
    if (!blk->root) {
        blk_unref(blk);
@@ -567,9 +577,13 @@ void blk_remove_all_bs(void)
    GLOBAL_STATE_CODE();
    while ((blk = blk_all_next(blk)) != NULL) {
        AioContext *ctx = blk_get_aio_context(blk);
        aio_context_acquire(ctx);
        if (blk->root) {
            blk_remove_bs(blk);
        }
        aio_context_release(ctx);
    }
 }
@@ -902,21 +916,22 @@ void blk_remove_bs(BlockBackend *blk)
    root = blk->root;
    blk->root = NULL;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    bdrv_root_unref_child(root);
    bdrv_graph_wrunlock();
 }
 /*
 * Associates a new BlockDriverState with @blk.
 *
 * Callers must hold the AioContext lock of @bs.
 */
 int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
 {
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
    GLOBAL_STATE_CODE();
    bdrv_ref(bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    blk->root = bdrv_root_attach_child(bs, "root", &child_root,
                                       BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
                                       blk->perm, blk->shared_perm,
@@ -2414,22 +2429,22 @@ void blk_op_unblock_all(BlockBackend *blk, Error *reason)
    }
 }
 /**
 * Return BB's current AioContext.  Note that this context may change
 * concurrently at any time, with one exception: If the BB has a root node
 * attached, its context will only change through bdrv_try_change_aio_context(),
 * which creates a drained section.  Therefore, incrementing such a BB's
 * in-flight counter will prevent its context from changing.
 */
 AioContext *blk_get_aio_context(BlockBackend *blk)
 {
    BlockDriverState *bs;
    IO_CODE();
    if (!blk) {
        return qemu_get_aio_context();
    }
-    return qatomic_read(&blk->ctx);
+    bs = blk_bs(blk);
    if (bs) {
        AioContext *ctx = bdrv_get_aio_context(blk_bs(blk));
        assert(ctx == blk->ctx);
    }
    return blk->ctx;
 }
 int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
@@ -2442,7 +2457,7 @@ int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
    GLOBAL_STATE_CODE();
    if (!bs) {
-        qatomic_set(&blk->ctx, new_context);
+        blk->ctx = new_context;
        return 0;
    }
@@ -2471,7 +2486,7 @@ static void blk_root_set_aio_ctx_commit(void *opaque)
    AioContext *new_context = s->new_ctx;
    ThrottleGroupMember *tgm = &blk->public.throttle_group_member;
-    qatomic_set(&blk->ctx, new_context);
+    blk->ctx = new_context;
    if (tgm->throttle_state) {
        throttle_group_detach_aio_context(tgm);
        throttle_group_attach_aio_context(tgm, new_context);
@@ -2718,16 +2733,20 @@ int blk_commit_all(void)
    GRAPH_RDLOCK_GUARD_MAINLOOP();
    while ((blk = blk_all_next(blk)) != NULL) {
        AioContext *aio_context = blk_get_aio_context(blk);
        BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
        aio_context_acquire(aio_context);
        if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
            int ret;
            ret = bdrv_commit(unfiltered_bs);
            if (ret < 0) {
                aio_context_release(aio_context);
                return ret;
            }
        }
        aio_context_release(aio_context);
    }
    return 0;
 }
--- a/block/commit.c
+++ b/block/commit.c
@@ -42,7 +42,6 @@ typedef struct CommitBlockJob {
    bool base_read_only;
    bool chain_frozen;
    char *backing_file_str;
    bool backing_mask_protocol;
 } CommitBlockJob;
 static int commit_prepare(Job *job)
@@ -62,8 +61,7 @@ static int commit_prepare(Job *job)
    /* FIXME: bdrv_drop_intermediate treats total failures and partial failures
     * identically. Further work is needed to disambiguate these cases. */
    return bdrv_drop_intermediate(s->commit_top_bs, s->base_bs,
-                                  s->backing_file_str,
+                                  s->backing_file_str);
                                  s->backing_mask_protocol);
 }
 static void commit_abort(Job *job)
@@ -102,7 +100,7 @@ static void commit_abort(Job *job)
    bdrv_graph_rdunlock_main_loop();
    bdrv_drained_begin(commit_top_backing_bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(commit_top_backing_bs);
    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
    bdrv_graph_wrunlock();
    bdrv_drained_end(commit_top_backing_bs);
@@ -256,7 +254,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, BlockDriverState *top,
                  int creation_flags, int64_t speed,
                  BlockdevOnError on_error, const char *backing_file_str,
                  bool backing_mask_protocol,
                  const char *filter_node_name, Error **errp)
 {
    CommitBlockJob *s;
@@ -342,7 +339,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
     * this is the responsibility of the interface (i.e. whoever calls
     * commit_start()).
     */
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(top);
    s->base_overlay = bdrv_find_overlay(top, base);
    assert(s->base_overlay);
@@ -411,7 +408,6 @@ void commit_start(const char *job_id, BlockDriverState *bs,
    blk_set_disable_request_queuing(s->top, true);
    s->backing_file_str = g_strdup(backing_file_str);
    s->backing_mask_protocol = backing_mask_protocol;
    s->on_error = on_error;
    trace_commit_start(bs, base, top, s);
@@ -438,7 +434,7 @@ fail:
     * otherwise this would fail because of lack of permissions. */
    if (commit_top_bs) {
        bdrv_drained_begin(top);
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(top);
        bdrv_replace_node(commit_top_bs, top, &error_abort);
        bdrv_graph_wrunlock();
        bdrv_drained_end(top);
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -412,6 +412,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    int64_t cluster_size;
    g_autoptr(BlockdevOptions) full_opts = NULL;
    BlockdevOptionsCbw *opts;
    AioContext *ctx;
    int ret;
    full_opts = cbw_parse_options(options, errp);
@@ -434,11 +435,15 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    GRAPH_RDLOCK_GUARD_MAINLOOP();
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    if (opts->bitmap) {
        bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
                                           opts->bitmap->name, NULL, errp);
        if (!bitmap) {
-            return -EINVAL;
+            ret = -EINVAL;
            goto out;
        }
    }
    s->on_cbw_error = opts->has_on_cbw_error ? opts->on_cbw_error :
@@ -456,21 +461,24 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
    if (!s->bcs) {
        error_prepend(errp, "Cannot create block-copy-state: ");
-        return -EINVAL;
+        ret = -EINVAL;
        goto out;
    }
    cluster_size = block_copy_cluster_size(s->bcs);
    s->done_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
    if (!s->done_bitmap) {
-        return -EINVAL;
+        ret = -EINVAL;
        goto out;
    }
    bdrv_disable_dirty_bitmap(s->done_bitmap);
    /* s->access_bitmap starts equal to bcs bitmap */
    s->access_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
    if (!s->access_bitmap) {
-        return -EINVAL;
+        ret = -EINVAL;
        goto out;
    }
    bdrv_disable_dirty_bitmap(s->access_bitmap);
    bdrv_dirty_bitmap_merge_internal(s->access_bitmap,
@@ -479,7 +487,11 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
    qemu_co_mutex_init(&s->lock);
    QLIST_INIT(&s->frozen_read_reqs);
-    return 0;
+
    ret = 0;
 out:
    aio_context_release(ctx);
    return ret;
 }
 static void cbw_close(BlockDriverState *bs)
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -39,7 +39,6 @@ typedef struct BlockCrypto BlockCrypto;
 struct BlockCrypto {
    QCryptoBlock *block;
    bool updating_keys;
    BdrvChild *header;  /* Reference to the detached LUKS header */
 };
@@ -64,14 +63,12 @@ static int block_crypto_read_func(QCryptoBlock *block,
                                  Error **errp)
 {
    BlockDriverState *bs = opaque;
    BlockCrypto *crypto = bs->opaque;
    ssize_t ret;
    GLOBAL_STATE_CODE();
    GRAPH_RDLOCK_GUARD_MAINLOOP();
-    ret = bdrv_pread(crypto->header ? crypto->header : bs->file,
+    ret = bdrv_pread(bs->file, offset, buflen, buf, 0);
                     offset, buflen, buf, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not read encryption header");
        return ret;
@@ -87,14 +84,12 @@ static int block_crypto_write_func(QCryptoBlock *block,
                                   Error **errp)
 {
    BlockDriverState *bs = opaque;
    BlockCrypto *crypto = bs->opaque;
    ssize_t ret;
    GLOBAL_STATE_CODE();
    GRAPH_RDLOCK_GUARD_MAINLOOP();
-    ret = bdrv_pwrite(crypto->header ? crypto->header : bs->file,
+    ret = bdrv_pwrite(bs->file, offset, buflen, buf, 0);
                      offset, buflen, buf, 0);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Could not write encryption header");
        return ret;
@@ -162,48 +157,6 @@ error:
    return ret;
 }
 static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_format_luks_payload(BlockdevCreateOptionsLUKS *luks_opts,
                                    Error **errp)
 {
    BlockDriverState *bs = NULL;
    BlockBackend *blk = NULL;
    Error *local_error = NULL;
    int ret;
    if (luks_opts->size > INT64_MAX) {
        return -EFBIG;
    }
    bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
    if (bs == NULL) {
        return -EIO;
    }
    blk = blk_co_new_with_bs(bs, BLK_PERM_WRITE | BLK_PERM_RESIZE,
                             BLK_PERM_ALL, errp);
    if (!blk) {
        ret = -EPERM;
        goto fail;
    }
    ret = blk_truncate(blk, luks_opts->size, true,
                       luks_opts->preallocation, 0, &local_error);
    if (ret < 0) {
        if (ret == -EFBIG) {
            /* Replace the error message with a better one */
            error_free(local_error);
            error_setg(errp, "The requested file size is too large");
        }
        goto fail;
    }
    ret = 0;
 fail:
    bdrv_co_unref(bs);
    return ret;
 }
 static QemuOptsList block_crypto_runtime_opts_luks = {
    .name = "crypto",
@@ -231,7 +184,6 @@ static QemuOptsList block_crypto_create_opts_luks = {
        BLOCK_CRYPTO_OPT_DEF_LUKS_IVGEN_HASH_ALG(""),
        BLOCK_CRYPTO_OPT_DEF_LUKS_HASH_ALG(""),
        BLOCK_CRYPTO_OPT_DEF_LUKS_ITER_TIME(""),
        BLOCK_CRYPTO_OPT_DEF_LUKS_DETACHED_HEADER(""),
        { /* end of list */ }
    },
 };
@@ -310,8 +262,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
                                     int flags,
                                     Error **errp)
 {
    ERRP_GUARD();
    BlockCrypto *crypto = bs->opaque;
    QemuOpts *opts = NULL;
    int ret;
@@ -326,13 +276,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
        return ret;
    }
    crypto->header = bdrv_open_child(NULL, options, "header", bs,
                                     &child_of_bds, BDRV_CHILD_METADATA,
                                     true, errp);
    if (*errp != NULL) {
        return -EINVAL;
    }
    GRAPH_RDLOCK_GUARD_MAINLOOP();
    bs->supported_write_flags = BDRV_REQ_FUA &
@@ -356,9 +299,6 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
    if (flags & BDRV_O_NO_IO) {
        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
    }
    if (crypto->header != NULL) {
        cflags |= QCRYPTO_BLOCK_OPEN_DETACHED;
    }
    crypto->block = qcrypto_block_open(open_opts, NULL,
                                       block_crypto_read_func,
                                       bs,
@@ -384,9 +324,7 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
 static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
                               QCryptoBlockCreateOptions *opts,
-                               PreallocMode prealloc,
+                               PreallocMode prealloc, Error **errp)
                               unsigned int flags,
                               Error **errp)
 {
    int ret;
    BlockBackend *blk;
@@ -406,7 +344,7 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
    data = (struct BlockCryptoCreateData) {
        .blk = blk,
-        .size = flags & QCRYPTO_BLOCK_CREATE_DETACHED ? 0 : size,
+        .size = size,
        .prealloc = prealloc,
    };
@@ -414,7 +352,6 @@ block_crypto_co_create_generic(BlockDriverState *bs, int64_t size,
                                  block_crypto_create_init_func,
                                  block_crypto_create_write_func,
                                  &data,
                                  flags,
                                  errp);
    if (!crypto) {
@@ -701,27 +638,17 @@ static int coroutine_fn GRAPH_UNLOCKED
 block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
 {
    BlockdevCreateOptionsLUKS *luks_opts;
    BlockDriverState *hdr_bs = NULL;
    BlockDriverState *bs = NULL;
    QCryptoBlockCreateOptions create_opts;
    PreallocMode preallocation = PREALLOC_MODE_OFF;
    unsigned int cflags = 0;
    int ret;
    assert(create_options->driver == BLOCKDEV_DRIVER_LUKS);
    luks_opts = &create_options->u.luks;
-    if (luks_opts->header == NULL && luks_opts->file == NULL) {
+    bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
-        error_setg(errp, "Either the parameter 'header' or 'file' must "
+    if (bs == NULL) {
-                   "be specified");
+        return -EIO;
        return -EINVAL;
    }
    if ((luks_opts->preallocation != PREALLOC_MODE_OFF) &&
        (luks_opts->file == NULL)) {
        error_setg(errp, "Parameter 'preallocation' requires 'file' to be "
                   "specified for formatting LUKS disk");
        return -EINVAL;
    }
    create_opts = (QCryptoBlockCreateOptions) {
@@ -733,52 +660,15 @@ block_crypto_co_create_luks(BlockdevCreateOptions *create_options, Error **errp)
        preallocation = luks_opts->preallocation;
    }
-    if (luks_opts->header) {
+    ret = block_crypto_co_create_generic(bs, luks_opts->size, &create_opts,
-        /* LUKS volume with detached header */
+                                         preallocation, errp);
-        hdr_bs = bdrv_co_open_blockdev_ref(luks_opts->header, errp);
+    if (ret < 0) {
-        if (hdr_bs == NULL) {
+        goto fail;
            return -EIO;
        }
        cflags |= QCRYPTO_BLOCK_CREATE_DETACHED;
        /* Format the LUKS header node */
        ret = block_crypto_co_create_generic(hdr_bs, 0, &create_opts,
                                             PREALLOC_MODE_OFF, cflags, errp);
        if (ret < 0) {
            goto fail;
        }
        /* Format the LUKS payload node */
        if (luks_opts->file) {
            ret = block_crypto_co_format_luks_payload(luks_opts, errp);
            if (ret < 0) {
                goto fail;
            }
        }
    } else if (luks_opts->file) {
        /* LUKS volume with none-detached header */
        bs = bdrv_co_open_blockdev_ref(luks_opts->file, errp);
        if (bs == NULL) {
            return -EIO;
        }
        ret = block_crypto_co_create_generic(bs, luks_opts->size, &create_opts,
                                             preallocation, cflags, errp);
        if (ret < 0) {
            goto fail;
        }
    }
    ret = 0;
 fail:
-    if (hdr_bs != NULL) {
+    bdrv_co_unref(bs);
        bdrv_co_unref(hdr_bs);
    }
    if (bs != NULL) {
        bdrv_co_unref(bs);
    }
    return ret;
 }
@@ -792,9 +682,6 @@ block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
    PreallocMode prealloc;
    char *buf = NULL;
    int64_t size;
    bool detached_hdr =
        qemu_opt_get_bool(opts, "detached-header", false);
    unsigned int cflags = 0;
    int ret;
    Error *local_err = NULL;
@@ -834,13 +721,8 @@ block_crypto_co_create_opts_luks(BlockDriver *drv, const char *filename,
        goto fail;
    }
    if (detached_hdr) {
        cflags |= QCRYPTO_BLOCK_CREATE_DETACHED;
    }
    /* Create format layer */
-    ret = block_crypto_co_create_generic(bs, size, create_opts,
+    ret = block_crypto_co_create_generic(bs, size, create_opts, prealloc, errp);
                                         prealloc, cflags, errp);
    if (ret < 0) {
        goto fail;
    }
--- a/block/crypto.h
+++ b/block/crypto.h
@@ -41,7 +41,6 @@
 #define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
 #define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
 #define BLOCK_CRYPTO_OPT_LUKS_ITER_TIME "iter-time"
 #define BLOCK_CRYPTO_OPT_LUKS_DETACHED_HEADER "detached-header"
 #define BLOCK_CRYPTO_OPT_LUKS_KEYSLOT "keyslot"
 #define BLOCK_CRYPTO_OPT_LUKS_STATE "state"
 #define BLOCK_CRYPTO_OPT_LUKS_OLD_SECRET "old-secret"
@@ -101,13 +100,6 @@
        .help = "Select new state of affected keyslots (active/inactive)",\
    }
 #define BLOCK_CRYPTO_OPT_DEF_LUKS_DETACHED_HEADER(prefix)     \
    {                                                         \
        .name = prefix BLOCK_CRYPTO_OPT_LUKS_DETACHED_HEADER, \
        .type = QEMU_OPT_BOOL,                                \
        .help = "Create a detached LUKS header",              \
    }
 #define BLOCK_CRYPTO_OPT_DEF_LUKS_KEYSLOT(prefix)              \
    {                                                          \
        .name = prefix BLOCK_CRYPTO_OPT_LUKS_KEYSLOT,          \
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -114,6 +114,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
    }
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    if (export->iothread) {
        IOThread *iothread;
@@ -132,6 +133,8 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
        set_context_errp = fixed_iothread ? errp : NULL;
        ret = bdrv_try_change_aio_context(bs, new_ctx, NULL, set_context_errp);
        if (ret == 0) {
            aio_context_release(ctx);
            aio_context_acquire(new_ctx);
            ctx = new_ctx;
        } else if (fixed_iothread) {
            goto fail;
@@ -188,6 +191,8 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
    assert(exp->blk != NULL);
    QLIST_INSERT_HEAD(&block_exports, exp, next);
    aio_context_release(ctx);
    return exp;
 fail:
@@ -195,6 +200,7 @@ fail:
        blk_set_dev_ops(blk, NULL, NULL);
        blk_unref(blk);
    }
    aio_context_release(ctx);
    if (exp) {
        g_free(exp->id);
        g_free(exp);
@@ -212,6 +218,9 @@ void blk_exp_ref(BlockExport *exp)
 static void blk_exp_delete_bh(void *opaque)
 {
    BlockExport *exp = opaque;
    AioContext *aio_context = exp->ctx;
    aio_context_acquire(aio_context);
    assert(exp->refcount == 0);
    QLIST_REMOVE(exp, next);
@@ -221,6 +230,8 @@ static void blk_exp_delete_bh(void *opaque)
    qapi_event_send_block_export_deleted(exp->id);
    g_free(exp->id);
    g_free(exp);
    aio_context_release(aio_context);
 }
 void blk_exp_unref(BlockExport *exp)
@@ -238,16 +249,22 @@ void blk_exp_unref(BlockExport *exp)
 * connections and other internally held references start to shut down. When
 * the function returns, there may still be active references while the export
 * is in the process of shutting down.
 *
 * Acquires exp->ctx internally. Callers must *not* hold the lock.
 */
 void blk_exp_request_shutdown(BlockExport *exp)
 {
    AioContext *aio_context = exp->ctx;
    aio_context_acquire(aio_context);
    /*
     * If the user doesn't own the export any more, it is already shutting
     * down. We must not call .request_shutdown and decrease the refcount a
     * second time.
     */
    if (!exp->user_owned) {
-        return;
+        goto out;
    }
    exp->drv->request_shutdown(exp);
@@ -255,6 +272,9 @@ void blk_exp_request_shutdown(BlockExport *exp)
    assert(exp->user_owned);
    exp->user_owned = false;
    blk_exp_unref(exp);
 out:
    aio_context_release(aio_context);
 }
 /*
--- a/block/export/vhost-user-blk-server.c
+++ b/block/export/vhost-user-blk-server.c
@@ -278,33 +278,32 @@ static void vu_blk_exp_resize(void *opaque)
    vu_config_change_msg(&vexp->vu_server.vu_dev);
 }
 /* Called with vexp->export.ctx acquired */
 static void vu_blk_drained_begin(void *opaque)
 {
    VuBlkExport *vexp = opaque;
    vexp->vu_server.quiescing = true;
    vhost_user_server_detach_aio_context(&vexp->vu_server);
 }
 /* Called with vexp->export.blk AioContext acquired */
 static void vu_blk_drained_end(void *opaque)
 {
    VuBlkExport *vexp = opaque;
    vexp->vu_server.quiescing = false;
    vhost_user_server_attach_aio_context(&vexp->vu_server, vexp->export.ctx);
 }
 /*
- * Ensures that bdrv_drained_begin() waits until in-flight requests complete
+ * Ensures that bdrv_drained_begin() waits until in-flight requests complete.
- * and the server->co_trip coroutine has terminated. It will be restarted in
+ *
- * vhost_user_server_attach_aio_context().
+ * Called with vexp->export.ctx acquired.
 */
 static bool vu_blk_drained_poll(void *opaque)
 {
    VuBlkExport *vexp = opaque;
    VuServer *server = &vexp->vu_server;
-    return server->co_trip || vhost_user_server_has_in_flight(server);
+    return vhost_user_server_has_in_flight(&vexp->vu_server);
 }
 static const BlockDevOps vu_blk_dev_ops = {
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -712,11 +712,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
 #ifdef CONFIG_LINUX_AIO
     /* Currently Linux does AIO only for files opened with O_DIRECT */
-    if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
+    if (s->use_linux_aio) {
-        error_setg(errp, "aio=native was specified, but it requires "
+        if (!(s->open_flags & O_DIRECT)) {
-                         "cache.direct=on, which was not specified.");
+            error_setg(errp, "aio=native was specified, but it requires "
-        ret = -EINVAL;
+                             "cache.direct=on, which was not specified.");
-        goto fail;
+            ret = -EINVAL;
            goto fail;
        }
        if (!aio_setup_linux_aio(bdrv_get_aio_context(bs), errp)) {
            error_prepend(errp, "Unable to use native AIO: ");
            goto fail;
        }
    }
 #else
    if (s->use_linux_aio) {
@@ -727,7 +733,14 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
    }
 #endif /* !defined(CONFIG_LINUX_AIO) */
-#ifndef CONFIG_LINUX_IO_URING
+#ifdef CONFIG_LINUX_IO_URING
    if (s->use_linux_io_uring) {
        if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
            error_prepend(errp, "Unable to use io_uring: ");
            goto fail;
        }
    }
 #else
    if (s->use_linux_io_uring) {
        error_setg(errp, "aio=io_uring was specified, but is not supported "
                         "in this build.");
@@ -2431,48 +2444,6 @@ static bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
    return true;
 }
 #ifdef CONFIG_LINUX_IO_URING
 static inline bool raw_check_linux_io_uring(BDRVRawState *s)
 {
    Error *local_err = NULL;
    AioContext *ctx;
    if (!s->use_linux_io_uring) {
        return false;
    }
    ctx = qemu_get_current_aio_context();
    if (unlikely(!aio_setup_linux_io_uring(ctx, &local_err))) {
        error_reportf_err(local_err, "Unable to use linux io_uring, "
                                     "falling back to thread pool: ");
        s->use_linux_io_uring = false;
        return false;
    }
    return true;
 }
 #endif
 #ifdef CONFIG_LINUX_AIO
 static inline bool raw_check_linux_aio(BDRVRawState *s)
 {
    Error *local_err = NULL;
    AioContext *ctx;
    if (!s->use_linux_aio) {
        return false;
    }
    ctx = qemu_get_current_aio_context();
    if (unlikely(!aio_setup_linux_aio(ctx, &local_err))) {
        error_reportf_err(local_err, "Unable to use Linux AIO, "
                                     "falling back to thread pool: ");
        s->use_linux_aio = false;
        return false;
    }
    return true;
 }
 #endif
 static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
                                   uint64_t bytes, QEMUIOVector *qiov, int type)
 {
@@ -2503,13 +2474,13 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
    if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
        type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_IO_URING
-    } else if (raw_check_linux_io_uring(s)) {
+    } else if (s->use_linux_io_uring) {
        assert(qiov->size == bytes);
        ret = luring_co_submit(bs, s->fd, offset, qiov, type);
        goto out;
 #endif
 #ifdef CONFIG_LINUX_AIO
-    } else if (raw_check_linux_aio(s)) {
+    } else if (s->use_linux_aio) {
        assert(qiov->size == bytes);
        ret = laio_co_submit(s->fd, offset, qiov, type,
                              s->aio_max_batch);
@@ -2596,13 +2567,39 @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
    };
 #ifdef CONFIG_LINUX_IO_URING
-    if (raw_check_linux_io_uring(s)) {
+    if (s->use_linux_io_uring) {
        return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
    }
 #endif
    return raw_thread_pool_submit(handle_aiocb_flush, &acb);
 }
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
                                       AioContext *new_context)
 {
    BDRVRawState __attribute__((unused)) *s = bs->opaque;
 #ifdef CONFIG_LINUX_AIO
    if (s->use_linux_aio) {
        Error *local_err = NULL;
        if (!aio_setup_linux_aio(new_context, &local_err)) {
            error_reportf_err(local_err, "Unable to use native AIO, "
                                         "falling back to thread pool: ");
            s->use_linux_aio = false;
        }
    }
 #endif
 #ifdef CONFIG_LINUX_IO_URING
    if (s->use_linux_io_uring) {
        Error *local_err = NULL;
        if (!aio_setup_linux_io_uring(new_context, &local_err)) {
            error_reportf_err(local_err, "Unable to use linux io_uring, "
                                         "falling back to thread pool: ");
            s->use_linux_io_uring = false;
        }
    }
 #endif
 }
 static void raw_close(BlockDriverState *bs)
 {
    BDRVRawState *s = bs->opaque;
@@ -3899,6 +3896,7 @@ BlockDriver bdrv_file = {
    .bdrv_co_copy_range_from = raw_co_copy_range_from,
    .bdrv_co_copy_range_to  = raw_co_copy_range_to,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_attach_aio_context = raw_aio_attach_aio_context,
    .bdrv_co_truncate                   = raw_co_truncate,
    .bdrv_co_getlength                  = raw_co_getlength,
@@ -4268,6 +4266,7 @@ static BlockDriver bdrv_host_device = {
    .bdrv_co_copy_range_from = raw_co_copy_range_from,
    .bdrv_co_copy_range_to  = raw_co_copy_range_to,
    .bdrv_refresh_limits = raw_refresh_limits,
    .bdrv_attach_aio_context = raw_aio_attach_aio_context,
    .bdrv_co_truncate                   = raw_co_truncate,
    .bdrv_co_getlength                  = raw_co_getlength,
@@ -4403,6 +4402,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
    .bdrv_refresh_limits    = cdrom_refresh_limits,
    .bdrv_attach_aio_context = raw_aio_attach_aio_context,
    .bdrv_co_truncate                   = raw_co_truncate,
    .bdrv_co_getlength                  = raw_co_getlength,
@@ -4528,6 +4528,7 @@ static BlockDriver bdrv_host_cdrom = {
    .bdrv_co_pwritev        = raw_co_pwritev,
    .bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
    .bdrv_refresh_limits    = cdrom_refresh_limits,
    .bdrv_attach_aio_context = raw_aio_attach_aio_context,
    .bdrv_co_truncate                   = raw_co_truncate,
    .bdrv_co_getlength                  = raw_co_getlength,
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -106,12 +106,27 @@ static uint32_t reader_count(void)
    return rd;
 }
-void no_coroutine_fn bdrv_graph_wrlock(void)
+void no_coroutine_fn bdrv_graph_wrlock(BlockDriverState *bs)
 {
    AioContext *ctx = NULL;
    GLOBAL_STATE_CODE();
    assert(!qatomic_read(&has_writer));
    assert(!qemu_in_coroutine());
    /*
     * Release only non-mainloop AioContext. The mainloop often relies on the
     * BQL and doesn't lock the main AioContext before doing things.
     */
    if (bs) {
        ctx = bdrv_get_aio_context(bs);
        if (ctx != qemu_get_aio_context()) {
            aio_context_release(ctx);
        } else {
            ctx = NULL;
        }
    }
    /* Make sure that constantly arriving new I/O doesn't cause starvation */
    bdrv_drain_all_begin_nopoll();
@@ -140,9 +155,13 @@ void no_coroutine_fn bdrv_graph_wrlock(void)
    } while (reader_count() >= 1);
    bdrv_drain_all_end();
    if (ctx) {
        aio_context_acquire(bdrv_get_aio_context(bs));
    }
 }
-void no_coroutine_fn bdrv_graph_wrunlock(void)
+void bdrv_graph_wrunlock(void)
 {
    GLOBAL_STATE_CODE();
    assert(qatomic_read(&has_writer));
--- a/block/io.c
+++ b/block/io.c
@@ -294,6 +294,8 @@ static void bdrv_co_drain_bh_cb(void *opaque)
    BlockDriverState *bs = data->bs;
    if (bs) {
        AioContext *ctx = bdrv_get_aio_context(bs);
        aio_context_acquire(ctx);
        bdrv_dec_in_flight(bs);
        if (data->begin) {
            bdrv_do_drained_begin(bs, data->parent, data->poll);
@@ -301,6 +303,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
            assert(!data->poll);
            bdrv_do_drained_end(bs, data->parent);
        }
        aio_context_release(ctx);
    } else {
        assert(data->begin);
        bdrv_drain_all_begin();
@@ -317,6 +320,8 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
 {
    BdrvCoDrainData data;
    Coroutine *self = qemu_coroutine_self();
    AioContext *ctx = bdrv_get_aio_context(bs);
    AioContext *co_ctx = qemu_coroutine_get_aio_context(self);
    /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
     * other coroutines run if they were queued by aio_co_enter(). */
@@ -335,6 +340,17 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
        bdrv_inc_in_flight(bs);
    }
    /*
     * Temporarily drop the lock across yield or we would get deadlocks.
     * bdrv_co_drain_bh_cb() reaquires the lock as needed.
     *
     * When we yield below, the lock for the current context will be
     * released, so if this is actually the lock that protects bs, don't drop
     * it a second time.
     */
    if (ctx != co_ctx) {
        aio_context_release(ctx);
    }
    replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
                                     bdrv_co_drain_bh_cb, &data);
@@ -342,6 +358,11 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
    /* If we are resumed from some other event (such as an aio completion or a
     * timer callback), it is a bug in the caller that should be fixed. */
    assert(data.done);
    /* Reacquire the AioContext of bs if we dropped it */
    if (ctx != co_ctx) {
        aio_context_acquire(ctx);
    }
 }
 static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
@@ -457,12 +478,13 @@ static bool bdrv_drain_all_poll(void)
    GLOBAL_STATE_CODE();
    GRAPH_RDLOCK_GUARD_MAINLOOP();
-    /*
+    /* bdrv_drain_poll() can't make changes to the graph and we are holding the
-     * bdrv_drain_poll() can't make changes to the graph and we hold the BQL,
+     * main AioContext lock, so iterating bdrv_next_all_states() is safe. */
     * so iterating bdrv_next_all_states() is safe.
     */
    while ((bs = bdrv_next_all_states(bs))) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);
        result |= bdrv_drain_poll(bs, NULL, true);
        aio_context_release(aio_context);
    }
    return result;
@@ -503,7 +525,11 @@ void bdrv_drain_all_begin_nopoll(void)
    /* Quiesce all nodes, without polling in-flight requests yet. The graph
     * cannot change during this loop. */
    while ((bs = bdrv_next_all_states(bs))) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);
        bdrv_do_drained_begin(bs, NULL, false);
        aio_context_release(aio_context);
    }
 }
@@ -562,7 +588,11 @@ void bdrv_drain_all_end(void)
    }
    while ((bs = bdrv_next_all_states(bs))) {
        AioContext *aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);
        bdrv_do_drained_end(bs, NULL);
        aio_context_release(aio_context);
    }
    assert(qemu_get_current_aio_context() == qemu_get_aio_context());
@@ -2338,10 +2368,15 @@ int bdrv_flush_all(void)
    }
    for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
-        int ret = bdrv_flush(bs);
+        AioContext *aio_context = bdrv_get_aio_context(bs);
        int ret;
        aio_context_acquire(aio_context);
        ret = bdrv_flush(bs);
        if (ret < 0 && !result) {
            result = ret;
        }
        aio_context_release(aio_context);
    }
    return result;
@@ -2584,16 +2619,6 @@ bdrv_co_do_block_status(BlockDriverState *bs, bool want_zero,
                ret |= (ret2 & BDRV_BLOCK_ZERO);
            }
        }
        /*
         * Now that the recursive search was done, clear the flag. Otherwise,
         * with more complicated block graphs like snapshot-access ->
         * copy-before-write -> qcow2, where the return value will be propagated
         * further up to a parent bdrv_co_do_block_status() call, both the
         * BDRV_BLOCK_RECURSE and BDRV_BLOCK_ZERO flags would be set, which is
         * not allowed.
         */
        ret &= ~BDRV_BLOCK_RECURSE;
    }
 out:
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -49,7 +49,7 @@ typedef struct LuringQueue {
    QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue;
 } LuringQueue;
-struct LuringState {
+typedef struct LuringState {
    AioContext *aio_context;
    struct io_uring ring;
@@ -58,7 +58,7 @@ struct LuringState {
    LuringQueue io_q;
    QEMUBH *completion_bh;
-};
+} LuringState;
 /**
 * luring_resubmit:
@@ -102,7 +102,7 @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
    /* Update sqe */
    luringcb->sqeq.off += nread;
-    luringcb->sqeq.addr = (uintptr_t)luringcb->resubmit_qiov.iov;
+    luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
    luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
    luring_resubmit(s, luringcb);
@@ -432,7 +432,7 @@ LuringState *luring_init(Error **errp)
    rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
    if (rc < 0) {
-        error_setg_errno(errp, -rc, "failed to init linux io_uring ring");
+        error_setg_errno(errp, errno, "failed to init linux io_uring ring");
        g_free(s);
        return NULL;
    }
--- a/block/meson.build
+++ b/block/meson.build
@@ -88,15 +88,10 @@ if get_option('parallels').allowed()
  block_ss.add(files('parallels.c', 'parallels-ext.c'))
 endif
-if host_os == 'windows'
+block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c'))
-  block_ss.add(files('file-win32.c', 'win32-aio.c'))
+block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
 else
  block_ss.add(files('file-posix.c'), coref, iokit)
 endif
 block_ss.add(when: libiscsi, if_true: files('iscsi-opts.c'))
-if host_os == 'linux'
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
  block_ss.add(files('nvme.c'))
 endif
 if get_option('replication').allowed()
  block_ss.add(files('replication.c'))
 endif
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -662,6 +662,7 @@ static int mirror_exit_common(Job *job)
    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
    BlockJob *bjob = &s->common;
    MirrorBDSOpaque *bs_opaque;
    AioContext *replace_aio_context = NULL;
    BlockDriverState *src;
    BlockDriverState *target_bs;
    BlockDriverState *mirror_top_bs;
@@ -676,6 +677,7 @@ static int mirror_exit_common(Job *job)
    }
    s->prepared = true;
    aio_context_acquire(qemu_get_aio_context());
    bdrv_graph_rdlock_main_loop();
    mirror_top_bs = s->mirror_top_bs;
@@ -740,6 +742,11 @@ static int mirror_exit_common(Job *job)
    }
    bdrv_graph_rdunlock_main_loop();
    if (s->to_replace) {
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
        aio_context_acquire(replace_aio_context);
    }
    if (s->should_complete && !abort) {
        BlockDriverState *to_replace = s->to_replace ?: src;
        bool ro = bdrv_is_read_only(to_replace);
@@ -757,7 +764,7 @@ static int mirror_exit_common(Job *job)
         * check for an op blocker on @to_replace, and we have our own
         * there.
         */
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(target_bs);
        if (bdrv_recurse_can_replace(src, to_replace)) {
            bdrv_replace_node(to_replace, target_bs, &local_err);
        } else {
@@ -778,6 +785,9 @@ static int mirror_exit_common(Job *job)
        error_free(s->replace_blocker);
        bdrv_unref(s->to_replace);
    }
    if (replace_aio_context) {
        aio_context_release(replace_aio_context);
    }
    g_free(s->replaces);
    /*
@@ -786,7 +796,7 @@ static int mirror_exit_common(Job *job)
     * valid.
     */
    block_job_remove_all_bdrv(bjob);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(mirror_top_bs);
    bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
    bdrv_graph_wrunlock();
@@ -801,6 +811,8 @@ static int mirror_exit_common(Job *job)
    bdrv_unref(mirror_top_bs);
    bdrv_unref(src);
    aio_context_release(qemu_get_aio_context());
    return ret;
 }
@@ -1179,17 +1191,24 @@ static void mirror_complete(Job *job, Error **errp)
    /* block all operations on to_replace bs */
    if (s->replaces) {
        AioContext *replace_aio_context;
        s->to_replace = bdrv_find_node(s->replaces);
        if (!s->to_replace) {
            error_setg(errp, "Node name '%s' not found", s->replaces);
            return;
        }
        replace_aio_context = bdrv_get_aio_context(s->to_replace);
        aio_context_acquire(replace_aio_context);
        /* TODO Translate this into child freeze system. */
        error_setg(&s->replace_blocker,
                   "block device is in use by block-job-complete");
        bdrv_op_block_all(s->to_replace, s->replace_blocker);
        bdrv_ref(s->to_replace);
        aio_context_release(replace_aio_context);
    }
    s->should_complete = true;
@@ -1895,7 +1914,7 @@ static BlockJob *mirror_start_job(
     */
    bdrv_disable_dirty_bitmap(s->dirty_bitmap);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    ret = block_job_add_bdrv(&s->common, "source", bs, 0,
                             BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE |
                             BLK_PERM_CONSISTENT_READ,
@@ -1982,7 +2001,7 @@ fail:
    bs_opaque->stop = true;
    bdrv_drained_begin(bs);
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    assert(mirror_top_bs->backing->bs == bs);
    bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
                             &error_abort);
--- a/block/monitor/bitmap-qmp-cmds.c
+++ b/block/monitor/bitmap-qmp-cmds.c
@@ -95,6 +95,7 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
 {
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
    AioContext *aio_context;
    if (!name || name[0] == '\0') {
        error_setg(errp, "Bitmap name cannot be empty");
@@ -106,11 +107,14 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
        return;
    }
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    if (has_granularity) {
        if (granularity < 512 || !is_power_of_2(granularity)) {
            error_setg(errp, "Granularity must be power of 2 "
                             "and at least 512");
-            return;
+            goto out;
        }
    } else {
        /* Default to cluster size, if available: */
@@ -128,12 +132,12 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
    if (persistent &&
        !bdrv_can_store_new_dirty_bitmap(bs, name, granularity, errp))
    {
-        return;
+        goto out;
    }
    bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
    if (bitmap == NULL) {
-        return;
+        goto out;
    }
    if (disabled) {
@@ -141,6 +145,9 @@ void qmp_block_dirty_bitmap_add(const char *node, const char *name,
    }
    bdrv_dirty_bitmap_set_persistence(bitmap, persistent);
 out:
    aio_context_release(aio_context);
 }
 BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
@@ -150,6 +157,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
 {
    BlockDriverState *bs;
    BdrvDirtyBitmap *bitmap;
    AioContext *aio_context;
    GLOBAL_STATE_CODE();
@@ -158,14 +166,19 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
        return NULL;
    }
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    if (bdrv_dirty_bitmap_check(bitmap, BDRV_BITMAP_BUSY | BDRV_BITMAP_RO,
                                errp)) {
        aio_context_release(aio_context);
        return NULL;
    }
    if (bdrv_dirty_bitmap_get_persistence(bitmap) &&
        bdrv_remove_persistent_dirty_bitmap(bs, name, errp) < 0)
    {
        aio_context_release(aio_context);
        return NULL;
    }
@@ -177,6 +190,7 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
        *bitmap_bs = bs;
    }
    aio_context_release(aio_context);
    return release ? NULL : bitmap;
 }
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -141,6 +141,7 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
    const char *id = qdict_get_str(qdict, "id");
    BlockBackend *blk;
    BlockDriverState *bs;
    AioContext *aio_context;
    Error *local_err = NULL;
    GLOBAL_STATE_CODE();
@@ -167,10 +168,14 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
        return;
    }
    aio_context = blk_get_aio_context(blk);
    aio_context_acquire(aio_context);
    bs = blk_bs(blk);
    if (bs) {
        if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
            error_report_err(local_err);
            aio_context_release(aio_context);
            return;
        }
@@ -191,6 +196,8 @@ void hmp_drive_del(Monitor *mon, const QDict *qdict)
    } else {
        blk_unref(blk);
    }
    aio_context_release(aio_context);
 }
 void hmp_commit(Monitor *mon, const QDict *qdict)
@@ -206,6 +213,7 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
        ret = blk_commit_all();
    } else {
        BlockDriverState *bs;
        AioContext *aio_context;
        blk = blk_by_name(device);
        if (!blk) {
@@ -214,13 +222,18 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
        }
        bs = bdrv_skip_implicit_filters(blk_bs(blk));
        aio_context = bdrv_get_aio_context(bs);
        aio_context_acquire(aio_context);
        if (!blk_is_available(blk)) {
            error_report("Device '%s' has no medium", device);
            aio_context_release(aio_context);
            return;
        }
        ret = bdrv_commit(bs);
        aio_context_release(aio_context);
    }
    if (ret < 0) {
        error_report("'commit' error for '%s': %s", device, strerror(-ret));
@@ -496,7 +509,7 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict)
    const char *base = qdict_get_try_str(qdict, "base");
    int64_t speed = qdict_get_try_int(qdict, "speed", 0);
-    qmp_block_stream(device, device, base, NULL, NULL, false, false, NULL,
+    qmp_block_stream(device, device, base, NULL, NULL, NULL,
                     qdict_haskey(qdict, "speed"), speed,
                     true, BLOCKDEV_ON_ERROR_REPORT, NULL,
                     false, false, false, false, &error);
@@ -547,6 +560,7 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
    BlockBackend *blk = NULL;
    BlockDriverState *bs = NULL;
    BlockBackend *local_blk = NULL;
    AioContext *ctx = NULL;
    bool qdev = qdict_get_try_bool(qdict, "qdev", false);
    const char *device = qdict_get_str(qdict, "device");
    const char *command = qdict_get_str(qdict, "command");
@@ -568,6 +582,9 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
        }
    }
    ctx = blk ? blk_get_aio_context(blk) : bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    if (bs) {
        blk = local_blk = blk_new(bdrv_get_aio_context(bs), 0, BLK_PERM_ALL);
        ret = blk_insert_bs(blk, bs, &err);
@@ -605,6 +622,11 @@ void hmp_qemu_io(Monitor *mon, const QDict *qdict)
 fail:
    blk_unref(local_blk);
    if (ctx) {
        aio_context_release(ctx);
    }
    hmp_handle_error(mon, err);
 }
@@ -860,6 +882,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
    int nb_sns, i;
    int total;
    int *global_snapshots;
    AioContext *aio_context;
    typedef struct SnapshotEntry {
        QEMUSnapshotInfo sn;
@@ -886,8 +909,11 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
        error_report_err(err);
        return;
    }
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
    aio_context_release(aio_context);
    if (nb_sns < 0) {
        monitor_printf(mon, "bdrv_snapshot_list: error %d\n", nb_sns);
@@ -898,7 +924,9 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
        int bs1_nb_sns = 0;
        ImageEntry *ie;
        SnapshotEntry *se;
        AioContext *ctx = bdrv_get_aio_context(bs1);
        aio_context_acquire(ctx);
        if (bdrv_can_snapshot(bs1)) {
            sn = NULL;
            bs1_nb_sns = bdrv_snapshot_list(bs1, &sn);
@@ -916,6 +944,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict)
            }
            g_free(sn);
        }
        aio_context_release(ctx);
    }
    if (no_snapshot) {
--- a/block/qapi-sysemu.c
+++ b/block/qapi-sysemu.c
@@ -174,6 +174,7 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp)
 {
    BlockBackend *blk;
    BlockDriverState *bs;
    AioContext *aio_context;
    bool has_attached_device;
    GLOBAL_STATE_CODE();
@@ -203,10 +204,13 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp)
        return;
    }
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    bdrv_graph_rdlock_main_loop();
    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
        bdrv_graph_rdunlock_main_loop();
-        return;
+        goto out;
    }
    bdrv_graph_rdunlock_main_loop();
@@ -219,6 +223,9 @@ blockdev_remove_medium(const char *device, const char *id, Error **errp)
         * value passed here (i.e. false). */
        blk_dev_change_media_cb(blk, false, &error_abort);
    }
 out:
    aio_context_release(aio_context);
 }
 void qmp_blockdev_remove_medium(const char *id, Error **errp)
@@ -230,6 +237,7 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
                                            BlockDriverState *bs, Error **errp)
 {
    Error *local_err = NULL;
    AioContext *ctx;
    bool has_device;
    int ret;
@@ -251,7 +259,11 @@ static void qmp_blockdev_insert_anon_medium(BlockBackend *blk,
        return;
    }
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    ret = blk_insert_bs(blk, bs, errp);
    aio_context_release(ctx);
    if (ret < 0) {
        return;
    }
@@ -362,7 +374,9 @@ void qmp_blockdev_change_medium(const char *device,
        qdict_put_str(options, "driver", format);
    }
    aio_context_acquire(qemu_get_aio_context());
    medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
    aio_context_release(qemu_get_aio_context());
    if (!medium_bs) {
        goto fail;
@@ -423,16 +437,20 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
    ThrottleConfig cfg;
    BlockDriverState *bs;
    BlockBackend *blk;
    AioContext *aio_context;
    blk = qmp_get_blk(arg->device, arg->id, errp);
    if (!blk) {
        return;
    }
    aio_context = blk_get_aio_context(blk);
    aio_context_acquire(aio_context);
    bs = blk_bs(blk);
    if (!bs) {
        error_setg(errp, "Device has no medium");
-        return;
+        goto out;
    }
    throttle_config_init(&cfg);
@@ -487,7 +505,7 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
    }
    if (!throttle_is_valid(&cfg, errp)) {
-        return;
+        goto out;
    }
    if (throttle_enabled(&cfg)) {
@@ -504,6 +522,9 @@ void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
        /* If all throttling settings are set to 0, disable I/O limits */
        blk_io_limits_disable(blk);
    }
 out:
    aio_context_release(aio_context);
 }
 void qmp_block_latency_histogram_set(
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -234,11 +234,13 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
    int ret;
    Error *err = NULL;
    aio_context_acquire(bdrv_get_aio_context(bs));
    size = bdrv_getlength(bs);
    if (size < 0) {
        error_setg_errno(errp, -size, "Can't get image size '%s'",
                         bs->exact_filename);
-        return;
+        goto out;
    }
    bdrv_refresh_filename(bs);
@@ -263,7 +265,7 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
    info->format_specific = bdrv_get_specific_info(bs, &err);
    if (err) {
        error_propagate(errp, err);
-        return;
+        goto out;
    }
    backing_filename = bs->backing_file;
    if (backing_filename[0] != '\0') {
@@ -298,8 +300,11 @@ bdrv_do_query_node_info(BlockDriverState *bs, BlockNodeInfo *info, Error **errp)
        break;
    default:
        error_propagate(errp, err);
-        return;
+        goto out;
    }
 out:
    aio_context_release(bdrv_get_aio_context(bs));
 }
 /**
@@ -704,10 +709,15 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
    /* Just to be safe if query_nodes is not always initialized */
    if (has_query_nodes && query_nodes) {
        for (bs = bdrv_next_node(NULL); bs; bs = bdrv_next_node(bs)) {
            AioContext *ctx = bdrv_get_aio_context(bs);
            aio_context_acquire(ctx);
            QAPI_LIST_APPEND(tail, bdrv_query_bds_stats(bs, false));
            aio_context_release(ctx);
        }
    } else {
        for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
            AioContext *ctx = blk_get_aio_context(blk);
            BlockStats *s;
            char *qdev;
@@ -715,6 +725,7 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
                continue;
            }
            aio_context_acquire(ctx);
            s = bdrv_query_bds_stats(blk_bs(blk), true);
            s->device = g_strdup(blk_name(blk));
@@ -726,6 +737,7 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
            }
            bdrv_query_blk_stats(s->stats, blk);
            aio_context_release(ctx);
            QAPI_LIST_APPEND(tail, s);
        }
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -885,7 +885,7 @@ qcow_co_create(BlockdevCreateOptions *opts, Error **errp)
        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
        crypto = qcrypto_block_create(qcow_opts->encrypt, "encrypt.",
-                                      NULL, NULL, NULL, 0, errp);
+                                      NULL, NULL, NULL, errp);
        if (!crypto) {
            ret = -EINVAL;
            goto exit;
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2807,7 +2807,7 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file)
    if (close_data_file && has_data_file(bs)) {
        GLOBAL_STATE_CODE();
        bdrv_graph_rdunlock_main_loop();
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(NULL);
        bdrv_unref_child(bs, s->data_file);
        bdrv_graph_wrunlock();
        s->data_file = NULL;
@@ -3216,7 +3216,7 @@ qcow2_set_up_encryption(BlockDriverState *bs,
    crypto = qcrypto_block_create(cryptoopts, "encrypt.",
                                  qcow2_crypto_hdr_init_func,
                                  qcow2_crypto_hdr_write_func,
-                                  bs, 0, errp);
+                                  bs, errp);
    if (!crypto) {
        return -EINVAL;
    }
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -1037,7 +1037,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
 close_exit:
    /* cleanup on error */
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    for (i = 0; i < s->num_children; i++) {
        if (!opened[i]) {
            continue;
@@ -1057,7 +1057,7 @@ static void quorum_close(BlockDriverState *bs)
    BDRVQuorumState *s = bs->opaque;
    int i;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    for (i = 0; i < s->num_children; i++) {
        bdrv_unref_child(bs, s->children[i]);
    }
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -470,6 +470,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                    Error **errp)
 {
    BDRVRawState *s = bs->opaque;
    AioContext *ctx;
    bool has_size;
    uint64_t offset, size;
    BdrvChildRole file_role;
@@ -521,7 +522,11 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
                bs->file->bs->filename);
    }
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    ret = raw_apply_options(bs, s, offset, has_size, size, errp);
    aio_context_release(ctx);
    if (ret < 0) {
        return ret;
    }
--- a/block/replication.c
+++ b/block/replication.c
@@ -394,7 +394,14 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
    }
    if (reopen_queue) {
        AioContext *ctx = bdrv_get_aio_context(bs);
        if (ctx != qemu_get_aio_context()) {
            aio_context_release(ctx);
        }
        bdrv_reopen_multiple(reopen_queue, errp);
        if (ctx != qemu_get_aio_context()) {
            aio_context_acquire(ctx);
        }
    }
 }
@@ -455,11 +462,14 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    BlockDriverState *top_bs;
    BdrvChild *active_disk, *hidden_disk, *secondary_disk;
    int64_t active_length, hidden_length, disk_length;
    AioContext *aio_context;
    Error *local_err = NULL;
    BackupPerf perf = { .use_copy_range = true, .max_workers = 1 };
    GLOBAL_STATE_CODE();
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    s = bs->opaque;
    if (s->stage == BLOCK_REPLICATION_DONE ||
@@ -469,17 +479,20 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
         * Ignore the request because the secondary side of replication
         * doesn't have to do anything anymore.
         */
        aio_context_release(aio_context);
        return;
    }
    if (s->stage != BLOCK_REPLICATION_NONE) {
        error_setg(errp, "Block replication is running or done");
        aio_context_release(aio_context);
        return;
    }
    if (s->mode != mode) {
        error_setg(errp, "The parameter mode's value is invalid, needs %d,"
                   " but got %d", s->mode, mode);
        aio_context_release(aio_context);
        return;
    }
@@ -492,6 +505,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (!active_disk || !active_disk->bs || !active_disk->bs->backing) {
            error_setg(errp, "Active disk doesn't have backing file");
            bdrv_graph_rdunlock_main_loop();
            aio_context_release(aio_context);
            return;
        }
@@ -499,6 +513,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (!hidden_disk->bs || !hidden_disk->bs->backing) {
            error_setg(errp, "Hidden disk doesn't have backing file");
            bdrv_graph_rdunlock_main_loop();
            aio_context_release(aio_context);
            return;
        }
@@ -506,6 +521,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (!secondary_disk->bs || !bdrv_has_blk(secondary_disk->bs)) {
            error_setg(errp, "The secondary disk doesn't have block backend");
            bdrv_graph_rdunlock_main_loop();
            aio_context_release(aio_context);
            return;
        }
        bdrv_graph_rdunlock_main_loop();
@@ -518,6 +534,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
            active_length != hidden_length || hidden_length != disk_length) {
            error_setg(errp, "Active disk, hidden disk, secondary disk's length"
                       " are not the same");
            aio_context_release(aio_context);
            return;
        }
@@ -529,6 +546,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
            !hidden_disk->bs->drv->bdrv_make_empty) {
            error_setg(errp,
                       "Active disk or hidden disk doesn't support make_empty");
            aio_context_release(aio_context);
            bdrv_graph_rdunlock_main_loop();
            return;
        }
@@ -538,10 +556,11 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        reopen_backing_file(bs, true, &local_err);
        if (local_err) {
            error_propagate(errp, local_err);
            aio_context_release(aio_context);
            return;
        }
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(bs);
        bdrv_ref(hidden_disk->bs);
        s->hidden_disk = bdrv_attach_child(bs, hidden_disk->bs, "hidden disk",
@@ -550,6 +569,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (local_err) {
            error_propagate(errp, local_err);
            bdrv_graph_wrunlock();
            aio_context_release(aio_context);
            return;
        }
@@ -560,6 +580,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (local_err) {
            error_propagate(errp, local_err);
            bdrv_graph_wrunlock();
            aio_context_release(aio_context);
            return;
        }
@@ -573,6 +594,7 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
            error_setg(errp, "No top_bs or it is invalid");
            bdrv_graph_wrunlock();
            reopen_backing_file(bs, false, NULL);
            aio_context_release(aio_context);
            return;
        }
        bdrv_op_block_all(top_bs, s->blocker);
@@ -590,11 +612,13 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
        if (local_err) {
            error_propagate(errp, local_err);
            backup_job_cleanup(bs);
            aio_context_release(aio_context);
            return;
        }
        job_start(&s->backup_job->job);
        break;
    default:
        aio_context_release(aio_context);
        abort();
    }
@@ -605,12 +629,18 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
    }
    s->error = 0;
    aio_context_release(aio_context);
 }
 static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
 {
    BlockDriverState *bs = rs->opaque;
-    BDRVReplicationState *s = bs->opaque;
+    BDRVReplicationState *s;
    AioContext *aio_context;
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    s = bs->opaque;
    if (s->stage == BLOCK_REPLICATION_DONE ||
        s->stage == BLOCK_REPLICATION_FAILOVER) {
@@ -619,28 +649,38 @@ static void replication_do_checkpoint(ReplicationState *rs, Error **errp)
         * Ignore the request because the secondary side of replication
         * doesn't have to do anything anymore.
         */
        aio_context_release(aio_context);
        return;
    }
    if (s->mode == REPLICATION_MODE_SECONDARY) {
        secondary_do_checkpoint(bs, errp);
    }
    aio_context_release(aio_context);
 }
 static void replication_get_error(ReplicationState *rs, Error **errp)
 {
    BlockDriverState *bs = rs->opaque;
-    BDRVReplicationState *s = bs->opaque;
+    BDRVReplicationState *s;
    AioContext *aio_context;
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    s = bs->opaque;
    if (s->stage == BLOCK_REPLICATION_NONE) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
    }
    if (s->error) {
        error_setg(errp, "I/O error occurred");
        aio_context_release(aio_context);
        return;
    }
    aio_context_release(aio_context);
 }
 static void replication_done(void *opaque, int ret)
@@ -651,7 +691,7 @@ static void replication_done(void *opaque, int ret)
    if (ret == 0) {
        s->stage = BLOCK_REPLICATION_DONE;
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(NULL);
        bdrv_unref_child(bs, s->secondary_disk);
        s->secondary_disk = NULL;
        bdrv_unref_child(bs, s->hidden_disk);
@@ -668,7 +708,12 @@ static void replication_done(void *opaque, int ret)
 static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
 {
    BlockDriverState *bs = rs->opaque;
-    BDRVReplicationState *s = bs->opaque;
+    BDRVReplicationState *s;
    AioContext *aio_context;
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    s = bs->opaque;
    if (s->stage == BLOCK_REPLICATION_DONE ||
        s->stage == BLOCK_REPLICATION_FAILOVER) {
@@ -677,11 +722,13 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         * Ignore the request because the secondary side of replication
         * doesn't have to do anything anymore.
         */
        aio_context_release(aio_context);
        return;
    }
    if (s->stage != BLOCK_REPLICATION_RUNNING) {
        error_setg(errp, "Block replication is not running");
        aio_context_release(aio_context);
        return;
    }
@@ -697,12 +744,15 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
         * disk, secondary disk in backup_job_completed().
         */
        if (s->backup_job) {
            aio_context_release(aio_context);
            job_cancel_sync(&s->backup_job->job, true);
            aio_context_acquire(aio_context);
        }
        if (!failover) {
            secondary_do_checkpoint(bs, errp);
            s->stage = BLOCK_REPLICATION_DONE;
            aio_context_release(aio_context);
            return;
        }
@@ -715,8 +765,10 @@ static void replication_stop(ReplicationState *rs, bool failover, Error **errp)
        bdrv_graph_rdunlock_main_loop();
        break;
    default:
        aio_context_release(aio_context);
        abort();
    }
    aio_context_release(aio_context);
 }
 static const char *const replication_strong_runtime_opts[] = {
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -196,10 +196,8 @@ bdrv_snapshot_fallback(BlockDriverState *bs)
 int bdrv_can_snapshot(BlockDriverState *bs)
 {
    BlockDriver *drv = bs->drv;
    GLOBAL_STATE_CODE();
-
+    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
    if (!drv || !bdrv_is_inserted(bs) || !bdrv_is_writable(bs)) {
        return 0;
    }
@@ -292,7 +290,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
        }
        /* .bdrv_open() will re-attach it */
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(NULL);
        bdrv_unref_child(bs, fallback);
        bdrv_graph_wrunlock();
@@ -527,7 +525,9 @@ static bool GRAPH_RDLOCK bdrv_all_snapshots_includes_bs(BlockDriverState *bs)
    return bdrv_has_blk(bs) || QLIST_EMPTY(&bs->parents);
 }
-/* Group operations. All block drivers are involved. */
+/* Group operations. All block drivers are involved.
 * These functions will properly handle dataplane (take aio_context_acquire
 * when appropriate for appropriate block drivers) */
 bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
                           Error **errp)
@@ -545,11 +545,14 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        bool ok = true;
        aio_context_acquire(ctx);
        if (devices || bdrv_all_snapshots_includes_bs(bs)) {
            ok = bdrv_can_snapshot(bs);
        }
        aio_context_release(ctx);
        if (!ok) {
            error_setg(errp, "Device '%s' is writable but does not support "
                       "snapshots", bdrv_get_device_or_node_name(bs));
@@ -579,15 +582,18 @@ int bdrv_all_delete_snapshot(const char *name,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        QEMUSnapshotInfo sn1, *snapshot = &sn1;
        int ret = 0;
        aio_context_acquire(ctx);
        if ((devices || bdrv_all_snapshots_includes_bs(bs)) &&
            bdrv_snapshot_find(bs, snapshot, name) >= 0)
        {
            ret = bdrv_snapshot_delete(bs, snapshot->id_str,
                                       snapshot->name, errp);
        }
        aio_context_release(ctx);
        if (ret < 0) {
            error_prepend(errp, "Could not delete snapshot '%s' on '%s': ",
                          name, bdrv_get_device_or_node_name(bs));
@@ -622,14 +628,19 @@ int bdrv_all_goto_snapshot(const char *name,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        int ret = 0;
        bool all_snapshots_includes_bs;
        aio_context_acquire(ctx);
        bdrv_graph_rdlock_main_loop();
        all_snapshots_includes_bs = bdrv_all_snapshots_includes_bs(bs);
        bdrv_graph_rdunlock_main_loop();
-        ret = (devices || all_snapshots_includes_bs) ?
+        if (devices || all_snapshots_includes_bs) {
-              bdrv_snapshot_goto(bs, name, errp) : 0;
+            ret = bdrv_snapshot_goto(bs, name, errp);
        }
        aio_context_release(ctx);
        if (ret < 0) {
            bdrv_graph_rdlock_main_loop();
            error_prepend(errp, "Could not load snapshot '%s' on '%s': ",
@@ -661,12 +672,15 @@ int bdrv_all_has_snapshot(const char *name,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        QEMUSnapshotInfo sn;
        int ret = 0;
        aio_context_acquire(ctx);
        if (devices || bdrv_all_snapshots_includes_bs(bs)) {
            ret = bdrv_snapshot_find(bs, &sn, name);
        }
        aio_context_release(ctx);
        if (ret < 0) {
            if (ret == -ENOENT) {
                return 0;
@@ -703,8 +717,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        int ret = 0;
        aio_context_acquire(ctx);
        if (bs == vm_state_bs) {
            sn->vm_state_size = vm_state_size;
            ret = bdrv_snapshot_create(bs, sn);
@@ -712,6 +728,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
            sn->vm_state_size = 0;
            ret = bdrv_snapshot_create(bs, sn);
        }
        aio_context_release(ctx);
        if (ret < 0) {
            error_setg(errp, "Could not create snapshot '%s' on '%s'",
                       sn->name, bdrv_get_device_or_node_name(bs));
@@ -742,10 +759,13 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
    iterbdrvs = bdrvs;
    while (iterbdrvs) {
        BlockDriverState *bs = iterbdrvs->data;
        AioContext *ctx = bdrv_get_aio_context(bs);
        bool found = false;
        aio_context_acquire(ctx);
        found = (devices || bdrv_all_snapshots_includes_bs(bs)) &&
            bdrv_can_snapshot(bs);
        aio_context_release(ctx);
        if (vmstate_bs) {
            if (g_str_equal(vmstate_bs,
--- a/block/stream.c
+++ b/block/stream.c
@@ -39,7 +39,6 @@ typedef struct StreamBlockJob {
    BlockDriverState *target_bs;
    BlockdevOnError on_error;
    char *backing_file_str;
    bool backing_mask_protocol;
    bool bs_read_only;
 } StreamBlockJob;
@@ -96,16 +95,11 @@ static int stream_prepare(Job *job)
        if (unfiltered_base) {
            base_id = s->backing_file_str ?: unfiltered_base->filename;
            if (unfiltered_base->drv) {
-                if (s->backing_mask_protocol &&
+                base_fmt = unfiltered_base->drv->format_name;
                    unfiltered_base->drv->protocol_name) {
                    base_fmt = "raw";
                } else {
                    base_fmt = unfiltered_base->drv->format_name;
                }
            }
        }
-        bdrv_graph_wrlock();
+        bdrv_graph_wrlock(base);
        bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
        bdrv_graph_wrunlock();
@@ -253,7 +247,6 @@ static const BlockJobDriver stream_job_driver = {
 void stream_start(const char *job_id, BlockDriverState *bs,
                  BlockDriverState *base, const char *backing_file_str,
                  bool backing_mask_protocol,
                  BlockDriverState *bottom,
                  int creation_flags, int64_t speed,
                  BlockdevOnError on_error,
@@ -373,7 +366,7 @@ void stream_start(const char *job_id, BlockDriverState *bs,
     * already have our own plans. Also don't allow resize as the image size is
     * queried only at the job start and then cached.
     */
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    if (block_job_add_bdrv(&s->common, "active node", bs, 0,
                           basic_flags | BLK_PERM_WRITE, errp)) {
        bdrv_graph_wrunlock();
@@ -405,7 +398,6 @@ void stream_start(const char *job_id, BlockDriverState *bs,
    s->base_overlay = base_overlay;
    s->above_base = above_base;
    s->backing_file_str = g_strdup(backing_file_str);
    s->backing_mask_protocol = backing_mask_protocol;
    s->cor_filter_bs = cor_filter_bs;
    s->target_bs = bs;
    s->bs_read_only = bs_read_only;
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -272,7 +272,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
    BDRVVmdkState *s = bs->opaque;
    VmdkExtent *e;
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(NULL);
    for (i = 0; i < s->num_extents; i++) {
        e = &s->extents[i];
        g_free(e->l1_table);
@@ -351,41 +351,29 @@ vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
    BDRVVmdkState *s = bs->opaque;
    int ret = 0;
-    size_t desc_buf_size;
+    desc = g_malloc0(DESC_SIZE);
-
+    tmp_desc = g_malloc0(DESC_SIZE);
-    if (s->desc_offset == 0) {
+    ret = bdrv_co_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
        desc_buf_size = bdrv_getlength(bs->file->bs);
        if (desc_buf_size > 16ULL << 20) {
            error_report("VMDK description file too big");
            return -EFBIG;
        }
    } else {
        desc_buf_size = DESC_SIZE;
    }
    desc = g_malloc0(desc_buf_size);
    tmp_desc = g_malloc0(desc_buf_size);
    ret = bdrv_co_pread(bs->file, s->desc_offset, desc_buf_size, desc, 0);
    if (ret < 0) {
        goto out;
    }
-    desc[desc_buf_size - 1] = '\0';
+    desc[DESC_SIZE - 1] = '\0';
    tmp_str = strstr(desc, "parentCID");
    if (tmp_str == NULL) {
        ret = -EINVAL;
        goto out;
    }
-    pstrcpy(tmp_desc, desc_buf_size, tmp_str);
+    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
    p_name = strstr(desc, "CID");
    if (p_name != NULL) {
        p_name += sizeof("CID");
-        snprintf(p_name, desc_buf_size - (p_name - desc), "%" PRIx32 "\n", cid);
+        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, desc_buf_size, tmp_desc);
+        pstrcat(desc, DESC_SIZE, tmp_desc);
    }
-    ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, desc_buf_size, desc, 0);
+    ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
 out:
    g_free(desc);
@@ -1247,7 +1235,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
                            0, 0, 0, 0, 0, &extent, errp);
            if (ret < 0) {
                bdrv_graph_rdunlock_main_loop();
-                bdrv_graph_wrlock();
+                bdrv_graph_wrlock(NULL);
                bdrv_unref_child(bs, extent_file);
                bdrv_graph_wrunlock();
                bdrv_graph_rdlock_main_loop();
@@ -1266,7 +1254,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
            g_free(buf);
            if (ret) {
                bdrv_graph_rdunlock_main_loop();
-                bdrv_graph_wrlock();
+                bdrv_graph_wrlock(NULL);
                bdrv_unref_child(bs, extent_file);
                bdrv_graph_wrunlock();
                bdrv_graph_rdlock_main_loop();
@@ -1277,7 +1265,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
            ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
            if (ret) {
                bdrv_graph_rdunlock_main_loop();
-                bdrv_graph_wrlock();
+                bdrv_graph_wrlock(NULL);
                bdrv_unref_child(bs, extent_file);
                bdrv_graph_wrunlock();
                bdrv_graph_rdlock_main_loop();
@@ -1287,7 +1275,7 @@ vmdk_parse_extents(const char *desc, BlockDriverState *bs, QDict *options,
        } else {
            error_setg(errp, "Unsupported extent type '%s'", type);
            bdrv_graph_rdunlock_main_loop();
-            bdrv_graph_wrlock();
+            bdrv_graph_wrlock(NULL);
            bdrv_unref_child(bs, extent_file);
            bdrv_graph_wrunlock();
            bdrv_graph_rdlock_main_loop();
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -33,6 +33,7 @@ void qmp_block_set_write_threshold(const char *node_name,
                                   Error **errp)
 {
    BlockDriverState *bs;
    AioContext *aio_context;
    bs = bdrv_find_node(node_name);
    if (!bs) {
@@ -40,7 +41,12 @@ void qmp_block_set_write_threshold(const char *node_name,
        return;
    }
    aio_context = bdrv_get_aio_context(bs);
    aio_context_acquire(aio_context);
    bdrv_write_threshold_set(bs, threshold_bytes);
    aio_context_release(aio_context);
 }
 void bdrv_write_threshold_check_write(BlockDriverState *bs, int64_t offset,
--- a/blockdev.c
+++ b/blockdev.c
--- a/blockjob.c
+++ b/blockjob.c
@@ -198,7 +198,9 @@ void block_job_remove_all_bdrv(BlockJob *job)
     * one to make sure that such a concurrent access does not attempt
     * to process an already freed BdrvChild.
     */
-    bdrv_graph_wrlock();
+    aio_context_release(job->job.aio_context);
    bdrv_graph_wrlock(NULL);
    aio_context_acquire(job->job.aio_context);
    while (job->nodes) {
        GSList *l = job->nodes;
        BdrvChild *c = l->data;
@@ -232,12 +234,28 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
                       uint64_t perm, uint64_t shared_perm, Error **errp)
 {
    BdrvChild *c;
    AioContext *ctx = bdrv_get_aio_context(bs);
    bool need_context_ops;
    GLOBAL_STATE_CODE();
    bdrv_ref(bs);
    need_context_ops = ctx != job->job.aio_context;
    if (need_context_ops) {
        if (job->job.aio_context != qemu_get_aio_context()) {
            aio_context_release(job->job.aio_context);
        }
        aio_context_acquire(ctx);
    }
    c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
                               errp);
    if (need_context_ops) {
        aio_context_release(ctx);
        if (job->job.aio_context != qemu_get_aio_context()) {
            aio_context_acquire(job->job.aio_context);
        }
    }
    if (c == NULL) {
        return -EPERM;
    }
@@ -496,7 +514,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
    int ret;
    GLOBAL_STATE_CODE();
-    bdrv_graph_wrlock();
+    bdrv_graph_wrlock(bs);
    if (job_id == NULL && !(flags & JOB_INTERNAL)) {
        job_id = bdrv_get_device_name(bs);
--- a/Show More
+++ b/Show More